--- linux-ec2-2.6.32.orig/Makefile +++ linux-ec2-2.6.32/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = +EXTRAVERSION = .56+drm33.22 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* @@ -331,14 +331,23 @@ AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage +# Prefer linux-backports-modules +ifneq ($(KBUILD_SRC),) +ifneq ($(shell if test -e $(KBUILD_OUTPUT)/ubuntu-build; then echo yes; fi),yes) +UBUNTUINCLUDE := -I/usr/src/linux-headers-lbm-$(KERNELRELEASE) +endif +endif # Use LINUXINCLUDE when you must reference the include/ directory. # Needed to be compatible with the O= option -LINUXINCLUDE := -Iinclude \ +LINUXINCLUDE := $(UBUNTUINCLUDE) -Iinclude \ $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ -I$(srctree)/arch/$(hdr-arch)/include \ -include include/linux/autoconf.h +# UBUNTU: Include our third party driver stuff too +LINUXINCLUDE += -Iubuntu/include $(if $(KBUILD_SRC),-I$(srctree)/ubuntu/include) + KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ @@ -464,12 +473,12 @@ # Carefully list dependencies so we do not try to build scripts twice # in parallel PHONY += scripts -scripts: scripts_basic include/config/auto.conf +scripts: scripts_basic include/config/auto.conf include/config/tristate.conf $(Q)$(MAKE) $(build)=$(@) # Objects we will link into vmlinux / subdirs we need to visit init-y := init/ -drivers-y := drivers/ sound/ firmware/ +drivers-y := drivers/ sound/ firmware/ ubuntu/ net-y := net/ libs-y := lib/ core-y := usr/ @@ -491,7 +500,7 @@ # with it and forgot to run make oldconfig. # if auto.conf.cmd is missing then we are probably in a cleaned tree so # we execute the config step to be sure to catch updated Kconfig files -include/config/auto.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd +include/config/%.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig else # external modules needs include/linux/autoconf.h and include/config/auto.conf @@ -537,6 +546,9 @@ KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) endif +# This warning generated too much noise in a regular build. +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else @@ -565,7 +577,7 @@ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) # disable pointer signed / unsigned warnings in gcc 4.0 -KBUILD_CFLAGS += $(call cc-option,-Wno-pointer-sign,) +KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) @@ -876,6 +888,9 @@ PHONY += $(vmlinux-dirs) $(vmlinux-dirs): prepare scripts $(Q)$(MAKE) $(build)=$@ +ifdef CONFIG_MODULES + $(Q)$(MAKE) $(modbuiltin)=$@ +endif # Build the kernel release string # @@ -1126,6 +1141,7 @@ PHONY += modules modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) $(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.order) > $(objtree)/modules.order + $(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.builtin) > $(objtree)/modules.builtin @$(kecho) ' Building modules, stage 2.'; $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_modbuild @@ -1155,6 +1171,7 @@ ln -s $(objtree) $(MODLIB)/build ; \ fi @cp -f $(objtree)/modules.order $(MODLIB)/ + @cp -f $(objtree)/modules.builtin $(MODLIB)/ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst # This depmod is only for convenience to give the initial @@ -1218,6 +1235,7 @@ -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ -o -name '*.symtypes' -o -name 'modules.order' \ -o -name 'Module.markers' -o -name '.tmp_*.o.*' \ + -o -name 'modules.builtin' \ -o -name '*.gcno' \) -type f -print | xargs rm -f # mrproper - Delete all generated files, including .config @@ -1416,7 +1434,8 @@ clean: rm-dirs := $(MODVERDIR) clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers \ $(KBUILD_EXTMOD)/Module.markers \ - $(KBUILD_EXTMOD)/modules.order + $(KBUILD_EXTMOD)/modules.order \ + $(KBUILD_EXTMOD)/modules.builtin clean: $(clean-dirs) $(call cmd,rmdirs) $(call cmd,rmfiles) --- linux-ec2-2.6.32.orig/MAINTAINERS +++ linux-ec2-2.6.32/MAINTAINERS @@ -1974,6 +1974,12 @@ S: Maintained F: drivers/platform/x86/eeepc-laptop.c +EFIFB FRAMEBUFFER DRIVER +L: linux-fbdev@vger.kernel.org +M: Peter Jones +S: Maintained +F: drivers/video/efifb.c + EFS FILESYSTEM W: http://aeschi.ch.eu.org/efs/ S: Orphan @@ -5004,8 +5010,7 @@ STABLE BRANCH M: Greg Kroah-Hartman -M: Chris Wright -L: stable@kernel.org +L: stable@vger.kernel.org S: Maintained STAGING SUBSYSTEM @@ -5594,9 +5599,11 @@ F: drivers/net/wireless/rndis_wlan.c USB XHCI DRIVER -M: Sarah Sharp +M: Sarah Sharp L: linux-usb@vger.kernel.org S: Supported +F: drivers/usb/host/xhci* +F: drivers/usb/host/pci-quirks* USB ZC0301 DRIVER M: Luca Risolia @@ -5718,6 +5725,14 @@ S: Maintained F: drivers/net/vmxnet3/ +VMware PVSCSI driver +M: Alok Kataria +M: VMware PV-Drivers +L: linux-scsi@vger.kernel.org +S: Maintained +F: drivers/scsi/vmw_pvscsi.c +F: drivers/scsi/vmw_pvscsi.h + VOLTAGE AND CURRENT REGULATOR FRAMEWORK M: Liam Girdwood M: Mark Brown --- linux-ec2-2.6.32.orig/sound/synth/emux/emux_hwdep.c +++ linux-ec2-2.6.32/sound/synth/emux/emux_hwdep.c @@ -128,6 +128,9 @@ strcpy(hw->name, SNDRV_EMUX_HWDEP_NAME); hw->iface = SNDRV_HWDEP_IFACE_EMUX_WAVETABLE; hw->ops.ioctl = snd_emux_hwdep_ioctl; + /* The ioctl parameter types are compatible between 32- and + * 64-bit architectures, so use the same function. */ + hw->ops.ioctl_compat = snd_emux_hwdep_ioctl; hw->exclusive = 1; hw->private_data = emu; if ((err = snd_card_register(emu->card)) < 0) --- linux-ec2-2.6.32.orig/sound/mips/sgio2audio.c +++ linux-ec2-2.6.32/sound/mips/sgio2audio.c @@ -609,7 +609,7 @@ /* alloc virtual 'dma' area */ if (runtime->dma_area) vfree(runtime->dma_area); - runtime->dma_area = vmalloc(size); + runtime->dma_area = vmalloc_user(size); if (runtime->dma_area == NULL) return -ENOMEM; runtime->dma_bytes = size; --- linux-ec2-2.6.32.orig/sound/pcmcia/pdaudiocf/pdaudiocf_pcm.c +++ linux-ec2-2.6.32/sound/pcmcia/pdaudiocf/pdaudiocf_pcm.c @@ -51,7 +51,7 @@ return 0; /* already enough large */ vfree(runtime->dma_area); } - runtime->dma_area = vmalloc_32(size); + runtime->dma_area = vmalloc_32_user(size); if (! runtime->dma_area) return -ENOMEM; runtime->dma_bytes = size; --- linux-ec2-2.6.32.orig/sound/pci/cmipci.c +++ linux-ec2-2.6.32/sound/pci/cmipci.c @@ -941,13 +941,21 @@ struct snd_pcm_substream *substream) { size_t ptr; - unsigned int reg; + unsigned int reg, rem, tries; + if (!rec->running) return 0; #if 1 // this seems better.. reg = rec->ch ? CM_REG_CH1_FRAME2 : CM_REG_CH0_FRAME2; - ptr = rec->dma_size - (snd_cmipci_read_w(cm, reg) + 1); - ptr >>= rec->shift; + for (tries = 0; tries < 3; tries++) { + rem = snd_cmipci_read_w(cm, reg); + if (rem < rec->dma_size) + goto ok; + } + printk(KERN_ERR "cmipci: invalid PCM pointer: %#x\n", rem); + return SNDRV_PCM_POS_XRUN; +ok: + ptr = (rec->dma_size - (rem + 1)) >> rec->shift; #else reg = rec->ch ? CM_REG_CH1_FRAME1 : CM_REG_CH0_FRAME1; ptr = snd_cmipci_read(cm, reg) - rec->offset; --- linux-ec2-2.6.32.orig/sound/pci/via82xx.c +++ linux-ec2-2.6.32/sound/pci/via82xx.c @@ -1791,6 +1791,12 @@ .type = AC97_TUNE_HP_ONLY }, { + .subvendor = 0x110a, + .subdevice = 0x0079, + .name = "Fujitsu Siemens D1289", + .type = AC97_TUNE_HP_ONLY + }, + { .subvendor = 0x1019, .subdevice = 0x0a81, .name = "ECS K7VTA3", --- linux-ec2-2.6.32.orig/sound/pci/ens1370.c +++ linux-ec2-2.6.32/sound/pci/ens1370.c @@ -229,6 +229,7 @@ #define ES_REG_1371_CODEC 0x14 /* W/R: Codec Read/Write register address */ #define ES_1371_CODEC_RDY (1<<31) /* codec ready */ #define ES_1371_CODEC_WIP (1<<30) /* codec register access in progress */ +#define EV_1938_CODEC_MAGIC (1<<26) #define ES_1371_CODEC_PIRD (1<<23) /* codec read/write select register */ #define ES_1371_CODEC_WRITE(a,d) ((((a)&0x7f)<<16)|(((d)&0xffff)<<0)) #define ES_1371_CODEC_READS(a) ((((a)&0x7f)<<16)|ES_1371_CODEC_PIRD) @@ -603,12 +604,18 @@ #ifdef CHIP1371 +static inline bool is_ev1938(struct ensoniq *ensoniq) +{ + return ensoniq->pci->device == 0x8938; +} + static void snd_es1371_codec_write(struct snd_ac97 *ac97, unsigned short reg, unsigned short val) { struct ensoniq *ensoniq = ac97->private_data; - unsigned int t, x; + unsigned int t, x, flag; + flag = is_ev1938(ensoniq) ? EV_1938_CODEC_MAGIC : 0; mutex_lock(&ensoniq->src_mutex); for (t = 0; t < POLL_COUNT; t++) { if (!(inl(ES_REG(ensoniq, 1371_CODEC)) & ES_1371_CODEC_WIP)) { @@ -630,7 +637,8 @@ 0x00010000) break; } - outl(ES_1371_CODEC_WRITE(reg, val), ES_REG(ensoniq, 1371_CODEC)); + outl(ES_1371_CODEC_WRITE(reg, val) | flag, + ES_REG(ensoniq, 1371_CODEC)); /* restore SRC reg */ snd_es1371_wait_src_ready(ensoniq); outl(x, ES_REG(ensoniq, 1371_SMPRATE)); @@ -647,8 +655,9 @@ unsigned short reg) { struct ensoniq *ensoniq = ac97->private_data; - unsigned int t, x, fail = 0; + unsigned int t, x, flag, fail = 0; + flag = is_ev1938(ensoniq) ? EV_1938_CODEC_MAGIC : 0; __again: mutex_lock(&ensoniq->src_mutex); for (t = 0; t < POLL_COUNT; t++) { @@ -671,7 +680,8 @@ 0x00010000) break; } - outl(ES_1371_CODEC_READS(reg), ES_REG(ensoniq, 1371_CODEC)); + outl(ES_1371_CODEC_READS(reg) | flag, + ES_REG(ensoniq, 1371_CODEC)); /* restore SRC reg */ snd_es1371_wait_src_ready(ensoniq); outl(x, ES_REG(ensoniq, 1371_SMPRATE)); @@ -683,6 +693,11 @@ /* now wait for the stinkin' data (RDY) */ for (t = 0; t < POLL_COUNT; t++) { if ((x = inl(ES_REG(ensoniq, 1371_CODEC))) & ES_1371_CODEC_RDY) { + if (is_ev1938(ensoniq)) { + for (t = 0; t < 100; t++) + inl(ES_REG(ensoniq, CONTROL)); + x = inl(ES_REG(ensoniq, 1371_CODEC)); + } mutex_unlock(&ensoniq->src_mutex); return ES_1371_CODEC_READ(x); } --- linux-ec2-2.6.32.orig/sound/pci/sis7019.c +++ linux-ec2-2.6.32/sound/pci/sis7019.c @@ -40,6 +40,7 @@ static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ static int enable = 1; +static int codecs = 1; module_param(index, int, 0444); MODULE_PARM_DESC(index, "Index value for SiS7019 Audio Accelerator."); @@ -47,6 +48,8 @@ MODULE_PARM_DESC(id, "ID string for SiS7019 Audio Accelerator."); module_param(enable, bool, 0444); MODULE_PARM_DESC(enable, "Enable SiS7019 Audio Accelerator."); +module_param(codecs, int, 0444); +MODULE_PARM_DESC(codecs, "Set bit to indicate that codec number is expected to be present (default 1)"); static struct pci_device_id snd_sis7019_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_SI, 0x7019) }, @@ -139,6 +142,9 @@ dma_addr_t silence_dma_addr; }; +/* These values are also used by the module param 'codecs' to indicate + * which codecs should be present. + */ #define SIS_PRIMARY_CODEC_PRESENT 0x0001 #define SIS_SECONDARY_CODEC_PRESENT 0x0002 #define SIS_TERTIARY_CODEC_PRESENT 0x0004 @@ -1075,6 +1081,7 @@ { unsigned long io = sis->ioport; void __iomem *ioaddr = sis->ioaddr; + unsigned long timeout; u16 status; int count; int i; @@ -1101,21 +1108,45 @@ while ((inw(io + SIS_AC97_STATUS) & SIS_AC97_STATUS_BUSY) && --count) udelay(1); - /* Now that we've finished the reset, find out what's attached. + /* Command complete, we can let go of the semaphore now. */ - status = inl(io + SIS_AC97_STATUS); - if (status & SIS_AC97_STATUS_CODEC_READY) - sis->codecs_present |= SIS_PRIMARY_CODEC_PRESENT; - if (status & SIS_AC97_STATUS_CODEC2_READY) - sis->codecs_present |= SIS_SECONDARY_CODEC_PRESENT; - if (status & SIS_AC97_STATUS_CODEC3_READY) - sis->codecs_present |= SIS_TERTIARY_CODEC_PRESENT; + outl(SIS_AC97_SEMA_RELEASE, io + SIS_AC97_SEMA); + if (!count) + return -EIO; + + /* Now that we've finished the reset, find out what's attached. + * There are some codec/board combinations that take an extremely + * long time to come up. 350+ ms has been observed in the field, + * so we'll give them up to 500ms. + */ + sis->codecs_present = 0; + timeout = msecs_to_jiffies(500) + jiffies; + while (time_before_eq(jiffies, timeout)) { + status = inl(io + SIS_AC97_STATUS); + if (status & SIS_AC97_STATUS_CODEC_READY) + sis->codecs_present |= SIS_PRIMARY_CODEC_PRESENT; + if (status & SIS_AC97_STATUS_CODEC2_READY) + sis->codecs_present |= SIS_SECONDARY_CODEC_PRESENT; + if (status & SIS_AC97_STATUS_CODEC3_READY) + sis->codecs_present |= SIS_TERTIARY_CODEC_PRESENT; + + if (sis->codecs_present == codecs) + break; - /* All done, let go of the semaphore, and check for errors + msleep(1); + } + + /* All done, check for errors. */ - outl(SIS_AC97_SEMA_RELEASE, io + SIS_AC97_SEMA); - if (!sis->codecs_present || !count) + if (!sis->codecs_present) { + printk(KERN_ERR "sis7019: could not find any codecs\n"); return -EIO; + } + + if (sis->codecs_present != codecs) { + printk(KERN_WARNING "sis7019: missing codecs, found %0x, expected %0x\n", + sis->codecs_present, codecs); + } /* Let the hardware know that the audio driver is alive, * and enable PCM slots on the AC-link for L/R playback (3 & 4) and @@ -1387,6 +1418,17 @@ if (!enable) goto error_out; + /* The user can specify which codecs should be present so that we + * can wait for them to show up if they are slow to recover from + * the AC97 cold reset. We default to a single codec, the primary. + * + * We assume that SIS_PRIMARY_*_PRESENT matches bits 0-2. + */ + codecs &= SIS_PRIMARY_CODEC_PRESENT | SIS_SECONDARY_CODEC_PRESENT | + SIS_TERTIARY_CODEC_PRESENT; + if (!codecs) + codecs = SIS_PRIMARY_CODEC_PRESENT; + rc = snd_card_create(index, id, THIS_MODULE, sizeof(*sis), &card); if (rc < 0) goto error_out; --- linux-ec2-2.6.32.orig/sound/pci/intel8x0.c +++ linux-ec2-2.6.32/sound/pci/intel8x0.c @@ -1776,6 +1776,12 @@ }, { .subvendor = 0x1014, + .subdevice = 0x0534, + .name = "ThinkPad X31", + .type = AC97_TUNE_INV_EAPD + }, + { + .subvendor = 0x1014, .subdevice = 0x1f00, .name = "MS-9128", .type = AC97_TUNE_ALC_JACK @@ -1859,6 +1865,12 @@ .type = AC97_TUNE_HP_ONLY }, { + .subvendor = 0x1028, + .subdevice = 0x0182, + .name = "Dell Latitude D610", /* STAC9750/51 */ + .type = AC97_TUNE_HP_ONLY + }, + { .subvendor = 0x1028, .subdevice = 0x0186, .name = "Dell Latitude D810", /* cf. Malone #41015 */ --- linux-ec2-2.6.32.orig/sound/pci/maestro3.c +++ linux-ec2-2.6.32/sound/pci/maestro3.c @@ -849,6 +849,7 @@ struct snd_kcontrol *master_switch; struct snd_kcontrol *master_volume; struct tasklet_struct hwvol_tq; + unsigned int in_suspend; #ifdef CONFIG_PM u16 *suspend_mem; @@ -884,6 +885,7 @@ MODULE_DEVICE_TABLE(pci, snd_m3_ids); static struct snd_pci_quirk m3_amp_quirk_list[] __devinitdata = { + SND_PCI_QUIRK(0x0E11, 0x0094, "Compaq Evo N600c", 0x0c), SND_PCI_QUIRK(0x10f7, 0x833e, "Panasonic CF-28", 0x0d), SND_PCI_QUIRK(0x10f7, 0x833d, "Panasonic CF-72", 0x0d), SND_PCI_QUIRK(0x1033, 0x80f1, "NEC LM800J/7", 0x03), @@ -1613,6 +1615,11 @@ outb(0x88, chip->iobase + SHADOW_MIX_REG_MASTER); outb(0x88, chip->iobase + HW_VOL_COUNTER_MASTER); + /* Ignore spurious HV interrupts during suspend / resume, this avoids + mistaking them for a mute button press. */ + if (chip->in_suspend) + return; + if (!chip->master_switch || !chip->master_volume) return; @@ -2424,6 +2431,7 @@ if (chip->suspend_mem == NULL) return 0; + chip->in_suspend = 1; snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); snd_pcm_suspend_all(chip->pcm); snd_ac97_suspend(chip->ac97); @@ -2497,6 +2505,7 @@ snd_m3_hv_init(chip); snd_power_change_state(card, SNDRV_CTL_POWER_D0); + chip->in_suspend = 0; return 0; } #endif /* CONFIG_PM */ --- linux-ec2-2.6.32.orig/sound/pci/atiixp.c +++ linux-ec2-2.6.32/sound/pci/atiixp.c @@ -297,6 +297,7 @@ MODULE_DEVICE_TABLE(pci, snd_atiixp_ids); static struct snd_pci_quirk atiixp_quirks[] __devinitdata = { + SND_PCI_QUIRK(0x105b, 0x0c81, "Foxconn RC4107MA-RS2", 0), SND_PCI_QUIRK(0x15bd, 0x3100, "DFI RS482", 0), { } /* terminator */ }; --- linux-ec2-2.6.32.orig/sound/pci/ice1712/maya44.c +++ linux-ec2-2.6.32/sound/pci/ice1712/maya44.c @@ -347,7 +347,7 @@ /* known working input slots (0-4) */ #define MAYA_LINE_IN 1 /* in-2 */ -#define MAYA_MIC_IN 4 /* in-5 */ +#define MAYA_MIC_IN 3 /* in-4 */ static void wm8776_select_input(struct snd_maya44 *chip, int idx, int line) { @@ -393,8 +393,8 @@ int changed; mutex_lock(&chip->mutex); - changed = maya_set_gpio_bits(chip->ice, GPIO_MIC_RELAY, - sel ? GPIO_MIC_RELAY : 0); + changed = maya_set_gpio_bits(chip->ice, 1 << GPIO_MIC_RELAY, + sel ? (1 << GPIO_MIC_RELAY) : 0); wm8776_select_input(chip, 0, sel ? MAYA_MIC_IN : MAYA_LINE_IN); mutex_unlock(&chip->mutex); return changed; --- linux-ec2-2.6.32.orig/sound/pci/ice1712/juli.c +++ linux-ec2-2.6.32/sound/pci/ice1712/juli.c @@ -504,6 +504,31 @@ } /* + * suspend/resume + * */ + +#ifdef CONFIG_PM +static int juli_resume(struct snd_ice1712 *ice) +{ + struct snd_akm4xxx *ak = ice->akm; + struct juli_spec *spec = ice->spec; + /* akm4358 un-reset, un-mute */ + snd_akm4xxx_reset(ak, 0); + /* reinit ak4114 */ + snd_ak4114_reinit(spec->ak4114); + return 0; +} + +static int juli_suspend(struct snd_ice1712 *ice) +{ + struct snd_akm4xxx *ak = ice->akm; + /* akm4358 reset and soft-mute */ + snd_akm4xxx_reset(ak, 1); + return 0; +} +#endif + +/* * initialize the chip */ @@ -646,6 +671,13 @@ ice->set_spdif_clock = juli_set_spdif_clock; ice->spdif.ops.open = juli_spdif_in_open; + +#ifdef CONFIG_PM + ice->pm_resume = juli_resume; + ice->pm_suspend = juli_suspend; + ice->pm_suspend_enabled = 1; +#endif + return 0; } --- linux-ec2-2.6.32.orig/sound/pci/ice1712/amp.c +++ linux-ec2-2.6.32/sound/pci/ice1712/amp.c @@ -69,8 +69,11 @@ static int __devinit snd_vt1724_amp_add_controls(struct snd_ice1712 *ice) { - /* we use pins 39 and 41 of the VT1616 for left and right read outputs */ - snd_ac97_write_cache(ice->ac97, 0x5a, snd_ac97_read(ice->ac97, 0x5a) & ~0x8000); + if (ice->ac97) + /* we use pins 39 and 41 of the VT1616 for left and right + read outputs */ + snd_ac97_write_cache(ice->ac97, 0x5a, + snd_ac97_read(ice->ac97, 0x5a) & ~0x8000); return 0; } --- linux-ec2-2.6.32.orig/sound/pci/lx6464es/lx_core.c +++ linux-ec2-2.6.32/sound/pci/lx6464es/lx_core.c @@ -80,8 +80,12 @@ void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len) { - void __iomem *address = lx_dsp_register(chip, port); - memcpy_fromio(data, address, len*sizeof(u32)); + u32 __iomem *address = lx_dsp_register(chip, port); + int i; + + /* we cannot use memcpy_fromio */ + for (i = 0; i != len; ++i) + data[i] = ioread32(address + i); } @@ -94,8 +98,12 @@ void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data, u32 len) { - void __iomem *address = lx_dsp_register(chip, port); - memcpy_toio(address, data, len*sizeof(u32)); + u32 __iomem *address = lx_dsp_register(chip, port); + int i; + + /* we cannot use memcpy_to */ + for (i = 0; i != len; ++i) + iowrite32(data[i], address + i); } --- linux-ec2-2.6.32.orig/sound/pci/au88x0/au88x0_pcm.c +++ linux-ec2-2.6.32/sound/pci/au88x0/au88x0_pcm.c @@ -42,11 +42,7 @@ .rate_min = 5000, .rate_max = 48000, .channels_min = 1, -#ifdef CHIP_AU8830 - .channels_max = 4, -#else .channels_max = 2, -#endif .buffer_bytes_max = 0x10000, .period_bytes_min = 0x1, .period_bytes_max = 0x1000, @@ -115,6 +111,17 @@ .periods_max = 64, }; #endif +#ifdef CHIP_AU8830 +static unsigned int au8830_channels[3] = { + 1, 2, 4, +}; + +static struct snd_pcm_hw_constraint_list hw_constraints_au8830_channels = { + .count = ARRAY_SIZE(au8830_channels), + .list = au8830_channels, + .mask = 0, +}; +#endif /* open callback */ static int snd_vortex_pcm_open(struct snd_pcm_substream *substream) { @@ -156,6 +163,15 @@ if (VORTEX_PCM_TYPE(substream->pcm) == VORTEX_PCM_ADB || VORTEX_PCM_TYPE(substream->pcm) == VORTEX_PCM_I2S) runtime->hw = snd_vortex_playback_hw_adb; +#ifdef CHIP_AU8830 + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && + VORTEX_PCM_TYPE(substream->pcm) == VORTEX_PCM_ADB) { + runtime->hw.channels_max = 4; + snd_pcm_hw_constraint_list(runtime, 0, + SNDRV_PCM_HW_PARAM_CHANNELS, + &hw_constraints_au8830_channels); + } +#endif substream->runtime->private_data = NULL; } #ifndef CHIP_AU8810 --- linux-ec2-2.6.32.orig/sound/pci/rme9652/hdsp.c +++ linux-ec2-2.6.32/sound/pci/rme9652/hdsp.c @@ -4610,6 +4610,7 @@ if (err < 0) return err; + memset(&info, 0, sizeof(info)); spin_lock_irqsave(&hdsp->lock, flags); info.pref_sync_ref = (unsigned char)hdsp_pref_sync_ref(hdsp); info.wordclock_sync_check = (unsigned char)hdsp_wc_sync_check(hdsp); --- linux-ec2-2.6.32.orig/sound/pci/rme9652/hdspm.c +++ linux-ec2-2.6.32/sound/pci/rme9652/hdspm.c @@ -4127,6 +4127,7 @@ case SNDRV_HDSPM_IOCTL_GET_CONFIG_INFO: + memset(&info, 0, sizeof(info)); spin_lock_irq(&hdspm->lock); info.pref_sync_ref = hdspm_pref_sync_ref(hdspm); info.wordclock_sync_check = hdspm_wc_sync_check(hdspm); --- linux-ec2-2.6.32.orig/sound/pci/ctxfi/ctatc.c +++ linux-ec2-2.6.32/sound/pci/ctxfi/ctatc.c @@ -166,18 +166,7 @@ static unsigned long atc_get_ptp_phys(struct ct_atc *atc, int index) { - struct ct_vm *vm; - void *kvirt_addr; - unsigned long phys_addr; - - vm = atc->vm; - kvirt_addr = vm->get_ptp_virt(vm, index); - if (kvirt_addr == NULL) - phys_addr = (~0UL); - else - phys_addr = virt_to_phys(kvirt_addr); - - return phys_addr; + return atc->vm->get_ptp_phys(atc->vm, index); } static unsigned int convert_format(snd_pcm_format_t snd_format) @@ -879,7 +868,7 @@ mutex_lock(&atc->atc_mutex); dao->ops->get_spos(dao, &status); if (((status >> 24) & IEC958_AES3_CON_FS) != iec958_con_fs) { - status &= ((~IEC958_AES3_CON_FS) << 24); + status &= ~(IEC958_AES3_CON_FS << 24); status |= (iec958_con_fs << 24); dao->ops->set_spos(dao, status); dao->ops->commit_write(dao); @@ -1669,7 +1658,7 @@ } /* Set up device virtual memory management object */ - err = ct_vm_create(&atc->vm); + err = ct_vm_create(&atc->vm, pci); if (err < 0) goto error1; --- linux-ec2-2.6.32.orig/sound/pci/ctxfi/ctdaio.c +++ linux-ec2-2.6.32/sound/pci/ctxfi/ctdaio.c @@ -176,6 +176,7 @@ if (!entry) return -ENOMEM; + dao->ops->clear_left_input(dao); /* Program master and conjugate resources */ input->ops->master(input); daio->rscl.ops->master(&daio->rscl); @@ -204,6 +205,7 @@ if (!entry) return -ENOMEM; + dao->ops->clear_right_input(dao); /* Program master and conjugate resources */ input->ops->master(input); daio->rscr.ops->master(&daio->rscr); --- linux-ec2-2.6.32.orig/sound/pci/ctxfi/ctvmem.c +++ linux-ec2-2.6.32/sound/pci/ctxfi/ctvmem.c @@ -138,7 +138,7 @@ return NULL; } - ptp = vm->ptp[0]; + ptp = (unsigned long *)vm->ptp[0].area; pte_start = (block->addr >> CT_PAGE_SHIFT); pages = block->size >> CT_PAGE_SHIFT; for (i = 0; i < pages; i++) { @@ -158,25 +158,25 @@ } /* * - * return the host (kmalloced) addr of the @index-th device - * page talbe page on success, or NULL on failure. - * The first returned NULL indicates the termination. + * return the host physical addr of the @index-th device + * page table page on success, or ~0UL on failure. + * The first returned ~0UL indicates the termination. * */ -static void * -ct_get_ptp_virt(struct ct_vm *vm, int index) +static dma_addr_t +ct_get_ptp_phys(struct ct_vm *vm, int index) { - void *addr; + dma_addr_t addr; - addr = (index >= CT_PTP_NUM) ? NULL : vm->ptp[index]; + addr = (index >= CT_PTP_NUM) ? ~0UL : vm->ptp[index].addr; return addr; } -int ct_vm_create(struct ct_vm **rvm) +int ct_vm_create(struct ct_vm **rvm, struct pci_dev *pci) { struct ct_vm *vm; struct ct_vm_block *block; - int i; + int i, err = 0; *rvm = NULL; @@ -188,23 +188,21 @@ /* Allocate page table pages */ for (i = 0; i < CT_PTP_NUM; i++) { - vm->ptp[i] = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vm->ptp[i]) + err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, + snd_dma_pci_data(pci), + PAGE_SIZE, &vm->ptp[i]); + if (err < 0) break; } - if (!i) { + if (err < 0) { /* no page table pages are allocated */ - kfree(vm); + ct_vm_destroy(vm); return -ENOMEM; } vm->size = CT_ADDRS_PER_PAGE * i; - /* Initialise remaining ptps */ - for (; i < CT_PTP_NUM; i++) - vm->ptp[i] = NULL; - vm->map = ct_vm_map; vm->unmap = ct_vm_unmap; - vm->get_ptp_virt = ct_get_ptp_virt; + vm->get_ptp_phys = ct_get_ptp_phys; INIT_LIST_HEAD(&vm->unused); INIT_LIST_HEAD(&vm->used); block = kzalloc(sizeof(*block), GFP_KERNEL); @@ -242,7 +240,7 @@ /* free allocated page table pages */ for (i = 0; i < CT_PTP_NUM; i++) - kfree(vm->ptp[i]); + snd_dma_free_pages(&vm->ptp[i]); vm->size = 0; --- linux-ec2-2.6.32.orig/sound/pci/ctxfi/ctmixer.c +++ linux-ec2-2.6.32/sound/pci/ctxfi/ctmixer.c @@ -566,19 +566,6 @@ return 0; } -static int ct_spdif_default_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - unsigned int status = SNDRV_PCM_DEFAULT_CON_SPDIF; - - ucontrol->value.iec958.status[0] = (status >> 0) & 0xff; - ucontrol->value.iec958.status[1] = (status >> 8) & 0xff; - ucontrol->value.iec958.status[2] = (status >> 16) & 0xff; - ucontrol->value.iec958.status[3] = (status >> 24) & 0xff; - - return 0; -} - static int ct_spdif_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -586,6 +573,10 @@ unsigned int status; atc->spdif_out_get_status(atc, &status); + + if (status == 0) + status = SNDRV_PCM_DEFAULT_CON_SPDIF; + ucontrol->value.iec958.status[0] = (status >> 0) & 0xff; ucontrol->value.iec958.status[1] = (status >> 8) & 0xff; ucontrol->value.iec958.status[2] = (status >> 16) & 0xff; @@ -629,7 +620,7 @@ .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT), .count = 1, .info = ct_spdif_info, - .get = ct_spdif_default_get, + .get = ct_spdif_get, .put = ct_spdif_put, .private_value = MIXER_IEC958_DEFAULT }; --- linux-ec2-2.6.32.orig/sound/pci/ctxfi/ctvmem.h +++ linux-ec2-2.6.32/sound/pci/ctxfi/ctvmem.h @@ -22,6 +22,8 @@ #include #include +#include +#include /* The chip can handle the page table of 4k pages * (emu20k1 can handle even 8k pages, but we don't use it right now) @@ -41,7 +43,7 @@ /* Virtual memory management object for card device */ struct ct_vm { - void *ptp[CT_PTP_NUM]; /* Device page table pages */ + struct snd_dma_buffer ptp[CT_PTP_NUM]; /* Device page table pages */ unsigned int size; /* Available addr space in bytes */ struct list_head unused; /* List of unused blocks */ struct list_head used; /* List of used blocks */ @@ -52,10 +54,10 @@ int size); /* Unmap device logical addr area. */ void (*unmap)(struct ct_vm *, struct ct_vm_block *block); - void *(*get_ptp_virt)(struct ct_vm *vm, int index); + dma_addr_t (*get_ptp_phys)(struct ct_vm *vm, int index); }; -int ct_vm_create(struct ct_vm **rvm); +int ct_vm_create(struct ct_vm **rvm, struct pci_dev *pci); void ct_vm_destroy(struct ct_vm *vm); #endif /* CTVMEM_H */ --- linux-ec2-2.6.32.orig/sound/pci/hda/hda_local.h +++ linux-ec2-2.6.32/sound/pci/hda/hda_local.h @@ -408,7 +408,12 @@ } /* get the widget type from widget capability bits */ -#define get_wcaps_type(wcaps) (((wcaps) & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT) +static inline int get_wcaps_type(unsigned int wcaps) +{ + if (!wcaps) + return -1; /* invalid type */ + return (wcaps & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT; +} static inline unsigned int get_wcaps_channels(u32 wcaps) { @@ -437,6 +442,15 @@ static inline int snd_hda_create_hwdep(struct hda_codec *codec) { return 0; } #endif +#ifdef CONFIG_SND_HDA_POWER_SAVE +int snd_hda_hwdep_add_power_sysfs(struct hda_codec *codec); +#else +static inline int snd_hda_hwdep_add_power_sysfs(struct hda_codec *codec) +{ + return 0; +} +#endif + #ifdef CONFIG_SND_HDA_RECONFIG int snd_hda_hwdep_add_sysfs(struct hda_codec *codec); #else --- linux-ec2-2.6.32.orig/sound/pci/hda/hda_codec.c +++ linux-ec2-2.6.32/sound/pci/hda/hda_codec.c @@ -515,6 +515,7 @@ struct hda_codec *codec; list_for_each_entry(codec, &bus->codec_list, list) { snd_hda_hwdep_add_sysfs(codec); + snd_hda_hwdep_add_power_sysfs(codec); } return 0; } @@ -2452,9 +2453,11 @@ codec->afg ? codec->afg : codec->mfg, AC_PWRST_D3); #ifdef CONFIG_SND_HDA_POWER_SAVE + snd_hda_update_power_acct(codec); cancel_delayed_work(&codec->power_work); codec->power_on = 0; codec->power_transition = 0; + codec->power_jiffies = jiffies; #endif } @@ -3207,6 +3210,17 @@ { codec->power_count++; codec->power_on = 1; + codec->power_jiffies = jiffies; +} + +void snd_hda_update_power_acct(struct hda_codec *codec) +{ + unsigned long delta = jiffies - codec->power_jiffies; + if (codec->power_on) + codec->power_on_acct += delta; + else + codec->power_off_acct += delta; + codec->power_jiffies += delta; } void snd_hda_power_up(struct hda_codec *codec) @@ -3217,7 +3231,9 @@ if (codec->power_on || codec->power_transition) return; + snd_hda_update_power_acct(codec); codec->power_on = 1; + codec->power_jiffies = jiffies; if (bus->ops.pm_notify) bus->ops.pm_notify(bus); hda_call_codec_resume(codec); --- linux-ec2-2.6.32.orig/sound/pci/hda/patch_conexant.c +++ linux-ec2-2.6.32/sound/pci/hda/patch_conexant.c @@ -366,10 +366,16 @@ struct conexant_spec *spec; struct conexant_jack *jack; const char *name; - int err; + int i, err; spec = codec->spec; snd_array_init(&spec->jacks, sizeof(*jack), 32); + + jack = spec->jacks.list; + for (i = 0; i < spec->jacks.used; i++, jack++) + if (jack->nid == nid) + return 0 ; /* already present */ + jack = snd_array_new(&spec->jacks); name = (type == SND_JACK_HEADPHONE) ? "Headphone" : "Mic" ; @@ -1175,9 +1181,12 @@ switch (codec->subsystem_id >> 16) { case 0x103c: - /* HP laptop has a really bad sound over 0dB on NID 0x17. - * Fix max PCM level to 0 dB - * (originall it has 0x2b steps with 0dB offset 0x14) + case 0x1631: + case 0x1734: + case 0x17aa: + /* HP, Packard Bell, Fujitsu-Siemens & Lenovo laptops have + * really bad sound over 0dB on NID 0x17. Fix max PCM level to + * 0 dB (originally it has 0x2b steps with 0dB offset 0x14) */ snd_hda_override_amp_caps(codec, 0x17, HDA_INPUT, (0x14 << AC_AMPCAP_OFFSET_SHIFT) | @@ -1581,6 +1590,21 @@ #endif } spec->vmaster_nid = 0x13; + + switch (codec->subsystem_id >> 16) { + case 0x103c: + /* HP laptops have really bad sound over 0 dB on NID 0x10. + * Fix max PCM level to 0 dB (originally it has 0x1e steps + * with 0 dB offset 0x17) + */ + snd_hda_override_amp_caps(codec, 0x10, HDA_INPUT, + (0x17 << AC_AMPCAP_OFFSET_SHIFT) | + (0x17 << AC_AMPCAP_NUM_STEPS_SHIFT) | + (0x05 << AC_AMPCAP_STEP_SIZE_SHIFT) | + (1 << AC_AMPCAP_MUTE_SHIFT)); + break; + } + return 0; } @@ -2333,6 +2357,8 @@ SND_PCI_QUIRK(0x1028, 0x02f5, "Dell", CXT5066_DELL_LAPTOP), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT5066_OLPC_XO_1_5), + SND_PCI_QUIRK(0x1179, 0xff50, "Toshiba Satellite P500-PSPGSC-01800T", CXT5066_OLPC_XO_1_5), + SND_PCI_QUIRK(0x1179, 0xffe0, "Toshiba Satellite Pro T130-15F", CXT5066_OLPC_XO_1_5), {} }; --- linux-ec2-2.6.32.orig/sound/pci/hda/hda_codec.h +++ linux-ec2-2.6.32/sound/pci/hda/hda_codec.h @@ -811,6 +811,9 @@ unsigned int power_transition :1; /* power-state in transition */ int power_count; /* current (global) power refcount */ struct delayed_work power_work; /* delayed task for powerdown */ + unsigned long power_on_acct; + unsigned long power_off_acct; + unsigned long power_jiffies; #endif /* codec-specific additional proc output */ @@ -933,6 +936,7 @@ void snd_hda_power_up(struct hda_codec *codec); void snd_hda_power_down(struct hda_codec *codec); #define snd_hda_codec_needs_resume(codec) codec->power_count +void snd_hda_update_power_acct(struct hda_codec *codec); #else static inline void snd_hda_power_up(struct hda_codec *codec) {} static inline void snd_hda_power_down(struct hda_codec *codec) {} --- linux-ec2-2.6.32.orig/sound/pci/hda/hda_hwdep.c +++ linux-ec2-2.6.32/sound/pci/hda/hda_hwdep.c @@ -154,6 +154,44 @@ return 0; } +#ifdef CONFIG_SND_HDA_POWER_SAVE +static ssize_t power_on_acct_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct snd_hwdep *hwdep = dev_get_drvdata(dev); + struct hda_codec *codec = hwdep->private_data; + snd_hda_update_power_acct(codec); + return sprintf(buf, "%u\n", jiffies_to_msecs(codec->power_on_acct)); +} + +static ssize_t power_off_acct_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct snd_hwdep *hwdep = dev_get_drvdata(dev); + struct hda_codec *codec = hwdep->private_data; + snd_hda_update_power_acct(codec); + return sprintf(buf, "%u\n", jiffies_to_msecs(codec->power_off_acct)); +} + +static struct device_attribute power_attrs[] = { + __ATTR_RO(power_on_acct), + __ATTR_RO(power_off_acct), +}; + +int snd_hda_hwdep_add_power_sysfs(struct hda_codec *codec) +{ + struct snd_hwdep *hwdep = codec->hwdep; + int i; + + for (i = 0; i < ARRAY_SIZE(power_attrs); i++) + snd_add_device_sysfs_file(SNDRV_DEVICE_TYPE_HWDEP, hwdep->card, + hwdep->device, &power_attrs[i]); + return 0; +} +#endif /* CONFIG_SND_HDA_POWER_SAVE */ + #ifdef CONFIG_SND_HDA_RECONFIG /* --- linux-ec2-2.6.32.orig/sound/pci/hda/hda_proc.c +++ linux-ec2-2.6.32/sound/pci/hda/hda_proc.c @@ -39,6 +39,8 @@ [AC_WID_BEEP] = "Beep Generator Widget", [AC_WID_VENDOR] = "Vendor Defined Widget", }; + if (wid_value == -1) + return "UNKNOWN Widget"; wid_value &= 0xf; if (names[wid_value]) return names[wid_value]; --- linux-ec2-2.6.32.orig/sound/pci/hda/patch_intelhdmi.c +++ linux-ec2-2.6.32/sound/pci/hda/patch_intelhdmi.c @@ -684,7 +684,7 @@ { .id = 0x80862801, .name = "G45 DEVBLC", .patch = patch_intel_hdmi }, { .id = 0x80862802, .name = "G45 DEVCTG", .patch = patch_intel_hdmi }, { .id = 0x80862803, .name = "G45 DEVELK", .patch = patch_intel_hdmi }, - { .id = 0x80862804, .name = "G45 DEVIBX", .patch = patch_intel_hdmi }, + { .id = 0x80862804, .name = "G45 DEVIBX", .patch = patch_intel_hdmi_ibexpeak }, { .id = 0x80860054, .name = "Q57 DEVIBX", .patch = patch_intel_hdmi_ibexpeak }, { .id = 0x10951392, .name = "SiI1392 HDMI", .patch = patch_intel_hdmi }, {} /* terminator */ --- linux-ec2-2.6.32.orig/sound/pci/hda/hda_eld.c +++ linux-ec2-2.6.32/sound/pci/hda/hda_eld.c @@ -383,7 +383,7 @@ snd_print_pcm_rates(a->rates, buf, sizeof(buf)); if (a->format == AUDIO_CODING_TYPE_LPCM) - snd_print_pcm_bits(a->sample_bits, buf2 + 8, sizeof(buf2 - 8)); + snd_print_pcm_bits(a->sample_bits, buf2 + 8, sizeof(buf2) - 8); else if (a->max_bitrate) snprintf(buf2, sizeof(buf2), ", max bitrate = %d", a->max_bitrate); --- linux-ec2-2.6.32.orig/sound/pci/hda/patch_realtek.c +++ linux-ec2-2.6.32/sound/pci/hda/patch_realtek.c @@ -400,6 +400,8 @@ unsigned int mux_idx = snd_ctl_get_ioffidx(kcontrol, &uinfo->id); if (mux_idx >= spec->num_mux_defs) mux_idx = 0; + if (!spec->input_mux[mux_idx].num_items && mux_idx > 0) + mux_idx = 0; return snd_hda_input_mux_info(&spec->input_mux[mux_idx], uinfo); } @@ -428,6 +430,10 @@ mux_idx = adc_idx >= spec->num_mux_defs ? 0 : adc_idx; imux = &spec->input_mux[mux_idx]; + if (!imux->num_items && mux_idx > 0) + imux = &spec->input_mux[0]; + if (!imux->num_items) + return 0; type = get_wcaps_type(get_wcaps(codec, nid)); if (type == AC_WID_AUD_MIX) { @@ -1149,7 +1155,7 @@ case 0x10ec0883: case 0x10ec0885: case 0x10ec0887: - case 0x10ec0889: + /*case 0x10ec0889:*/ /* this causes an SPDIF problem */ alc889_coef_init(codec); break; case 0x10ec0888: @@ -1333,7 +1339,9 @@ * 15 : 1 --> enable the function "Mute internal speaker * when the external headphone out jack is plugged" */ - if (!spec->autocfg.hp_pins[0]) { + if (!spec->autocfg.hp_pins[0] && + !(spec->autocfg.line_out_pins[0] && + spec->autocfg.line_out_type == AUTO_PIN_HP_OUT)) { hda_nid_t nid; tmp = (ass >> 11) & 0x3; /* HP to chassis */ if (tmp == 0) @@ -2401,6 +2409,8 @@ "Speaker Playback Switch", "Mono Playback Switch", "IEC958 Playback Switch", + "Line-Out Playback Switch", + "PCM Playback Switch", NULL, }; @@ -3965,10 +3975,11 @@ SND_PCI_QUIRK(0x1695, 0x4012, "EPox EP-5LDA", ALC880_5ST_DIG), SND_PCI_QUIRK(0x1734, 0x107c, "FSC F1734", ALC880_F1734), SND_PCI_QUIRK(0x1734, 0x1094, "FSC Amilo M1451G", ALC880_FUJITSU), - SND_PCI_QUIRK(0x1734, 0x10ac, "FSC", ALC880_UNIWILL), + SND_PCI_QUIRK(0x1734, 0x10ac, "FSC AMILO Xi 1526", ALC880_F1734), SND_PCI_QUIRK(0x1734, 0x10b0, "Fujitsu", ALC880_FUJITSU), SND_PCI_QUIRK(0x1854, 0x0018, "LG LW20", ALC880_LG_LW), SND_PCI_QUIRK(0x1854, 0x003b, "LG", ALC880_LG), + SND_PCI_QUIRK(0x1854, 0x005f, "LG P1 Express", ALC880_LG), SND_PCI_QUIRK(0x1854, 0x0068, "LG w1", ALC880_LG), SND_PCI_QUIRK(0x1854, 0x0077, "LG LW25", ALC880_LG_LW), SND_PCI_QUIRK(0x19db, 0x4188, "TCL S700", ALC880_TCL_S700), @@ -6246,6 +6257,7 @@ static struct snd_pci_quirk alc260_cfg_tbl[] = { SND_PCI_QUIRK(0x1025, 0x007b, "Acer C20x", ALC260_ACER), + SND_PCI_QUIRK(0x1025, 0x007f, "Acer", ALC260_WILL), SND_PCI_QUIRK(0x1025, 0x008f, "Acer", ALC260_ACER), SND_PCI_QUIRK(0x1509, 0x4540, "Favorit 100XS", ALC260_FAVORIT100), SND_PCI_QUIRK(0x103c, 0x2808, "HP d5700", ALC260_HP_3013), @@ -6275,7 +6287,7 @@ .num_dacs = ARRAY_SIZE(alc260_dac_nids), .dac_nids = alc260_dac_nids, .num_adc_nids = ARRAY_SIZE(alc260_dual_adc_nids), - .adc_nids = alc260_adc_nids, + .adc_nids = alc260_dual_adc_nids, .num_channel_mode = ARRAY_SIZE(alc260_modes), .channel_mode = alc260_modes, .input_mux = &alc260_capture_source, @@ -6448,6 +6460,7 @@ spec->stream_analog_playback = &alc260_pcm_analog_playback; spec->stream_analog_capture = &alc260_pcm_analog_capture; + spec->stream_analog_alt_capture = &alc260_pcm_analog_capture; spec->stream_digital_playback = &alc260_pcm_digital_playback; spec->stream_digital_capture = &alc260_pcm_digital_capture; @@ -6581,7 +6594,7 @@ .num_items = 4, .items = { { "Mic", 0x0 }, - { "iMic", 0x1 }, + { "Int Mic", 0x1 }, { "Line", 0x2 }, { "CD", 0x4 }, }, @@ -7040,8 +7053,8 @@ HDA_BIND_MUTE ("Surround Playback Switch", 0x0d, 0x02, HDA_INPUT), HDA_CODEC_VOLUME("LFE Playback Volume", 0x0e, 0x00, HDA_OUTPUT), HDA_BIND_MUTE ("LFE Playback Switch", 0x0e, 0x02, HDA_INPUT), - HDA_CODEC_VOLUME("HP Playback Volume", 0x0f, 0x00, HDA_OUTPUT), - HDA_BIND_MUTE ("HP Playback Switch", 0x0f, 0x02, HDA_INPUT), + HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0f, 0x00, HDA_OUTPUT), + HDA_BIND_MUTE ("Headphone Playback Switch", 0x0f, 0x02, HDA_INPUT), HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT), HDA_CODEC_MUTE ("Line Playback Switch", 0x0b, 0x02, HDA_INPUT), HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x01, HDA_INPUT), @@ -7428,6 +7441,7 @@ {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT}, {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, {0x14, AC_VERB_SET_CONNECT_SEL, 0x03}, + {0x14, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN}, /* Front Mic pin: input vref at 80% */ {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80}, {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, @@ -7552,6 +7566,27 @@ spec->autocfg.speaker_pins[0] = 0x14; } +static void alc885_mb5_automute(struct hda_codec *codec) +{ + unsigned int present; + + present = snd_hda_codec_read(codec, 0x14, 0, + AC_VERB_GET_PIN_SENSE, 0) & 0x80000000; + snd_hda_codec_amp_stereo(codec, 0x18, HDA_OUTPUT, 0, + HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0); + snd_hda_codec_amp_stereo(codec, 0x1a, HDA_OUTPUT, 0, + HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0); + +} + +static void alc885_mb5_unsol_event(struct hda_codec *codec, + unsigned int res) +{ + /* Headphone insertion or removal. */ + if ((res >> 26) == ALC880_HP_EVENT) + alc885_mb5_automute(codec); +} + static struct hda_verb alc882_targa_verbs[] = { {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, @@ -8008,8 +8043,8 @@ HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT), HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT), HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT), - HDA_CODEC_VOLUME("iMic Playback Volume", 0x0b, 0x1, HDA_INPUT), - HDA_CODEC_MUTE("iMic Playback Switch", 0x0b, 0x1, HDA_INPUT), + HDA_CODEC_VOLUME("Int Mic Playback Volume", 0x0b, 0x1, HDA_INPUT), + HDA_CODEC_MUTE("Int Mic Playback Switch", 0x0b, 0x1, HDA_INPUT), { } /* end */ }; @@ -8839,7 +8874,7 @@ SND_PCI_QUIRK(0x1462, 0x040d, "MSI", ALC883_TARGA_2ch_DIG), SND_PCI_QUIRK(0x1462, 0x0579, "MSI", ALC883_TARGA_2ch_DIG), SND_PCI_QUIRK(0x1462, 0x28fb, "Targa T8", ALC882_TARGA), /* MSI-1049 T8 */ - SND_PCI_QUIRK(0x1462, 0x2fb3, "MSI", ALC883_TARGA_2ch_DIG), + SND_PCI_QUIRK(0x1462, 0x2fb3, "MSI", ALC882_AUTO), SND_PCI_QUIRK(0x1462, 0x6668, "MSI", ALC882_6ST_DIG), SND_PCI_QUIRK(0x1462, 0x3729, "MSI S420", ALC883_TARGA_DIG), SND_PCI_QUIRK(0x1462, 0x3783, "NEC S970", ALC883_TARGA_DIG), @@ -8866,6 +8901,7 @@ SND_PCI_QUIRK(0x1462, 0xaa08, "MSI", ALC883_TARGA_2ch_DIG), SND_PCI_QUIRK(0x147b, 0x1083, "Abit IP35-PRO", ALC883_6ST_DIG), + SND_PCI_QUIRK(0x1558, 0x0571, "Clevo laptop M570U", ALC883_3ST_6ch_DIG), SND_PCI_QUIRK(0x1558, 0x0721, "Clevo laptop M720R", ALC883_CLEVO_M720), SND_PCI_QUIRK(0x1558, 0x0722, "Clevo laptop M720SR", ALC883_CLEVO_M720), SND_PCI_QUIRK(0x1558, 0x5409, "Clevo laptop M540R", ALC883_CLEVO_M540R), @@ -8893,7 +8929,7 @@ SND_PCI_QUIRK(0x8086, 0x0022, "DX58SO", ALC889_INTEL), SND_PCI_QUIRK(0x8086, 0x0021, "Intel IbexPeak", ALC889A_INTEL), SND_PCI_QUIRK(0x8086, 0x3b56, "Intel IbexPeak", ALC889A_INTEL), - SND_PCI_QUIRK(0x8086, 0xd601, "D102GGC", ALC883_3ST_6ch), + SND_PCI_QUIRK(0x8086, 0xd601, "D102GGC", ALC882_6ST_DIG), {} }; @@ -8907,10 +8943,12 @@ SND_PCI_QUIRK(0x106b, 0x1000, "iMac 24", ALC885_IMAC24), SND_PCI_QUIRK(0x106b, 0x2800, "AppleTV", ALC885_IMAC24), SND_PCI_QUIRK(0x106b, 0x2c00, "MacbookPro rev3", ALC885_MBP3), + SND_PCI_QUIRK(0x106b, 0x3000, "iMac", ALC889A_MB31), SND_PCI_QUIRK(0x106b, 0x3600, "Macbook 3,1", ALC889A_MB31), SND_PCI_QUIRK(0x106b, 0x3800, "MacbookPro 4,1", ALC885_MBP3), SND_PCI_QUIRK(0x106b, 0x3e00, "iMac 24 Aluminum", ALC885_IMAC24), SND_PCI_QUIRK(0x106b, 0x3f00, "Macbook 5,1", ALC885_MB5), + SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC885_MB5), /* FIXME: HP jack sense seems not working for MBP 5,1 or 5,2, * so apparently no perfect solution yet */ @@ -8994,6 +9032,8 @@ .input_mux = &mb5_capture_source, .dig_out_nid = ALC882_DIGOUT_NID, .dig_in_nid = ALC882_DIGIN_NID, + .unsol_event = alc885_mb5_unsol_event, + .init_hook = alc885_mb5_automute, }, [ALC885_MACPRO] = { .mixers = { alc882_macpro_mixer }, @@ -9141,6 +9181,8 @@ .dac_nids = alc883_dac_nids, .num_adc_nids = ARRAY_SIZE(alc889_adc_nids), .adc_nids = alc889_adc_nids, + .capsrc_nids = alc889_capsrc_nids, + .capsrc_nids = alc889_capsrc_nids, .dig_out_nid = ALC883_DIGOUT_NID, .dig_in_nid = ALC883_DIGIN_NID, .slave_dig_outs = alc883_slave_dig_outs, @@ -9187,6 +9229,7 @@ .dac_nids = alc883_dac_nids, .adc_nids = alc883_adc_nids_alt, .num_adc_nids = ARRAY_SIZE(alc883_adc_nids_alt), + .capsrc_nids = alc883_capsrc_nids, .dig_out_nid = ALC883_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes), .channel_mode = alc883_3ST_2ch_modes, @@ -9333,6 +9376,7 @@ .dac_nids = alc883_dac_nids, .adc_nids = alc883_adc_nids_alt, .num_adc_nids = ARRAY_SIZE(alc883_adc_nids_alt), + .capsrc_nids = alc883_capsrc_nids, .num_channel_mode = ARRAY_SIZE(alc883_sixstack_modes), .channel_mode = alc883_sixstack_modes, .input_mux = &alc883_capture_source, @@ -9394,6 +9438,7 @@ .dac_nids = alc883_dac_nids, .adc_nids = alc883_adc_nids_alt, .num_adc_nids = ARRAY_SIZE(alc883_adc_nids_alt), + .capsrc_nids = alc883_capsrc_nids, .num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes), .channel_mode = alc883_3ST_2ch_modes, .input_mux = &alc883_lenovo_101e_capture_source, @@ -9573,6 +9618,7 @@ alc880_gpio1_init_verbs }, .adc_nids = alc883_adc_nids, .num_adc_nids = ARRAY_SIZE(alc883_adc_nids), + .capsrc_nids = alc883_capsrc_nids, .dac_nids = alc883_dac_nids, .num_dacs = ARRAY_SIZE(alc883_dac_nids), .channel_mode = alc889A_mb31_6ch_modes, @@ -9711,6 +9757,8 @@ continue; mux_idx = c >= spec->num_mux_defs ? 0 : c; imux = &spec->input_mux[mux_idx]; + if (!imux->num_items && mux_idx > 0) + imux = &spec->input_mux[0]; for (idx = 0; idx < conns; idx++) { /* if the current connection is the selected one, * unmute it as default - otherwise mute it @@ -10146,7 +10194,7 @@ struct alc_spec *spec = codec->spec; spec->autocfg.hp_pins[0] = 0x15; - spec->autocfg.speaker_pins[0] = 0x0c; /* HACK: not actually a pin */ + spec->autocfg.speaker_pins[0] = 0x14; } static struct snd_kcontrol_new alc262_hp_t5735_mixer[] = { @@ -10581,6 +10629,13 @@ {} }; +static struct hda_verb alc262_lenovo_3000_init_verbs[] = { + /* Front Mic pin: input vref at 50% */ + {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF50}, + {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, + {} +}; + static struct hda_input_mux alc262_fujitsu_capture_source = { .num_items = 3, .items = { @@ -11580,9 +11635,9 @@ .num_channel_mode = ARRAY_SIZE(alc262_modes), .channel_mode = alc262_modes, .input_mux = &alc262_capture_source, - .unsol_event = alc_automute_amp_unsol_event, + .unsol_event = alc_sku_unsol_event, .setup = alc262_hp_t5735_setup, - .init_hook = alc_automute_amp, + .init_hook = alc_inithook, }, [ALC262_HP_RP5700] = { .mixers = { alc262_hp_rp5700_mixer }, @@ -11648,7 +11703,8 @@ [ALC262_LENOVO_3000] = { .mixers = { alc262_lenovo_3000_mixer }, .init_verbs = { alc262_init_verbs, alc262_EAPD_verbs, - alc262_lenovo_3000_unsol_verbs }, + alc262_lenovo_3000_unsol_verbs, + alc262_lenovo_3000_init_verbs }, .num_dacs = ARRAY_SIZE(alc262_dac_nids), .dac_nids = alc262_dac_nids, .hp_nid = 0x03, @@ -12338,6 +12394,9 @@ dac = 0x02; break; case 0x15: + case 0x1a: /* ALC259/269 only */ + case 0x1b: /* ALC259/269 only */ + case 0x21: /* ALC269vb has this pin, too */ dac = 0x03; break; default: @@ -14679,6 +14738,8 @@ spec->stream_digital_playback = &alc861_pcm_digital_playback; spec->stream_digital_capture = &alc861_pcm_digital_capture; + if (!spec->cap_mixer) + set_capture_mixer(codec); set_beep_amp(spec, 0x23, 0, HDA_OUTPUT); spec->vmaster_nid = 0x03; @@ -15317,7 +15378,7 @@ static int alc861vd_auto_create_input_ctls(struct hda_codec *codec, const struct auto_pin_cfg *cfg) { - return alc_auto_create_input_ctls(codec, cfg, 0x15, 0x09, 0); + return alc_auto_create_input_ctls(codec, cfg, 0x15, 0x22, 0); } @@ -17204,6 +17265,8 @@ return 0x02; else if (nid >= 0x0c && nid <= 0x0e) return nid - 0x0c + 0x02; + else if (nid == 0x26) /* ALC887-VD has this DAC too */ + return 0x25; else return 0; } @@ -17212,7 +17275,7 @@ static hda_nid_t alc662_dac_to_mix(struct hda_codec *codec, hda_nid_t pin, hda_nid_t dac) { - hda_nid_t mix[4]; + hda_nid_t mix[5]; int i, num; num = snd_hda_get_connections(codec, pin, mix, ARRAY_SIZE(mix)); @@ -17623,6 +17686,8 @@ .patch = patch_alc882 }, { .id = 0x10ec0662, .rev = 0x100101, .name = "ALC662 rev1", .patch = patch_alc662 }, + { .id = 0x10ec0662, .rev = 0x100300, .name = "ALC662 rev3", + .patch = patch_alc662 }, { .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 }, { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 }, { .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 }, --- linux-ec2-2.6.32.orig/sound/pci/hda/patch_cirrus.c +++ linux-ec2-2.6.32/sound/pci/hda/patch_cirrus.c @@ -65,7 +65,9 @@ /* available models */ enum { + CS420X_MBP53, CS420X_MBP55, + CS420X_IMAC27, CS420X_AUTO, CS420X_MODELS }; @@ -507,7 +509,7 @@ int index, unsigned int pval, int dir, struct snd_kcontrol **kctlp) { - char tmp[32]; + char tmp[44]; struct snd_kcontrol_new knew = HDA_CODEC_VOLUME_IDX(tmp, index, 0, 0, HDA_OUTPUT); knew.private_value = pval; @@ -832,7 +834,9 @@ AC_VERB_SET_PIN_WIDGET_CONTROL, hp_present ? 0 : PIN_OUT); } - if (spec->board_config == CS420X_MBP55) { + if (spec->board_config == CS420X_MBP53 || + spec->board_config == CS420X_MBP55 || + spec->board_config == CS420X_IMAC27) { unsigned int gpio = hp_present ? 0x02 : 0x08; snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, gpio); @@ -1077,13 +1081,19 @@ } static const char *cs420x_models[CS420X_MODELS] = { + [CS420X_MBP53] = "mbp53", [CS420X_MBP55] = "mbp55", + [CS420X_IMAC27] = "imac27", [CS420X_AUTO] = "auto", }; static struct snd_pci_quirk cs420x_cfg_tbl[] = { + SND_PCI_QUIRK(0x10de, 0x0ac0, "MacBookPro 5,3", CS420X_MBP53), + SND_PCI_QUIRK(0x10de, 0x0d94, "MacBookAir 3,1(2)", CS420X_MBP55), SND_PCI_QUIRK(0x10de, 0xcb79, "MacBookPro 5,5", CS420X_MBP55), + SND_PCI_QUIRK(0x10de, 0xcb89, "MacBookPro 7,1", CS420X_MBP55), + SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27), {} /* terminator */ }; @@ -1092,6 +1102,20 @@ u32 val; }; +static struct cs_pincfg mbp53_pincfgs[] = { + { 0x09, 0x012b4050 }, + { 0x0a, 0x90100141 }, + { 0x0b, 0x90100140 }, + { 0x0c, 0x018b3020 }, + { 0x0d, 0x90a00110 }, + { 0x0e, 0x400000f0 }, + { 0x0f, 0x01cbe030 }, + { 0x10, 0x014be060 }, + { 0x12, 0x400000f0 }, + { 0x15, 0x400000f0 }, + {} /* terminator */ +}; + static struct cs_pincfg mbp55_pincfgs[] = { { 0x09, 0x012b4030 }, { 0x0a, 0x90100121 }, @@ -1106,8 +1130,24 @@ {} /* terminator */ }; +static struct cs_pincfg imac27_pincfgs[] = { + { 0x09, 0x012b4050 }, + { 0x0a, 0x90100140 }, + { 0x0b, 0x90100142 }, + { 0x0c, 0x018b3020 }, + { 0x0d, 0x90a00110 }, + { 0x0e, 0x400000f0 }, + { 0x0f, 0x01cbe030 }, + { 0x10, 0x014be060 }, + { 0x12, 0x01ab9070 }, + { 0x15, 0x400000f0 }, + {} /* terminator */ +}; + static struct cs_pincfg *cs_pincfgs[CS420X_MODELS] = { + [CS420X_MBP53] = mbp53_pincfgs, [CS420X_MBP55] = mbp55_pincfgs, + [CS420X_IMAC27] = imac27_pincfgs, }; static void fix_pincfg(struct hda_codec *codec, int model) @@ -1137,6 +1177,8 @@ fix_pincfg(codec, spec->board_config); switch (spec->board_config) { + case CS420X_IMAC27: + case CS420X_MBP53: case CS420X_MBP55: /* GPIO1 = headphones */ /* GPIO3 = speakers */ --- linux-ec2-2.6.32.orig/sound/pci/hda/patch_analog.c +++ linux-ec2-2.6.32/sound/pci/hda/patch_analog.c @@ -1003,7 +1003,7 @@ SND_PCI_QUIRK(0x1043, 0x81cb, "ASUS M2N", AD1986A_3STACK), SND_PCI_QUIRK(0x1043, 0x8234, "ASUS M2N", AD1986A_3STACK), SND_PCI_QUIRK(0x10de, 0xcb84, "ASUS A8N-VM", AD1986A_3STACK), - SND_PCI_QUIRK(0x1179, 0xff40, "Toshiba", AD1986A_LAPTOP_EAPD), + SND_PCI_QUIRK(0x1179, 0xff40, "Toshiba Satellite L40-10Q", AD1986A_3STACK), SND_PCI_QUIRK(0x144d, 0xb03c, "Samsung R55", AD1986A_3STACK), SND_PCI_QUIRK(0x144d, 0xc01e, "FSC V2060", AD1986A_LAPTOP), SND_PCI_QUIRK(0x144d, 0xc024, "Samsung P50", AD1986A_SAMSUNG_P50), @@ -1789,6 +1789,14 @@ case AD1981_THINKPAD: spec->mixers[0] = ad1981_thinkpad_mixers; spec->input_mux = &ad1981_thinkpad_capture_source; + /* set the upper-limit for mixer amp to 0dB for avoiding the + * possible damage by overloading + */ + snd_hda_override_amp_caps(codec, 0x11, HDA_INPUT, + (0x17 << AC_AMPCAP_OFFSET_SHIFT) | + (0x17 << AC_AMPCAP_NUM_STEPS_SHIFT) | + (0x05 << AC_AMPCAP_STEP_SIZE_SHIFT) | + (1 << AC_AMPCAP_MUTE_SHIFT)); break; case AD1981_TOSHIBA: spec->mixers[0] = ad1981_hp_mixers; @@ -3502,6 +3510,7 @@ /* Lenovo Thinkpad T61/X61 */ SND_PCI_QUIRK_VENDOR(0x17aa, "Lenovo Thinkpad", AD1984_THINKPAD), SND_PCI_QUIRK(0x1028, 0x0214, "Dell T3400", AD1984_DELL_DESKTOP), + SND_PCI_QUIRK(0x1028, 0x0233, "Dell Latitude E6400", AD1984_DELL_DESKTOP), {} }; --- linux-ec2-2.6.32.orig/sound/pci/hda/patch_sigmatel.c +++ linux-ec2-2.6.32/sound/pci/hda/patch_sigmatel.c @@ -727,7 +727,7 @@ struct sigmatel_spec *spec = codec->spec; unsigned int adc_idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id); const struct hda_input_mux *imux = spec->input_mux; - unsigned int idx, prev_idx; + unsigned int idx, prev_idx, didx; idx = ucontrol->value.enumerated.item[0]; if (idx >= imux->num_items) @@ -739,7 +739,8 @@ snd_hda_codec_write_cache(codec, spec->mux_nids[adc_idx], 0, AC_VERB_SET_CONNECT_SEL, imux->items[idx].index); - if (prev_idx >= spec->num_analog_muxes) { + if (prev_idx >= spec->num_analog_muxes && + spec->mux_nids[adc_idx] != spec->dmux_nids[adc_idx]) { imux = spec->dinput_mux; /* 0 = analog */ snd_hda_codec_write_cache(codec, @@ -749,9 +750,13 @@ } } else { imux = spec->dinput_mux; + /* first dimux item is hardcoded to select analog imux, + * so lets skip it + */ + didx = idx - spec->num_analog_muxes + 1; snd_hda_codec_write_cache(codec, spec->dmux_nids[adc_idx], 0, AC_VERB_SET_CONNECT_SEL, - imux->items[idx - 1].index); + imux->items[didx].index); } spec->cur_mux[adc_idx] = idx; return 1; @@ -1592,12 +1597,18 @@ "Dell Studio 1555", STAC_DELL_M6_DMIC), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02bd, "Dell Studio 1557", STAC_DELL_M6_DMIC), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02fe, + "Dell Studio XPS 1645", STAC_DELL_M6_DMIC), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0413, + "Dell Studio 1558", STAC_DELL_M6_DMIC), {} /* terminator */ }; static struct snd_pci_quirk stac92hd73xx_codec_id_cfg_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02a1, "Alienware M17x", STAC_ALIENWARE_M17X), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x043a, + "Alienware M17x", STAC_ALIENWARE_M17X), {} /* terminator */ }; @@ -1712,6 +1723,8 @@ "HP HDX", STAC_HP_HDX), /* HDX16 */ SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x3620, "HP dv6", STAC_HP_DV5), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3061, + "HP dv6", STAC_HP_DV5), /* HP dv6-1110ax */ SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x7010, "HP", STAC_HP_DV5), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0233, @@ -2053,12 +2066,12 @@ SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_INTEL, 0xff00, 0x2000, "Intel D965", STAC_D965_3ST), /* Dell 3 stack systems */ - SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f7, "Dell XPS M1730", STAC_DELL_3ST), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01dd, "Dell Dimension E520", STAC_DELL_3ST), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01ed, "Dell ", STAC_DELL_3ST), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f4, "Dell ", STAC_DELL_3ST), /* Dell 3 stack systems with verb table in BIOS */ SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f3, "Dell Inspiron 1420", STAC_DELL_BIOS), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f7, "Dell XPS M1730", STAC_DELL_BIOS), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0227, "Dell Vostro 1400 ", STAC_DELL_BIOS), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x022e, "Dell ", STAC_DELL_BIOS), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x022f, "Dell Inspiron 1525", STAC_DELL_BIOS), --- linux-ec2-2.6.32.orig/sound/pci/hda/hda_intel.c +++ linux-ec2-2.6.32/sound/pci/hda/hda_intel.c @@ -116,6 +116,7 @@ "{Intel, ICH9}," "{Intel, ICH10}," "{Intel, PCH}," + "{Intel, CPT}," "{Intel, SCH}," "{ATI, SB450}," "{ATI, SB600}," @@ -437,6 +438,7 @@ /* driver types */ enum { AZX_DRIVER_ICH, + AZX_DRIVER_PCH, AZX_DRIVER_SCH, AZX_DRIVER_ATI, AZX_DRIVER_ATIHDMI, @@ -451,6 +453,7 @@ static char *driver_short_names[] __devinitdata = { [AZX_DRIVER_ICH] = "HDA Intel", + [AZX_DRIVER_PCH] = "HDA Intel PCH", [AZX_DRIVER_SCH] = "HDA Intel MID", [AZX_DRIVER_ATI] = "HDA ATI SB", [AZX_DRIVER_ATIHDMI] = "HDA ATI HDMI", @@ -1039,6 +1042,7 @@ 0x01, NVIDIA_HDA_ENABLE_COHBIT); break; case AZX_DRIVER_SCH: + case AZX_DRIVER_PCH: pci_read_config_word(chip->pci, INTEL_SCH_HDA_DEVC, &snoop); if (snoop & INTEL_SCH_HDA_DEVC_NOSNOOP) { pci_write_config_word(chip->pci, INTEL_SCH_HDA_DEVC, @@ -1858,6 +1862,9 @@ if (!bdl_pos_adj[chip->dev_index]) return 1; /* no delayed ack */ + if (WARN_ONCE(!azx_dev->period_bytes, + "hda-intel: zero azx_dev->period_bytes")) + return 0; /* this shouldn't happen! */ if (pos % azx_dev->period_bytes > azx_dev->period_bytes / 2) return 0; /* NG - it's below the period boundary */ return 1; /* OK, it's fine */ @@ -1944,6 +1951,7 @@ struct azx_pcm *apcm; int pcm_dev = cpcm->device; int s, err; + size_t prealloc_min = 64*1024; /* 64KB */ if (pcm_dev >= AZX_MAX_PCMS) { snd_printk(KERN_ERR SFX "Invalid PCM device number %d\n", @@ -1977,10 +1985,21 @@ if (cpcm->stream[s].substreams) snd_pcm_set_ops(pcm, s, &azx_pcm_ops); } + /* buffer pre-allocation */ + + /* subtle, don't allocate a big buffer for modems... + * also, don't just test 32BIT_MASK, since azx supports + * 64-bit DMA in some cases. + */ + /* lennart wants a 2.2MB buffer for 2sec of 48khz */ + if (pcm->dev_class == SNDRV_PCM_CLASS_GENERIC && + chip->pci->dma_mask >= DMA_32BIT_MASK) + prealloc_min = 4 * 1024 * 1024; /* 4MB */ + snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG, snd_dma_pci_data(chip->pci), - 1024 * 64, 32 * 1024 * 1024); + prealloc_min, 32 * 1024 * 1024); return 0; } @@ -2219,9 +2238,27 @@ * white/black-listing for position_fix */ static struct snd_pci_quirk position_fix_list[] __devinitdata = { + SND_PCI_QUIRK(0x1025, 0x009f, "Acer Aspire 5110", POS_FIX_LPIB), SND_PCI_QUIRK(0x1028, 0x01cc, "Dell D820", POS_FIX_LPIB), SND_PCI_QUIRK(0x1028, 0x01de, "Dell Precision 390", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1028, 0x01f6, "Dell Latitude 131L", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1028, 0x0470, "Dell Inspiron 1120", POS_FIX_LPIB), + SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1028, 0x01f6, "Dell Latitude 131L", POS_FIX_LPIB), SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1043, 0x81e7, "ASUS M2V", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1043, 0x8410, "ASUS", POS_FIX_LPIB), + SND_PCI_QUIRK(0x104d, 0x9069, "Sony VPCS11V9E", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1106, 0x3288, "ASUS M2V-MX SE", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1179, 0xff10, "Toshiba A100-259", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1297, 0x3166, "Shuttle", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1458, 0xa022, "ga-ma770-ud3", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1462, 0x1002, "MSI Wind U115", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1565, 0x820f, "Biostar Microtech", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1565, 0x8218, "Biostar Microtech", POS_FIX_LPIB), + SND_PCI_QUIRK(0x8086, 0x2503, "DG965OT AAD63733-203", POS_FIX_LPIB), + SND_PCI_QUIRK(0x8086, 0xd601, "eMachines T5212", POS_FIX_LPIB), {} }; @@ -2309,6 +2346,7 @@ static struct snd_pci_quirk msi_white_list[] __devinitdata = { SND_PCI_QUIRK(0x103c, 0x30f7, "HP Pavilion dv4t-1300", 1), SND_PCI_QUIRK(0x103c, 0x3607, "HP Compa CQ40", 1), + SND_PCI_QUIRK(0x107b, 0x0380, "Gateway M-6866", 1), {} }; @@ -2325,6 +2363,13 @@ "hda_intel: msi for device %04x:%04x set to %d\n", q->subvendor, q->subdevice, q->value); chip->msi = q->value; + return; + } + + /* NVidia chipsets seem to cause troubles with MSI */ + if (chip->driver_type == AZX_DRIVER_NVIDIA) { + printk(KERN_INFO "hda_intel: Disable MSI for Nvidia chipset\n"); + chip->msi = 0; } } @@ -2374,6 +2419,7 @@ if (bdl_pos_adj[dev] < 0) { switch (chip->driver_type) { case AZX_DRIVER_ICH: + case AZX_DRIVER_PCH: bdl_pos_adj[dev] = 1; break; default: @@ -2436,6 +2482,11 @@ } } + /* disable 64bit DMA address for Teradici */ + /* it does not work with device 6549:1200 subsys e4a2:040b */ + if (chip->driver_type == AZX_DRIVER_TERA) + gcap &= ~ICH6_GCAP_64OK; + /* allow 64bit DMA address if supported by H/W */ if ((gcap & ICH6_GCAP_64OK) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64))) pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64)); @@ -2643,6 +2694,9 @@ { PCI_DEVICE(0x8086, 0x3a6e), .driver_data = AZX_DRIVER_ICH }, /* PCH */ { PCI_DEVICE(0x8086, 0x3b56), .driver_data = AZX_DRIVER_ICH }, + { PCI_DEVICE(0x8086, 0x3b57), .driver_data = AZX_DRIVER_ICH }, + /* CPT */ + { PCI_DEVICE(0x8086, 0x1c20), .driver_data = AZX_DRIVER_PCH }, /* SCH */ { PCI_DEVICE(0x8086, 0x811b), .driver_data = AZX_DRIVER_SCH }, /* ATI SB 450/600 */ @@ -2689,6 +2743,9 @@ { PCI_DEVICE(0x10de, 0x0ac1), .driver_data = AZX_DRIVER_NVIDIA }, { PCI_DEVICE(0x10de, 0x0ac2), .driver_data = AZX_DRIVER_NVIDIA }, { PCI_DEVICE(0x10de, 0x0ac3), .driver_data = AZX_DRIVER_NVIDIA }, + { PCI_DEVICE(0x10de, 0x0be2), .driver_data = AZX_DRIVER_NVIDIA }, + { PCI_DEVICE(0x10de, 0x0be3), .driver_data = AZX_DRIVER_NVIDIA }, + { PCI_DEVICE(0x10de, 0x0be4), .driver_data = AZX_DRIVER_NVIDIA }, { PCI_DEVICE(0x10de, 0x0d94), .driver_data = AZX_DRIVER_NVIDIA }, { PCI_DEVICE(0x10de, 0x0d95), .driver_data = AZX_DRIVER_NVIDIA }, { PCI_DEVICE(0x10de, 0x0d96), .driver_data = AZX_DRIVER_NVIDIA }, --- linux-ec2-2.6.32.orig/sound/pci/ac97/ac97_patch.c +++ linux-ec2-2.6.32/sound/pci/ac97/ac97_patch.c @@ -1867,11 +1867,14 @@ 0x10140523, /* Thinkpad R40 */ 0x10140534, /* Thinkpad X31 */ 0x10140537, /* Thinkpad T41p */ + 0x1014053e, /* Thinkpad R40e */ 0x10140554, /* Thinkpad T42p/R50p */ 0x10140567, /* Thinkpad T43p 2668-G7U */ 0x10140581, /* Thinkpad X41-2527 */ + 0x10280160, /* Dell Dimension 2400 */ 0x104380b0, /* Asus A7V8X-MX */ 0x11790241, /* Toshiba Satellite A-15 S127 */ + 0x1179ff10, /* Toshiba P500 */ 0x144dc01a, /* Samsung NP-X20C004/SEG */ 0 /* end */ }; @@ -1912,6 +1915,7 @@ 0x103c0944, /* HP nc6220 */ 0x103c0934, /* HP nc8220 */ 0x103c006d, /* HP nx9105 */ + 0x103c300d, /* HP Compaq dc5100 SFF(PT003AW) */ 0x17340088, /* FSC Scenic-W */ 0 /* end */ }; --- linux-ec2-2.6.32.orig/sound/pci/riptide/riptide.c +++ linux-ec2-2.6.32/sound/pci/riptide/riptide.c @@ -1224,15 +1224,14 @@ firmware.firmware.ASIC, firmware.firmware.CODEC, firmware.firmware.AUXDSP, firmware.firmware.PROG); + if (!chip) + return 1; + for (i = 0; i < FIRMWARE_VERSIONS; i++) { if (!memcmp(&firmware_versions[i], &firmware, sizeof(firmware))) - break; - } - if (i >= FIRMWARE_VERSIONS) - return 0; /* no match */ + return 1; /* OK */ - if (!chip) - return 1; /* OK */ + } snd_printdd("Writing Firmware\n"); if (!chip->fw_entry) { --- linux-ec2-2.6.32.orig/sound/pci/emu10k1/emu10k1.c +++ linux-ec2-2.6.32/sound/pci/emu10k1/emu10k1.c @@ -52,6 +52,7 @@ static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128}; static int enable_ir[SNDRV_CARDS]; static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */ +static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2}; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard."); @@ -73,6 +74,8 @@ MODULE_PARM_DESC(enable_ir, "Enable IR."); module_param_array(subsystem, uint, NULL, 0444); MODULE_PARM_DESC(subsystem, "Force card subsystem model."); +module_param_array(delay_pcm_irq, uint, NULL, 0444); +MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0)."); /* * Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value Model:SB0400 */ @@ -127,6 +130,7 @@ &emu)) < 0) goto error; card->private_data = emu; + emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f; if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0) goto error; if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0) --- linux-ec2-2.6.32.orig/sound/pci/emu10k1/memory.c +++ linux-ec2-2.6.32/sound/pci/emu10k1/memory.c @@ -309,8 +309,10 @@ if (snd_BUG_ON(!hdr)) return NULL; + idx = runtime->period_size >= runtime->buffer_size ? + (emu->delay_pcm_irq * 2) : 0; mutex_lock(&hdr->block_mutex); - blk = search_empty(emu, runtime->dma_bytes); + blk = search_empty(emu, runtime->dma_bytes + idx); if (blk == NULL) { mutex_unlock(&hdr->block_mutex); return NULL; --- linux-ec2-2.6.32.orig/sound/pci/emu10k1/emupcm.c +++ linux-ec2-2.6.32/sound/pci/emu10k1/emupcm.c @@ -332,7 +332,7 @@ evoice->epcm->ccca_start_addr = start_addr + ccis; if (extra) { start_addr += ccis; - end_addr += ccis; + end_addr += ccis + emu->delay_pcm_irq; } if (stereo && !extra) { snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK); @@ -360,7 +360,9 @@ /* Assumption that PT is already 0 so no harm overwriting */ snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]); snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24)); - snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24)); + snd_emu10k1_ptr_write(emu, PSST, voice, + (start_addr + (extra ? emu->delay_pcm_irq : 0)) | + (send_amount[2] << 24)); if (emu->card_capabilities->emu_model) pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */ else @@ -732,6 +734,23 @@ snd_emu10k1_ptr_write(emu, IP, voice, 0); } +static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu, + struct snd_emu10k1_pcm *epcm, + struct snd_pcm_substream *substream, + struct snd_pcm_runtime *runtime) +{ + unsigned int ptr, period_pos; + + /* try to sychronize the current position for the interrupt + source voice */ + period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt; + period_pos %= runtime->period_size; + ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number); + ptr &= ~0x00ffffff; + ptr |= epcm->ccca_start_addr + period_pos; + snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr); +} + static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, int cmd) { @@ -753,6 +772,8 @@ /* follow thru */ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: + if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE) + snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime); mix = &emu->pcm_mixer[substream->number]; snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix); snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix); @@ -869,8 +890,9 @@ #endif /* printk(KERN_DEBUG - "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n", - ptr, runtime->buffer_size, runtime->period_size); + "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n", + (long)ptr, (long)runtime->buffer_size, + (long)runtime->period_size); */ return ptr; } --- linux-ec2-2.6.32.orig/sound/pci/oxygen/oxygen.c +++ linux-ec2-2.6.32/sound/pci/oxygen/oxygen.c @@ -393,6 +393,10 @@ chip->model.suspend = claro_suspend; chip->model.resume = claro_resume; chip->model.set_adc_params = set_ak5385_params; + chip->model.device_config = PLAYBACK_0_TO_I2S | + PLAYBACK_1_TO_SPDIF | + CAPTURE_0_FROM_I2S_2 | + CAPTURE_1_FROM_SPDIF; break; } if (id->driver_data == MODEL_MERIDIAN || --- linux-ec2-2.6.32.orig/sound/pci/echoaudio/echoaudio.c +++ linux-ec2-2.6.32/sound/pci/echoaudio/echoaudio.c @@ -1821,7 +1821,9 @@ /* The hardware doesn't tell us which substream caused the irq, thus we have to check all running substreams. */ for (ss = 0; ss < DSP_MAXPIPES; ss++) { - if ((substream = chip->substream[ss])) { + substream = chip->substream[ss]; + if (substream && ((struct audiopipe *)substream->runtime-> + private_data)->state == PIPE_STATE_STARTED) { period = pcm_pointer(substream) / substream->runtime->period_size; if (period != chip->last_period[ss]) { --- linux-ec2-2.6.32.orig/sound/pci/mixart/mixart.c +++ linux-ec2-2.6.32/sound/pci/mixart/mixart.c @@ -1161,13 +1161,15 @@ unsigned long count, unsigned long pos) { struct mixart_mgr *mgr = entry->private_data; + unsigned long maxsize; - count = count & ~3; /* make sure the read size is a multiple of 4 bytes */ - if(count <= 0) + if (pos >= MIXART_BA0_SIZE) return 0; - if(pos + count > MIXART_BA0_SIZE) - count = (long)(MIXART_BA0_SIZE - pos); - if(copy_to_user_fromio(buf, MIXART_MEM( mgr, pos ), count)) + maxsize = MIXART_BA0_SIZE - pos; + if (count > maxsize) + count = maxsize; + count = count & ~3; /* make sure the read size is a multiple of 4 bytes */ + if (copy_to_user_fromio(buf, MIXART_MEM(mgr, pos), count)) return -EFAULT; return count; } @@ -1180,13 +1182,15 @@ unsigned long count, unsigned long pos) { struct mixart_mgr *mgr = entry->private_data; + unsigned long maxsize; - count = count & ~3; /* make sure the read size is a multiple of 4 bytes */ - if(count <= 0) + if (pos > MIXART_BA1_SIZE) return 0; - if(pos + count > MIXART_BA1_SIZE) - count = (long)(MIXART_BA1_SIZE - pos); - if(copy_to_user_fromio(buf, MIXART_REG( mgr, pos ), count)) + maxsize = MIXART_BA1_SIZE - pos; + if (count > maxsize) + count = maxsize; + count = count & ~3; /* make sure the read size is a multiple of 4 bytes */ + if (copy_to_user_fromio(buf, MIXART_REG(mgr, pos), count)) return -EFAULT; return count; } --- linux-ec2-2.6.32.orig/sound/soc/soc-jack.c +++ linux-ec2-2.6.32/sound/soc/soc-jack.c @@ -94,7 +94,7 @@ snd_soc_dapm_sync(codec); - snd_jack_report(jack->jack, status); + snd_jack_report(jack->jack, jack->status); out: mutex_unlock(&codec->mutex); --- linux-ec2-2.6.32.orig/sound/soc/codecs/ak4642.c +++ linux-ec2-2.6.32/sound/soc/codecs/ak4642.c @@ -93,17 +93,17 @@ /* * ak4642 register cache */ -static const u16 ak4642_reg[AK4642_CACHEREGNUM] = { - 0x0000, 0x0000, 0x0001, 0x0000, - 0x0002, 0x0000, 0x0000, 0x0000, - 0x00e1, 0x00e1, 0x0018, 0x0000, - 0x00e1, 0x0018, 0x0011, 0x0008, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, +static const u8 ak4642_reg[AK4642_CACHEREGNUM] = { + 0x00, 0x00, 0x01, 0x00, + 0x02, 0x00, 0x00, 0x00, + 0xe1, 0xe1, 0x18, 0x00, + 0xe1, 0x18, 0x11, 0x08, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, }; /* --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm_hubs.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm_hubs.c @@ -593,12 +593,12 @@ { "SPKL", "Input Switch", "MIXINL" }, { "SPKL", "IN1LP Switch", "IN1LP" }, - { "SPKL", "Output Switch", "Left Output Mixer" }, + { "SPKL", "Output Switch", "Left Output PGA" }, { "SPKL", NULL, "TOCLK" }, { "SPKR", "Input Switch", "MIXINR" }, { "SPKR", "IN1RP Switch", "IN1RP" }, - { "SPKR", "Output Switch", "Right Output Mixer" }, + { "SPKR", "Output Switch", "Right Output PGA" }, { "SPKR", NULL, "TOCLK" }, { "SPKL Boost", "Direct Voice Switch", "Direct Voice" }, @@ -620,8 +620,8 @@ { "SPKOUTRP", NULL, "SPKR Driver" }, { "SPKOUTRN", NULL, "SPKR Driver" }, - { "Left Headphone Mux", "Mixer", "Left Output Mixer" }, - { "Right Headphone Mux", "Mixer", "Right Output Mixer" }, + { "Left Headphone Mux", "Mixer", "Left Output PGA" }, + { "Right Headphone Mux", "Mixer", "Right Output PGA" }, { "Headphone PGA", NULL, "Left Headphone Mux" }, { "Headphone PGA", NULL, "Right Headphone Mux" }, @@ -639,17 +639,17 @@ static const struct snd_soc_dapm_route lineout1_diff_routes[] = { { "LINEOUT1 Mixer", "IN1L Switch", "IN1L PGA" }, { "LINEOUT1 Mixer", "IN1R Switch", "IN1R PGA" }, - { "LINEOUT1 Mixer", "Output Switch", "Left Output Mixer" }, + { "LINEOUT1 Mixer", "Output Switch", "Left Output PGA" }, { "LINEOUT1N Driver", NULL, "LINEOUT1 Mixer" }, { "LINEOUT1P Driver", NULL, "LINEOUT1 Mixer" }, }; static const struct snd_soc_dapm_route lineout1_se_routes[] = { - { "LINEOUT1N Mixer", "Left Output Switch", "Left Output Mixer" }, - { "LINEOUT1N Mixer", "Right Output Switch", "Left Output Mixer" }, + { "LINEOUT1N Mixer", "Left Output Switch", "Left Output PGA" }, + { "LINEOUT1N Mixer", "Right Output Switch", "Right Output PGA" }, - { "LINEOUT1P Mixer", "Left Output Switch", "Left Output Mixer" }, + { "LINEOUT1P Mixer", "Left Output Switch", "Left Output PGA" }, { "LINEOUT1N Driver", NULL, "LINEOUT1N Mixer" }, { "LINEOUT1P Driver", NULL, "LINEOUT1P Mixer" }, @@ -658,17 +658,17 @@ static const struct snd_soc_dapm_route lineout2_diff_routes[] = { { "LINEOUT2 Mixer", "IN2L Switch", "IN2L PGA" }, { "LINEOUT2 Mixer", "IN2R Switch", "IN2R PGA" }, - { "LINEOUT2 Mixer", "Output Switch", "Right Output Mixer" }, + { "LINEOUT2 Mixer", "Output Switch", "Right Output PGA" }, { "LINEOUT2N Driver", NULL, "LINEOUT2 Mixer" }, { "LINEOUT2P Driver", NULL, "LINEOUT2 Mixer" }, }; static const struct snd_soc_dapm_route lineout2_se_routes[] = { - { "LINEOUT2N Mixer", "Left Output Switch", "Left Output Mixer" }, - { "LINEOUT2N Mixer", "Right Output Switch", "Left Output Mixer" }, + { "LINEOUT2N Mixer", "Left Output Switch", "Left Output PGA" }, + { "LINEOUT2N Mixer", "Right Output Switch", "Right Output PGA" }, - { "LINEOUT2P Mixer", "Right Output Switch", "Right Output Mixer" }, + { "LINEOUT2P Mixer", "Right Output Switch", "Right Output PGA" }, { "LINEOUT2N Driver", NULL, "LINEOUT2N Mixer" }, { "LINEOUT2P Driver", NULL, "LINEOUT2P Mixer" }, @@ -686,17 +686,21 @@ snd_soc_update_bits(codec, WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, WM8993_IN2_VU, WM8993_IN2_VU); + snd_soc_update_bits(codec, WM8993_SPEAKER_VOLUME_LEFT, + WM8993_SPKOUT_VU, WM8993_SPKOUT_VU); snd_soc_update_bits(codec, WM8993_SPEAKER_VOLUME_RIGHT, WM8993_SPKOUT_VU, WM8993_SPKOUT_VU); snd_soc_update_bits(codec, WM8993_LEFT_OUTPUT_VOLUME, - WM8993_HPOUT1L_ZC, WM8993_HPOUT1L_ZC); + WM8993_HPOUT1_VU | WM8993_HPOUT1L_ZC, + WM8993_HPOUT1_VU | WM8993_HPOUT1L_ZC); snd_soc_update_bits(codec, WM8993_RIGHT_OUTPUT_VOLUME, WM8993_HPOUT1_VU | WM8993_HPOUT1R_ZC, WM8993_HPOUT1_VU | WM8993_HPOUT1R_ZC); snd_soc_update_bits(codec, WM8993_LEFT_OPGA_VOLUME, - WM8993_MIXOUTL_ZC, WM8993_MIXOUTL_ZC); + WM8993_MIXOUTL_ZC | WM8993_MIXOUT_VU, + WM8993_MIXOUTL_ZC | WM8993_MIXOUT_VU); snd_soc_update_bits(codec, WM8993_RIGHT_OPGA_VOLUME, WM8993_MIXOUTR_ZC | WM8993_MIXOUT_VU, WM8993_MIXOUTR_ZC | WM8993_MIXOUT_VU); --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8510.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8510.c @@ -425,23 +425,23 @@ /* filter coefficient */ switch (params_rate(params)) { - case SNDRV_PCM_RATE_8000: + case 8000: adn |= 0x5 << 1; break; - case SNDRV_PCM_RATE_11025: + case 11025: adn |= 0x4 << 1; break; - case SNDRV_PCM_RATE_16000: + case 16000: adn |= 0x3 << 1; break; - case SNDRV_PCM_RATE_22050: + case 22050: adn |= 0x2 << 1; break; - case SNDRV_PCM_RATE_32000: + case 32000: adn |= 0x1 << 1; break; - case SNDRV_PCM_RATE_44100: - case SNDRV_PCM_RATE_48000: + case 44100: + case 48000: break; } --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8974.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8974.c @@ -47,7 +47,7 @@ }; #define WM8974_POWER1_BIASEN 0x08 -#define WM8974_POWER1_BUFIOEN 0x10 +#define WM8974_POWER1_BUFIOEN 0x04 struct wm8974_priv { struct snd_soc_codec codec; @@ -480,23 +480,23 @@ /* filter coefficient */ switch (params_rate(params)) { - case SNDRV_PCM_RATE_8000: + case 8000: adn |= 0x5 << 1; break; - case SNDRV_PCM_RATE_11025: + case 11025: adn |= 0x4 << 1; break; - case SNDRV_PCM_RATE_16000: + case 16000: adn |= 0x3 << 1; break; - case SNDRV_PCM_RATE_22050: + case 22050: adn |= 0x2 << 1; break; - case SNDRV_PCM_RATE_32000: + case 32000: adn |= 0x1 << 1; break; - case SNDRV_PCM_RATE_44100: - case SNDRV_PCM_RATE_48000: + case 44100: + case 48000: break; } --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8940.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8940.c @@ -379,23 +379,23 @@ iface |= (1 << 9); switch (params_rate(params)) { - case SNDRV_PCM_RATE_8000: + case 8000: addcntrl |= (0x5 << 1); break; - case SNDRV_PCM_RATE_11025: + case 11025: addcntrl |= (0x4 << 1); break; - case SNDRV_PCM_RATE_16000: + case 16000: addcntrl |= (0x3 << 1); break; - case SNDRV_PCM_RATE_22050: + case 22050: addcntrl |= (0x2 << 1); break; - case SNDRV_PCM_RATE_32000: + case 32000: addcntrl |= (0x1 << 1); break; - case SNDRV_PCM_RATE_44100: - case SNDRV_PCM_RATE_48000: + case 44100: + case 48000: break; } ret = snd_soc_write(codec, WM8940_ADDCNTRL, addcntrl); @@ -472,6 +472,7 @@ ret = snd_soc_write(codec, WM8940_POWER1, pwr_reg); break; } + codec->bias_level = level; return ret; } --- linux-ec2-2.6.32.orig/sound/soc/codecs/ak4535.c +++ linux-ec2-2.6.32/sound/soc/codecs/ak4535.c @@ -40,11 +40,11 @@ /* * ak4535 register cache */ -static const u16 ak4535_reg[AK4535_CACHEREGNUM] = { - 0x0000, 0x0080, 0x0000, 0x0003, - 0x0002, 0x0000, 0x0011, 0x0001, - 0x0000, 0x0040, 0x0036, 0x0010, - 0x0000, 0x0000, 0x0057, 0x0000, +static const u8 ak4535_reg[AK4535_CACHEREGNUM] = { + 0x00, 0x80, 0x00, 0x03, + 0x02, 0x00, 0x11, 0x01, + 0x00, 0x40, 0x36, 0x10, + 0x00, 0x00, 0x57, 0x00, }; /* --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8990.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8990.c @@ -110,21 +110,21 @@ #define wm8990_reset(c) snd_soc_write(c, WM8990_RESET, 0) -static const DECLARE_TLV_DB_LINEAR(rec_mix_tlv, -1500, 600); +static const DECLARE_TLV_DB_SCALE(rec_mix_tlv, -1500, 600, 0); -static const DECLARE_TLV_DB_LINEAR(in_pga_tlv, -1650, 3000); +static const DECLARE_TLV_DB_SCALE(in_pga_tlv, -1650, 3000, 0); -static const DECLARE_TLV_DB_LINEAR(out_mix_tlv, 0, -2100); +static const DECLARE_TLV_DB_SCALE(out_mix_tlv, 0, -2100, 0); -static const DECLARE_TLV_DB_LINEAR(out_pga_tlv, -7300, 600); +static const DECLARE_TLV_DB_SCALE(out_pga_tlv, -7300, 600, 0); -static const DECLARE_TLV_DB_LINEAR(out_omix_tlv, -600, 0); +static const DECLARE_TLV_DB_SCALE(out_omix_tlv, -600, 0, 0); -static const DECLARE_TLV_DB_LINEAR(out_dac_tlv, -7163, 0); +static const DECLARE_TLV_DB_SCALE(out_dac_tlv, -7163, 0, 0); -static const DECLARE_TLV_DB_LINEAR(in_adc_tlv, -7163, 1763); +static const DECLARE_TLV_DB_SCALE(in_adc_tlv, -7163, 1763, 0); -static const DECLARE_TLV_DB_LINEAR(out_sidetone_tlv, -3600, 0); +static const DECLARE_TLV_DB_SCALE(out_sidetone_tlv, -3600, 0, 0); static int wm899x_outpga_put_volsw_vu(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) @@ -450,7 +450,7 @@ /* INMIX dB values */ static const unsigned int in_mix_tlv[] = { TLV_DB_RANGE_HEAD(1), - 0, 7, TLV_DB_LINEAR_ITEM(-1200, 600), + 0, 7, TLV_DB_SCALE_ITEM(-1200, 600, 0), }; /* Left In PGA Connections */ @@ -1185,7 +1185,7 @@ WM8990_VMIDTOG); /* Delay to allow output caps to discharge */ - msleep(msecs_to_jiffies(300)); + msleep(300); /* Disable VMIDTOG */ snd_soc_write(codec, WM8990_ANTIPOP2, WM8990_SOFTST | @@ -1197,17 +1197,17 @@ /* Enable outputs */ snd_soc_write(codec, WM8990_POWER_MANAGEMENT_1, 0x1b00); - msleep(msecs_to_jiffies(50)); + msleep(50); /* Enable VMID at 2x50k */ snd_soc_write(codec, WM8990_POWER_MANAGEMENT_1, 0x1f02); - msleep(msecs_to_jiffies(100)); + msleep(100); /* Enable VREF */ snd_soc_write(codec, WM8990_POWER_MANAGEMENT_1, 0x1f03); - msleep(msecs_to_jiffies(600)); + msleep(600); /* Enable BUFIOEN */ snd_soc_write(codec, WM8990_ANTIPOP2, WM8990_SOFTST | @@ -1252,7 +1252,7 @@ /* Disable VMID */ snd_soc_write(codec, WM8990_POWER_MANAGEMENT_1, 0x1f01); - msleep(msecs_to_jiffies(300)); + msleep(300); /* Enable all output discharge bits */ snd_soc_write(codec, WM8990_ANTIPOP1, WM8990_DIS_LLINE | --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8400.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8400.c @@ -106,21 +106,21 @@ wm8400_reset_codec_reg_cache(wm8400->wm8400); } -static const DECLARE_TLV_DB_LINEAR(rec_mix_tlv, -1500, 600); +static const DECLARE_TLV_DB_SCALE(rec_mix_tlv, -1500, 600, 0); -static const DECLARE_TLV_DB_LINEAR(in_pga_tlv, -1650, 3000); +static const DECLARE_TLV_DB_SCALE(in_pga_tlv, -1650, 3000, 0); -static const DECLARE_TLV_DB_LINEAR(out_mix_tlv, -2100, 0); +static const DECLARE_TLV_DB_SCALE(out_mix_tlv, -2100, 0, 0); -static const DECLARE_TLV_DB_LINEAR(out_pga_tlv, -7300, 600); +static const DECLARE_TLV_DB_SCALE(out_pga_tlv, -7300, 600, 0); -static const DECLARE_TLV_DB_LINEAR(out_omix_tlv, -600, 0); +static const DECLARE_TLV_DB_SCALE(out_omix_tlv, -600, 0, 0); -static const DECLARE_TLV_DB_LINEAR(out_dac_tlv, -7163, 0); +static const DECLARE_TLV_DB_SCALE(out_dac_tlv, -7163, 0, 0); -static const DECLARE_TLV_DB_LINEAR(in_adc_tlv, -7163, 1763); +static const DECLARE_TLV_DB_SCALE(in_adc_tlv, -7163, 1763, 0); -static const DECLARE_TLV_DB_LINEAR(out_sidetone_tlv, -3600, 0); +static const DECLARE_TLV_DB_SCALE(out_sidetone_tlv, -3600, 0, 0); static int wm8400_outpga_put_volsw_vu(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) @@ -439,7 +439,7 @@ /* INMIX dB values */ static const unsigned int in_mix_tlv[] = { TLV_DB_RANGE_HEAD(1), - 0,7, TLV_DB_LINEAR_ITEM(-1200, 600), + 0,7, TLV_DB_SCALE_ITEM(-1200, 600, 0), }; /* Left In PGA Connections */ --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8350.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8350.c @@ -423,8 +423,8 @@ SOC_ENUM_SINGLE(WM8350_INPUT_MIXER_VOLUME, 15, 2, wm8350_lr), }; -static DECLARE_TLV_DB_LINEAR(pre_amp_tlv, -1200, 3525); -static DECLARE_TLV_DB_LINEAR(out_pga_tlv, -5700, 600); +static DECLARE_TLV_DB_SCALE(pre_amp_tlv, -1200, 3525, 0); +static DECLARE_TLV_DB_SCALE(out_pga_tlv, -5700, 600, 0); static DECLARE_TLV_DB_SCALE(dac_pcm_tlv, -7163, 36, 1); static DECLARE_TLV_DB_SCALE(adc_pcm_tlv, -12700, 50, 1); static DECLARE_TLV_DB_SCALE(out_mix_tlv, -1500, 300, 1); @@ -925,7 +925,7 @@ iface |= 0x3 << 8; break; case SND_SOC_DAIFMT_DSP_B: - iface |= 0x3 << 8; /* lg not sure which mode */ + iface |= 0x3 << 8 | WM8350_AIF_LRCLK_INV; break; default: return -EINVAL; --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm9712.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm9712.c @@ -464,7 +464,8 @@ { u16 *cache = codec->reg_cache; - soc_ac97_ops.write(codec->ac97, reg, val); + if (reg < 0x7c) + soc_ac97_ops.write(codec->ac97, reg, val); reg = reg >> 1; if (reg < (ARRAY_SIZE(wm9712_reg))) cache[reg] = val; --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8903.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8903.c @@ -1506,7 +1506,7 @@ struct i2c_client *i2c = codec->control_data; int i; u16 *reg_cache = codec->reg_cache; - u16 *tmp_cache = kmemdup(codec->reg_cache, sizeof(wm8903_reg_defaults), + u16 *tmp_cache = kmemdup(reg_cache, sizeof(wm8903_reg_defaults), GFP_KERNEL); /* Bring the codec back up to standby first to minimise pop/clicks */ @@ -1518,6 +1518,7 @@ for (i = 2; i < ARRAY_SIZE(wm8903_reg_defaults); i++) if (tmp_cache[i] != reg_cache[i]) snd_soc_write(codec, i, tmp_cache[i]); + kfree(tmp_cache); } else { dev_err(&i2c->dev, "Failed to allocate temporary cache\n"); } --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8776.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8776.c @@ -93,7 +93,6 @@ static const struct snd_soc_dapm_widget wm8776_dapm_widgets[] = { SND_SOC_DAPM_INPUT("AUX"), -SND_SOC_DAPM_INPUT("AUX"), SND_SOC_DAPM_INPUT("AIN1"), SND_SOC_DAPM_INPUT("AIN2"), @@ -178,13 +177,6 @@ case SND_SOC_DAIFMT_LEFT_J: iface |= 0x0001; break; - /* FIXME: CHECK A/B */ - case SND_SOC_DAIFMT_DSP_A: - iface |= 0x0003; - break; - case SND_SOC_DAIFMT_DSP_B: - iface |= 0x0007; - break; default: return -EINVAL; } --- linux-ec2-2.6.32.orig/sound/soc/codecs/wm8580.c +++ linux-ec2-2.6.32/sound/soc/codecs/wm8580.c @@ -268,9 +268,9 @@ SOC_DOUBLE("DAC3 Invert Switch", WM8580_DAC_CONTROL4, 4, 5, 1, 0), SOC_SINGLE("DAC ZC Switch", WM8580_DAC_CONTROL5, 5, 1, 0), -SOC_SINGLE("DAC1 Switch", WM8580_DAC_CONTROL5, 0, 1, 0), -SOC_SINGLE("DAC2 Switch", WM8580_DAC_CONTROL5, 1, 1, 0), -SOC_SINGLE("DAC3 Switch", WM8580_DAC_CONTROL5, 2, 1, 0), +SOC_SINGLE("DAC1 Switch", WM8580_DAC_CONTROL5, 0, 1, 1), +SOC_SINGLE("DAC2 Switch", WM8580_DAC_CONTROL5, 1, 1, 1), +SOC_SINGLE("DAC3 Switch", WM8580_DAC_CONTROL5, 2, 1, 1), SOC_DOUBLE("ADC Mute Switch", WM8580_ADC_CONTROL1, 0, 1, 1, 0), SOC_SINGLE("ADC High-Pass Filter Switch", WM8580_ADC_CONTROL1, 4, 1, 0), --- linux-ec2-2.6.32.orig/sound/soc/codecs/ak4104.c +++ linux-ec2-2.6.32/sound/soc/codecs/ak4104.c @@ -90,12 +90,10 @@ if (reg >= codec->reg_cache_size) return -EINVAL; - reg &= AK4104_REG_MASK; - reg |= AK4104_WRITE; - /* only write to the hardware if value has changed */ if (cache[reg] != value) { - u8 tmp[2] = { reg, value }; + u8 tmp[2] = { (reg & AK4104_REG_MASK) | AK4104_WRITE, value }; + if (spi_write(spi, tmp, sizeof(tmp))) { dev_err(&spi->dev, "SPI write failed\n"); return -EIO; --- linux-ec2-2.6.32.orig/sound/soc/blackfin/bf5xx-ac97.c +++ linux-ec2-2.6.32/sound/soc/blackfin/bf5xx-ac97.c @@ -260,9 +260,9 @@ pr_debug("%s : sport %d\n", __func__, dai->id); if (!dai->active) return 0; - if (dai->capture.active) + if (dai->capture_active) sport_rx_stop(sport); - if (dai->playback.active) + if (dai->playback_active) sport_tx_stop(sport); return 0; } --- linux-ec2-2.6.32.orig/sound/soc/blackfin/bf5xx-i2s-pcm.c +++ linux-ec2-2.6.32/sound/soc/blackfin/bf5xx-i2s-pcm.c @@ -139,11 +139,20 @@ pr_debug("%s enter\n", __func__); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { diff = sport_curr_offset_tx(sport); - frames = bytes_to_frames(substream->runtime, diff); } else { diff = sport_curr_offset_rx(sport); - frames = bytes_to_frames(substream->runtime, diff); } + + /* + * TX at least can report one frame beyond the end of the + * buffer if we hit the wraparound case - clamp to within the + * buffer as the ALSA APIs require. + */ + if (diff == snd_pcm_lib_buffer_bytes(substream)) + diff = 0; + + frames = bytes_to_frames(substream->runtime, diff); + return frames; } --- linux-ec2-2.6.32.orig/sound/core/rawmidi.c +++ linux-ec2-2.6.32/sound/core/rawmidi.c @@ -530,13 +530,15 @@ { struct snd_rawmidi_file *rfile; struct snd_rawmidi *rmidi; + struct module *module; rfile = file->private_data; rmidi = rfile->rmidi; rawmidi_release_priv(rfile); kfree(rfile); + module = rmidi->card->module; snd_card_file_remove(rmidi->card, file); - module_put(rmidi->card->module); + module_put(module); return 0; } --- linux-ec2-2.6.32.orig/sound/core/init.c +++ linux-ec2-2.6.32/sound/core/init.c @@ -848,6 +848,7 @@ return -ENOMEM; mfile->file = file; mfile->disconnected_f_op = NULL; + INIT_LIST_HEAD(&mfile->shutdown_list); spin_lock(&card->files_lock); if (card->shutdown) { spin_unlock(&card->files_lock); @@ -883,6 +884,9 @@ list_for_each_entry(mfile, &card->files_list, list) { if (mfile->file == file) { list_del(&mfile->list); + spin_lock(&shutdown_lock); + list_del(&mfile->shutdown_list); + spin_unlock(&shutdown_lock); if (mfile->disconnected_f_op) fops_put(mfile->disconnected_f_op); found = mfile; --- linux-ec2-2.6.32.orig/sound/core/control.c +++ linux-ec2-2.6.32/sound/core/control.c @@ -31,6 +31,7 @@ /* max number of user-defined controls */ #define MAX_USER_CONTROLS 32 +#define MAX_CONTROL_COUNT 1028 struct snd_kctl_ioctl { struct list_head list; /* list of all ioctls */ @@ -190,6 +191,10 @@ if (snd_BUG_ON(!control || !control->count)) return NULL; + + if (control->count > MAX_CONTROL_COUNT) + return NULL; + kctl = kzalloc(sizeof(*kctl) + sizeof(struct snd_kcontrol_volatile) * control->count, GFP_KERNEL); if (kctl == NULL) { snd_printk(KERN_ERR "Cannot allocate control instance\n"); --- linux-ec2-2.6.32.orig/sound/core/timer.c +++ linux-ec2-2.6.32/sound/core/timer.c @@ -530,6 +530,8 @@ if (err < 0) return err; timer = timeri->timer; + if (!timer) + return -EINVAL; spin_lock_irqsave(&timer->lock, flags); timeri->cticks = timeri->ticks; timeri->pticks = 0; --- linux-ec2-2.6.32.orig/sound/core/hrtimer.c +++ linux-ec2-2.6.32/sound/core/hrtimer.c @@ -37,14 +37,23 @@ struct snd_hrtimer { struct snd_timer *timer; struct hrtimer hrt; + atomic_t running; }; static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt) { struct snd_hrtimer *stime = container_of(hrt, struct snd_hrtimer, hrt); struct snd_timer *t = stime->timer; - hrtimer_forward_now(hrt, ns_to_ktime(t->sticks * resolution)); - snd_timer_interrupt(stime->timer, t->sticks); + unsigned long oruns; + + if (!atomic_read(&stime->running)) + return HRTIMER_NORESTART; + + oruns = hrtimer_forward_now(hrt, ns_to_ktime(t->sticks * resolution)); + snd_timer_interrupt(stime->timer, t->sticks * oruns); + + if (!atomic_read(&stime->running)) + return HRTIMER_NORESTART; return HRTIMER_RESTART; } @@ -58,6 +67,7 @@ hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stime->timer = t; stime->hrt.function = snd_hrtimer_callback; + atomic_set(&stime->running, 0); t->private_data = stime; return 0; } @@ -78,16 +88,18 @@ { struct snd_hrtimer *stime = t->private_data; + atomic_set(&stime->running, 0); + hrtimer_cancel(&stime->hrt); hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); + atomic_set(&stime->running, 1); return 0; } static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; - - hrtimer_cancel(&stime->hrt); + atomic_set(&stime->running, 0); return 0; } --- linux-ec2-2.6.32.orig/sound/core/pcm_compat.c +++ linux-ec2-2.6.32/sound/core/pcm_compat.c @@ -341,7 +341,7 @@ kfree(bufs); return -EFAULT; } - bufs[ch] = compat_ptr(ptr); + bufs[i] = compat_ptr(ptr); bufptr++; } if (dir == SNDRV_PCM_STREAM_PLAYBACK) --- linux-ec2-2.6.32.orig/sound/core/pcm_native.c +++ linux-ec2-2.6.32/sound/core/pcm_native.c @@ -314,10 +314,10 @@ if (!params->info) params->info = hw->info & ~SNDRV_PCM_INFO_FIFO_IN_FRAMES; if (!params->fifo_size) { - if (snd_mask_min(¶ms->masks[SNDRV_PCM_HW_PARAM_FORMAT]) == - snd_mask_max(¶ms->masks[SNDRV_PCM_HW_PARAM_FORMAT]) && - snd_mask_min(¶ms->masks[SNDRV_PCM_HW_PARAM_CHANNELS]) == - snd_mask_max(¶ms->masks[SNDRV_PCM_HW_PARAM_CHANNELS])) { + m = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + i = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); + if (snd_mask_min(m) == snd_mask_max(m) && + snd_interval_min(i) == snd_interval_max(i)) { changed = substream->ops->ioctl(substream, SNDRV_PCM_IOCTL1_FIFO_SIZE, params); if (changed < 0) @@ -972,6 +972,10 @@ { if (substream->runtime->trigger_master != substream) return 0; + /* some drivers might use hw_ptr to recover from the pause - + update the hw_ptr now */ + if (push) + snd_pcm_update_hw_ptr(substream); /* The jiffies check in snd_pcm_update_hw_ptr*() is done by * a delta betwen the current jiffies, this gives a large enough * delta, effectively to skip the check once. --- linux-ec2-2.6.32.orig/sound/core/seq/oss/seq_oss_init.c +++ linux-ec2-2.6.32/sound/core/seq/oss/seq_oss_init.c @@ -280,13 +280,10 @@ return 0; _error: - snd_seq_oss_writeq_delete(dp->writeq); - snd_seq_oss_readq_delete(dp->readq); snd_seq_oss_synth_cleanup(dp); snd_seq_oss_midi_cleanup(dp); - delete_port(dp); delete_seq_queue(dp->queue); - kfree(dp); + delete_port(dp); return rc; } @@ -349,8 +346,10 @@ static int delete_port(struct seq_oss_devinfo *dp) { - if (dp->port < 0) + if (dp->port < 0) { + kfree(dp); return 0; + } debug_printk(("delete_port %i\n", dp->port)); return snd_seq_event_port_detach(dp->cseq, dp->port); --- linux-ec2-2.6.32.orig/sound/oss/sb_mixer.c +++ linux-ec2-2.6.32/sound/oss/sb_mixer.c @@ -230,7 +230,7 @@ return 1; } -static void change_bits(sb_devc * devc, unsigned char *regval, int dev, int chn, int newval) +static void __change_bits(sb_devc * devc, unsigned char *regval, int dev, int chn, int newval) { unsigned char mask; int shift; @@ -282,7 +282,7 @@ return -EINVAL; val = sb_getmixer(devc, regoffs); - change_bits(devc, &val, dev, LEFT_CHN, left); + __change_bits(devc, &val, dev, LEFT_CHN, left); if ((*devc->iomap)[dev][RIGHT_CHN].regno != regoffs) /* * Change register @@ -302,7 +302,7 @@ * Read the new one */ } - change_bits(devc, &val, dev, RIGHT_CHN, right); + __change_bits(devc, &val, dev, RIGHT_CHN, right); sb_setmixer(devc, regoffs, val); --- linux-ec2-2.6.32.orig/sound/oss/sequencer.c +++ linux-ec2-2.6.32/sound/oss/sequencer.c @@ -241,7 +241,7 @@ return -ENXIO; fmt = (*(short *) &event_rec[0]) & 0xffff; - err = synth_devs[dev]->load_patch(dev, fmt, buf, p + 4, c, 0); + err = synth_devs[dev]->load_patch(dev, fmt, buf + p, c, 0); if (err < 0) return err; --- linux-ec2-2.6.32.orig/sound/oss/midi_synth.h +++ linux-ec2-2.6.32/sound/oss/midi_synth.h @@ -8,7 +8,7 @@ void midi_synth_close (int dev); void midi_synth_hw_control (int dev, unsigned char *event); int midi_synth_load_patch (int dev, int format, const char __user * addr, - int offs, int count, int pmgr_flag); + int count, int pmgr_flag); void midi_synth_panning (int dev, int channel, int pressure); void midi_synth_aftertouch (int dev, int channel, int pressure); void midi_synth_controller (int dev, int channel, int ctrl_num, int value); --- linux-ec2-2.6.32.orig/sound/oss/ad1848.c +++ linux-ec2-2.6.32/sound/oss/ad1848.c @@ -457,7 +457,7 @@ return mask; } -static void change_bits(ad1848_info * devc, unsigned char *regval, +static void __change_bits(ad1848_info * devc, unsigned char *regval, unsigned char *muteval, int dev, int chn, int newval) { unsigned char mask; @@ -515,10 +515,10 @@ if (muteregoffs != regoffs) { muteval = ad_read(devc, muteregoffs); - change_bits(devc, &val, &muteval, dev, channel, value); + __change_bits(devc, &val, &muteval, dev, channel, value); } else - change_bits(devc, &val, &val, dev, channel, value); + __change_bits(devc, &val, &val, dev, channel, value); spin_lock_irqsave(&devc->lock,flags); ad_write(devc, regoffs, val); --- linux-ec2-2.6.32.orig/sound/oss/dev_table.h +++ linux-ec2-2.6.32/sound/oss/dev_table.h @@ -271,7 +271,7 @@ void (*reset) (int dev); void (*hw_control) (int dev, unsigned char *event); int (*load_patch) (int dev, int format, const char __user *addr, - int offs, int count, int pmgr_flag); + int count, int pmgr_flag); void (*aftertouch) (int dev, int voice, int pressure); void (*controller) (int dev, int voice, int ctrl_num, int value); void (*panning) (int dev, int voice, int value); --- linux-ec2-2.6.32.orig/sound/oss/soundcard.c +++ linux-ec2-2.6.32/sound/oss/soundcard.c @@ -87,7 +87,7 @@ int i, n; for (i = 0; i < num_mixer_volumes; i++) { - if (strcmp(name, mixer_vols[i].name) == 0) { + if (strncmp(name, mixer_vols[i].name, 32) == 0) { if (present) mixer_vols[i].num = i; return mixer_vols[i].levels; @@ -99,7 +99,7 @@ } n = num_mixer_volumes++; - strcpy(mixer_vols[n].name, name); + strncpy(mixer_vols[n].name, name, 32); if (present) mixer_vols[n].num = n; --- linux-ec2-2.6.32.orig/sound/oss/opl3.c +++ linux-ec2-2.6.32/sound/oss/opl3.c @@ -819,7 +819,7 @@ } static int opl3_load_patch(int dev, int format, const char __user *addr, - int offs, int count, int pmgr_flag) + int count, int pmgr_flag) { struct sbi_instrument ins; @@ -829,11 +829,7 @@ return -EINVAL; } - /* - * What the fuck is going on here? We leave junk in the beginning - * of ins and then check the field pretty close to that beginning? - */ - if(copy_from_user(&((char *) &ins)[offs], addr + offs, sizeof(ins) - offs)) + if (copy_from_user(&ins, addr, sizeof(ins))) return -EFAULT; if (ins.channel < 0 || ins.channel >= SBFM_MAXINSTR) @@ -848,6 +844,10 @@ static void opl3_panning(int dev, int voice, int value) { + + if (voice < 0 || voice >= devc->nr_voice) + return; + devc->voc[voice].panning = value; } @@ -1065,8 +1065,15 @@ static void opl3_setup_voice(int dev, int voice, int chn) { - struct channel_info *info = - &synth_devs[dev]->chn_info[chn]; + struct channel_info *info; + + if (voice < 0 || voice >= devc->nr_voice) + return; + + if (chn < 0 || chn > 15) + return; + + info = &synth_devs[dev]->chn_info[chn]; opl3_set_instr(dev, voice, info->pgm_num); --- linux-ec2-2.6.32.orig/sound/oss/midi_synth.c +++ linux-ec2-2.6.32/sound/oss/midi_synth.c @@ -476,7 +476,7 @@ int midi_synth_load_patch(int dev, int format, const char __user *addr, - int offs, int count, int pmgr_flag) + int count, int pmgr_flag) { int orig_dev = synth_devs[dev]->midi_dev; @@ -491,39 +491,37 @@ if (!prefix_cmd(orig_dev, 0xf0)) return 0; + /* Invalid patch format */ if (format != SYSEX_PATCH) - { -/* printk("MIDI Error: Invalid patch format (key) 0x%x\n", format);*/ return -EINVAL; - } + + /* Patch header too short */ if (count < hdr_size) - { -/* printk("MIDI Error: Patch header too short\n");*/ return -EINVAL; - } + count -= hdr_size; /* - * Copy the header from user space but ignore the first bytes which have - * been transferred already. + * Copy the header from user space */ - if(copy_from_user(&((char *) &sysex)[offs], &(addr)[offs], hdr_size - offs)) + if (copy_from_user(&sysex, addr, hdr_size)) return -EFAULT; - - if (count < sysex.len) - { -/* printk(KERN_WARNING "MIDI Warning: Sysex record too short (%d<%d)\n", count, (int) sysex.len);*/ + + /* Sysex record too short */ + if ((unsigned)count < (unsigned)sysex.len) sysex.len = count; - } - left = sysex.len; - src_offs = 0; + + left = sysex.len; + src_offs = 0; for (i = 0; i < left && !signal_pending(current); i++) { unsigned char data; - get_user(*(unsigned char *) &data, (unsigned char __user *) &((addr)[hdr_size + i])); + if (get_user(data, + (unsigned char __user *)(addr + hdr_size + i))) + return -EFAULT; eox_seen = (i > 0 && data & 0x80); /* End of sysex */ --- linux-ec2-2.6.32.orig/sound/ppc/tumbler.c +++ linux-ec2-2.6.32/sound/ppc/tumbler.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ #define DBG(fmt...) #endif +#define IS_G4DA (machine_is_compatible("PowerMac3,4")) + /* i2c address for tumbler */ #define TAS_I2C_ADDR 0x34 @@ -1134,7 +1137,8 @@ gp->inactive_val = (*base) ? 0x4 : 0x5; } else { const u32 *prop = NULL; - gp->active_state = 0; + gp->active_state = IS_G4DA + && !strncmp(device, "keywest-gpio1", 13); gp->active_val = 0x4; gp->inactive_val = 0x5; /* Here are some crude hacks to extract the GPIO polarity and @@ -1312,6 +1316,9 @@ if (irq <= NO_IRQ) irq = tumbler_find_device("line-output-detect", NULL, &mix->line_detect, 1); + if (IS_G4DA && irq <= NO_IRQ) + irq = tumbler_find_device("keywest-gpio16", + NULL, &mix->line_detect, 1); mix->lineout_irq = irq; tumbler_reset_audio(chip); --- linux-ec2-2.6.32.orig/sound/usb/usbaudio.c +++ linux-ec2-2.6.32/sound/usb/usbaudio.c @@ -752,7 +752,7 @@ return 0; /* already large enough */ vfree(runtime->dma_area); } - runtime->dma_area = vmalloc(size); + runtime->dma_area = vmalloc_user(size); if (!runtime->dma_area) return -ENOMEM; runtime->dma_bytes = size; @@ -1936,7 +1936,7 @@ struct snd_usb_stream *as = snd_pcm_substream_chip(substream); struct snd_usb_substream *subs = &as->substream[direction]; - if (subs->interface >= 0) { + if (!as->chip->shutdown && subs->interface >= 0) { usb_set_interface(subs->dev, subs->interface, 0); subs->interface = -1; } @@ -3326,6 +3326,32 @@ } /* + * This call will put the synth in "USB send" mode, i.e it will send MIDI + * messages through USB (this is disabled at startup). The synth will + * acknowledge by sending a sysex on endpoint 0x85 and by displaying a USB + * sign on its LCD. Values here are chosen based on sniffing USB traffic + * under Windows. + */ +static int snd_usb_accessmusic_boot_quirk(struct usb_device *dev) +{ + int err, actual_length; + + /* "midi send" enable */ + static const u8 seq[] = { 0x4e, 0x73, 0x52, 0x01 }; + + void *buf = kmemdup(seq, ARRAY_SIZE(seq), GFP_KERNEL); + if (!buf) + return -ENOMEM; + err = usb_interrupt_msg(dev, usb_sndintpipe(dev, 0x05), buf, + ARRAY_SIZE(seq), &actual_length, 1000); + kfree(buf); + if (err < 0) + return err; + + return 0; +} + +/* * Setup quirks */ #define AUDIOPHILE_SET 0x01 /* if set, parse device_setup */ @@ -3616,6 +3642,12 @@ goto __err_val; } + /* Access Music VirusTI Desktop */ + if (id == USB_ID(0x133e, 0x0815)) { + if (snd_usb_accessmusic_boot_quirk(dev) < 0) + goto __err_val; + } + /* * found a config. now register to ALSA */ --- linux-ec2-2.6.32.orig/sound/usb/usbmidi.c +++ linux-ec2-2.6.32/sound/usb/usbmidi.c @@ -931,6 +931,8 @@ DEFINE_WAIT(wait); long timeout = msecs_to_jiffies(50); + if (ep->umidi->disconnected) + return; /* * The substream buffer is empty, but some data might still be in the * currently active URBs, so we have to wait for those to complete. @@ -1075,14 +1077,21 @@ * Frees an output endpoint. * May be called when ep hasn't been initialized completely. */ -static void snd_usbmidi_out_endpoint_delete(struct snd_usb_midi_out_endpoint* ep) +static void snd_usbmidi_out_endpoint_clear(struct snd_usb_midi_out_endpoint *ep) { unsigned int i; for (i = 0; i < OUTPUT_URBS; ++i) - if (ep->urbs[i].urb) + if (ep->urbs[i].urb) { free_urb_and_buffer(ep->umidi, ep->urbs[i].urb, ep->max_transfer); + ep->urbs[i].urb = NULL; + } +} + +static void snd_usbmidi_out_endpoint_delete(struct snd_usb_midi_out_endpoint *ep) +{ + snd_usbmidi_out_endpoint_clear(ep); kfree(ep); } @@ -1201,15 +1210,18 @@ usb_kill_urb(ep->out->urbs[j].urb); if (umidi->usb_protocol_ops->finish_out_endpoint) umidi->usb_protocol_ops->finish_out_endpoint(ep->out); + ep->out->active_urbs = 0; + if (ep->out->drain_urbs) { + ep->out->drain_urbs = 0; + wake_up(&ep->out->drain_wait); + } } if (ep->in) for (j = 0; j < INPUT_URBS; ++j) usb_kill_urb(ep->in->urbs[j]); /* free endpoints here; later call can result in Oops */ - if (ep->out) { - snd_usbmidi_out_endpoint_delete(ep->out); - ep->out = NULL; - } + if (ep->out) + snd_usbmidi_out_endpoint_clear(ep->out); if (ep->in) { snd_usbmidi_in_endpoint_delete(ep->in); ep->in = NULL; @@ -1360,6 +1372,12 @@ EXTERNAL_PORT(0x086a, 0x0001, 8, "%s Broadcast"), EXTERNAL_PORT(0x086a, 0x0002, 8, "%s Broadcast"), EXTERNAL_PORT(0x086a, 0x0003, 4, "%s Broadcast"), + /* Access Music Virus TI */ + EXTERNAL_PORT(0x133e, 0x0815, 0, "%s MIDI"), + PORT_INFO(0x133e, 0x0815, 1, "%s Synth", 0, + SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | + SNDRV_SEQ_PORT_TYPE_HARDWARE | + SNDRV_SEQ_PORT_TYPE_SYNTHESIZER), }; static struct port_info *find_port_info(struct snd_usb_midi* umidi, int number) --- linux-ec2-2.6.32.orig/sound/usb/usbquirks.h +++ linux-ec2-2.6.32/sound/usb/usbquirks.h @@ -2050,6 +2050,33 @@ } }, +/* Access Music devices */ +{ + /* VirusTI Desktop */ + USB_DEVICE_VENDOR_SPEC(0x133e, 0x0815), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 3, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = &(const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0003, + .in_cables = 0x0003 + } + }, + { + .ifnum = 4, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = -1 + } + } + } +}, + /* */ { /* aka. Serato Scratch Live DJ Box */ --- linux-ec2-2.6.32.orig/sound/usb/usx2y/us122l.c +++ linux-ec2-2.6.32/sound/usb/usx2y/us122l.c @@ -234,29 +234,26 @@ struct file *file, poll_table *wait) { struct us122l *us122l = hw->private_data; - struct usb_stream *s = us122l->sk.s; unsigned *polled; unsigned int mask; poll_wait(file, &us122l->sk.sleep, wait); - switch (s->state) { - case usb_stream_ready: - if (us122l->first == file) - polled = &s->periods_polled; - else - polled = &us122l->second_periods_polled; - if (*polled != s->periods_done) { - *polled = s->periods_done; - mask = POLLIN | POLLOUT | POLLWRNORM; - break; + mask = POLLIN | POLLOUT | POLLWRNORM | POLLERR; + if (mutex_trylock(&us122l->mutex)) { + struct usb_stream *s = us122l->sk.s; + if (s && s->state == usb_stream_ready) { + if (us122l->first == file) + polled = &s->periods_polled; + else + polled = &us122l->second_periods_polled; + if (*polled != s->periods_done) { + *polled = s->periods_done; + mask = POLLIN | POLLOUT | POLLWRNORM; + } else + mask = 0; } - /* Fall through */ - mask = 0; - break; - default: - mask = POLLIN | POLLOUT | POLLWRNORM | POLLERR; - break; + mutex_unlock(&us122l->mutex); } return mask; } @@ -342,6 +339,7 @@ { struct usb_stream_config *cfg; struct us122l *us122l = hw->private_data; + struct usb_stream *s; unsigned min_period_frames; int err = 0; bool high_speed; @@ -387,18 +385,18 @@ snd_power_wait(hw->card, SNDRV_CTL_POWER_D0); mutex_lock(&us122l->mutex); + s = us122l->sk.s; if (!us122l->master) us122l->master = file; else if (us122l->master != file) { - if (memcmp(cfg, &us122l->sk.s->cfg, sizeof(*cfg))) { + if (!s || memcmp(cfg, &s->cfg, sizeof(*cfg))) { err = -EIO; goto unlock; } us122l->slave = file; } - if (!us122l->sk.s || - memcmp(cfg, &us122l->sk.s->cfg, sizeof(*cfg)) || - us122l->sk.s->state == usb_stream_xrun) { + if (!s || memcmp(cfg, &s->cfg, sizeof(*cfg)) || + s->state == usb_stream_xrun) { us122l_stop(us122l); if (!us122l_start(us122l, cfg->sample_rate, cfg->period_frames)) err = -EIO; @@ -409,6 +407,7 @@ mutex_unlock(&us122l->mutex); free: kfree(cfg); + wake_up_all(&us122l->sk.sleep); return err; } --- linux-ec2-2.6.32.orig/sound/usb/usx2y/usb_stream.c +++ linux-ec2-2.6.32/sound/usb/usx2y/usb_stream.c @@ -673,7 +673,7 @@ inurb->transfer_buffer_length = inurb->number_of_packets * inurb->iso_frame_desc[0].length; - preempt_disable(); + if (u == 0) { int now; struct usb_device *dev = inurb->dev; @@ -685,19 +685,17 @@ } err = usb_submit_urb(inurb, GFP_ATOMIC); if (err < 0) { - preempt_enable(); snd_printk(KERN_ERR"usb_submit_urb(sk->inurb[%i])" " returned %i\n", u, err); return err; } err = usb_submit_urb(outurb, GFP_ATOMIC); if (err < 0) { - preempt_enable(); snd_printk(KERN_ERR"usb_submit_urb(sk->outurb[%i])" " returned %i\n", u, err); return err; } - preempt_enable(); + if (inurb->start_frame != outurb->start_frame) { snd_printd(KERN_DEBUG "u[%i] start_frames differ in:%u out:%u\n", --- linux-ec2-2.6.32.orig/sound/usb/caiaq/midi.c +++ linux-ec2-2.6.32/sound/usb/caiaq/midi.c @@ -135,7 +135,7 @@ if (ret < 0) return ret; - strcpy(rmidi->name, device->product_name); + strlcpy(rmidi->name, device->product_name, sizeof(rmidi->name)); rmidi->info_flags = SNDRV_RAWMIDI_INFO_DUPLEX; rmidi->private_data = device; --- linux-ec2-2.6.32.orig/sound/usb/caiaq/device.h +++ linux-ec2-2.6.32/sound/usb/caiaq/device.h @@ -92,6 +92,7 @@ int input_panic, output_panic, warned; char *audio_in_buf, *audio_out_buf; unsigned int samplerates, bpp; + unsigned long outurb_active_mask; struct snd_pcm_substream *sub_playback[MAX_STREAMS]; struct snd_pcm_substream *sub_capture[MAX_STREAMS]; --- linux-ec2-2.6.32.orig/sound/usb/caiaq/input.c +++ linux-ec2-2.6.32/sound/usb/caiaq/input.c @@ -28,7 +28,7 @@ static unsigned short keycode_rk2[] = { KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7 }; static unsigned short keycode_rk3[] = { KEY_1, KEY_2, KEY_3, KEY_4, - KEY_5, KEY_6, KEY_7, KEY_5, KEY_6 }; + KEY_5, KEY_6, KEY_7, KEY_8, KEY_9 }; static unsigned short keycode_kore[] = { KEY_FN_F1, /* "menu" */ --- linux-ec2-2.6.32.orig/sound/usb/caiaq/audio.c +++ linux-ec2-2.6.32/sound/usb/caiaq/audio.c @@ -138,8 +138,12 @@ for (i = 0; i < N_URBS; i++) { usb_kill_urb(dev->data_urbs_in[i]); - usb_kill_urb(dev->data_urbs_out[i]); + + if (test_bit(i, &dev->outurb_active_mask)) + usb_kill_urb(dev->data_urbs_out[i]); } + + dev->outurb_active_mask = 0; } static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream) @@ -466,8 +470,9 @@ { struct snd_usb_caiaq_cb_info *info = urb->context; struct snd_usb_caiaqdev *dev; - struct urb *out; - int frame, len, send_it = 0, outframe = 0; + struct urb *out = NULL; + int i, frame, len, send_it = 0, outframe = 0; + size_t offset = 0; if (urb->status || !info) return; @@ -477,7 +482,17 @@ if (!dev->streaming) return; - out = dev->data_urbs_out[info->index]; + /* find an unused output urb that is unused */ + for (i = 0; i < N_URBS; i++) + if (test_and_set_bit(i, &dev->outurb_active_mask) == 0) { + out = dev->data_urbs_out[i]; + break; + } + + if (!out) { + log("Unable to find an output urb to use\n"); + goto requeue; + } /* read the recently received packet and send back one which has * the same layout */ @@ -488,7 +503,8 @@ len = urb->iso_frame_desc[outframe].actual_length; out->iso_frame_desc[outframe].length = len; out->iso_frame_desc[outframe].actual_length = 0; - out->iso_frame_desc[outframe].offset = BYTES_PER_FRAME * frame; + out->iso_frame_desc[outframe].offset = offset; + offset += len; if (len > 0) { spin_lock(&dev->spinlock); @@ -504,11 +520,15 @@ } if (send_it) { - out->number_of_packets = FRAMES_PER_URB; + out->number_of_packets = outframe; out->transfer_flags = URB_ISO_ASAP; usb_submit_urb(out, GFP_ATOMIC); + } else { + struct snd_usb_caiaq_cb_info *oinfo = out->context; + clear_bit(oinfo->index, &dev->outurb_active_mask); } +requeue: /* re-submit inbound urb */ for (frame = 0; frame < FRAMES_PER_URB; frame++) { urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame; @@ -530,6 +550,8 @@ dev->output_running = 1; wake_up(&dev->prepare_wait_queue); } + + clear_bit(info->index, &dev->outurb_active_mask); } static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret) @@ -639,7 +661,7 @@ } dev->pcm->private_data = dev; - strcpy(dev->pcm->name, dev->product_name); + strlcpy(dev->pcm->name, dev->product_name, sizeof(dev->pcm->name)); memset(dev->sub_playback, 0, sizeof(dev->sub_playback)); memset(dev->sub_capture, 0, sizeof(dev->sub_capture)); @@ -680,6 +702,9 @@ if (!dev->data_cb_info) return -ENOMEM; + dev->outurb_active_mask = 0; + BUILD_BUG_ON(N_URBS > (sizeof(dev->outurb_active_mask) * 8)); + for (i = 0; i < N_URBS; i++) { dev->data_cb_info[i].dev = dev; dev->data_cb_info[i].index = i; --- linux-ec2-2.6.32.orig/debian.ec2/control.stub.in +++ linux-ec2-2.6.32/debian.ec2/control.stub.in @@ -0,0 +1,58 @@ +Source: SRCPKGNAME +Section: devel +Priority: optional +Maintainer: Ubuntu Kernel Team +Standards-Version: 3.8.4.0 +Build-Depends: debhelper (>= 5), cpio, module-init-tools, kernel-wedge (>= 2.24ubuntu1), makedumpfile [amd64 i386], gawk +Build-Depends-Indep: xmlto, docbook-utils, ghostscript, transfig, bzip2, sharutils +Build-Conflicts: findutils (= 4.4.1-1ubuntu1) +Vcs-Git: http://kernel.ubuntu.com/git-repos/ubuntu/ubuntu-karmic.git + +Package: SRCPKGNAME-source-PKGVER +Architecture: all +Section: devel +Priority: optional +Provides: SRCPKGNAME-source, SRCPKGNAME-source-2.6 +Depends: ${misc:Depends}, binutils, bzip2, coreutils | fileutils (>= 4.0) +Recommends: libc-dev, gcc, make +Suggests: libncurses-dev | ncurses-dev, kernel-package, libqt3-dev +Description: Linux kernel source for version PKGVER with Ubuntu patches + This package provides the source code for the Linux kernel version + PKGVER. + . + This package is mainly meant for other packages to use, in order to build + custom flavours. + . + If you wish to use this package to create a custom Linux kernel, then it + is suggested that you investigate the package kernel-package, which has + been designed to ease the task of creating kernel image packages. + . + If you are simply trying to build third-party modules for your kernel, + you do not want this package. Install the appropriate linux-headers + package instead. + +Package: SRCPKGNAME-doc +Architecture: all +Section: doc +Priority: optional +Depends: ${misc:Depends} +Conflicts: SRCPKGNAME-doc-2.6 +Replaces: SRCPKGNAME-doc-2.6 +Description: Linux kernel specific documentation for version PKGVER + This package provides the various documents in the PKGVER kernel + Documentation/ subdirectory. These document kernel subsystems, APIs, device + drivers, and so on. See + /usr/share/doc/SRCPKGNAME-doc/00-INDEX for a list of what is + contained in each file. + +Package: linux-headers-PKGVER-ABINUM +Architecture: all +Section: devel +Priority: optional +Depends: ${misc:Depends}, coreutils | fileutils (>= 4.0) +#Provides: linux-headers, linux-headers-2.6 +Description: Header files related to Linux kernel version PKGVER + This package provides kernel header files for version PKGVER, for sites + that want the latest kernel headers. Please read + /usr/share/doc/linux-headers-PKGVER-ABINUM/debian.README.gz for details + --- linux-ec2-2.6.32.orig/debian.ec2/deviations.txt +++ linux-ec2-2.6.32/debian.ec2/deviations.txt @@ -0,0 +1,98 @@ +XEN PATCHSET +F: drivers/pci/msi.c +F: drivers/hwmon/coretemp.c +F: lib/swiotlb.c +F: arch/x86/kernel/pci-dma.c +F: arch/x86/kernel/ioport.c +F: arch/x86/kernel/smp.c +F: arch/x86/kernel/process_32.c +F: arch/x86/kernel/setup.c +F: arch/x86/kernel/msr.c +F: arch/x86/kernel/quirks.c +F: arch/x86/kernel/entry_64.S +F: arch/x86/kernel/process.c +F: arch/x86/kernel/pci-nommu.c +F: arch/x86/kernel/head64.c +F: arch/x86/kernel/head.c +F: arch/x86/kernel/head_32.S +F: arch/x86/kernel/irq.c +F: arch/x86/kernel/early_printk.c +F: arch/x86/kernel/process_64.c +F: arch/x86/kernel/mpparse.c +F: arch/x86/kernel/acpi/sleep.c +F: arch/x86/kernel/entry_32.S +F: arch/x86/kernel/traps.c +F: arch/x86/kernel/apic/ipi.c +F: arch/x86/kernel/apic/probe_32.c +F: arch/x86/kernel/apic/io_apic.c +F: arch/x86/kernel/apic/apic.c +F: arch/x86/kernel/microcode_core.c +F: arch/x86/kernel/head_64.S +F: arch/x86/kernel/x86_init.c +F: arch/x86/kernel/ldt.c +F: arch/x86/kernel/head32.c +F: arch/x86/kernel/vsyscall_64.c +F: arch/x86/kernel/cpu/common.c +F: arch/x86/kernel/cpu/mtrr/main.c +F: arch/x86/kernel/e820.c +F: arch/x86/kernel/time.c +F: arch/x86/pci/irq.c +F: arch/x86/mm/init.c +F: arch/x86/mm/iomap_32.c +F: arch/x86/mm/highmem_32.c +F: arch/x86/mm/init_32.c +F: arch/x86/mm/pgtable.c +F: arch/x86/mm/pgtable_32.c +F: arch/x86/mm/ioremap.c +F: arch/x86/mm/pageattr.c +F: arch/x86/mm/pat.c +F: arch/x86/mm/init_64.c +F: arch/x86/mm/fault.c +F: arch/x86/vdso/vdso32-setup.c +F: arch/x86/ia32/ia32entry.S +F: mm/tmem.c +F: arch/x86/include/asm/agp.h +F: arch/x86/include/asm/desc.h +F: arch/x86/include/asm/dma-mapping.h +F: arch/x86/include/asm/fixmap.h +F: arch/x86/include/asm/gnttab_dma.h +F: arch/x86/include/asm/highmem.h +F: arch/x86/include/asm/hypercall_32.h +F: arch/x86/include/asm/hypercall_64.h +F: arch/x86/include/asm/hypercall.h +F: arch/x86/include/asm/hypervisor.h +F: arch/x86/include/asm/io.h +F: arch/x86/include/asm/ipi.h +F: arch/x86/include/asm/irqflags.h +F: arch/x86/include/asm/irq_vectors.h +F: arch/x86/include/asm/mach_traps.h +F: arch/x86/include/asm/maddr_32.h +F: arch/x86/include/asm/maddr_64.h +F: arch/x86/include/asm/maddr.h +F: arch/x86/include/asm/mmu_context.h +F: arch/x86/include/asm/pci.h +F: arch/x86/include/asm/pgalloc.h +F: arch/x86/include/asm/pgtable_32.h +F: arch/x86/include/asm/pgtable-3level.h +F: arch/x86/include/asm/pgtable-3level_types.h +F: arch/x86/include/asm/pgtable_64.h +F: arch/x86/include/asm/pgtable_64_types.h +F: arch/x86/include/asm/pgtable.h +F: arch/x86/include/asm/pgtable_types.h +F: arch/x86/include/asm/processor.h +F: arch/x86/include/asm/setup.h +F: arch/x86/include/asm/smp.h +F: arch/x86/include/asm/smp-processor-id.h +F: arch/x86/include/asm/spinlock.h +F: arch/x86/include/asm/swiotlb.h +F: arch/x86/include/asm/synch_bitops.h +F: arch/x86/include/asm/system_64.h +F: arch/x86/include/asm/system.h +F: arch/x86/include/asm/tlbflush.h +F: arch/x86/include/asm/vga.h +F: arch/x86/include/asm/xenoprof.h +F: arch/x86/include/asm/xor_64.h +F: arch/x86/include/asm/xor.h +F: drivers/xen/* +F: drivers/block/xen-blkfront.c +F: drivers/net/xen-netfront.c --- linux-ec2-2.6.32.orig/debian.ec2/control.stub +++ linux-ec2-2.6.32/debian.ec2/control.stub @@ -0,0 +1,112 @@ +Source: linux-ec2 +Section: devel +Priority: optional +Maintainer: Ubuntu Kernel Team +Standards-Version: 3.8.4.0 +Build-Depends: debhelper (>= 5), cpio, module-init-tools, kernel-wedge (>= 2.24ubuntu1), makedumpfile [amd64 i386], gawk +Build-Depends-Indep: xmlto, docbook-utils, ghostscript, transfig, bzip2, sharutils +Build-Conflicts: findutils (= 4.4.1-1ubuntu1) +Vcs-Git: http://kernel.ubuntu.com/git-repos/ubuntu/ubuntu-karmic.git + +Package: linux-ec2-source-2.6.32 +Architecture: all +Section: devel +Priority: optional +Provides: linux-ec2-source, linux-ec2-source-2.6 +Depends: ${misc:Depends}, binutils, bzip2, coreutils | fileutils (>= 4.0) +Recommends: libc-dev, gcc, make +Suggests: libncurses-dev | ncurses-dev, kernel-package, libqt3-dev +Description: Linux kernel source for version 2.6.32 with Ubuntu patches + This package provides the source code for the Linux kernel version + 2.6.32. + . + This package is mainly meant for other packages to use, in order to build + custom flavours. + . + If you wish to use this package to create a custom Linux kernel, then it + is suggested that you investigate the package kernel-package, which has + been designed to ease the task of creating kernel image packages. + . + If you are simply trying to build third-party modules for your kernel, + you do not want this package. Install the appropriate linux-headers + package instead. + +Package: linux-ec2-doc +Architecture: all +Section: doc +Priority: optional +Depends: ${misc:Depends} +Conflicts: linux-ec2-doc-2.6 +Replaces: linux-ec2-doc-2.6 +Description: Linux kernel specific documentation for version 2.6.32 + This package provides the various documents in the 2.6.32 kernel + Documentation/ subdirectory. These document kernel subsystems, APIs, device + drivers, and so on. See + /usr/share/doc/linux-ec2-doc/00-INDEX for a list of what is + contained in each file. + +Package: linux-headers-2.6.32-343 +Architecture: all +Section: devel +Priority: optional +Depends: ${misc:Depends}, coreutils | fileutils (>= 4.0) +#Provides: linux-headers, linux-headers-2.6 +Description: Header files related to Linux kernel version 2.6.32 + This package provides kernel header files for version 2.6.32, for sites + that want the latest kernel headers. Please read + /usr/share/doc/linux-headers-2.6.32-343/debian.README.gz for details + + +Package: linux-image-2.6.32-343-ec2 +Architecture: i386 amd64 +Section: admin +Priority: optional +Pre-Depends: dpkg (>= 1.10.24) +Provides: linux-image, linux-image-2.6, fuse-module, kvm-api-4, redhat-cluster-modules, ivtv-modules, ndiswrapper-modules-1.9 +Depends: ${misc:Depends}, initramfs-tools (>= 0.36ubuntu6), coreutils | fileutils (>= 4.0), module-init-tools (>= 3.3-pre11-4ubuntu3) +Conflicts: hotplug (<< 0.0.20040105-1) +Recommends: +Suggests: fdutils, linux-ec2-doc-2.6.32 | linux-ec2-source-2.6.32 +Description: Linux kernel image for version 2.6.32 on x86/x86_64 + This package contains the Linux kernel image for version 2.6.32 on + x86/x86_64. + . + Also includes the corresponding System.map file, the modules built by the + packager, and scripts that try to ensure that the system is not left in an + unbootable state after an update. + . + Supports Generic processors. + . + Geared toward desktop systems. + . + You likely do not want to install this package directly. Instead, install + the linux-ec2 meta-package, which will ensure that upgrades work + correctly, and that supporting packages are also installed. + +Package: linux-headers-2.6.32-343-ec2 +Architecture: i386 amd64 +Section: devel +Priority: optional +Depends: ${misc:Depends}, coreutils | fileutils (>= 4.0), linux-headers-2.6.32-343, ${shlibs:Depends} +Description: Linux kernel headers for version 2.6.32 on x86/x86_64 + This package provides kernel header files for version 2.6.32 on + x86/x86_64. + . + This is for sites that want the latest kernel headers. Please read + /usr/share/doc/linux-headers-2.6.32-343/debian.README.gz for details. + +Package: linux-image-2.6.32-343-ec2-dbgsym +Architecture: i386 amd64 +Section: devel +Priority: optional +Depends: ${misc:Depends} +Provides: linux-debug +Description: Linux kernel debug image for version 2.6.32 on x86/x86_64 + This package provides a kernel debug image for version 2.6.32 on + x86/x86_64. + . + This is for sites that wish to debug the kernel. + . + The kernel image contained in this package is NOT meant to boot from. It + is uncompressed, and unstripped. This package also includes the + unstripped modules. --- linux-ec2-2.6.32.orig/debian.ec2/copyright +++ linux-ec2-2.6.32/debian.ec2/copyright @@ -0,0 +1,30 @@ +This is the Ubuntu prepackaged version of the Linux kernel. +Linux was written by Linus Torvalds +and others. + +This package was put together by the Ubuntu Kernel Team, from +sources retrieved from upstream linux git. +The sources may be found at most Linux ftp sites, including +ftp://ftp.kernel.org/pub/linux/kernel/ + +This package is currently maintained by the +Ubuntu Kernel Team + +Linux is copyrighted by Linus Torvalds and others. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 dated June, 1991. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. + +On Ubuntu Linux systems, the complete text of the GNU General +Public License v2 can be found in `/usr/share/common-licenses/GPL-2'. --- linux-ec2-2.6.32.orig/debian.ec2/NOTES +++ linux-ec2-2.6.32/debian.ec2/NOTES @@ -0,0 +1,4 @@ +eSCO patch removed. Replaced upstream with a disable_esco module parm. +airprime: Module gone, use option driver instead +AppArmor: Patch is all there and ported. Ooops when enabled, so default + off (still can be enabled apparmor=1) --- linux-ec2-2.6.32.orig/debian.ec2/changelog +++ linux-ec2-2.6.32/debian.ec2/changelog @@ -0,0 +1,10288 @@ +linux-ec2 (2.6.32-343.45) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebase to Ubuntu-2.6.32-39.86 + * Release Tracking Bug + - LP: #931914 + + [ Ubuntu: 2.6.32-39.86 ] + + * ext4: fix undefined behavior in ext4_fill_flex_info() + - LP: #922315 + * ALSA: snd-usb-us122l: Delete calls to preempt_disable + - LP: #922315 + * ALSA: ice1724 - Check for ac97 to avoid kernel oops + - LP: #922315 + * ALSA: hda - Return the error from get_wcaps_type() for invalid NIDs + - LP: #922315 + * HID: bump maximum global item tag report size to 96 bytes + - LP: #724831, #922315 + * UBI: fix use-after-free on error path + - LP: #922315 + * PCI: Fix PCI_EXP_TYPE_RC_EC value + - LP: #922315 + * PCI: msi: Disable msi interrupts when we initialize a pci device + - LP: #922315 + * xen/xenbus: Reject replies with payload > XENSTORE_PAYLOAD_MAX. + - LP: #922315 + * ima: free duplicate measurement memory + - LP: #922315 + * PNP: work around Dell 1536/1546 BIOS MMCONFIG bug that breaks USB + - LP: #922315 + * x86: Fix mmap random address range + - LP: #922315 + * UBI: fix nameless volumes handling + - LP: #922315 + * i2c: Fix error value returned by several bus drivers + - LP: #922315 + * V4L/DVB: v4l2-ioctl: integer overflow in video_usercopy() + - LP: #922315 + * svcrpc: fix double-free on shutdown of nfsd after changing pool mode + - LP: #922315 + * svcrpc: destroy server sockets all at once + - LP: #922315 + * nfsd: Fix oops when parsing a 0 length export + - LP: #922315 + * USB: cdc-wdm: fix misuse of logical operation in place of bitop + - LP: #922315 + * fix cputime overflow in uptime_proc_show + - LP: #922315 + * USB: Fix 'bad dma' problem on WDM device disconnect + - LP: #922315 + * block: add and use scsi_blk_cmd_ioctl + - LP: #922315 + * kernel.h: add printk_ratelimited and pr__rl + - LP: #922315 + * ALSA: HDA: Fix internal microphone on Dell Studio 16 XPS 1645 + - LP: #795823, #922315 + * sym53c8xx: Fix NULL pointer dereference in slave_destroy + - LP: #922315 + * score: fix off-by-one index into syscall table + - LP: #922315 + * kprobes: initialize before using a hlist + - LP: #922315 + * Linux 2.6.32.55 + - LP: #922315 + * eCryptfs: Sanitize write counts of /dev/ecryptfs + - LP: #926321 + * ecryptfs: Improve metadata read failure logging + - LP: #926321 + * eCryptfs: Make truncate path killable + - LP: #926321 + * crypto: sha512 - make it work, undo percpu message schedule + - LP: #926321 + * crypto: sha512 - reduce stack usage to safe number + - LP: #926321 + * block: fail SCSI passthrough ioctls on partition devices + - LP: #926321 + * dm: do not forward ioctls from logical volumes to the underlying device + - LP: #926321 + * USB: ftdi_sio: fix TIOCSSERIAL baud_base handling + - LP: #926321 + * USB: ftdi_sio: add PID for TI XDS100v2 / BeagleBone A3 + - LP: #926321 + * USB: serial: ftdi additional IDs + - LP: #926321 + * USB: ftdi_sio: Add more identifiers + - LP: #926321 + * USB: cdc-wdm: updating desc->length must be protected by spin_lock + - LP: #926321 + * usb: io_ti: Make edge_remove_sysfs_attrs the port_remove method. + - LP: #926321 + * USB: usbsevseg: fix max length + - LP: #926321 + * hwmon: (f71805f) Fix clamping of temperature limits + - LP: #926321 + * hwmon: (sht15) fix bad error code + - LP: #926321 + * USB: serial: CP210x: Added USB-ID for the Link Instruments MSO-19 + - LP: #926321 + * USB: cp210x: do not map baud rates to B0 + - LP: #926321 + * USB: ftdi_sio: fix initial baud rate + - LP: #926321 + * Linux 2.6.32.56 + - LP: #926321 + * drm: Fix authentication kernel crash + - LP: #931415 + * Linux 2.6.32.56+drm33.22 + - LP: #931415 + + -- Stefan Bader Tue, 14 Feb 2012 15:14:37 +0100 + +linux-ec2 (2.6.32-342.44) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to Ubuntu-2.6.32-38.85 + * Release Tracking Bug + - LP: #921882 + + [ Ubuntu: 2.6.32-38.85 ] + + * Revert "ARM: 7220/1: mmc: mmci: Fixup error handling for dma" + - LP: #921113 + + [ Ubuntu: 2.6.32-38.84 ] + + * KVM: x86: Prevent starting PIT timers in the absence of irqchip support + - LP: #911303 + - CVE-2011-4622 + * cfq-iosched: fix cfq_cic_link() race confition + - LP: #913463 + * SCSI: mpt2sas: _scsih_smart_predicted_fault uses GFP_KERNEL in + interrupt context + - LP: #913463 + * MXC PWM: should active during DOZE/WAIT/DBG mode + - LP: #913463 + * vfs: __read_cache_page should use gfp argument rather than GFP_KERNEL + - LP: #913463 + * ARM:imx:fix pwm period value + - LP: #913463 + * ARM: 7220/1: mmc: mmci: Fixup error handling for dma + - LP: #913463 + * watchdog: hpwdt: Changes to handle NX secure bit in 32bit path + - LP: #913463 + * hung_task: fix false positive during vfork + - LP: #913463 + * ath9k: Fix kernel panic in AR2427 in AP mode + - LP: #913463 + * Linux 2.6.32.53 + - LP: #913463 + * MAINTAINERS: stable: Update address + - LP: #915900 + * Documentation: Update stable address + - LP: #915900 + * firmware: Fix an oops on reading fw_priv->fw in sysfs loading file + - LP: #915900 + * offb: Fix setting of the pseudo-palette for >8bpp + - LP: #915900 + * offb: Fix bug in calculating requested vram size + - LP: #915900 + * asix: new device id + - LP: #915900 + * reiserfs: Fix quota mount option parsing + - LP: #915900 + * reiserfs: Force inode evictions before umount to avoid crash + - LP: #915900 + * USB: update documentation for usbmon + - LP: #915900 + * drivers/usb/class/cdc-acm.c: clear dangling pointer + - LP: #915900 + * USB: isight: fix kernel bug when loading firmware + - LP: #915900 + * usb: usb-storage doesn't support dynamic id currently, the patch + disables the feature to fix an oops + - LP: #915900 + * USB: add quirk for another camera + - LP: #915900 + * USB: omninet: fix write_room + - LP: #915900 + * USB: Add USB-ID for Multiplex RC serial adapter to cp210x.c + - LP: #915900 + * asix: fix infinite loop in rx_fixup() + - LP: #915900 + * PM / Sleep: Fix race between CPU hotplug and freezer + - LP: #915900 + * SCSI: scsi_dh: check queuedata pointer before proceeding further + - LP: #915900 + * xfs: validate acl count + - LP: #915900 + * xfs: fix acl count validation in xfs_acl_from_disk() + - LP: #915900 + * Linux 2.6.32.54 + - LP: #915900 + + -- Stefan Bader Thu, 26 Jan 2012 15:17:59 +0100 + +linux-ec2 (2.6.32-342.43) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to Ubuntu-2.6.32-38.83 + * XEN: x86/mpparse: Account for bus types other than ISA and PCI + - LP: #902317 + * Release Tracking Bug + - LP: #911230 + + [ Ubuntu: 2.6.32-38.83 ] + + * Revert "clockevents: Set noop handler in clockevents_exchange_device()" + - LP: #911392 + * Linux 2.6.32.52 + - LP: #911392 + + [ Ubuntu: 2.6.32-38.82 ] + + * SAUCE: netns: Add quota for number of NET_NS instances. + * [Config] CONFIG_NET_NS=y + - LP: #790863 + * Revert "core: Fix memory leak/corruption on VLAN GRO_DROP, + CVE-2011-1576" + * hfs: fix hfs_find_init() sb->ext_tree NULL ptr oops, CVE-2011-2203 + - LP: #899466 + - CVE-2011-2203 + * net: ipv4: relax AF_INET check in bind() + - LP: #900396 + * KEYS: Fix a NULL pointer deref in the user-defined key type, + CVE-2011-4110 + - LP: #894369 + - CVE-2011-4110 + * i2c-algo-bit: Generate correct i2c address sequence for 10-bit target + - LP: #902317 + * eCryptfs: Extend array bounds for all filename chars + - LP: #902317 + * PCI hotplug: shpchp: don't blindly claim non-AMD 0x7450 device IDs + - LP: #902317 + * ARM: 7161/1: errata: no automatic store buffer drain + - LP: #902317 + * ALSA: lx6464es - fix device communication via command bus + - LP: #902317 + * SUNRPC: Ensure we return EAGAIN in xs_nospace if congestion is cleared + - LP: #902317 + * timekeeping: add arch_offset hook to ktime_get functions + - LP: #902317 + * p54spi: Add missing spin_lock_init + - LP: #902317 + * p54spi: Fix workqueue deadlock + - LP: #902317 + * nl80211: fix MAC address validation + - LP: #902317 + * gro: reset vlan_tci on reuse + - LP: #902317 + * staging: usbip: bugfix for deadlock + - LP: #902317 + * staging: comedi: fix oops for USB DAQ devices. + - LP: #902317 + * Staging: comedi: fix signal handling in read and write + - LP: #902317 + * USB: whci-hcd: fix endian conversion in qset_clear() + - LP: #902317 + * usb: ftdi_sio: add PID for Propox ISPcable III + - LP: #902317 + * usb: option: add SIMCom SIM5218 + - LP: #902317 + * USB: usb-storage: unusual_devs entry for Kingston DT 101 G2 + - LP: #902317 + * SCSI: scsi_lib: fix potential NULL dereference + - LP: #902317 + * SCSI: Silencing 'killing requests for dead queue' + - LP: #902317 + * cifs: fix cifs stable patch cifs-fix-oplock-break-handling-try-2.patch + - LP: #902317 + * sched, x86: Avoid unnecessary overflow in sched_clock + - LP: #902317 + * x86/mpparse: Account for bus types other than ISA and PCI + - LP: #902317 + * oprofile, x86: Fix crash when unloading module (nmi timer mode) + - LP: #902317 + * genirq: Fix race condition when stopping the irq thread + - LP: #902317 + * tick-broadcast: Stop active broadcast device when replacing it + - LP: #902317 + * clockevents: Set noop handler in clockevents_exchange_device() + - LP: #902317 + * Linux 2.6.32.50 + - LP: #902317 + * nfsd4: permit read opens of executable-only files + - LP: #833300 + * ipv6: Allow inet6_dump_addr() to handle more than 64 addresses + - LP: #863394 + * ALSA: sis7019 - give slow codecs more time to reset + - LP: #907774 + * ALSA: hda/realtek - Fix Oops in alc_mux_select() + - LP: #907774 + * ARM: davinci: dm646x evm: wrong register used in + setup_vpif_input_channel_mode + - LP: #907774 + * oprofile: Free potentially owned tasks in case of errors + - LP: #907774 + * oprofile: Fix locking dependency in sync_start() + - LP: #907774 + * percpu: fix chunk range calculation + - LP: #907774 + * xfrm: Fix key lengths for rfc3686(ctr(aes)) + - LP: #907774 + * linux/log2.h: Fix rounddown_pow_of_two(1) + - LP: #907774 + * jbd/jbd2: validate sb->s_first in journal_get_superblock() + - LP: #907774 + * hfs: fix hfs_find_init() sb->ext_tree NULL ptr oops + - LP: #907774 + * export __get_user_pages_fast() function + - LP: #907774 + * oprofile, x86: Fix nmi-unsafe callgraph support + - LP: #907774 + * oprofile, x86: Fix crash when unloading module (timer mode) + - LP: #907774 + * ext4: avoid hangs in ext4_da_should_update_i_disksize() + - LP: #907774 + * USB: cdc-acm: add IDs for Motorola H24 HSPA USB module. + - LP: #907774 + * Linux 2.6.32.51 + - LP: #907774 + + -- Stefan Bader Wed, 04 Jan 2012 17:57:42 +0100 + +linux-ec2 (2.6.32-341.42) lucid-proposed; urgency=low + + [Stefan Bader] + + * Release Tracking Bug + - LP: #900752 + * XEN: xen-blkfront: fix data size for xenbus_gather in blkfront_connect + - LP: #888042 + + -- Stefan Bader Tue, 06 Dec 2011 14:30:05 +0100 + +linux-ec2 (2.6.32-341.41) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Release Tracking Bug + - LP: #899737 + * Revert "SAUCE: XEN: Import 26ab8408157fbbc669e1aab1f271bb3cb1c433d0" + - LP: #888042 + * Rebased to Ubuntu-Ubuntu-2.6.32-37.81 + + [ Ubuntu: 2.6.32-37.81 ] + + * (pre-stable) sparc: sigutil: Include + - LP: #899311 + + [ Ubuntu: 2.6.32-37.80 ] + + * Revert "x86, hotplug: Use mwait to offline a processor, fix the legacy + case" + - LP: #888042 + * Revert "usb: musb: restore INDEX register in resume path" + - LP: #888042 + * Revert "MIPS: MTX-1: Make au1000_eth probe all PHY + - LP: #888042 + * Revert "ALSA: hda: Fix quirk for Dell Inspiron 910" + - LP: #875300 + * USB: ftdi_sio: add Calao reference board support + - LP: #888042 + * USB: EHCI: Do not rely on PORT_SUSPEND to stop USB resuming in + ehci_bus_resume(). + - LP: #888042 + * rt2x00: do not drop usb dev reference counter on suspend + - LP: #888042 + * atm: br2684: Fix oops due to skb->dev being NULL + - LP: #888042 + * sparc: Allow handling signals when stack is corrupted. + - LP: #888042 + * sparc: fix array bounds error setting up PCIC NMI trap + - LP: #888042 + * net: Fix IPv6 GSO type checks in Intel ethernet drivers + - LP: #888042 + * ipv6: Add GSO support on forwarding path + - LP: #888042 + * GRO: fix merging a paged skb after non-paged skbs + - LP: #888042 + * xen-blkfront: fix data size for xenbus_gather in blkfront_connect + - LP: #888042 + * md/linear: avoid corrupting structure while waiting for rcu_free to + complete. + - LP: #888042 + * powerpc/pci: Check devices status property when scanning OF tree + - LP: #888042 + * xen: x86_32: do not enable iterrupts when returning from exception in + interrupt context + - LP: #888042 + * xen/smp: Warn user why they keel over - nosmp or noapic and what to use + instead. + - LP: #888042 + * ARM: davinci: da850 EVM: read mac address from SPI flash + - LP: #888042 + * md: Fix handling for devices from 2TB to 4TB in 0.90 metadata. + - LP: #888042 + * net/9p: fix client code to fail more gracefully on protocol error + - LP: #888042 + * fs/9p: Fid is not valid after a failed clunk. + - LP: #888042 + * net/9p: Fix the msize calculation. + - LP: #888042 + * irda: fix smsc-ircc2 section mismatch warning + - LP: #888042 + * qla2xxx: Correct inadvertent loop state transitions during port-update + handling. + - LP: #888042 + * e1000: Fix driver to be used on PA RISC C8000 workstations + - LP: #888042 + * ASoC: Fix reporting of partial jack updates + - LP: #888042 + * ALSA: HDA: Cirrus - fix "Surround Speaker" volume control name + - LP: #888042 + * b43: Fix beacon problem in ad-hoc mode + - LP: #888042 + * wireless: Reset beacon_found while updating regulatory + - LP: #888042 + * USB: PL2303: correctly handle baudrates above 115200 + - LP: #888042 + * ASIX: Add AX88772B USB ID + - LP: #888042 + * hvc_console: Improve tty/console put_chars handling + - LP: #888042 + * TPM: Call tpm_transmit with correct size + - LP: #888042 + * TPM: Zero buffer after copying to userspace + - LP: #888042 + * libiscsi_tcp: fix LLD data allocation + - LP: #888042 + * cnic: Improve NETDEV_UP event handling + - LP: #888042 + * ALSA: hda/realtek - Avoid bogus HP-pin assignment + - LP: #888042 + * 3w-9xxx: fix iommu_iova leak + - LP: #888042 + * aacraid: reset should disable MSI interrupt + - LP: #888042 + * libsas: fix failure to revalidate domain for anything but the first + expander child. + - LP: #888042 + * cfg80211: Fix validation of AKM suites + - LP: #888042 + * libsas: fix panic when single phy is disabled on a wide port + - LP: #888042 + * ahci: Enable SB600 64bit DMA on Asus M3A + - LP: #888042 + * HID: usbhid: Add support for SiGma Micro chip + - LP: #888042 + * hwmon: (w83627ehf) Properly report thermal diode sensors + - LP: #888042 + * x25: Prevent skb overreads when checking call user data + - LP: #888042 + * staging: quatech_usb2: Potential lost wakeup scenario in TIOCMIWAIT + - LP: #888042 + * USB: qcserial: add device ID for "HP un2430 Mobile Broadband Module" + - LP: #888042 + * xhci-mem.c: Check for ring->first_seg != NULL + - LP: #888042 + * ipr: Always initiate hard reset in kdump kernel + - LP: #888042 + * libsas: set sas_address and device type of rphy + - LP: #888042 + * ALSA: HDA: Add new revision for ALC662 + - LP: #877373, #888042 + * x86: Fix compilation bug in kprobes' twobyte_is_boostable + - LP: #888042 + * epoll: fix spurious lockdep warnings + - LP: #888042 + * usbmon vs. tcpdump: fix dropped packet count + - LP: #888042 + * USB: storage: Use normalized sense when emulating autosense + - LP: #888042 + * USB: pid_ns: ensure pid is not freed during kill_pid_info_as_uid + - LP: #888042 + * usb: cdc-acm: Owen SI-30 support + - LP: #888042 + * USB: add RESET_RESUME for webcams shown to be quirky + - LP: #888042 + * USB: pl2303: add id for SMART device + - LP: #888042 + * USB: ftdi_sio: add PID for Sony Ericsson Urban + - LP: #888042 + * USB: ftdi_sio: Support TI/Luminary Micro Stellaris BD-ICDI Board + - LP: #888042 + * QE/FHCI: fixed the CONTROL bug + - LP: #888042 + * Update email address for stable patch submission + - LP: #888042 + * kobj_uevent: Ignore if some listeners cannot handle message + - LP: #888042 + * kmod: prevent kmod_loop_msg overflow in __request_module() + - LP: #888042 + * time: Change jiffies_to_clock_t() argument type to unsigned long + - LP: #888042 + * nfsd4: Remove check for a 32-bit cookie in nfsd4_readdir() + - LP: #888042 + * nfsd4: ignore WANT bits in open downgrade + - LP: #888042 + * ASoC: ak4642: fixup cache register table + - LP: #888042 + * ASoC: ak4535: fixup cache register table + - LP: #888042 + * KVM: s390: check cpu_id prior to using it + - LP: #888042 + * ccwgroup: move attributes to attribute group + - LP: #888042 + * iommu/amd: Fix wrong shift direction + - LP: #888042 + * carminefb: Fix module parameters permissions + - LP: #888042 + * uvcvideo: Set alternate setting 0 on resume if the bus has been reset + - LP: #888042 + * tuner_xc2028: Allow selection of the frequency adjustment code for + XC3028 + - LP: #888042 + * plat-mxc: iomux-v3.h: implicitly enable pull-up/down when that's + desired + - LP: #888042 + * um: fix ubd cow size + - LP: #888042 + * cfq: calculate the seek_mean per cfq_queue not per cfq_io_context + - LP: #888042 + * cfq: merge cooperating cfq_queues + - LP: #888042 + * cfq: change the meaning of the cfqq_coop flag + - LP: #888042 + * cfq: break apart merged cfqqs if they stop cooperating + - LP: #888042 + * cfq-iosched: get rid of the coop_preempt flag + - LP: #888042 + * cfq: Don't allow queue merges for queues that have no process + references + - LP: #888042 + * xen/timer: Missing IRQF_NO_SUSPEND in timer code broke suspend. + - LP: #888042 + * KVM: x86: Reset tsc_timestamp on TSC writes + - LP: #888042 + * watchdog: mtx1-wdt: fix build failure + - LP: #888042 + * kcore: fix test for end of list + - LP: #888042 + * thinkpad-acpi: module autoloading for newer Lenovo ThinkPads. + - LP: #888042 + * scm: lower SCM_MAX_FD + - LP: #888042 + * NLM: Don't hang forever on NLM unlock requests + - LP: #888042 + * ext4: fix BUG_ON() in ext4_ext_insert_extent() + - LP: #888042 + * drivers/net/rionet.c: fix ethernet address macros for LE platforms + - LP: #888042 + * ext2,ext3,ext4: don't inherit APPEND_FL or IMMUTABLE_FL for new inodes + - LP: #888042 + * USB: Serial: Add device ID for Sierra Wireless MC8305 + - LP: #888042 + * USB: Serial: Add PID(0xF7C0) to FTDI SIO driver for a + zeitcontrol-device + - LP: #888042 + * Linux 2.6.32.47 + - LP: #888042 + * Linux 2.6.32.48 + - LP: #888042 + * drm: radeon: fix sign bug + - LP: #888042 + * drm/radeon/kms: prefer high post dividers in legacy pll algo + - LP: #888042 + * Linux 2.6.32.48+drm33.21 + - LP: #888042 + * crypto: ghash - Avoid null pointer dereference if no key is set + - LP: #887299 + - CVE-2011-4081 + * xfs: Fix possible memory corruption in xfs_readlink, CVE-2011-4077 + - LP: #887298 + - CVE-2011-4077 + * jbd/jbd2: validate sb->s_first in journal_get_superblock() + - LP: #893148 + - CVE-2011-4132 + * hfs: add sanity check for file name length, CVE-2011-4330 + - LP: #894374 + - CVE-2011-4330 + * ipv6: udp: fix the wrong headroom check + - LP: #894373 + - CVE-2011-4326 + * st: fix race in st_scsi_execute_end + - LP: #897377 + * Make scsi_free_queue() kill pending SCSI commands + - LP: #897377 + * NFS/sunrpc: don't use a credential with extra groups. + - LP: #897377 + * netlink: validate NLA_MSECS length + - LP: #897377 + * mtd: mtdchar: add missing initializer on raw write + - LP: #897377 + * PM / Suspend: Off by one in pm_suspend() + - LP: #897377 + * kbuild: Disable -Wunused-but-set-variable for gcc 4.6.0 + - LP: #897377 + * ASoC: wm8940: Properly set codec->dapm.bias_level + - LP: #897377 + * md/raid5: abort any pending parity operations when array fails. + - LP: #897377 + * Remove the old V4L1 v4lgrab.c file + - LP: #897377 + * genirq: Add IRQF_RESUME_EARLY and resume such IRQs earlier + - LP: #897377 + * mm: avoid null pointer access in vm_struct via /proc/vmallocinfo + - LP: #897377 + * kbuild: Fix passing -Wno-* options to gcc 4.4+ + - LP: #897377 + * USB: serial: pl2303: rm duplicate id + - LP: #897377 + * USB: Fix Corruption issue in USB ftdi driver ftdi_sio.c + - LP: #897377 + * usb-storage: Accept 8020i-protocol commands longer than 12 bytes + - LP: #897377 + * USB: add quirk for Logitech C600 web cam + - LP: #897377 + * USB: quirks: adding more quirky webcams to avoid squeaky audio + - LP: #897377 + * Linux 2.6.32.49 + - LP: #897377 + + -- Stefan Bader Mon, 05 Dec 2011 16:18:55 +0100 + +linux-ec2 (2.6.32-340.40) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to Ubuntu-2.6.32-36.79 + * Release Tracking Bug + - LP: #888700 + + [ Ubuntu: 2.6.32-36.79 ] + + * net_sched: Fix qdisc_notify() - CVE-2011-2525 + - LP: #869250 + - CVE-2011-2525 + * ipv6: restore correct ECN handling on TCP xmit + - LP: #872179 + * nl80211: fix overflow in ssid_len - CVE-2011-2517 + - LP: #869245 + - CVE-2011-2517 + * vm: fix vm_pgoff wrap in stack expansion - CVE-2011-2496 + - LP: #869243 + - CVE-2011-2496 + * vm: fix vm_pgoff wrap in upward expansion - CVE-2011-2496 + - LP: #869243 + - CVE-2011-2496 + * drm: mm: fix range restricted allocations + - LP: #873130 + * NLM: Don't hang forever on NLM unlock requests - CVE-2011-2491 + - LP: #869237 + - CVE-2011-2491 + + -- Stefan Bader Mon, 14 Nov 2011 17:04:46 +0100 + +linux-ec2 (2.6.32-319.39) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-35.78 + * Release Tracking Bug + - LP: #873063 + + [ Ubuntu: 2.6.32-35.78 ] + + * [Config] Force perf to use libiberty for demangling + - LP: #783660 + * [Config] Simplify binary-udebs dependencies + - LP: #832352 + * [Config] kernel preparation cannot be parallelized + - LP: #832352 + * [Config] Linearize module/abi checks + - LP: #832352 + * [Config] Linearize and simplify tree preparation rules + - LP: #832352 + * [Config] Build kernel image in parallel with modules + - LP: #832352 + * [Config] Set concurrency for kmake invocations + - LP: #832352 + * [Config] Improve install-arch-headers speed + - LP: #832352 + * [Config] Fix binary-perarch dependencies + - LP: #832352 + * [Config] Removed stamp-flavours target + - LP: #832352 + * [Config] Serialize binary indep targets + - LP: #832352 + * [Config] Use build stamp directly + - LP: #832352 + * [Config] Restore prepare-% target + - LP: #832352 + * [Config] Fix binary-% build target + * [Config] Fix install-headers target + - LP: #832352 + * SAUCE: igb: Protect stats update + - LP: #829566 + * SAUCE: rtl8192se spams log + - LP: #859702 + * Add mount option to check uid of device being mounted = expect uid, + CVE-2011-1833 + - LP: #732628 + - CVE-2011-1833 + * crypto: Move md5_transform to lib/md5.c + - LP: #827462 + * net: Compute protocol sequence numbers and fragment IDs using MD5. + - LP: #827462 + * ALSA: timer - Fix Oops at closing slave timer + - LP: #827462 + * ALSA: snd-usb-caiaq: Fix keymap for RigKontrol3 + - LP: #827462 + * powerpc: Fix device tree claim code + - LP: #827462 + * powerpc: pseries: Fix kexec on machines with more than 4TB of RAM + - LP: #827462 + * Linux 2.6.32.45+drm33.19 + - LP: #827462 + * ipv6: make fragment identifications less predictable, CVE-2011-2699 + - LP: #827685 + - CVE-2011-2699 + * tunnels: fix netns vs proto registration ordering + - LP: #823296 + * Fix broken backport for IPv6 tunnels in 2.6.32-longterm kernels. + * USB: xhci: fix OS want to own HC + - LP: #837669 + * USB: assign instead of equal in usbtmc.c + - LP: #837669 + * USB: usb-storage: unusual_devs entry for ARM V2M motherboard. + - LP: #837669 + * USB: Serial: Added device ID for Qualcomm Modem in Sagemcom's HiLo3G + - LP: #837669 + * atm: br2864: sent packets truncated in VC routed mode + - LP: #837669 + * hwmon: (ibmaem) add missing kfree + - LP: #837669 + * ALSA: snd-usb-caiaq: Correct offset fields of outbound iso_frame_desc + - LP: #837669 + * mm: fix wrong vmap address calculations with odd NR_CPUS values + - LP: #837669 + * perf tools: do not look at ./config for configuration + - LP: #837669 + * fs/partitions/efi.c: corrupted GUID partition tables can cause kernel + oops + - LP: #837669 + * befs: Validate length of long symbolic links. + - LP: #837669 + * ALSA: snd_usb_caiaq: track submitted output urbs + - LP: #837669 + * ALSA: ac97: Add HP Compaq dc5100 SFF(PT003AW) to Headphone Jack Sense + whitelist + - LP: #826081, #837669 + * futex: Fix regression with read only mappings + - LP: #837669 + * x86-32, vdso: On system call restart after SYSENTER, use int $0x80 + - LP: #837669 + * x86, UV: Remove UV delay in starting slave cpus + - LP: #837669 + * drm/ttm: fix ttm_bo_add_ttm(user) failure path + - LP: #837669 + * fuse: check size of FUSE_NOTIFY_INVAL_ENTRY message + - LP: #837669 + * igb: Fix lack of flush after register write and before delay + - LP: #837669 + * Linux 2.6.32.46 + - LP: #837669 + * cifs: fix possible memory corruption in CIFSFindNext, CVE-2011-3191 + - LP: #834135 + - CVE-2011-3191 + * Bluetooth: Prevent buffer overflow in l2cap config request, + CVE-2011-2497 + - LP: #838423 + - CVE-2011-2497 + * core: Fix memory leak/corruption on VLAN GRO_DROP, CVE-2011-1576 + - LP: #844361 + - CVE-2011-1576 + * ext4: Fix max file size and logical block counting of extent format + file, CVE-2011-2695 + - LP: #819574 + - CVE-2011-2695 + * drm/i915: prepare for fair lru eviction + - LP: #843904 + * drm/i915: Move the eviction logic to its own file. + - LP: #843904 + * drm/i915: Implement fair lru eviction across both rings. (v2) + - LP: #843904 + * drm/i915: Maintain LRU order of inactive objects upon access by CPU + (v2) + - LP: #843904 + * drm/i915/evict: Ensure we completely cleanup on failure + - LP: #843904 + * drm/i915: Periodically flush the active lists and requests + - LP: #843904 + * Make TASKSTATS require root access, CVE-2011-2494 + - LP: #866021 + - CVE-2011-2494 + * proc: fix a race in do_io_accounting(), CVE-2011-2495 + - LP: #866025 + - CVE-2011-2495 + * drm/i915: Remove BUG_ON from i915_gem_evict_something + - LP: #828550 + * drm/i915: Hold a reference to the object whilst unbinding the eviction + list + - LP: #843904 + * drm/i915: Fix refleak during eviction. + - LP: #843904 + * Linux 2.6.32.46+drm33.20 + + [ Ubuntu: 2.6.32-34.77 ] + + * Revert "drm/i915: Remove BUG_ON from i915_gem_evict_something" + * Revert "drm/i915: Periodically flush the active lists and requests" + * Revert "drm/i915/evict: Ensure we completely cleanup on failure" + * Revert "drm/i915: Maintain LRU order of inactive objects upon access by + CPU (v2)" + * Revert "drm/i915: Implement fair lru eviction across both rings. (v2)" + * Revert "drm/i915: Move the eviction logic to its own file." + * Revert "drm/i915: prepare for fair lru eviction" + + -- Stefan Bader Fri, 14 Oct 2011 17:14:29 +0100 + +linux-ec2 (2.6.32-318.38) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-34.76 + * Release Tracking Bug + - LP: #837804 + + [ Ubuntu: 2.6.32-34.76 ] + + * Revert "drm/nv50-nvc0: work around an evo channel hang that some people + see" + * Revert "eCryptfs: Handle failed metadata read in lookup" + * Revert "tunnels: fix netns vs proto registration ordering" + + [ Ubuntu: 2.6.32-34.75 ] + + * drm/i915: Remove BUG_ON from i915_gem_evict_something + - LP: #828550 + + -- Stefan Bader Thu, 01 Sep 2011 16:20:32 +0200 + +linux-ec2 (2.6.32-318.37) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Release Tracking Bug + - LP: #829162 + * XEN: exec: delay address limit change until point of no return + - LP: #802383 + * Rebased to 2.6.32-34.74 + + [ Ubuntu: 2.6.32-34.74 ] + + * proc: fix oops on invalid /proc//maps access, CVE-2011-1020 + - LP: #813026 + - CVE-2011-1020 + + [ Ubuntu: 2.6.32-34.73 ] + + * SAUCE: rtl8192se: Force a build for a 2.6/3.0 kernel + - LP: #805494 + * [Config] Add enic/fnic to udebs + - LP: #801610 + * tty: icount changeover for other main devices, CVE-2010-4076, + CVE-2010-4077 + - LP: #720189 + - CVE-2010-4077 + * fs/partitions/efi.c: corrupted GUID partition tables can cause kernel + oops + - LP: #795418 + - CVE-2011-1577 + * ftrace: Only update the function code on write to filter files + - LP: #802383 + * kmemleak: Do not return a pointer to an object that kmemleak did not + get + - LP: #802383 + * CPU hotplug, re-create sysfs directory and symlinks + - LP: #802383 + * Fix memory leak in cpufreq_stat + - LP: #802383 + * powerpc/kexec: Fix memory corruption from unallocated slaves + - LP: #802383 + * powerpc/oprofile: Handle events that raise an exception without + overflowing + - LP: #802383 + * mtd: mtdconcat: fix NAND OOB write + - LP: #802383 + * x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address + limit + - LP: #802383 + * ext3: Fix fs corruption when make_indexed_dir() fails + - LP: #802383 + * jbd: Fix forever sleeping process in do_get_write_access() + - LP: #802383 + * jbd: fix fsync() tid wraparound bug + - LP: #802383 + * ext4: release page cache in ext4_mb_load_buddy error path + - LP: #802383 + * Fix Ultrastor asm snippet + - LP: #802383 + * x86, amd: Do not enable ARAT feature on AMD processors below family + 0x12 + - LP: #802383 + * x86, amd: Use _safe() msr access for GartTlbWlk disable code + - LP: #802383 + * rcu: Fix unpaired rcu_irq_enter() from locking selftests + - LP: #802383 + * staging: usbip: fix wrong endian conversion + - LP: #802383 + * Fix for buffer overflow in ldm_frag_add not sufficient + - LP: #802383 + * seqlock: Don't smp_rmb in seqlock reader spin loop + - LP: #802383 + * ALSA: HDA: Use one dmic only for Dell Studio 1558 + - LP: #731706, #802383 + * ASoC: Ensure output PGA is enabled for line outputs in wm_hubs + - LP: #802383 + * ASoC: Add some missing volume update bit sets for wm_hubs devices + - LP: #802383 + * mm/page_alloc.c: prevent unending loop in __alloc_pages_slowpath() + - LP: #802383 + * loop: limit 'max_part' module param to DISK_MAX_PARTS + - LP: #802383 + * loop: handle on-demand devices correctly + - LP: #802383 + * USB: CP210x Add 4 Device IDs for AC-Services Devices + - LP: #802383 + * USB: moto_modem: Add USB identifier for the Motorola VE240. + - LP: #802383 + * USB: serial: ftdi_sio: adding support for TavIR STK500 + - LP: #802383 + * USB: gamin_gps: Fix for data transfer problems in native mode + - LP: #802383 + * usb/gadget: at91sam9g20 fix end point max packet size + - LP: #802383 + * usb: gadget: rndis: don't test against req->length + - LP: #802383 + * OHCI: fix regression caused by nVidia shutdown workaround + - LP: #802383 + * p54usb: add zoom 4410 usbid + - LP: #802383 + * eCryptfs: Allow 2 scatterlist entries for encrypted filenames + - LP: #802383 + * UBIFS: fix a rare memory leak in ro to rw remounting path + - LP: #802383 + * i8k: Avoid lahf in 64-bit code + - LP: #802383 + * cpuidle: menu: fixed wrapping timers at 4.294 seconds + - LP: #802383 + * dm table: reject devices without request fns + - LP: #802383 + * atm: expose ATM device index in sysfs + - LP: #802383 + * brd: limit 'max_part' module param to DISK_MAX_PARTS + - LP: #802383 + * brd: handle on-demand devices correctly + - LP: #802383 + * SUNRPC: Deal with the lack of a SYN_SENT sk->sk_state_change + callback... + - LP: #802383 + * PCI: Add quirk for setting valid class for TI816X Endpoint + - LP: #802383 + * xen mmu: fix a race window causing leave_mm BUG() + - LP: #802383 + * netfilter: nf_conntrack_reasm: properly handle packets fragmented into + a single fragment + - LP: #802383 + * fix memory leak in scsi_report_lun_scan + - LP: #802383 + * fix refcounting bug in scsi_get_host_dev + - LP: #802383 + * fix duplicate removal on error path in scsi_sysfs_add_sdev + - LP: #802383 + * UBIFS: fix shrinker object count reports + - LP: #802383 + * UBIFS: fix memory leak on error path + - LP: #802383 + * nbd: limit module parameters to a sane value + - LP: #802383 + * mm: fix ENOSPC returned by handle_mm_fault() + - LP: #802383 + * PCI: Set PCIE maxpayload for card during hotplug insertion + - LP: #802383 + * nl80211: fix check for valid SSID size in scan operations + - LP: #802383 + * lockdep: Fix lock_is_held() on recursion + - LP: #802383 + * drm/i915: Add a no lvds quirk for the Asus EeeBox PC EB1007 + - LP: #802383 + * drm/radeon/kms: fix for radeon on systems >4GB without hardware iommu + - LP: #802383 + * fat: Fix corrupt inode flags when remove ATTR_SYS flag + - LP: #802383 + * xen: off by one errors in multicalls.c + - LP: #802383 + * x86/amd-iommu: Fix 3 possible endless loops + - LP: #802383 + * USB: cdc-acm: Adding second ACM channel support for Nokia E7 and C7 + - LP: #802383 + * USB: core: Tolerate protocol stall during hub and port status read + - LP: #802383 + * USB: serial: add another 4N-GALAXY.DE PID to ftdi_sio driver + - LP: #802383 + * ALSA: hda: Fix quirk for Dell Inspiron 910 + - LP: #792712, #802383 + * oprofile, dcookies: Fix possible circular locking dependency + - LP: #802383 + * CPUFREQ: Remove cpufreq_stats sysfs entries on module unload. + - LP: #802383 + * md: check ->hot_remove_disk when removing disk + - LP: #802383 + * md/raid5: fix raid5_set_bi_hw_segments + - LP: #802383 + * md/raid5: fix FUA request handling in ops_run_io() + - LP: #802383 + * ata: use pci_dev->revision + - LP: #802383 + * pata_cmd64x: fix PIO setup + - LP: #802383 + * pata_cmd64x: cmd648_bmdma_stop() fix + - LP: #802383 + * pata_cmd64x: remove unused definitions + - LP: #802383 + * pata_cm64x: fix boot crash on parisc + - LP: #802383 + * ACPI: use _HID when supplied by root-level devices + - LP: #802383 + * xfs: properly account for reclaimed inodes + - LP: #802383 + * exec: delay address limit change until point of no return + - LP: #802383 + * netfilter: IPv6: initialize TOS field in REJECT target module + - LP: #802383 + * netfilter: IPv6: fix DSCP mangle code + - LP: #802383 + * genirq: Add IRQF_FORCE_RESUME + - LP: #802383 + * xen: Use IRQF_FORCE_RESUME + - LP: #802383 + * time: Compensate for rounding on odd-frequency clocksources + - LP: #802383 + * Linux 2.6.32.42 + - LP: #802383 + * taskstats: don't allow duplicate entries in listener mode, + CVE-2011-2484 + - LP: #806390 + - CVE-2011-2484 + * drm_mm: extract check_free_mm_node + - LP: #599017, #807508 + * drm: implement helper functions for scanning lru list + - LP: #599017, #807508 + * drm/i915: prepare for fair lru eviction + - LP: #599017, #807508 + * drm/i915: Move the eviction logic to its own file. + - LP: #599017, #807508 + * drm/i915: Implement fair lru eviction across both rings. (v2) + - LP: #599017, #807508 + * drm/i915: Maintain LRU order of inactive objects upon access by CPU + (v2) + - LP: #599017, #807508 + * drm/i915/evict: Ensure we completely cleanup on failure + - LP: #599017, #807508 + * drm/i915: Periodically flush the active lists and requests + - LP: #599017, #807508 + * Linux 2.6.32.42+drm33.19 + - LP: #807508 + * net: add limit for socket backlog CVE-2010-4251 + - LP: #807462 + * tcp: use limited socket backlog CVE-2010-4251 + - LP: #807462 + * ipv6: udp: Optimise multicast reception + - LP: #807462 + * ipv4: udp: Optimise multicast reception + - LP: #807462 + * udp: multicast RX should increment SNMP/sk_drops counter in allocation + failures CVE-2010-4251 + - LP: #807462 + * udp: use limited socket backlog CVE-2010-4251 + - LP: #807462 + * llc: use limited socket backlog CVE-2010-4251 + - LP: #807462 + * sctp: use limited socket backlog CVE-2010-4251 + - LP: #807462 + * tipc: use limited socket backlog CVE-2010-4251 + - LP: #807462 + * x25: use limited socket backlog CVE-2010-4251 + - LP: #807462 + * net: backlog functions rename CVE-2010-4251 + - LP: #807462 + * net: sk_add_backlog() take rmem_alloc into account CVE-2010-4805 + - LP: #809318 + * ksm: fix NULL pointer dereference in scan_get_next_rmap_item() + - LP: #810425 + * migrate: don't account swapcache as shmem + - LP: #810425 + * clocksource: Make watchdog robust vs. interruption + - LP: #810425 + * TTY: ldisc, do not close until there are readers + - LP: #810425 + * xhci: Reject double add of active endpoints. + - LP: #810425 + * PM: Free memory bitmaps if opening /dev/snapshot fails + - LP: #810425 + * ath5k: fix memory leak when fewer than N_PD_CURVES are in use + - LP: #810425 + * mm: fix negative commitlimit when gigantic hugepages are allocated + - LP: #810425 + * uvcvideo: Remove buffers from the queues when freeing + - LP: #810425 + * watchdog: mtx1-wdt: request gpio before using it + - LP: #810425 + * debugobjects: Fix boot crash when kmemleak and debugobjects enabled + - LP: #810425 + * cfq-iosched: fix locking around ioc->ioc_data assignment + - LP: #810425 + * cfq-iosched: fix a rcu warning + - LP: #810425 + * i2c-taos-evm: Fix log messages + - LP: #810425 + * md: avoid endless recovery loop when waiting for fail device to + complete. + - LP: #810425 + * SUNRPC: Ensure the RPC client only quits on fatal signals + - LP: #810425 + * 6pack,mkiss: fix lock inconsistency + - LP: #810425 + * USB: don't let errors prevent system sleep + - LP: #810425 + * USB: don't let the hub driver prevent system sleep + - LP: #810425 + * uml: fix CONFIG_STATIC_LINK=y build failure with newer glibc + - LP: #810425 + * um: os-linux/mem.c needs sys/stat.h + - LP: #810425 + * inet_diag: fix inet_diag_bc_audit() + - LP: #810425 + * PM / Hibernate: Avoid hitting OOM during preallocation of memory + - LP: #810425 + * PM / Hibernate: Fix free_unnecessary_pages() + - LP: #810425 + * bug.h: Add WARN_RATELIMIT + - LP: #810425 + * net: filter: Use WARN_RATELIMIT + - LP: #810425 + * af_packet: prevent information leak + - LP: #810425 + * net/ipv4: Check for mistakenly passed in non-IPv4 address + - LP: #810425 + * ipv6/udp: Use the correct variable to determine non-blocking condition + - LP: #810425 + * udp/recvmsg: Clear MSG_TRUNC flag when starting over for a new packet + - LP: #810425 + * mm: prevent concurrent unmap_mapping_range() on the same inode + - LP: #810425 + * xen: set max_pfn_mapped to the last pfn mapped + - LP: #810425 + * xen: partially revert "xen: set max_pfn_mapped to the last pfn mapped" + - LP: #810425 + * Linux 2.6.32.43 + - LP: #810425 + * eCryptfs: Handle failed metadata read in lookup + - LP: #509180 + * pagemap: close races with suid execve, CVE-2011-1020 + - LP: #813026 + - CVE-2011-1020 + * report errors in /proc/*/*map* sanely, CVE-2011-1020 + - LP: #813026 + - CVE-2011-1020 + * close race in /proc/*/environ, CVE-2011-1020 + - LP: #813026 + - CVE-2011-1020 + * auxv: require the target to be tracable (or yourself), CVE-2011-1020 + - LP: #813026 + - CVE-2011-1020 + * deal with races in /proc/*/{syscall, stack, personality}, CVE-2011-1020 + - LP: #813026 + - CVE-2011-1020 + * rose_loopback_timer sets VC number <= ROSE_DEFAULT_MAXVC, CVE-2011-1493 + - LP: #816550 + - CVE-2011-1493 + * rose: Add length checks to CALL_REQUEST parsing, CVE-2011-1493 + - LP: #816550 + - CVE-2011-1493 + * Bluetooth: l2cap and rfcomm: fix 1 byte infoleak to userspace. + - LP: #819569 + - CVE-2011-2492 + * drm/nv50-nvc0: work around an evo channel hang that some people see + - LP: #583760 + * ASoC: Fix Blackfin I2S _pointer() implementation return in bounds + values + - LP: #823296 + * v4l2-ioctl.c: prefill tuner type for g_frequency and g/s_tuner + - LP: #823296 + * pvrusb2: fix g/s_tuner support + - LP: #823296 + * bttv: fix s_tuner for radio + - LP: #823296 + * gro: Only reset frag0 when skb can be pulled + - LP: #823296 + * NFSv4.1: update nfs4_fattr_bitmap_maxsz + - LP: #823296 + * SUNRPC: Fix a race between work-queue and rpc_killall_tasks + - LP: #823296 + * SUNRPC: Fix use of static variable in rpcb_getport_async + - LP: #823296 + * si4713-i2c: avoid potential buffer overflow on si4713 + - LP: #823296 + * hwmon: (max1111) Fix race condition causing NULL pointer exception + - LP: #823296 + * bridge: send proper message_age in config BPDU + - LP: #823296 + * davinci: DM365 EVM: fix video input mux bits + - LP: #823296 + * libata: fix unexpectedly frozen port after ata_eh_reset() + - LP: #823296 + * x86: Make Dell Latitude E5420 use reboot=pci + - LP: #823296 + * USB: pl2303: add AdLink ND-6530 USB IDs + - LP: #823296 + * USB: pl2303.h: checkpatch cleanups + - LP: #823296 + * USB: serial: add IDs for WinChipHead USB->RS232 adapter + - LP: #823296 + * staging: comedi: fix infoleak to userspace + - LP: #823296 + * USB: OHCI: fix another regression for NVIDIA controllers + - LP: #823296 + * usb: musb: restore INDEX register in resume path + - LP: #823296 + * USB: dummy-hcd needs the has_tt flag + - LP: #823296 + * ARM: pxa/cm-x300: fix V3020 RTC functionality + - LP: #823296 + * jme: Fix unmap error (Causing system freeze) + - LP: #823296 + * libsas: remove expander from dev list on error + - LP: #823296 + * mac80211: Restart STA timers only on associated state + - LP: #823296 + * Blacklist Traxdata CDR4120 and IOMEGA Zip drive to avoid lock ups. + - LP: #823296 + * ses: requesting a fault indication + - LP: #823296 + * pmcraid: reject negative request size + - LP: #823296 + * kexec, x86: Fix incorrect jump back address if not preserving context + - LP: #823296 + * powerpc/kdump: Fix timeout in crash_kexec_wait_realmode + - LP: #823296 + * PCI: ARI is a PCIe v2 feature + - LP: #823296 + * cciss: do not attempt to read from a write-only register + - LP: #823296 + * xtensa: prevent arbitrary read in ptrace + - LP: #823296 + * ext3: Fix oops in ext3_try_to_allocate_with_rsv() + - LP: #823296 + * svcrpc: fix list-corrupting race on nfsd shutdown + - LP: #823296 + * EHCI: only power off port if over-current is active + - LP: #823296 + * EHCI: fix direction handling for interrupt data toggles + - LP: #823296 + * powerpc/pseries/hvconsole: Fix dropped console output + - LP: #823296 + * x86: Hpet: Avoid the comparator readback penalty + - LP: #823296 + * x86: HPET: Chose a paranoid safe value for the ETIME check + - LP: #823296 + * cifs: clean up cifs_find_smb_ses (try #2) + - LP: #823296 + * cifs: fix NULL pointer dereference in cifs_find_smb_ses + - LP: #823296 + * cifs: check for NULL session password + - LP: #823296 + * gre: fix netns vs proto registration ordering + - LP: #823296 + * netns xfrm: fixup xfrm6_tunnel error propagation + - LP: #823296 + * tunnels: fix netns vs proto registration ordering + - LP: #823296 + * alpha: fix several security issues + - LP: #823296 + * proc: restrict access to /proc/PID/io + - LP: #823296 + * ALSA: sound/core/pcm_compat.c: adjust array index + - LP: #823296 + * dm mpath: fix potential NULL pointer in feature arg processing + - LP: #823296 + * dm: fix idr leak on module removal + - LP: #823296 + * perf: overflow/perf_count_sw_cpu_clock crashes recent kernels + - LP: #823296 + * atm: [br2684] allow routed mode operation again + - LP: #823296 + * Linux 2.6.32.44 + - LP: #823296 + + [ Ubuntu: 2.6.32-33.72 ] + + * Revert "fix oops in scsi_run_queue()" + - LP: #811745 + * Revert "put stricter guards on queue dead checks" + - LP: #811745 + + [ Ubuntu: 2.6.32-33.71 ] + + * splice: direct_splice_actor() should not use pos in sd + - LP: #588861 + + -- Stefan Bader Thu, 18 Aug 2011 11:08:03 +0200 + +linux-ec2 (2.6.32-317.36) lucid-proposed; urgency=low + + [Stefan Bader] + + * Release Tracking Bug + - LP: #807505 + + [ Stefan Bader ] + + * Revert "XEN: x86: Flush TLB if PGD entry is changed in i386 PAE mode" + - LP: #805209 + * Rebased to 2.6.32-33.70 + + [ Ubuntu: 2.6.32-33.70 ] + + * Revert "x86: Flush TLB if PGD entry is changed in i386 PAE mode" + - LP: #805209 + + -- Stefan Bader Fri, 08 Jul 2011 09:57:36 +0000 + +linux-ec2 (2.6.32-317.35) lucid-proposed; urgency=low + + [Stefan Bader] + + * Release Tracking Bug + - LP: #806968 + * Rebased to 2.6.32-33.69 + + [ Ubuntu: 2.6.32-33.69 ] + + * Revert "af_unix: Only allow recv on connected seqpacket sockets." + + [ Ubuntu: 2.6.32-33.68 ] + + * Fix abi directory + + [ Ubuntu: 2.6.32-33.67 ] + + * Revert "iwlagn: Support new 5000 microcode." + + -- Stefan Bader Thu, 07 Jul 2011 15:09:59 +0200 + +linux-ec2 (2.6.32-317.34) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Release Tracking Bug + - LP: #794420 + * Rebased to 2.6.32-33.66 + + [ Ubuntu: 2.6.32-33.66 ] + + * Revert "xhci: Fix full speed bInterval encoding." + * Revert "USB: xhci - fix math in xhci_get_endpoint_interval()" + * Revert "USB: xhci - fix unsafe macro definitions" + + -- Stefan Bader Wed, 08 Jun 2011 09:58:47 +0200 + +linux-ec2 (2.6.32-317.33) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Release Tracking Bug + - LP: #793566 + * Rebased to 2.6.32-33.65 + + [ Ubuntu: 2.6.32-33.65 ] + + * xhci: Fix full speed bInterval encoding. + - LP: #792959 + + -- Stefan Bader Mon, 06 Jun 2011 14:06:42 +0000 + +linux-ec2 (2.6.32-317.32) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * SAUCE: XEN: Sync with x86, cpu: AMD errata checking framework + - LP: #770050 + * SAUCE: XEN: Sync with x86, cpu: Clean up AMD erratum 400 workaround + - LP: #770050 + + [ Upstream Kernel Changes ] + + * Rebased to 2.6.32-33.64 + + [ Ubuntu: 2.6.32-33.64 ] + + * Release Tracking Bug + - LP: #789325 + * SAUCE: (no-up) Fix up KVM: VMX: Fix host userspace gsbase corruption + - LP: #787675 + * SAUCE: vesafb: mtrr module parameter is uint, not bool + - LP: #778043 + * Revert "(pre-stable): input: Support Clickpad devices in ClickZone + mode" + - LP: #780588 + * Revert "GFS2: Fix writing to non-page aligned gfs2_quota structures" + - LP: #780588 + * Revert "mmc: build fix: mmc_pm_notify is only available with + CONFIG_PM=y" + - LP: #780588 + * Revert "mmc: fix all hangs related to mmc/sd card insert/removal during + suspend/resume" + - LP: #780588 + * Revert "econet: fix CVE-2010-3848" + - LP: #780588 + * Revert "dell-laptop: Add another Dell laptop family to the DMI + whitelist" + - LP: #780588 + * Revert "dell-laptop: Add another Dell laptop family to the DMI + whitelist" + - LP: #780588 + * Revert "xen: set max_pfn_mapped to the last pfn mapped" + * cifs: always do is_path_accessible check in cifs_mount + - LP: #770050 + * video: sn9c102: world-wirtable sysfs files + - LP: #770050 + * UBIFS: restrict world-writable debugfs files + - LP: #770050 + * NET: cdc-phonet, handle empty phonet header + - LP: #770050 + * x86: Fix a bogus unwind annotation in lib/semaphore_32.S + - LP: #770050 + * tioca: Fix assignment from incompatible pointer warnings + - LP: #770050 + * mca.c: Fix cast from integer to pointer warning + - LP: #770050 + * ramfs: fix memleak on no-mmu arch + - LP: #770050 + * MAINTAINERS: update STABLE BRANCH info + - LP: #770050 + * UBIFS: fix oops when R/O file-system is fsync'ed + - LP: #770050 + * x86, cpu: AMD errata checking framework + - LP: #770050 + * x86, cpu: Clean up AMD erratum 400 workaround + - LP: #770050 + * x86, AMD: Set ARAT feature on AMD processors + - LP: #770050 + * x86, amd: Disable GartTlbWlkErr when BIOS forgets it + - LP: #770050 + * USB: ftdi_sio: Added IDs for CTI USB Serial Devices + - LP: #770050 + * USB: ftdi_sio: add PID for OCT DK201 docking station + - LP: #770050 + * USB: ftdi_sio: add ids for Hameg HO720 and HO730 + - LP: #770050 + * USB: option: Add new ONDA vendor id and product id for ONDA MT825UP + - LP: #770050 + * USB: option: Added support for Samsung GT-B3730/GT-B3710 LTE USB modem. + - LP: #770050 + * next_pidmap: fix overflow condition + - LP: #770050 + * proc: do proper range check on readdir offset + - LP: #770050 + * USB: EHCI: unlink unused QHs when the controller is stopped + - LP: #770050 + * USB: fix formatting of SuperSpeed endpoints in /proc/bus/usb/devices + - LP: #770050 + * USB: xhci - fix unsafe macro definitions + - LP: #770050 + * USB: xhci - fix math in xhci_get_endpoint_interval() + - LP: #770050 + * x86, cpu: Fix regression in AMD errata checking code + - LP: #770050 + * Linux 2.6.32.39 + - LP: #770050 + * fs/partitions/ldm.c: fix oops caused by corrupted partition table, + CVE-2011-1017 + - LP: #771382 + - CVE-2011-1017 + * drm/i915: set DIDL using the ACPI video output device _ADR method + return. + - LP: #775547 + * drm/radeon/kms: MC vram map needs to be >= pci aperture size + - LP: #775547 + * drm/radeon/kms: make sure blit addr masks are 64 bit + - LP: #775547 + * drm/radeon/kms: fix handling of tex lookup disable in cs checker on + r2xx + - LP: #775547 + * drm/i915: Free hardware status page on unload when physically mapped + - LP: #775547 + * drm/i915/overlay: Ensure that the reg_bo is in the GTT prior to + writing. + - LP: #775547 + * drm/radeon/kms/atom: set sane defaults in atombios_get_encoder_mode() + - LP: #775547 + * drm/radeon/kms: fix typos in disabled vbios code + - LP: #775547 + * drm/radeon/kms: add workaround for dce3 ddc line vbios bug + - LP: #775547 + * drm/radeon/kms: fix interlaced and doublescan handling + - LP: #775547 + * drm/i915/sdvo: Always add a 30ms delay to make SDVO TV detection + reliable + - LP: #775547 + * drm/radeon/kms: don't apply 7xx HDP flush workaround on AGP + - LP: #775547 + * drm/ttm: Fix two race conditions + fix busy codepaths + - LP: #775547 + * drm/i915: overlay on gen2 can't address above 1G + - LP: #775547 + * drm/i915: fix memory corruption with GM965 and >4GB RAM + - LP: #775547 + * drm/radeon: add quirk to make HP nx6125 laptop resume. + - LP: #775547 + * drm/radeon/kms: add quirk to make HP DV5000 laptop resume + - LP: #775547 + * ath: add missing regdomain pair 0x5c mapping + - LP: #780588 + * block, blk-sysfs: Fix an err return path in blk_register_queue() + - LP: #780588 + * p54: Initialize extra_len in p54_tx_80211 + - LP: #780588 + * intel-iommu: Unlink domain from iommu + - LP: #780588 + * intel-iommu: Fix get_domain_for_dev() error path + - LP: #780588 + * NFS: nfs_wcc_update_inode() should set nfsi->attr_gencount + - LP: #780588 + * serial/imx: read cts state only after acking cts change irq + - LP: #780588 + * ASoC: Fix output PGA enabling in wm_hubs CODECs + - LP: #780588 + * kconfig: Avoid buffer underrun in choice input + - LP: #780588 + * UBIFS: fix master node recovery + - LP: #780588 + * Remove extra struct page member from the buffer info structure + - LP: #780588 + * dasd: correct device table + - LP: #780588 + * iwlagn: Support new 5000 microcode. + - LP: #780588 + * atl1c: duplicate atl1c_get_tpd + - LP: #780588 + * udp: Fix bogus UFO packet generation + - LP: #780588 + * slub: fix panic with DISCONTIGMEM + - LP: #780588 + * set memory ranges in N_NORMAL_MEMORY when onlined + - LP: #780588 + * FLEXCOP-PCI: fix __xlate_proc_name-warning for flexcop-pci + - LP: #780588 + * m68k/mm: Set all online nodes in N_NORMAL_MEMORY + - LP: #780588 + * nfs: don't lose MS_SYNCHRONOUS on remount of noac mount + - LP: #780588 + * NFSv4.1: Ensure state manager thread dies on last umount + - LP: #780588 + * agp: fix arbitrary kernel memory writes + - LP: #780588 + * agp: fix OOM and buffer overflow + - LP: #780588 + * Input: xen-kbdfront - fix mouse getting stuck after save/restore + - LP: #780588 + * pmcraid: reject negative request size + - LP: #780588 + * mpt2sas: prevent heap overflows and unchecked reads + - LP: #780588 + * put stricter guards on queue dead checks + - LP: #780588 + * mmc: sdhci-pci: Fix error case in sdhci_pci_probe_slot() + - LP: #780588 + * mmc: sdhci: Check mrq->cmd in sdhci_tasklet_finish + - LP: #780588 + * mmc: sdhci: Check mrq != NULL in sdhci_tasklet_finish + - LP: #780588 + * USB: fix regression in usbip by setting has_tt flag + - LP: #780588 + * af_unix: Only allow recv on connected seqpacket sockets. + - LP: #780588 + * ARM: 6891/1: prevent heap corruption in OABI semtimedop + - LP: #780588 + * i8k: Tell gcc that *regs gets clobbered + - LP: #780588 + * Fix gcc 4.5.1 miscompiling drivers/char/i8k.c (again) + - LP: #780588 + * Open with O_CREAT flag set fails to open existing files on non writable + directories + - LP: #780588 + * can: Add missing socket check in can/bcm release. + - LP: #780588 + * netxen: module firmware hints + - LP: #780588 + * bnx2x: declare MODULE_FIRMWARE + - LP: #780588 + * cxgb3: declare MODULE_FIRMWARE + - LP: #780588 + * myri10ge: declare MODULE_FIRMWARE + - LP: #780588 + * netx: declare MODULE_FIRMWARE + - LP: #780588 + * pcnet-cs: declare MODULE_FIRMWARE + - LP: #780588 + * spider-net: declare MODULE_FIRMWARE + - LP: #780588 + * tms380tr: declare MODULE_FIRMWARE + - LP: #780588 + * Input: Add support of Synaptics Clickpad device + - LP: #780588 + * Input: elantech - do not advertise relative events + - LP: #780588 + * Input: elantech - fix firmware version check + - LP: #780588 + * Input: elantech - allow forcing Elantech protocol + - LP: #780588 + * Input: elantech - ignore high bits in the position coordinates + - LP: #780588 + * Input: elantech - use all 3 bytes when checking version + - LP: #780588 + * Input: elantech - relax signature checks + - LP: #780588 + * Input: elantech - discard the first 2 positions on some firmwares + - LP: #780588 + * Staging: rtl8192su: check for skb == NULL + - LP: #780588 + * Staging: rtl8192su: Clean up in case of an error in module + initialisation + - LP: #780588 + * Staging: rtl8192su: Fix procfs code for interfaces not named wlan0 + - LP: #780588 + * Staging: rtl8192su: remove device ids + - LP: #780588 + * Staging: rtl8192su: add device ids + - LP: #780588 + * USB: retain USB device power/wakeup setting across reconfiguration + - LP: #780588 + * USB: don't enable remote wakeup by default + - LP: #780588 + * USB: teach "devices" file about Wireless and SuperSpeed USB + - LP: #780588 + * GFS2: Clean up gfs2_adjust_quota() and do_glock() + - LP: #780588 + * GFS2: Fix writing to non-page aligned gfs2_quota structures + - LP: #780588 + * GFS2: BUG in gfs2_adjust_quota + - LP: #780588 + * SUNRPC: fix NFS client over TCP hangs due to packet loss (Bug 16494) + - LP: #780588 + * nfs4: Ensure that ACL pages sent over NFS were not allocated from the + slab (v3) + - LP: #780588 + * nfs: fix compilation warning + - LP: #780588 + * Fix corrupted OSF partition table parsing + - LP: #780588 + * Increase OSF partition limit from 8 to 18 + - LP: #780588 + * Please add support for Microsoft MN-120 PCMCIA network card + - LP: #780588 + * hwmon: (applesmc) Add iMac9,1 and MacBookPro2,2 support + - LP: #780588 + * hwmon: (applesmc) Add support for MacBook Pro 5,3 and 5,4 + - LP: #780588 + * hwmon: (applesmc) Add generic support for MacBook Pro 6 + - LP: #780588 + * hwmon: (applesmc) Add generic support for MacBook Pro 7 + - LP: #780588 + * hwmon: (applesmc) Add MacBookAir3,1(3,2) support + - LP: #780588 + * ALSA: emux: Add trivial compat ioctl handler + - LP: #780588 + * ALSA: powermac - Reverse HP detection on G4 DA + - LP: #780588 + * ALSA: powermac - Lineout detection on G4 DA + - LP: #780588 + * ALSA: hda - Add support for the new 27 inch IMacs + - LP: #780588 + * ALSA: hda - MacBookPro 5,3 line-in support + - LP: #780588 + * ALSA: hda - Add model=mbp55 entry for MacBookPro 7,1 + - LP: #780588 + * ALSA: hda - MacBookAir3,1(3,2) alsa support + - LP: #780588 + * virtio_net: fix oom handling on tx + - LP: #780588 + * mac80211: Add define for TX headroom reserved by mac80211 itself. + - LP: #780588 + * rt2x00: Centralize setting of extra TX headroom requested by rt2x00. + - LP: #780588 + * rt2x00: Properly request tx headroom for alignment operations. + - LP: #780588 + * rt2x00: use correct headroom for transmission + - LP: #780588 + * Bluetooth: Add support Bluetooth controller of MacbookPro 6,2 + - LP: #780588 + * Bluetooth: Add support Bluetooth controller of MacbookPro 7,1 + - LP: #780588 + * Bluetooth: Add MacBookAir3,1(2) support + - LP: #780588 + * perf tools: Display better error messages on missing packages + - LP: #780588 + * perf tools: Add 'make DEBUG=1' to remove the -O6 cflag + - LP: #780588 + * perf tools: Test -fstack-protector-all compiler option for inclusion in + CFLAGS + - LP: #780588 + * perf tools: Support static build + - LP: #780588 + * perf tools: Add V=2 option to help debug config issues + - LP: #780588 + * perf tools: Suggest static libraries as well + - LP: #780588 + * perf: Use default compiler mode by default + - LP: #780588 + * perf tools: Move QUIET_STDERR def to before first use + - LP: #780588 + * perf tools: Check if /dev/null can be used as the -o gcc argument + - LP: #780588 + * perf symbols: allow forcing use of cplus_demangle + - LP: #780588 + * V4L/DVB: Add Elgato EyeTV Diversity to dibcom driver + - LP: #780588 + * mmc: fix all hangs related to mmc/sd card insert/removal during + suspend/resume + - LP: #780588 + * mmc: build fix: mmc_pm_notify is only available with CONFIG_PM=y + - LP: #780588 + * b43: Fix warning at drivers/mmc/core/core.c:237 in mmc_wait_for_cmd + - LP: #780588 + * econet: Fix redeclaration of symbol len + - LP: #780588 + * econet: fix CVE-2010-3848 + - LP: #780588 + * dell-laptop: Add another Dell laptop to the DMI whitelist + - LP: #780588 + * dell-laptop: Add another Dell laptop family to the DMI whitelist + - LP: #780588 + * scsi_dh_emc: fix mode select request setup + - LP: #780588 + * scsi_dh_emc: request flag cleanup + - LP: #780588 + * cifs: fix another memleak, in cifs_root_iget + - LP: #780588 + * e1000e: Reset 82577/82578 PHY before first PHY register read + - LP: #780588 + * e1000: fix Tx hangs by disabling 64-bit DMA + - LP: #780588 + * btrfs: Require CAP_SYS_ADMIN for filesystem rebalance + - LP: #780588 + * af_unix: limit recursion level + - LP: #780588 + * init, sched: Fix race between init and kthreadd + - LP: #780588 + * backlight: MacBookAir3,1(3,2) mbp-nvidia-bl support + - LP: #780588 + * bonding: Ensure that we unshare skbs prior to calling pskb_may_pull + - LP: #780588 + * HID: add MacBookAir 3,1 and 3,2 support + - LP: #780588 + * intel-iommu: Force-disable IOMMU for iGFX on broken Cantiga revisions. + - LP: #780588 + * ipg: Remove device claimed by dl2k from pci id table + - LP: #780588 + * ipv6: Silence privacy extensions initialization + - LP: #780588 + * l2tp: Fix UDP socket reference count bugs in the pppol2tp driver + - LP: #780588 + * MIPS: DMA: Fix computation of DMA flags from device's + coherent_dma_mask. + - LP: #780588 + * mpt2sas: fix the incorrect scsi_dma_map error checking + - LP: #780588 + * Phonet: device notifier only runs on initial namespace + - LP: #780588 + * powerpc/boot/dts: Install dts from the right directory + - LP: #780588 + * rt2500usb: fallback to SW encryption for TKIP+AES + - LP: #780588 + * sata_via: Delay on vt6420 when starting ATAPI DMA write + - LP: #780588 + * tehuti: Firmware filename is tehuti/bdx.bin + - LP: #780588 + * wireless: b43: fix error path in SDIO + - LP: #780588 + * libata: set queue DMA alignment to sector size for ATAPI too + - LP: #780588 + * usb: musb: core: set has_tt flag + - LP: #780588 + * iwlwifi: fix skb usage after free + - LP: #780588 + * can: add missing socket check in can/raw release + - LP: #780588 + * fix oops in scsi_run_queue() + - LP: #780588 + * Linux 2.6.32.40 + - LP: #780588 + * PCI: allow matching of prefetchable resources to non-prefetchable + windows + - LP: #424142 + * cifs: check for bytes_remaining going to zero in CIFS_SessSetup + - LP: #788602 + * Validate size of EFI GUID partition entries. + - LP: #788602 + * dccp: handle invalid feature options length + - LP: #788602 + * CIFS: Fix memory over bound bug in cifs_parse_mount_options + - LP: #788602 + * Fix time() inconsistencies caused by intermediate xtime_cache values + being read + - LP: #788602 + * ehea: fix wrongly reported speed and port + - LP: #788602 + * NET: slip, fix ldisc->open retval + - LP: #788602 + * ne-h8300: Fix regression caused during net_device_ops conversion + - LP: #788602 + * hydra: Fix regression caused during net_device_ops conversion + - LP: #788602 + * libertas: fix cmdpendingq locking + - LP: #788602 + * zorro8390: Fix regression caused during net_device_ops conversion + - LP: #788602 + * cifs: add fallback in is_path_accessible for old servers + - LP: #788602 + * x86, AMD: Fix ARAT feature setting again + - LP: #788602 + * clocksource: Install completely before selecting + - LP: #788602 + * tick: Clear broadcast active bit when switching to oneshot + - LP: #788602 + * x86, apic: Fix spurious error interrupts triggering on all non-boot APs + - LP: #788602 + * x86, mce, AMD: Fix leaving freed data in a list + - LP: #788602 + * megaraid_sas: Sanity check user supplied length before passing it to + dma_alloc_coherent() + - LP: #788602 + * vmxnet3: Fix inconsistent LRO state after initialization + - LP: #788602 + * netxen: Remove references to unified firmware file + - LP: #788602 + * Linux 2.6.32.41 + - LP: #788602 + * drm/radeon/kms: fix bad shift in atom iio table parser + - LP: #788602 + * Linux 2.6.32.41+drm33.18 + - LP: #788602 + + -- Stefan Bader Wed, 01 Jun 2011 16:22:08 +0200 + +linux-ec2 (2.6.32-316.31) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * XEN: x86: Flush TLB if PGD entry is changed in i386 PAE mode + - LP: #742056 + * XEN: x86: Emit "mem=nopentium ignored" warning when not supported + - LP: #742056 + * XEN: x86: Fix panic when handling "mem={invalid}" param + - LP: #742056 + * XEN: x86/mm: Handle mm_fault_error() in kernel space + - LP: #742056 + * SAUCE: XEN: Do not force sched_clock_stable + - LP: #727459 + + -- Stefan Bader Mon, 02 May 2011 15:17:30 +0200 + +linux-ec2 (2.6.32-316.30) lucid-proposed; urgency=low + + [ Brad Figg ] + + * Release Tracking Bug + - LP: #767978 + + [ Brad Figg ] + + * Rebased to 2.6.32-32.62 + + [ Ubuntu: 2.6.32-32.62 ] + + * Release Tracking Bug + - LP: #767370 + * (config) Disable CONFIG_NET_NS + - LP: #720095 + * Revert "drm/radeon/kms: Fix retrying ttm_bo_init() after it failed + once." + - LP: #736234 + * Revert "drm/radeon: fall back to GTT if bo creation/validation in VRAM + fails." + - LP: #736234 + * x86: pvclock: Move scale_delta into common header + * KVM: x86: Fix a possible backwards warp of kvmclock + * KVM: x86: Fix kvmclock bug + * cpuset: add a missing unlock in cpuset_write_resmask() + - LP: #736234 + * keyboard: integer underflow bug + - LP: #736234 + * RxRPC: Fix v1 keys + - LP: #736234 + * ixgbe: fix for 82599 erratum on Header Splitting + - LP: #736234 + * mm: fix possible cause of a page_mapped BUG + - LP: #736234 + * powerpc/kdump: CPUs assume the context of the oopsing CPU + - LP: #736234 + * powerpc/kdump: Use chip->shutdown to disable IRQs + - LP: #736234 + * powerpc: Use more accurate limit for first segment memory allocations + - LP: #736234 + * powerpc/pseries: Add hcall to read 4 ptes at a time in real mode + - LP: #736234 + * powerpc/kexec: Speedup kexec hash PTE tear down + - LP: #736234 + * powerpc/crashdump: Do not fail on NULL pointer dereferencing + - LP: #736234 + * powerpc/kexec: Fix orphaned offline CPUs across kexec + - LP: #736234 + * netfilter: nf_log: avoid oops in (un)bind with invalid nfproto values + - LP: #736234 + * nfsd: wrong index used in inner loop + - LP: #736234 + * r8169: use RxFIFO overflow workaround for 8168c chipset. + - LP: #736234 + * Staging: comedi: jr3_pci: Don't ioremap too much space. Check result. + - LP: #736234 + * net: don't allow CAP_NET_ADMIN to load non-netdev kernel modules, + CVE-2011-1019 + - LP: #736234 + - CVE-2011-1019 + * ip6ip6: autoload ip6 tunnel + - LP: #736234 + * Linux 2.6.32.33 + - LP: #736234 + * drm/radeon: fall back to GTT if bo creation/validation in VRAM fails. + - LP: #652934, #736234 + * drm/radeon/kms: Fix retrying ttm_bo_init() after it failed once. + - LP: #652934, #736234 + * drm: fix unsigned vs signed comparison issue in modeset ctl ioctl, + CVE-2011-1013 + - LP: #736234 + - CVE-2011-1013 + * Linux 2.6.32.33+drm33.15 + - LP: #736234 + * econet: Fix crash in aun_incoming(). CVE-2010-4342 + - LP: #736394 + - CVE-2010-4342 + * igb: only use vlan_gro_receive if vlans are registered, CVE-2010-4263 + - LP: #737024 + - CVE-2010-4263 + * irda: prevent integer underflow in IRLMP_ENUMDEVICES, CVE-2010-4529 + - LP: #737823 + - CVE-2010-4529 + * hwmon/f71882fg: Set platform drvdata to NULL later + - LP: #742056 + * mtd: add "platform:" prefix for platform modalias + - LP: #742056 + * libata: no special completion processing for EH commands + - LP: #742056 + * MIPS: MTX-1: Make au1000_eth probe all PHY addresses + - LP: #742056 + * x86/mm: Handle mm_fault_error() in kernel space + - LP: #742056 + * ftrace: Fix memory leak with function graph and cpu hotplug + - LP: #742056 + * x86: Fix panic when handling "mem={invalid}" param + - LP: #553464, #742056 + * x86: Emit "mem=nopentium ignored" warning when not supported + - LP: #553464, #742056 + * ahci: AHCI and RAID mode SATA patch for Intel Patsburg DeviceIDs + - LP: #742056 + * ahci: AHCI mode SATA patch for Intel DH89xxCC DeviceIDs + - LP: #742056 + * ahci: AHCI mode SATA patch for Intel Patsburg SATA RAID controller + - LP: #742056 + * RDMA/cma: Fix crash in request handlers + - LP: #742056 + * IB/cm: Bump reference count on cm_id before invoking callback + - LP: #742056 + * ath9k_hw: Fix incorrect macversion and macrev checks + - LP: #742056 + * USB: serial/kobil_sct, fix potential tty NULL dereference + - LP: #742056 + * USB: serial: ch341: add new id + - LP: #742056 + * xhci: Fix cycle bit calculation during stall handling. + - LP: #742056 + * ALSA: hda - fix digital mic selection in mixer on 92HD8X codecs + - LP: #742056 + * PCI: remove quirk for pre-production systems + - LP: #742056 + * PCI: add more checking to ICH region quirks + - LP: #742056 + * PCI: do not create quirk I/O regions below PCIBIOS_MIN_IO for ICH + - LP: #742056 + * PCI: sysfs: Fix failure path for addition of "vpd" attribute + - LP: #742056 + * ALSA: ctxfi - Fix incorrect SPDIF status bit mask + - LP: #742056 + * ALSA: ctxfi - Fix SPDIF status retrieval + - LP: #742056 + * ALSA: ctxfi - Clear input settings before initialization + - LP: #742056 + * SUNRPC: Ensure we always run the tk_callback before tk_action + - LP: #742056 + * perf, powerpc: Handle events that raise an exception without + overflowing + - LP: #742056 + * ext3: Always set dx_node's fake_dirent explicitly. + - LP: #742056 + * call_function_many: fix list delete vs add race + - LP: #742056 + * call_function_many: add missing ordering + - LP: #742056 + * x86: Flush TLB if PGD entry is changed in i386 PAE mode + - LP: #742056 + * isdn: avoid calling tty_ldisc_flush() in atomic context + - LP: #742056 + * smp_call_function_many: handle concurrent clearing of mask + - LP: #742056 + * fix per-cpu flag problem in the cpu affinity checkers + - LP: #742056 + * i2c: Fix typo in instantiating-devices document + - LP: #742056 + * mmc: sdio: remember new card RCA when redetecting card + - LP: #742056 + * powerpc/kexec: Fix race in kexec shutdown + - LP: #742056 + * powerpc/kdump: Fix race in kdump shutdown + - LP: #742056 + * powerpc: rtas_flash needs to use rtas_data_buf + - LP: #742056 + * x86, binutils, xen: Fix another wrong size directive + - LP: #742056 + * hwmon: (sht15) Fix integer overflow in humidity calculation + - LP: #742056 + * Linux 2.6.32.34 + - LP: #742056 + * Linux 2.6.32.35 + - LP: #742056 + * aio: wake all waiters when destroying ctx + - LP: #744921 + * shmem: let shared anonymous be nonlinear again + - LP: #744921 + * PCI hotplug: acpiphp: set current_state to D0 in register_slot + - LP: #744921 + * xen: set max_pfn_mapped to the last pfn mapped + - LP: #744921 + * PCI: return correct value when writing to the "reset" attribute + - LP: #744921 + * Prevent rt_sigqueueinfo and rt_tgsigqueueinfo from spoofing the signal + code + - LP: #744921 + * ext3: skip orphan cleanup on rocompat fs + - LP: #744921 + * procfs: fix /proc//maps heap check + - LP: #744921 + * proc: protect mm start_code/end_code in /proc/pid/stat, CVE-2011-0726 + - LP: #744921 + - CVE-2011-0726 + * fbcon: Bugfix soft cursor detection in Tile Blitting + - LP: #744921 + * nfsd41: modify the members value of nfsd4_op_flags + - LP: #744921 + * nfsd: wrong index used in inner loop + - LP: #744921 + * uvcvideo: Fix uvc_fixup_video_ctrl() format search + - LP: #744921 + * ehci-hcd: Bug fix: don't set a QH's Halt bit + - LP: #744921 + * USB: uss720 fixup refcount position + - LP: #744921 + * USB: cdc-acm: fix memory corruption / panic + - LP: #744921 + * USB: cdc-acm: fix potential null-pointer dereference + - LP: #744921 + * USB: cdc-acm: fix potential null-pointer dereference on disconnect + - LP: #744921 + * Input: xen-kbdfront - advertise either absolute or relative coordinates + - LP: #744921 + * SUNRPC: Never reuse the socket port after an xs_close() + - LP: #744921 + * fs: call security_d_instantiate in d_obtain_alias V2 + - LP: #744921 + * dcdbas: force SMI to happen when expected + - LP: #744921 + * Linux 2.6.32.36 + - LP: #744921 + * drm/radeon/kms: check AA resolve registers on r300, CVE-2011-1016 + - LP: #745686 + - CVE-2011-1016 + * drm/radeon: fix regression with AA resolve checking, CVE-2011-1016 + - LP: #745686 + - CVE-2011-1016 + * xen: events: do not unmask event channels on resume + - LP: #681083 + * drm/radeon/kms: check AA resolve registers on r300 + - LP: #754584 + * drm/radeon: fix regression with AA resolve checking + - LP: #754584 + * Linux 2.6.32.36+drm33.16 + - LP: #754584 + * ALSA: hda - Fix SPDIF out regression on ALC889 + - LP: #764685 + * ALSA: Fix yet another race in disconnection + - LP: #764685 + * perf: Better fit max unprivileged mlock pages for tools needs + - LP: #764685 + * myri10ge: fix rmmod crash + - LP: #764685 + * cciss: fix lost command issue + - LP: #764685 + * sound/oss/opl3: validate voice and channel indexes + - LP: #764685 + * mac80211: initialize sta->last_rx in sta_info_alloc + - LP: #764685 + * ses: show devices for enclosures with no page 7 + - LP: #764685 + * ses: Avoid kernel panic when lun 0 is not mapped + - LP: #764685 + * eCryptfs: Unlock page in write_begin error path + - LP: #764685 + * eCryptfs: ecryptfs_keyring_auth_tok_for_sig() bug fix + - LP: #764685 + * staging: usbip: bugfixes related to kthread conversion + - LP: #764685 + * staging: usbip: bugfix add number of packets for isochronous frames + - LP: #764685 + * staging: usbip: bugfix for isochronous packets and optimization + - LP: #764685 + * staging: hv: Fix GARP not sent after Quick Migration + - LP: #764685 + * staging: hv: use sync_bitops when interacting with the hypervisor + - LP: #764685 + * Relax si_code check in rt_sigqueueinfo and rt_tgsigqueueinfo + - LP: #764685 + * xfs: prevent leaking uninitialized stack memory in FSGEOMETRY_V1 + - LP: #764685 + * irda: validate peer name and attribute lengths + - LP: #764685 + * irda: prevent heap corruption on invalid nickname + - LP: #764685 + * nilfs2: fix data loss in mmap page write for hole blocks + - LP: #764685 + * ASoC: Explicitly say registerless widgets have no register + - LP: #764685 + * ALSA: ens1371: fix Creative Ectiva support + - LP: #764685 + * ROSE: prevent heap corruption with bad facilities + - LP: #764685 + * Btrfs: Fix uninitialized root flags for subvolumes + - LP: #764685 + * x86, mtrr, pat: Fix one cpu getting out of sync during resume + - LP: #764685 + * ath9k: fix a chip wakeup related crash in ath9k_start + - LP: #764685 + * UBIFS: do not read flash unnecessarily + - LP: #764685 + * UBIFS: fix oops on error path in read_pnode + - LP: #764685 + * UBIFS: fix debugging failure in dbg_check_space_info + - LP: #764685 + * quota: Don't write quota info in dquot_commit() + - LP: #764685 + * mm: avoid wrapping vm_pgoff in mremap() + - LP: #764685 + * p54usb: IDs for two new devices + - LP: #764685 + * b43: allocate receive buffers big enough for max frame len + offset + - LP: #764685 + * Bluetooth: sco: fix information leak to userspace + - LP: #764685 + * bridge: netfilter: fix information leak + - LP: #764685 + * Bluetooth: bnep: fix buffer overflow + - LP: #764685 + * Bluetooth: add support for Apple MacBook Pro 8,2 + - LP: #764685 + * Treat writes as new when holes span across page boundaries + - LP: #764685 + * char/tpm: Fix unitialized usage of data buffer + - LP: #764685 + * netfilter: ip_tables: fix infoleak to userspace + - LP: #764685 + * netfilter: arp_tables: fix infoleak to userspace + - LP: #764685 + * netfilter: ipt_CLUSTERIP: fix buffer overflow + - LP: #764685 + * ipv6: netfilter: ip6_tables: fix infoleak to userspace + - LP: #764685 + * mfd: ab3100: world-writable debugfs *_priv files + - LP: #764685 + * drivers/rtc/rtc-ds1511.c: world-writable sysfs nvram file + - LP: #764685 + * drivers/misc/ep93xx_pwm.c: world-writable sysfs files + - LP: #764685 + * econet: 4 byte infoleak to the network + - LP: #764685 + * sound/oss: remove offset from load_patch callbacks + - LP: #764685 + * sound: oss: midi_synth: check get_user() return value + - LP: #764685 + * repair gdbstub to match the gdbserial protocol specification + - LP: #764685 + * gro: Reset dev pointer on reuse + - LP: #764685 + * gro: reset skb_iif on reuse + - LP: #764685 + * x86, amd-ucode: Remove needless log messages + - LP: #764685 + * x86, microcode, AMD: Extend ucode size verification + - LP: #764685 + * powerpc/kexec: Add ifdef CONFIG_PPC_STD_MMU_64 to PPC64 code + - LP: #764685 + * powerpc: Fix default_machine_crash_shutdown #ifdef botch + - LP: #764685 + * Squashfs: handle corruption of directory structure + - LP: #764685 + * sctp: fix to calc the INIT/INIT-ACK chunk length correctly is set + - LP: #764685 + * atm/solos-pci: Don't include frame pseudo-header on transmit hex-dump + - LP: #764685 + * ext4: fix credits computing for indirect mapped files + - LP: #764685 + * nfsd: fix auth_domain reference leak on nlm operations + - LP: #764685 + * CAN: Use inode instead of kernel address for /proc file + - LP: #764685 + * exec: make argv/envp memory visible to oom-killer + - LP: #764685 + * exec: copy-and-paste the fixes into compat_do_execve() paths + - LP: #764685 + * xfs: zero proper structure size for geometry calls + - LP: #764685 + * Linux 2.6.32.37 + - LP: #764685 + * Linux 2.6.32.38 + - LP: #764685 + + -- Brad Figg Wed, 20 Apr 2011 18:25:39 -0700 + +linux-ec2 (2.6.32-315.29) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-31.61 + + [ Ubuntu: 2.6.32-31.61 ] + + * Release Tracking Bug + - LP: #754842 + * x86, quirk: Fix SB600 revision check + - LP: #742056 + + -- Stefan Bader Thu, 14 Apr 2011 18:28:11 +0200 + +linux-ec2 (2.6.32-315.28) lucid-proposed; urgency=low + + [ Upstream Kernel Changes ] + + * Rebased to 2.6.32-31.60 + + [ Ubuntu: 2.6.32-31.60 ] + + * Release Tracking Bug + - LP: #734950 + * SAUCE: Clear new_profile in error path + - LP: #732700 + * [Config] CONFIG_BOOT_PRINTK_DELAY=y + - LP: #733191 + * Revert "drm/radeon/bo: add some fallback placements for VRAM only + objects." + - LP: #652934 + * drm/radeon: fall back to GTT if bo creation/validation in VRAM fails. + - LP: #652934 + * drm/radeon/kms: Fix retrying ttm_bo_init() after it failed once. + - LP: #652934 + * xfs: always use iget in bulkstat + - LP: #692848 + * drm/radeon/kms: make the mac rv630 quirk generic + - LP: #728687 + * drm/radeon/kms: add pll debugging output + - LP: #728687 + * drm/radeon: remove 0x4243 pci id + - LP: #728687 + * drm/radeon/kms: fix s/r issues with bios scratch regs + - LP: #728687 + * drm/i915/lvds: Add AOpen i915GMm-HFS to the list of false-positive LVDS + - LP: #728687 + * drm/i915: Add dependency on CONFIG_TMPFS + - LP: #728687 + * Linux 2.6.32.29+drm33.14 + - LP: #728687 + * NFSD: memory corruption due to writing beyond the stat array + - LP: #728687 + * mptfusion: mptctl_release is required in mptctl.c + - LP: #728687 + * mptfusion: Fix Incorrect return value in mptscsih_dev_reset + - LP: #728687 + * ocfs2_connection_find() returns pointer to bad structure + - LP: #728687 + * x25: decrement netdev reference counts on unload + - LP: #728687 + * x86, hpet: Disable per-cpu hpet timer if ARAT is supported + - LP: #728687 + * OHCI: work around for nVidia shutdown problem + - LP: #728687 + * x86/pvclock: Zero last_value on resume + - LP: #728687 + * av7110: check for negative array offset + - LP: #728687 + * CRED: Fix get_task_cred() and task_state() to not resurrect dead + credentials + - LP: #728687 + * bonding/vlan: Avoid mangled NAs on slaves without VLAN tag insertion + - LP: #728687 + * CRED: Fix kernel panic upon security_file_alloc() failure. + - LP: #728687 + * CRED: Fix BUG() upon security_cred_alloc_blank() failure + - LP: #728687 + * CRED: Fix memory and refcount leaks upon security_prepare_creds() + failure + - LP: #728687 + * sendfile(): check f_op.splice_write() rather than f_op.sendpage() + - LP: #728687 + * isdn: hisax: Replace the bogus access to irq stats + - LP: #728687 + * ixgbe: add support for 82599 based Express Module X520-P2 + - LP: #728687 + * ixgbe: prevent speculative processing of descriptors before ready + - LP: #728687 + * scsi_dh_alua: add netapp to dev list + - LP: #728687 + * scsi_dh_alua: Add IBM Power Virtual SCSI ALUA device to dev list + - LP: #728687 + * dm raid1: fail writes if errors are not handled and log fails + - LP: #728687 + * GFS2: Fix bmap allocation corner-case bug + - LP: #728687 + * dm raid1: fix null pointer dereference in suspend + - LP: #728687 + * sunrpc/cache: fix module refcnt leak in a failure path + - LP: #728687 + * be2net: Maintain tx and rx counters in driver + - LP: #728687 + * tcp: Make TCP_MAXSEG minimum more correct. + - LP: #728687 + * nfsd: correctly handle return value from nfsd_map_name_to_* + - LP: #728687 + * s390: remove task_show_regs + - LP: #728687 + * PM / Hibernate: Return error code when alloc_image_page() fails + - LP: #728687 + * fs/partitions: Validate map_count in Mac partition tables + - LP: #728687 + * ALSA: HDA: Add position_fix quirk for an Asus device + - LP: #718402, #728687 + * ALSA: caiaq - Fix possible string-buffer overflow + - LP: #728687 + * radio-aimslab.c needs #include + - LP: #728687 + * ARM: Ensure predictable endian state on signal handler entry + - LP: #728687 + * acer-wmi: Fix capitalisation of GUID + - LP: #728687 + * eCryptfs: Copy up lower inode attrs in getattr + - LP: #728687 + * platform: x86: acer-wmi: world-writable sysfs threeg file + - LP: #728687 + * platform: x86: asus_acpi: world-writable procfs files + - LP: #728687 + * platform: x86: tc1100-wmi: world-writable sysfs wireless and jogdial + files + - LP: #728687 + * genirq: Disable the SHIRQ_DEBUG call in request_threaded_irq for now + - LP: #728687 + * usb: musb: omap2430: fix kernel panic on reboot + - LP: #728687 + * USB: add quirks entry for Keytouch QWERTY Panel + - LP: #728687 + * USB: Add Samsung SGH-I500/Android modem ID switch to visor driver + - LP: #728687 + * USB: Add quirk for Samsung Android phone modem + - LP: #728687 + * p54pci: update receive dma buffers before and after processing + - LP: #728687 + * sierra: add new ID for Airprime/Sierra USB IP modem + - LP: #728687 + * staging: usbip: vhci: update reference count for usb_device + - LP: #728687 + * staging: usbip: vhci: give back URBs from in-flight unlink requests + - LP: #728687 + * staging: usbip: vhci: refuse to enqueue for dead connections + - LP: #728687 + * staging: usbip: vhci: use urb->dev->portnum to find port + - LP: #728687 + * epoll: prevent creating circular epoll structures + - LP: #728687 + * ldm: corrupted partition table can cause kernel oops + - LP: #728687 + * md: correctly handle probe of an 'mdp' device. + - LP: #728687 + * x86 quirk: Fix polarity for IRQ0 pin2 override on SB800 systems + - LP: #728687 + * xhci: Avoid BUG() in interrupt context + - LP: #728687 + * xhci: Clarify some expressions in the TRB math + - LP: #728687 + * xhci: Fix errors in the running total calculations in the TRB math + - LP: #728687 + * xhci: Fix an error in count_sg_trbs_needed() + - LP: #728687 + * x25: Do not reference freed memory. + - LP: #728687 + * Linux 2.6.32.30 + - LP: #728687 + * Linux 2.6.32.31 + - LP: #728687 + * Ocfs2/refcounttree: Fix a bug for refcounttree to writeback clusters in + a right number. + - LP: #731226 + * mfd: Fix NULL pointer due to non-initialized ucb1x00-ts absinfo + - LP: #731226 + * x86: Use u32 instead of long to set reset vector back to 0 + - LP: #731226 + * fuse: fix hang of single threaded fuseblk filesystem + - LP: #731226 + * clockevents: Prevent oneshot mode when broadcast device is periodic + - LP: #731226 + * ext2: Fix link count corruption under heavy link+rename load + - LP: #731226 + * sctp: Fix oops when sending queued ASCONF chunks + - LP: #731226 + * virtio: set pci bus master enable bit + - LP: #731226 + * netxen: fix set mac addr + - LP: #731226 + * HID: add support for Acan FG-8100 barcode reader + - LP: #731226 + * p54usb: add Senao NUB-350 usbid + - LP: #731226 + * dccp: fix oops on Reset after close + - LP: #731226 + * e1000e: disable broken PHY wakeup for ICH10 LOMs, use MAC wakeup + instead + - LP: #731226 + * r8169: disable ASPM + - LP: #731226 + * usb: iowarrior: don't trust report_size for buffer size + - LP: #731226 + * arp_notify: unconditionally send gratuitous ARP for + NETDEV_NOTIFY_PEERS. + - LP: #731226 + * CIFS: Fix oplock break handling (try #2) + - LP: #731226 + * Linux 2.6.32.32 + - LP: #731226 + + -- Stefan Bader Fri, 18 Mar 2011 18:23:16 +0100 + +linux-ec2 (2.6.32-314.27) lucid-proposed; urgency=low + + [ Steve Conklin ] + + * Release Tracking Bug + - LP: #728089 + + [ Stefan Bader ] + + * SAUCE: XEN: Import f9dee15f07cc3e6625734f3cf667054a5ff5ac2b + - LP: #717177 + * SAUCE: XEN: Import 65d64895d4ed4250448ac6906de25777470b5180 + - LP: #717177 + * SAUCE: XEN: Import cf383fb0c59287a81d6e71ebb7fafffd104a3983 + - LP: #717177 + * SAUCE: XEN: Import ffd91af58e011821332cc56019d1709f7d9a8a54 + - LP: #717177 + * SAUCE: XEN: Import 97a21d3ae6477ef00e39f21e47ce767a8ca1a22a + - LP: #717177 + * SAUCE: XEN: Import b5d6492343e5897eba863ed1d08ca51552657d1f + - LP: #717177 + * SAUCE: XEN: Import 9fb860778805809529f4c27b896370f2a945e945 + - LP: #717177 + * SAUCE: XEN: Import 20af80bf13116eb2d0d4bd6a307a4260cd277095 + - LP: #717177 + * SAUCE: XEN: Import 481710c9b5d154e81d388e9460fa2851a4ea7c94 + - LP: #717177 + * SAUCE: XEN: Import 9512754e274895426d5305d09a4c667d8621f420 + - LP: #717177 + * SAUCE: XEN: Update import 2444b629ee518745f54c2f7cf9e4de3d48e834cb + - LP: #717177 + * SAUCE: XEN: Import 28995d0c0ef1958ecffdcc80091c46233daea29a + - LP: #717177 + * SAUCE: XEN: Import fc04955bae3662eecaa8216694fd82b36256cfc6 + - LP: #717177 + * SAUCE: XEN: Import 4bbd3ea6754dc11fdb96a8ab9dfa13e24b1a6599 + - LP: #717177 + * SAUCE: XEN: Import 5a6ca212d5400dfcd8734da1f282b83a01a04d9e + - LP: #717177 + * SAUCE: XEN: Import cde0c191416d1470a2aa30d893f7ea987363386b + - LP: #717177 + * SAUCE: XEN: Import fe304738902627ec07c5195664ddb7a6f7750272 + - LP: #717177 + * SAUCE: XEN: Import 941f392437b3005d2d220a7254d48fbc342b6829 + - LP: #717177 + * SAUCE: XEN: Import 7ad50ac196fde9cf57a55b5908764227d9d7f8e6 + - LP: #717177 + * SAUCE: XEN: Import eeb31bcc98f8497dc83c5e7c8ef5c9a9438cb689 + - LP: #717177 + * SAUCE: XEN: Import d5324680becdc93bd37cbbc9581c727959c57ad9 + - LP: #717177 + * SAUCE: XEN: Import f2bb11b1573330fca6ddba33a5cff3695d1a690c + - LP: #717177 + * SAUCE: XEN: Import 7e3ff59bf997c554ebd14b17b9ba4246448e80c7 + - LP: #717177 + * SAUCE: XEN: Import c19aaf77a85ba61de84de2bf6922e269d78223a0 + - LP: #717177 + * SAUCE: XEN: Import 77b49ffdcd22f66180778085716512d508ab2617 + - LP: #717177 + * SAUCE: XEN: Import 34ebf644aafc699db42a5c2d109b6f057ee6b4f3 + - LP: #717177 + * SAUCE: XEN: Import c4dad435c692830c634ab7001b62b6a90b5ae530 + - LP: #717177 + * SAUCE: XEN: Import 52c82b295371158c61f8b06d7a03daaaeedfea00 + - LP: #717177 + * SAUCE: XEN: Import f3a8bea7094f487ed6139a6b6254d0e4147a866d + - LP: #717177 + * SAUCE: XEN: Import 2e911f0f7f1590fc8c4cb2a331ac1c1799a8385b + - LP: #717177 + * SAUCE: XEN: Import 7aa0c2220a675805f38b43313228544a0cce7488 + - LP: #717177 + * SAUCE: XEN: Import a805c0009da4770f72dab3f3c71cd263f79894aa + - LP: #717177 + * SAUCE: XEN: Import 26ab8408157fbbc669e1aab1f271bb3cb1c433d0 + - LP: #717177 + * SAUCE: XEN: Import ca51d9445c9b51d6b1c800c9c04e5472d55f0845 + - LP: #717177 + * SAUCE: XEN: Import 831d52bc153971b70e64eccfbed2b232394f22f8 + - LP: #723819 + + [ Upstream Kernel Changes ] + + * Rebased to 2.6.32-30.59 + + [ Ubuntu: 2.6.32-30.59 ] + + * Release Tracking Bug + - LP: #727336 + * [Config] CONFIG_IRQ_TIME_ACCOUNTING=n + - LP: #723819 + * virtio_net: Add schedule check to napi_enable call + - LP: #579276 + * NFS: fix the return value of nfs_file_fsync() + - LP: #585657 + * block: check for proper length of iov entries earlier in + blk_rq_map_user_iov(), CVE-2010-4163 + - LP: #721504 + - CVE-2010-4163 + * filter: make sure filters dont read uninitialized memory + - LP: #721282 + - CVE-2010-4158 + * tty: Make tiocgicount a handler, CVE-2010-4076, CVE-2010-4077 + - LP: #720189 + - CVE-2010-4077 + * staging: usbip: remove double giveback of URB + - LP: #723819 + * USB: EHCI: ASPM quirk of ISOC on AMD SB800 + - LP: #723819 + * rt2x00: add device id for windy31 usb device + - LP: #723819 + * ALSA: snd-usb-us122l: Fix missing NULL checks + - LP: #723819 + * hwmon: (via686a) Initialize fan_div values + - LP: #723819 + * USB: serial: handle Data Carrier Detect changes + - LP: #723819 + * USB: CP210x Add two device IDs + - LP: #723819 + * USB: CP210x Removed incorrect device ID + - LP: #723819 + * USB: usb-storage: unusual_devs update for Cypress ATACB + - LP: #723819 + * USB: usb-storage: unusual_devs update for TrekStor DataStation maxi g.u + external hard drive enclosure + - LP: #723819 + * USB: usb-storage: unusual_devs entry for CamSport Evo + - LP: #723819 + * USB: usb-storage: unusual_devs entry for Coby MP3 player + - LP: #723819 + * USB: serial: Updated support for ICOM devices + - LP: #723819 + * USB: adding USB support for Cinterion's HC2x, EU3 and PH8 products + - LP: #723819 + * USB: EHCI: ASPM quirk of ISOC on AMD Hudson + - LP: #723819 + * USB: EHCI: fix DMA deallocation bug + - LP: #723819 + * USB: g_printer: fix bug in module parameter definitions + - LP: #723819 + * USB: io_edgeport: fix the reported firmware major and minor + - LP: #723819 + * USB: ti_usb: fix module removal + - LP: #723819 + * USB: Storage: Add unusual_devs entry for VTech Kidizoom + - LP: #723819 + * USB: ftdi_sio: add ST Micro Connect Lite uart support + - LP: #723819 + * USB: cdc-acm: Adding second ACM channel support for Nokia N8 + - LP: #723819 + * USB: ftdi_sio: Add VID=0x0647, PID=0x0100 for Acton Research + spectrograph + - LP: #723819 + * USB: prevent buggy hubs from crashing the USB stack + - LP: #723819 + * staging: comedi: add support for newer jr3 1-channel pci board + - LP: #723819 + * staging: comedi: ni_labpc: Use shared IRQ for PCMCIA card + - LP: #723819 + * Staging: hv: fix sysfs symlink on hv block device + - LP: #723819 + * staging: hv: Enable sending GARP packet after live migration + - LP: #723819 + * hvc_iucv: allocate memory buffers for IUCV in zone DMA + - LP: #723819 + * iwlagn: enable only rfkill interrupt when device is down + - LP: #723819 + * ath9k: Fix bug in delimiter padding computation + - LP: #723819 + * correct vdso version string + - LP: #723819 + * fix medium error problems with some arrays which can cause data + corruption + - LP: #723819 + * libsas: fix runaway error handler problem + - LP: #723819 + * mpt2sas: Fix device removal handshake for zoned devices + - LP: #723819 + * mpt2sas: Correct resizing calculation for max_queue_depth + - LP: #723819 + * mpt2sas: Kernel Panic during Large Topology discovery + - LP: #723819 + * radio-aimslab.c: Fix gcc 4.5+ bug + - LP: #723819 + * em28xx: Fix audio input for Terratec Grabby + - LP: #723819 + * ALSA : au88x0 - Limit number of channels to fix Oops via OSS emu + - LP: #723819 + * ALSA: HDA: Fix dmesg output of HDMI supported bits + - LP: #723819 + * ALSA: hda - Fix memory leaks in conexant jack arrays + - LP: #723819 + * input: bcm5974: Add support for MacBookAir3 + - LP: #723819 + * ALSA: hrtimer: handle delayed timer interrupts + - LP: #723819 + * ASoC: WM8990: msleep() takes milliseconds not jiffies + - LP: #723819 + * ASoC: Blackfin AC97: fix build error after multi-component update + - LP: #723819 + * NFS: Fix "kernel BUG at fs/aio.c:554!" + - LP: #723819 + * rtc-cmos: fix suspend/resume + - LP: #723819 + * iwlagn: Re-enable RF_KILL interrupt when down + - LP: #723819 + * rapidio: fix hang on RapidIO doorbell queue full condition + - LP: #723819 + * PCI: pci-stub: ignore zero-length id parameters + - LP: #723819 + * virtio: remove virtio-pci root device + - LP: #723819 + * ds2760_battery: Fix calculation of time_to_empty_now + - LP: #723819 + * p54: fix sequence no. accounting off-by-one error + - LP: #723819 + * i2c: Unregister dummy devices last on adapter removal + - LP: #723819 + * serial: unbreak billionton CF card + - LP: #723819 + * ptrace: use safer wake up on ptrace_detach() + - LP: #723819 + * x86, mtrr: Avoid MTRR reprogramming on BP during boot on UP platforms + - LP: #723819 + * fix jiffy calculations in calibrate_delay_direct to handle overflow + - LP: #723819 + * USB: serial: pl2303: Hybrid reader Uniform HCR331 + - LP: #723819 + * drivers: update to pl2303 usb-serial to support Motorola cables + - LP: #723819 + * klist: Fix object alignment on 64-bit. + - LP: #723819 + * powerpc: Fix some 6xx/7xxx CPU setup functions + - LP: #723819 + * parisc : Remove broken line wrapping handling pdc_iodc_print() + - LP: #723819 + * kernel/smp.c: fix smp_call_function_many() SMP race + - LP: #723819 + * hostap_cs: fix sleeping function called from invalid context + - LP: #723819 + * md: fix regression with re-adding devices to arrays with no metadata + - LP: #723819 + * pata_mpc52xx: inherit from ata_bmdma_port_ops + - LP: #723819 + * TPM: Long default timeout fix + - LP: #723819 + * tpm_tis: Use timeouts returned from TPM + - LP: #723819 + * SELinux: define permissions for DCB netlink messages + - LP: #723819 + * SELinux: do not compute transition labels on mountpoint labeled + filesystems + - LP: #723819 + * ieee80211: correct IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK macro + - LP: #723819 + * dm: dont take i_mutex to change device size + - LP: #723819 + * dm mpath: disable blk_abort_queue + - LP: #723819 + * x86, mm: avoid possible bogus tlb entries by clearing prev mm_cpumask + after switching mm + - LP: #723819 + * usb: Realloc xHCI structures after a hub is verified. + - LP: #723819 + * sched: Remove USER_SCHED + - LP: #723819 + * sched: Remove remaining USER_SCHED code + - LP: #723819 + * sched: Move sched_avg_update() to update_cpu_load() + - LP: #723819 + * sched: Increment cache_nice_tries only on periodic lb + - LP: #723819 + * sched: Try not to migrate higher priority RT tasks + - LP: #723819 + * sched: Give CPU bound RT tasks preference + - LP: #723819 + * sched: suppress RCU lockdep splat in task_fork_fair + - LP: #723819 + * sched: fix RCU lockdep splat from task_group() + - LP: #723819 + * sched: Do not consider SCHED_IDLE tasks to be cache hot + - LP: #723819 + * sched: Set group_imb only a task can be pulled from the busiest cpu + - LP: #723819 + * sched: Force balancing on newidle balance if local group has capacity + - LP: #723819 + * sched: Drop group_capacity to 1 only if local group has extra capacity + - LP: #723819 + * sched: Fix softirq time accounting + - LP: #723819 + * sched: Consolidate account_system_vtime extern declaration + - LP: #723819 + * sched: Remove unused PF_ALIGNWARN flag + - LP: #723819 + * sched: Add a PF flag for ksoftirqd identification + - LP: #723819 + * sched: Add IRQ_TIME_ACCOUNTING, finer accounting of irq time + - LP: #723819 + * x86: Add IRQ_TIME_ACCOUNTING + - LP: #723819 + * sched: Do not account irq time to current task + - LP: #723819 + * sched: Remove irq time from available CPU power + - LP: #723819 + * sched: Call tick_check_idle before __irq_enter + - LP: #723819 + * sched: Export account_system_vtime() + - LP: #723819 + * sched, cgroup: Fixup broken cgroup movement + - LP: #723819 + * sched: Use group weight, idle cpu metrics to fix imbalances during idle + - LP: #723819 + * sched: Fix cross-sched-class wakeup preemption + - LP: #723819 + * sched: Fix volanomark performance regression + - LP: #723819 + * sched: Fix idle balancing + - LP: #723819 + * sched: Fix wake_affine() vs RT tasks + - LP: #723819 + * sched: Remove some dead code + - LP: #723819 + * kernel/user.c: add lock release annotation on free_user() + - LP: #723819 + * Linux 2.6.32.29 + - LP: #723819 + * rds: Integer overflow in RDS cmsg handling, CVE-2010-4175 + - LP: #721455 + - CVE-2010-4175 + + -- Stefan Bader Wed, 02 Mar 2011 14:59:29 +0100 + +linux-ec2 (2.6.32-313.26) lucid-proposed; urgency=low + + [ Brad Figg ] + + * Release Tracking Bug + - LP: #716657 + + [ Brad Figg ] + + * Release Tracking Bug + - LP: #712864 + + [ Brad Figg ] + + * Rebased to 2.6.32-29.58 + + [ Ubuntu: 2.6.32-29.58 ] + + * Release Tracking Bug + - LP: #716551 + * net: fix rds_iovec page count overflow, CVE-2010-3865 + - LP: #709153 + - CVE-2010-3865 + * net: ax25: fix information leak to userland, CVE-2010-3875 + - LP: #710714 + - CVE-2010-3875 + * net: ax25: fix information leak to userland harder, CVE-2010-3875 + - LP: #710714 + - CVE-2010-3875 + * net: packet: fix information leak to userland, CVE-2010-3876 + - LP: #710714 + - CVE-2010-3876 + * net: tipc: fix information leak to userland, CVE-2010-3877 + - LP: #711291 + - CVE-2010-3877 + * inet_diag: Make sure we actually run the same bytecode we audited, + CVE-2010-3880 + - LP: #711865 + - CVE-2010-3880 + + -- Brad Figg Thu, 10 Feb 2011 11:03:57 -0800 + +linux-ec2 (2.6.32-313.25) lucid-proposed; urgency=low + + [ Brad Figg ] + + * Tracking Bug + - LP: #708890 + + [ Andrew Dickinson ] + + * SAUCE: sched: Prevent divide by zero when cpu_power is 0 + - LP: #614853 + + [ Brad Figg ] + + * Rebased to 2.6.32-29.57 + + [ Stefan Bader ] + + * SAUCE: sched: Try tp catch cpu_power being set to 0 + - LP: #614853 + + [ Upstream Kernel Changes ] + + * SRU: xen: events: do not unmask event channels on resume + - LP: #681083 + + [ Ubuntu: 2.6.32-29.57 ] + + * Tracking Bug + - LP: #708864 + * [Config] Set CONFIG_NR_CPUS=256 for amd64 server + - LP: #706058 + * Input: i8042 - introduce 'notimeout' blacklist for Dell Vostro V13 + - LP: #380126 + * tun: avoid BUG, dump packet on GSO errors + - LP: #698883 + * TTY: Fix error return from tty_ldisc_open() + - LP: #705045 + * x86, hotplug: Use mwait to offline a processor, fix the legacy case + - LP: #705045 + * fuse: verify ioctl retries + - LP: #705045 + * fuse: fix ioctl when server is 32bit + - LP: #705045 + * ALSA: hda: Use model=lg quirk for LG P1 Express to enable playback and + capture + - LP: #595482, #705045 + * nohz: Fix printk_needs_cpu() return value on offline cpus + - LP: #705045 + * nohz: Fix get_next_timer_interrupt() vs cpu hotplug + - LP: #705045 + * nfsd: Fix possible BUG_ON firing in set_change_info + - LP: #705045 + * NFS: Fix fcntl F_GETLK not reporting some conflicts + - LP: #705045 + * sunrpc: prevent use-after-free on clearing XPT_BUSY + - LP: #705045 + * hwmon: (adm1026) Allow 1 as a valid divider value + - LP: #705045 + * hwmon: (adm1026) Fix setting fan_div + - LP: #705045 + * amd64_edac: Fix interleaving check + - LP: #705045 + * IB/uverbs: Handle large number of entries in poll CQ + - LP: #705045 + * PM / Hibernate: Fix PM_POST_* notification with user-space suspend + - LP: #705045 + * ACPICA: Fix Scope() op in module level code + - LP: #705045 + * ACPI: EC: Add another dmi match entry for MSI hardware + - LP: #705045 + * orinoco: fix TKIP countermeasure behaviour + - LP: #705045 + * orinoco: clear countermeasure setting on commit + - LP: #705045 + * x86, amd: Fix panic on AMD CPU family 0x15 + - LP: #705045 + * md: fix bug with re-adding of partially recovered device. + - LP: #705045 + * tracing: Fix panic when lseek() called on "trace" opened for writing + - LP: #705045 + * x86, gcc-4.6: Use gcc -m options when building vdso + - LP: #705045 + * x86: Enable the intr-remap fault handling after local APIC setup + - LP: #705045 + * x86, vt-d: Handle previous faults after enabling fault handling + - LP: #705045 + * x86, vt-d: Fix the vt-d fault handling irq migration in the x2apic mode + - LP: #705045 + * x86, vt-d: Quirk for masking vtd spec errors to platform error handling + logic + - LP: #705045 + * hvc_console: Fix race between hvc_close and hvc_remove + - LP: #705045 + * hvc_console: Fix race between hvc_close and hvc_remove, again + - LP: #705045 + * HID: hidraw: fix window in hidraw_release + - LP: #705045 + * bfa: fix system crash when reading sysfs fc_host statistics + - LP: #705045 + * net: release dst entry while cache-hot for GSO case too + - LP: #705045 + * install_special_mapping skips security_file_mmap check. + - LP: #705045 + * USB: misc: uss720.c: add another vendor/product ID + - LP: #705045 + * USB: ftdi_sio: Add D.O.Tec PID + - LP: #705045 + * USB: usb-storage: unusual_devs entry for the Samsung YP-CP3 + - LP: #705045 + * p54usb: add 5 more USBIDs + - LP: #705045 + * p54usb: New USB ID for Gemtek WUBI-100GW + - LP: #705045 + * sound: Prevent buffer overflow in OSS load_mixer_volumes + - LP: #705045 + * mv_xor: fix race in tasklet function + - LP: #705045 + * ima: fix add LSM rule bug + - LP: #705045 + * ALSA: hda: Use LPIB for Dell Latitude 131L + - LP: #530346, #705045 + * ALSA: hda: Use LPIB quirk for Dell Inspiron m101z/1120 + - LP: #705045 + * block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead + - LP: #705045 + * sctp: Fix a race between ICMP protocol unreachable and connect() + - LP: #705045 + * posix-cpu-timers: workaround to suppress the problems with mt exec + - LP: #705045 + * Linux 2.6.32.28 + - LP: #705045 + * dell-laptop: Add another Dell laptop family to the DMI whitelist + - LP: #693078 + * dell-laptop: Add another Dell laptop family to the DMI whitelist + - LP: #693078 + * drm/ttm: Clear the ghost cpu_writers flag on + ttm_buffer_object_transfer. + - LP: #708769 + * drm/kms: remove spaces from connector names (v2) + - LP: #708769 + * Linux 2.6.32.28+drm33.13 + - LP: #708769 + + [ Ubuntu: 2.6.32-28.56 ] + + * Tracking Bug + - LP: #705565 + * Just a build number increment for a new upload. There was an issue + in the previous upload that prevented ARMEL from building. The + issue has been resolved in the PPA and a new upload should produce + the requisite images. + + [ Ubuntu: 2.6.32-28.55 ] + + * Another version bump because of abi check failure + * Tracking Bug + - LP: #699885 + + [ Ubuntu: 2.6.32-28.54 ] + + * Another version bump because of upload failure + + [ Ubuntu: 2.6.32-28.53 ] + + * Another version bump because of upload failure + + -- Brad Figg Wed, 19 Jan 2011 12:23:49 -0800 + +linux-ec2 (2.6.32-312.24) lucid-proposed; urgency=low + + [ Brad Figg ] + + * Tracking bug + - LP: #698345 + + [ Brad Figg ] + + * Rebased to 2.6.32-28.52 + + [ Ubuntu: 2.6.32-28.52 ] + + + [ Ubuntu: 2.6.32-28.51 ] + + * bumped version due to build fail + + [ Ubuntu: 2.6.32-28.50 ] + + * SAUCE: Change nodelayacct boot parameter polarity. + - LP: #493156 + * [Config] CONFIG_TASK_DELAY_ACCT=y + - LP: #493156 + * ipc: initialize structure memory to zero for compat functions + * tcp: Increase TCP_MAXSEG socket option minimum. + - CVE-2010-4165 + * perf_events: Fix perf_counter_mmap() hook in mprotect() + - CVE-2010-4169 + * af_unix: limit unix_tot_inflight + - CVE-2010-4249 + * AppArmor: fix the upper bound check for the next/check table + - LP: #581525 + * NFS: Fix panic after nfs_umount() + - LP: #683938 + * block: Ensure physical block size is unsigned int + - LP: #688669 + * block: limit vec count in bio_kmalloc() and bio_alloc_map_data() + - LP: #688669 + * block: take care not to overflow when calculating total iov length + - LP: #688669 + * block: check for proper length of iov entries in blk_rq_map_user_iov() + - LP: #688669 + * jme: Fix PHY power-off error + - LP: #688669 + * irda: Fix parameter extraction stack overflow + - LP: #688669 + * irda: Fix heap memory corruption in iriap.c + - LP: #688669 + * i2c-pca-platform: Change device name of request_irq + - LP: #688669 + * microblaze: Fix build with make 3.82 + - LP: #688669 + * Staging: asus_oled: fix up some sysfs attribute permissions + - LP: #688669 + * Staging: asus_oled: fix up my fixup for some sysfs attribute + permissions + - LP: #688669 + * Staging: line6: fix up some sysfs attribute permissions + - LP: #688669 + * hpet: fix unwanted interrupt due to stale irq status bit + - LP: #688669 + * hpet: unmap unused I/O space + - LP: #688669 + * olpc_battery: Fix endian neutral breakage for s16 values + - LP: #688669 + * percpu: fix list_head init bug in __percpu_counter_init() + - LP: #688669 + * um: remove PAGE_SIZE alignment in linker script causing kernel + segfault. + - LP: #688669 + * um: fix global timer issue when using CONFIG_NO_HZ + - LP: #688669 + * numa: fix slab_node(MPOL_BIND) + - LP: #688669 + * hwmon: (lm85) Fix ADT7468 frequency table + - LP: #688669 + * mm: fix return value of scan_lru_pages in memory unplug + - LP: #688669 + * mm: fix is_mem_section_removable() page_order BUG_ON check + - LP: #688669 + * ssb: b43-pci-bridge: Add new vendor for BCM4318 + - LP: #688669 + * sgi-xpc: XPC fails to discover partitions with all nasids above 128 + - LP: #688669 + * xen: ensure that all event channels start off bound to VCPU 0 + - LP: #688669 + * xen: don't bother to stop other cpus on shutdown/reboot + - LP: #688669 + * sys_semctl: fix kernel stack leakage + - LP: #688669 + * net: NETIF_F_HW_CSUM does not imply FCoE CRC offload + - LP: #688669 + * drivers/char/vt_ioctl.c: fix VT_OPENQRY error value + - LP: #688669 + * viafb: use proper register for colour when doing fill ops + - LP: #688669 + * eCryptfs: Clear LOOKUP_OPEN flag when creating lower file + - LP: #688669 + * md/raid1: really fix recovery looping when single good device fails. + - LP: #688669 + * md: fix return value of rdev_size_change() + - LP: #688669 + * x86: AMD Northbridge: Verify NB's node is online + - LP: #688669 + * tty: prevent DOS in the flush_to_ldisc + - LP: #688669 + * TTY: restore tty_ldisc_wait_idle + - LP: #688669 + * tty_ldisc: Fix BUG() on hangup + - LP: #688669 + * TTY: ldisc, fix open flag handling + - LP: #688669 + * KVM: VMX: fix vmx null pointer dereference on debug register access + - LP: #688669 + - CVE-2010-0435 + * KVM: x86: fix information leak to userland + - LP: #688669 + * firewire: cdev: fix information leak + - LP: #688669 + * firewire: core: fix an information leak + - LP: #688669 + * firewire: ohci: fix buffer overflow in AR split packet handling + - LP: #688669 + * firewire: ohci: fix race in AR split packet handling + - LP: #688669 + * ALSA: ac97: Apply quirk for Dell Latitude D610 binding Master and + Headphone controls + - LP: #669279, #688669 + * ALSA: HDA: Add an extra DAC for Realtek ALC887-VD + - LP: #688669 + * ALSA: hda: Use "alienware" model quirk for another SSID + - LP: #683695, #688669 + * netfilter: nf_conntrack: allow nf_ct_alloc_hashtable() to get highmem + pages + - LP: #688669 + * latencytop: fix per task accumulator + - LP: #688669 + * mm/vfs: revalidate page->mapping in do_generic_file_read() + - LP: #688669 + * bio: take care not overflow page count when mapping/copying user data + - LP: #688669 + * libata-scsi passthru: fix bug which truncated LBA48 return values + - LP: #688669 + * libata: fix NULL sdev dereference race in atapi_qc_complete() + - LP: #688669 + * PCI: fix size checks for mmap() on /proc/bus/pci files + - LP: #688669 + * PCI: fix offset check for sysfs mmapped files + - LP: #688669 + * efifb: check that the base address is plausible on pci systems + - LP: #688669 + * USB: gadget: AT91: fix typo in atmel_usba_udc driver + - LP: #688669 + * USB: ftdi_sio: add device IDs for Milkymist One JTAG/serial + - LP: #688669 + * USB: option: fix when the driver is loaded incorrectly for some Huawei + devices. + - LP: #688669 + * usb: misc: sisusbvga: fix information leak to userland + - LP: #688669 + * usb: misc: iowarrior: fix information leak to userland + - LP: #688669 + * usb: core: fix information leak to userland + - LP: #688669 + * USB: EHCI: fix obscure race in ehci_endpoint_disable + - LP: #688669 + * USB: storage: sierra_ms: fix sysfs file attribute + - LP: #688669 + * USB: atm: ueagle-atm: fix up some permissions on the sysfs files + - LP: #688669 + * USB: misc: cypress_cy7c63: fix up some sysfs attribute permissions + - LP: #688669 + * USB: misc: usbled: fix up some sysfs attribute permissions + - LP: #688669 + * USB: ftdi_sio: revert "USB: ftdi_sio: fix DTR/RTS line modes" + - LP: #688669 + * USB: misc: trancevibrator: fix up a sysfs attribute permission + - LP: #688669 + * USB: misc: usbsevseg: fix up some sysfs attribute permissions + - LP: #688669 + * USB: ftdi_sio: Add ID for RT Systems USB-29B radio cable + - LP: #688669 + * USB: serial: ftdi_sio: Vardaan USB RS422/485 converter PID added + - LP: #688669 + * acpi-cpufreq: fix a memleak when unloading driver + - LP: #688669 + * ACPI: EC: add Vista incompatibility DMI entry for Toshiba Satellite + L355 + - LP: #688669 + * fuse: fix attributes after open(O_TRUNC) + - LP: #688669 + * do_exit(): make sure that we run with get_fs() == USER_DS + - LP: #688669 + * uml: disable winch irq before freeing handler data + - LP: #688669 + * backlight: grab ops_lock before testing bd->ops + - LP: #688669 + * nommu: yield CPU while disposing VM + - LP: #688669 + * DECnet: don't leak uninitialized stack byte + - LP: #688669 + * ARM: 6489/1: thumb2: fix incorrect optimisation in usracc + - LP: #688669 + * ARM: 6482/2: Fix find_next_zero_bit and related assembly + - LP: #688669 + * Staging: frontier: fix up some sysfs attribute permissions + - LP: #688669 + * staging: rtl8187se: Change panic to warn when RF switch turned off + - LP: #688669 + * HID: hidraw, fix a NULL pointer dereference in hidraw_ioctl + - LP: #688669 + * HID: hidraw, fix a NULL pointer dereference in hidraw_write + - LP: #688669 + * gianfar: Fix crashes on RX path (Was Re: [Bugme-new] [Bug 19692] New: + linux-2.6.36-rc5 crash with gianfar ethernet at full line rate traffic) + - LP: #688669 + * Limit sysctl_tcp_mem and sysctl_udp_mem initializers to prevent integer + overflows. + - LP: #688669 + * sparc64: Fix race in signal instruction flushing. + - LP: #688669 + * sparc: Don't mask signal when we can't setup signal frame. + - LP: #688669 + * sparc: Prevent no-handler signal syscall restart recursion. + - LP: #688669 + * x86, UV: Delete unneeded boot messages + - LP: #688669 + * x86, UV: Fix initialization of max_pnode + - LP: #688669 + * drivers/video/efifb.c: support framebuffer for NVIDIA 9400M in MacBook + Pro 5,1 + - LP: #688669 + * efifb: support the EFI framebuffer on more Apple hardware + - LP: #688669 + * V4L/DVB (13154): uvcvideo: Handle garbage at the end of streaming + interface descriptors + - LP: #688669 + * Input: i8042 - add Sony VAIO VPCZ122GX to nomux list + - LP: #688669 + * x25: Patch to fix bug 15678 - x25 accesses fields beyond end of packet. + - LP: #688669 + * memory corruption in X.25 facilities parsing + - LP: #688669 + * can-bcm: fix minor heap overflow + - LP: #688669 + * V4L/DVB: ivtvfb: prevent reading uninitialized stack memory + - LP: #688669 + * x25: Prevent crashing when parsing bad X.25 facilities + - LP: #688669 + * crypto: padlock - Fix AES-CBC handling on odd-block-sized input + - LP: #688669 + * x86-32: Separate 1:1 pagetables from swapper_pg_dir + - LP: #688669 + * x86, mm: Fix CONFIG_VMSPLIT_1G and 2G_OPT trampoline + - LP: #688669 + * x86-32: Fix dummy trampoline-related inline stubs + - LP: #688669 + * rds: Integer overflow in RDS cmsg handling + - LP: #688669 + * net: Truncate recvfrom and sendto length to INT_MAX. + - LP: #688669 + * net: Limit socket I/O iovec total length to INT_MAX. + - LP: #688669 + * nmi: fix clock comparator revalidation + - LP: #688669 + * UV - XPC: pass nasid instead of nid to gru_create_message_queue + - LP: #688669 + * x86: uv: XPC receive message reuse triggers invalid BUG_ON() + - LP: #688669 + * X86: uv: xpc_make_first_contact hang due to not accepting ACTIVE state + - LP: #688669 + * x86: uv: xpc NULL deref when mesq becomes empty + - LP: #688669 + * Linux 2.6.32.27 + - LP: #688669 + + -- Brad Figg Thu, 06 Jan 2011 14:07:09 -0800 + +linux-ec2 (2.6.32-311.23) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-27.49 + + [ Upstream Kernel Changes ] + + * xen, compat: Test %rax for the syscall number, not %eax + - CVE-2010-3301 + * xen, compat: Retruncate rax after ia32 syscall entry tracing + - CVE-2010-3301 + * xen: Match cpu_get_cap() with upstream changes + - LP: #668380 + + [ Ubuntu: 2.6.32-27.49 ] + + * Revert "SAUCE: AF_ECONET prevent kernel stack overflow" + * Revert "SAUCE: AF_ECONET SIOCSIFADDR ioctl does not check privileges" + * Revert "SAUCE: AF_ECONET saddr->cookie prevent NULL pointer + dereference" + * [Config] Added be2net, be2scsi to udebs + - LP: #628776 + * [Config] Use correct be2iscsi module name in d-i/modules/scsi-modules + - LP: #628776 + * Revert "(pre-stable) ACPI: enable repeated PCIEXP wakeup by clearing + PCIEXP_WAKE_STS on resume" + * Revert "mm: (pre-stable) Move vma_stack_continue into mm.h" + * x86, cpu: After uncapping CPUID, re-run CPU feature detection + - LP: #668380 + * ALSA: sound/pci/rme9652: prevent reading uninitialized stack memory + - LP: #668380 + * ALSA: oxygen: fix analog capture on Claro halo cards + - LP: #668380 + * ALSA: hda - Add Dell Latitude E6400 model quirk + - LP: #643891, #668380 + * ALSA: rawmidi: fix oops (use after free) when unloading a driver module + - LP: #668380 + * USB: fix bug in initialization of interface minor numbers + - LP: #668380 + * usb: musb: gadget: fix kernel panic if using out ep with FIFO_TXRX + style + - LP: #668380 + * usb: musb: gadget: restart request on clearing endpoint halt + - LP: #668380 + * oprofile: Add Support for Intel CPU Family 6 / Model 29 + - LP: #668380 + * RDMA/cxgb3: Turn off RX coalescing for iWARP connections + - LP: #668380 + * mmc: sdhci-s3c: fix NULL ptr access in sdhci_s3c_remove + - LP: #668380 + * x86/amd-iommu: Set iommu configuration flags in enable-loop + - LP: #668380 + * x86/amd-iommu: Fix rounding-bug in __unmap_single + - LP: #668380 + * x86/amd-iommu: Work around S3 BIOS bug + - LP: #668380 + * tracing/x86: Don't use mcount in pvclock.c + - LP: #668380 + * tracing/x86: Don't use mcount in kvmclock.c + - LP: #668380 + * v4l1: fix 32-bit compat microcode loading translation + - LP: #668380 + * V4L/DVB: cx231xx: Avoid an OOPS when card is unknown (card=0) + - LP: #668380 + * V4L/DVB (13966): DVB-T regression fix for saa7134 cards + - LP: #668380 + * Input: joydev - fix JSIOCSAXMAP ioctl + - LP: #668380 + * x86, hpet: Fix bogus error check in hpet_assign_irq() + - LP: #668380 + * x86, irq: Plug memory leak in sparse irq + - LP: #668380 + * ubd: fix incorrect sector handling during request restart + - LP: #668380 + * ring-buffer: Fix typo of time extends per page + - LP: #668380 + * dmaengine: fix interrupt clearing for mv_xor + - LP: #668380 + * hrtimer: Preserve timer state in remove_hrtimer() + - LP: #668380 + * i2c-pca: Fix waitforcompletion() return value + - LP: #668380 + * wext: fix potential private ioctl memory content leak + - LP: #668380 + * atl1: fix resume + - LP: #668380 + * x86, AMD, MCE thresholding: Fix the MCi_MISCj iteration order + - LP: #668380 + * De-pessimize rds_page_copy_user + - LP: #668380 + * xfrm4: strip ECN and IP Precedence bits in policy lookup + - LP: #668380 + * tcp: Fix >4GB writes on 64-bit. + - LP: #668380 + * net: Fix the condition passed to sk_wait_event() + - LP: #668380 + * Phonet: Correct header retrieval after pskb_may_pull + - LP: #668380 + * net: Fix IPv6 PMTU disc. w/ asymmetric routes + - LP: #668380 + * ip: fix truesize mismatch in ip fragmentation + - LP: #668380 + * net: clear heap allocations for privileged ethtool actions + - LP: #668380 + * tcp: Fix race in tcp_poll + - LP: #668380 + * netxen: dont set skb->truesize + - LP: #668380 + * net: blackhole route should always be recalculated + - LP: #668380 + * skge: add quirk to limit DMA + - LP: #668380 + * r8169: allocate with GFP_KERNEL flag when able to sleep + - LP: #668380 + * bsg: fix incorrect device_status value + - LP: #668380 + * r6040: fix r6040_multicast_list + - LP: #668380 + * r6040: Fix multicast list iteration when hash filter is used + - LP: #668380 + * powerpc: Initialise paca->kstack before early_setup_secondary + - LP: #668380 + * powerpc: Don't use kernel stack with translation off + - LP: #668380 + * b44: fix carrier detection on bind + - LP: #668380 + * ACPI: enable repeated PCIEXP wakeup by clearing PCIEXP_WAKE_STS on + resume + - LP: #613381, #668380 + * intel_idle: PCI quirk to prevent Lenovo Ideapad s10-3 boot hang + - LP: #668380 + * ACPI: EC: add Vista incompatibility DMI entry for Toshiba Satellite + L355 + - LP: #668380 + * ACPI: delete ZEPTO idle=nomwait DMI quirk + - LP: #668380 + * ACPI: Disable Windows Vista compatibility for Toshiba P305D + - LP: #668380 + * x86: detect scattered cpuid features earlier + - LP: #668380 + * fix 2.6.32.23 suspend regression caused by commit 6f6198a + - LP: #668380 + * setup_arg_pages: diagnose excessive argument size + - LP: #668380 + * execve: improve interactivity with large arguments + - LP: #668380 + * execve: make responsive to SIGKILL with large arguments + - LP: #668380 + * Phonet: disable network namespace support + - LP: #668380 + * mm: Move vma_stack_continue into mm.h + - LP: #668380 + * Linux 2.6.32.25 + - LP: #668380 + * xfs: validate untrusted inode numbers during lookup + - CVE-2010-2943 + * xfs: rename XFS_IGET_BULKSTAT to XFS_IGET_UNTRUSTED + - CVE-2010-2943 + * xfs: remove block number from inode lookup code + - CVE-2010-2943 + * xfs: fix untrusted inode number lookup + - CVE-2010-2943 + * drm/i915: Sanity check pread/pwrite + - CVE-2010-2962 + * drm/i915: Rephrase pwrite bounds checking to avoid any potential + overflow + - CVE-2010-2962 + * net: clear heap allocation for ETHTOOL_GRXCLSRLALL + - CVE-2010-3861 + * ipc: shm: fix information leak to userland + - CVE-2010-4072 + * staging: usbip: Notify usb core of port status changes + - LP: #681132 + * staging: usbip: Process event flags without delay + - LP: #681132 + * powerpc/perf: Fix sampling enable for PPC970 + - LP: #681132 + * pcmcia: synclink_cs: fix information leak to userland + - LP: #681132 + * sched: Fix string comparison in /proc/sched_features + - LP: #681132 + * bluetooth: Fix missing NULL check + - LP: #681132 + * futex: Fix errors in nested key ref-counting + - LP: #681132 + * mm, x86: Saving vmcore with non-lazy freeing of vmas + - LP: #681132 + * x86, cpu: Fix renamed, not-yet-shipping AMD CPUID feature bit + - LP: #681132 + * x86, kexec: Make sure to stop all CPUs before exiting the kernel + - LP: #681132 + * x86, olpc: Don't retry EC commands forever + - LP: #681132 + * x86, mtrr: Assume SYS_CFG[Tom2ForceMemTypeWB] exists on all future AMD + CPUs + - LP: #681132 + * x86, intr-remap: Set redirection hint in the IRTE + - LP: #681132 + * x86, kdump: Change copy_oldmem_page() to use cached addressing + - LP: #681132 + * KVM: SVM: Fix wrong intercept masks on 32 bit + - LP: #681132 + * KVM: MMU: fix direct sps access corrupted + - LP: #681132 + * KVM: MMU: fix conflict access permissions in direct sp + - LP: #681132 + * KVM: VMX: Fix host GDT.LIMIT corruption + - LP: #681132 + * KVM: SVM: Adjust tsc_offset only if tsc_unstable + - LP: #681132 + * KVM: x86: Fix SVM VMCB reset + - LP: #681132 + * KVM: x86: Move TSC reset out of vmcb_init + - LP: #681132 + * KVM: Fix fs/gs reload oops with invalid ldt + - LP: #681132 + * KVM: Correct ordering of ldt reload wrt fs/gs reload + * KVM: VMX: Fix host userspace gsbase corruption + * pipe: fix failure to return error code on ->confirm() + - LP: #681132 + * p54usb: fix off-by-one on !CONFIG_PM + - LP: #681132 + * p54usb: add five more USBIDs + - LP: #681132 + * drivers/net/wireless/p54/eeprom.c: Return -ENOMEM on memory allocation + failure + - LP: #681132 + * USB: ftdi_sio: Add PID for accesio products + - LP: #681132 + * USB: add PID for FTDI based OpenDCC hardware + - LP: #681132 + * USB: ftdi_sio: new VID/PIDs for various Papouch devices + - LP: #681132 + * USB: ftdi_sio: add device ids for ScienceScope + - LP: #681132 + * usb: musb: blackfin: call gpio_free() on error path in + musb_platform_init() + - LP: #681132 + * USB: option: Add more ZTE modem USB id's + - LP: #681132 + * USB: cp210x: Add Renesas RX-Stick device ID + - LP: #681132 + * USB: cp210x: Add WAGO 750-923 Service Cable device ID + - LP: #681132 + * USB: atmel_usba_udc: force vbus_pin at -EINVAL when gpio_request + failled + - LP: #681132 + * USB: disable endpoints after unbinding interfaces, not before + - LP: #681132 + * USB: opticon: Fix long-standing bugs in opticon driver + - LP: #681132 + * USB: accept some invalid ep0-maxpacket values + - LP: #681132 + * sd name space exhaustion causes system hang + - LP: #681132 + * libsas: fix NCQ mixing with non-NCQ + - LP: #681132 + * gdth: integer overflow in ioctl + - LP: #681132 + * Fix race when removing SCSI devices + - LP: #681132 + * Fix regressions in scsi_internal_device_block + - LP: #681132 + * sgi-xp: incoming XPC channel messages can come in after the channel's + partition structures have been torn down + - LP: #681132 + * Linux 2.6.32.26 + - LP: #681132 + * drm/radeon: fix PCI ID 5657 to be an RV410 + - LP: #683257 + * Linux 2.6.32.26+drm33.12 + - LP: #683257 + * econet: disallow NULL remote addr for sendmsg(), fixes CVE-2010-3849 + - CVE-2010-3849 + * econet: fix CVE-2010-3850 + - CVE-2010-3850 + * econet: fix CVE-2010-3848 + - CVE-2010-3848 + + -- Stefan Bader Wed, 01 Dec 2010 08:13:05 +0100 + +linux-ec2 (2.6.32-310.21) lucid-security; urgency=low + + [ Upstream Kernel Changes ] + + * Rebased to 2.6.32-26.48 + + [ Ubuntu: 2.6.32-26.48 ] + + * SAUCE: AF_ECONET prevent kernel stack overflow + - CVE-2010-3848 + * SAUCE: AF_ECONET SIOCSIFADDR ioctl does not check privileges + - CVE-2010-3850 + * SAUCE: AF_ECONET saddr->cookie prevent NULL pointer dereference + - CVE-2010-3849 + + -- Stefan Bader Wed, 24 Nov 2010 10:34:37 +0100 + +linux-ec2 (2.6.32-310.20) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-26.47 + + [ Ubuntu: 2.6.32-26.47 ] + + * Revert "SAUCE: ALSA: HDA: Enable internal mic on Dell E6410 and Dell + E6510" + * Revert "[Config] Added be2net, be2scsi to udebs" + * Revert "(ore-stable) ALSA: hda - Apply ALC269 VAIO fix-up to all Sony + laptops with ALC269" + * Revert "(pre-stable) ALSA: HDA: Correctly apply position_fix quirks for + ATI and VIA controllers" + * Revert "ALSA: hda: Use LPIB for another mainboard" + * Revert "ALSA: hda: Use LPIB for ASUS M2V" + * Revert "ALSA: hda: Use LPIB for an ASUS device" + * Buglink Fixup for reverted unverified fixes + + -- Stefan Bader Fri, 19 Nov 2010 17:03:52 +0100 + +linux-ec2 (2.6.32-310.19) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-26.46 + + [ Ubuntu: 2.6.32-26.46 ] + + * SAUCE: ALSA: HDA: Enable internal mic on Dell E6410 and Dell E6510 + - See: #605047, #628961 + * [Config] Added be2net, be2scsi to udebs + - See: #628776 + * Revert "(pre-stable) drm/i915: add PANEL_UNLOCK_REGS definition" + - LP: #645444 + * Revert "(pre-stable) drm/i915: make sure we shut off the panel in eDP + configs" + - LP: #645444 + * Revert "(pre-stable) drm/i915: make sure eDP panel is turned on" + - LP: #645444 + * Revert "(pre-stable) drm/radeon/kms: initialize set_surface_reg reg for + rs600 asic" + - LP: #645371 + * Revert "drm/nouveau: Fix fbcon corruption with font width not divisible + by 8" + - LP: #663176 + * mmc: fix all hangs related to mmc/sd card insert/removal during + suspend/resume + - LP: #477106 + * mmc: build fix: mmc_pm_notify is only available with CONFIG_PM=y + - LP: #477106 + * hwmon: (k8temp) Differentiate between AM2 and ASB1 + - LP: #644694 + * xen: handle events as edge-triggered + - LP: #644694 + * xen: use percpu interrupts for IPIs and VIRQs + - LP: #644694 + * ALSA: hda - Rename iMic to Int Mic on Lenovo NB0763 + - LP: #605101, #644694 + * sata_mv: fix broken DSM/TRIM support (v2) + - LP: #644694 + * x86, tsc, sched: Recompute cyc2ns_offset's during resume from sleep + states + - LP: #644694 + * PCI: MSI: Remove unsafe and unnecessary hardware access + - LP: #644694 + * PCI: MSI: Restore read_msi_msg_desc(); add get_cached_msi_msg_desc() + - LP: #644694 + * sched: kill migration thread in CPU_POST_DEAD instead of CPU_DEAD + - LP: #644694 + * sched: revert stable c6fc81a sched: Fix a race between ttwu() and + migrate_task() + - LP: #644694 + * staging: hv: Fix missing functions for net_device_ops + - LP: #644694 + * staging: hv: Fixed bounce kmap problem by using correct index + - LP: #644694 + * staging: hv: Fixed the value of the 64bit-hole inside ring buffer + - LP: #644694 + * staging: hv: Increased storvsc ringbuffer and max_io_requests + - LP: #644694 + * staging: hv: Fixed lockup problem with bounce_buffer scatter list + - LP: #644694 + * fuse: flush background queue on connection close + - LP: #644694 + * ath9k_hw: fix parsing of HT40 5 GHz CTLs + - LP: #644694 + * ocfs2: Fix incorrect checksum validation error + - LP: #644694 + * USB: ehci-ppc-of: problems in unwind + - LP: #644694 + * USB: Fix kernel oops with g_ether and Windows + - LP: #644694 + * USB: CP210x Add new device ID + - LP: #644694 + * USB: cp210x: Add B&G H3000 link cable ID + - LP: #644694 + * USB: ftdi_sio: Added custom PIDs for ChamSys products + - LP: #644694 + * USB: serial: Extra device/vendor ID for mos7840 driver + - LP: #644694 + * usb: serial: mos7840: Add USB ID to support the B&B Electronics + USOPTL4-2P. + - LP: #644694 + * USB: mos7840: fix DMA buffers on stack and endianess bugs + - LP: #644694 + * usb: serial: mos7840: Add USB IDs to support more B&B USB/RS485 + converters. + - LP: #644694 + * USB: Exposing second ACM channel as tty for Nokia S60 phones. + - LP: #644694 + * USB: cdc-acm: add another device quirk + - LP: #644694 + * USB: Expose vendor-specific ACM channel on Nokia 5230 + - LP: #644694 + * USB: cdc-acm: Adding second ACM channel support for various Nokia and + one Samsung phones + - LP: #644694 + * USB: cdc-acm: Add pseudo modem without AT command capabilities + - LP: #644694 + * USB: cdc-acm: Fixing crash when ACM probing interfaces with no endpoint + descriptors. + - LP: #644694 + * ALSA: hda - Fix auto-parser of ALC269vb for HP pin NID 0x21 + - LP: #644694 + * ALSA: seq/oss - Fix double-free at error path of snd_seq_oss_open() + - LP: #644694 + * sysfs: checking for NULL instead of ERR_PTR + - LP: #644694 + * tun: Don't add sysfs attributes to devices without sysfs directories + - LP: #644694 + * oprofile: fix crash when accessing freed task structs + - LP: #644694 + * oprofile, x86: fix init_sysfs error handling + - LP: #644694 + * oprofile, x86: fix init_sysfs() function stub + - LP: #644694 + * HID: usbhid: initialize interface pointers early enough + - LP: #644694 + * HID: fix suspend crash by moving initializations earlier + - LP: #644694 + * libata: skip EH autopsy and recovery during suspend + - LP: #644694 + * tracing: Fix a race in function profile + - LP: #644694 + * tracing: Do not allow llseek to set_ftrace_filter + - LP: #644694 + * tracing: t_start: reset FTRACE_ITER_HASH in case of seek/pread + - LP: #644694 + * irda: off by one + - LP: #644694 + * gcov: fix null-pointer dereference for certain module types + - LP: #644694 + * tmio_mmc: don't clear unhandled pending interrupts + - LP: #644694 + * mmc: fix the use of kunmap_atomic() in tmio_mmc.h + - LP: #644694 + * bounce: call flush_dcache_page() after bounce_copy_vec() + - LP: #644694 + * kernel/groups.c: fix integer overflow in groups_search + - LP: #644694 + * binfmt_misc: fix binfmt_misc priority + - LP: #644694 + * Input: i8042 - fix device removal on unload + - LP: #644694 + * memory hotplug: fix next block calculation in is_removable + - LP: #644694 + * perf: Initialize callchains roots's childen hits + - LP: #644694 + * p54: fix tx feedback status flag check + - LP: #644694 + * ath5k: check return value of ieee80211_get_tx_rate + - LP: #644694 + * wireless extensions: fix kernel heap content leak + - LP: #644694 + * x86, tsc: Fix a preemption leak in restore_sched_clock_state() + - LP: #644694 + * sched: Protect task->cpus_allowed access in sched_getaffinity() + - LP: #644694 + * sched: Protect sched_rr_get_param() access to task->sched_class + - LP: #644694 + * sched: Consolidate select_task_rq() callers + - LP: #644694 + * sched: Remove unused cpu_nr_migrations() + - LP: #644694 + * sched: Remove rq->clock coupling from set_task_cpu() + - LP: #644694 + * sched: Clean up ttwu() rq locking + - LP: #644694 + * sched: Sanitize fork() handling + - LP: #644694 + * sched: Remove forced2_migrations stats + - LP: #644694 + * sched: Make wakeup side and atomic variants of completion API irq safe + - LP: #644694 + * sched: Use rcu in sys_sched_getscheduler/sys_sched_getparam() + - LP: #644694 + * sched: Use rcu in sched_get/set_affinity() + - LP: #644694 + * sched: Use rcu in sched_get_rr_param() + - LP: #644694 + * sched: Fix set_cpu_active() in cpu_down() + - LP: #644694 + * sched: Use TASK_WAKING for fork wakups + - LP: #644694 + * sched: Ensure set_task_cpu() is never called on blocked tasks + - LP: #644694 + * sched: Make warning less noisy + - LP: #644694 + * sched: Fix broken assertion + - LP: #644694 + * sched: Fix sched_exec() balancing + - LP: #644694 + * sched: Fix select_task_rq() vs hotplug issues + - LP: #644694 + * sched: Add pre and post wakeup hooks + - LP: #644694 + * sched: Remove the cfs_rq dependency from set_task_cpu() + - LP: #644694 + * sched: Fix hotplug hang + - LP: #644694 + * sched: Fix fork vs hotplug vs cpuset namespaces + - LP: #644694 + * sched: Fix incorrect sanity check + - LP: #644694 + * sched: Fix race between ttwu() and task_rq_lock() + - LP: #644694 + * sched: Extend enqueue_task to allow head queueing + - LP: #644694 + * sched: Implement head queueing for sched_rt + - LP: #644694 + * sched: Queue a deboosted task to the head of the RT prio queue + - LP: #644694 + * sched: set_cpus_allowed_ptr(): Don't use rq->migration_thread after + unlock + - LP: #644694 + * sched: Kill the broken and deadlockable + cpuset_lock/cpuset_cpus_allowed_locked code + - LP: #644694 + * sched: move_task_off_dead_cpu(): Take rq->lock around + select_fallback_rq() + - LP: #644694 + * sched: move_task_off_dead_cpu(): Remove retry logic + - LP: #644694 + * sched: sched_exec(): Remove the select_fallback_rq() logic + - LP: #644694 + * sched: _cpu_down(): Don't play with current->cpus_allowed + - LP: #644694 + * sched: Make select_fallback_rq() cpuset friendly + - LP: #644694 + * sched: Fix TASK_WAKING vs fork deadlock + - LP: #644694 + * sched: Optimize task_rq_lock() + - LP: #644694 + * sched: Fix nr_uninterruptible count + - LP: #644694 + * sched: Fix rq->clock synchronization when migrating tasks + - LP: #644694 + * sched: Remove unnecessary RCU exclusion + - LP: #644694 + * sched: apply RCU protection to wake_affine() + - LP: #644694 + * sched: Cleanup select_task_rq_fair() + - LP: #644694 + * sched: More generic WAKE_AFFINE vs select_idle_sibling() + - LP: #644694 + * sched: Fix vmark regression on big machines + - LP: #644694 + * sched: Fix select_idle_sibling() + - LP: #644694 + * sched: Pre-compute cpumask_weight(sched_domain_span(sd)) + - LP: #644694 + * sched: Fix select_idle_sibling() logic in select_task_rq_fair() + - LP: #644694 + * sched: cpuacct: Use bigger percpu counter batch values for stats + counters + - LP: #644694 + * ALSA: hda - Handle missing NID 0x1b on ALC259 codec + - LP: #644694 + * ALSA: hda - Handle pin NID 0x1a on ALC259/269 + - LP: #644694 + * arm: fix really nasty sigreturn bug + - LP: #644694 + * hwmon: (f75375s) Shift control mode to the correct bit position + - LP: #644694 + * hwmon: (f75375s) Do not overwrite values read from registers + - LP: #644694 + * apm_power: Add missing break statement + - LP: #644694 + * NFS: Fix a typo in nfs_sockaddr_match_ipaddr6 + - LP: #644694 + * SUNRPC: Fix race corrupting rpc upcall + - LP: #644694 + * Linux 2.6.32.22 + - LP: #644694 + * drm/i915: don't access FW_BLC_SELF on 965G + - LP: #645444 + * drm/i915: gen3 page flipping fixes + - LP: #645444 + * drm/i915: don't queue flips during a flip pending event + - LP: #645444 + * drm/i915: Hold the spinlock whilst resetting unpin_work along error + path + - LP: #645444 + * drm/i915: handle shared framebuffers when flipping + - LP: #645444 + * drm/i915: add PANEL_UNLOCK_REGS definition + - LP: #645444 + * drm/i915: make sure eDP panel is turned on + - LP: #645444 + * drm/i915: make sure we shut off the panel in eDP configs + - LP: #645444 + * Linux 2.6.32.22+drm33.9 + - LP: #645444 + * drm/radeon/kms/igp: sideport is AMD only + - LP: #645371 + * drm/radeon/kms: flush HDP cache on GART table updates. + - LP: #645371 + * drm/radeon/kms/r7xx: add workaround for hw issue with HDP flush + - LP: #645371 + * drm/i915: Check overlay stride errata for i830 and i845 + - LP: #645371 + * i915: fix ironlake edp panel setup (v4) + - LP: #645371 + * drm/radeon/kms: add additional quirk for Acer rv620 laptop + - LP: #645371 + * drm/i915: fixup pageflip ringbuffer commands for i8xx + - LP: #645371 + * drm/i915: i8xx also doesn't like multiple oustanding pageflips + - LP: #645371 + * drm/i915/edp: Flush the write before waiting for PLLs + - LP: #645371 + * drm/radeon/kms: disable MSI on IGP chips + - LP: #645371 + * drm/radeon/kms: don't enable MSIs on AGP boards + - LP: #645371 + * drm/radeon/kms: fix typo in radeon_compute_pll_gain + - LP: #645371 + * drm/radeon/kms/DCE3+: switch pads to ddc mode when going i2c + - LP: #645371 + * drm/radeon/kms: fix sideport detection on newer rs880 boards + - LP: #645371 + * drm/i915: Don't touch PORT_HOTPLUG_EN in intel_dp_detect() + - LP: #645371 + * drm/i915: Kill dangerous pending-flip debugging + - LP: #645371 + * drm/radeon/kms: release AGP bridge at suspend + - LP: #645371 + * drm/radeon/kms: initialize set_surface_reg reg for rs600 asic + - LP: #645371 + * drm/radeon/kms: fix a regression on r7xx AGP due to the HDP flush fix + - LP: #645371 + * Linux 2.6.32.22+drm33.10 + - LP: #645371 + * USB: serial/mos*: prevent reading uninitialized stack memory + - LP: #649483 + * sparc: Provide io{read,write}{16,32}be(). + - LP: #649483 + * gro: fix different skb headrooms + - LP: #649483 + * gro: Re-fix different skb headrooms + - LP: #649483 + * irda: Correctly clean up self->ias_obj on irda_bind() failure. + - LP: #649483 + * tcp: select(writefds) don't hang up when a peer close connection + - LP: #649483 + * tcp: Combat per-cpu skew in orphan tests. + - LP: #649483 + * tcp: fix three tcp sysctls tuning + - LP: #649483 + * bridge: Clear IPCB before possible entry into IP stack + - LP: #649483 + * bridge: Clear INET control block of SKBs passed into ip_fragment(). + - LP: #649483 + * net: Fix oops from tcp_collapse() when using splice() + - LP: #649483 + * rds: fix a leak of kernel memory + - LP: #649483 + * tcp: Prevent overzealous packetization by SWS logic. + - LP: #649483 + * UNIX: Do not loop forever at unix_autobind(). + - LP: #649483 + * r8169: fix random mdio_write failures + - LP: #649483 + * r8169: fix mdio_read and update mdio_write according to hw specs + - LP: #649483 + * sparc64: Get rid of indirect p1275 PROM call buffer. + - LP: #649483 + * drivers/net/usb/hso.c: prevent reading uninitialized memory + - LP: #649483 + * drivers/net/cxgb3/cxgb3_main.c: prevent reading uninitialized stack + memory + - LP: #649483 + * drivers/net/eql.c: prevent reading uninitialized stack memory + - LP: #649483 + * bonding: correctly process non-linear skbs + - LP: #649483 + * Staging: vt6655: fix buffer overflow + - LP: #649483 + * net/llc: make opt unsigned in llc_ui_setsockopt() + - LP: #649483 + * pid: make setpgid() system call use RCU read-side critical section + - LP: #649483 + * sched: Fix user time incorrectly accounted as system time on 32-bit + - LP: #649483 + * oprofile: Add Support for Intel CPU Family 6 / Model 22 (Intel Celeron + 540) + - LP: #649483 + * char: Mark /dev/zero and /dev/kmem as not capable of writeback + - LP: #649483 + * drivers/pci/intel-iommu.c: fix build with older gcc's + - LP: #649483 + * drivers/video/sis/sis_main.c: prevent reading uninitialized stack + memory + - LP: #649483 + * percpu: fix pcpu_last_unit_cpu + - LP: #649483 + * aio: check for multiplication overflow in do_io_submit + - LP: #649483 + * inotify: send IN_UNMOUNT events + - LP: #649483 + * SCSI: mptsas: fix hangs caused by ATA pass-through + - LP: #649483 + * ext4: Fix remaining racy updates of EXT4_I(inode)->i_flags + - LP: #649483 + * IA64: fix siglock + - LP: #649483 + * IA64: Optimize ticket spinlocks in fsys_rt_sigprocmask + - LP: #649483 + * KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() + - LP: #649483 + * KEYS: Fix bug in keyctl_session_to_parent() if parent has no session + keyring + - LP: #649483 + * xfs: prevent reading uninitialized stack memory + - LP: #649483 + * drivers/video/via/ioctl.c: prevent reading uninitialized stack memory + - LP: #649483 + * ACPI: disable _OSI(Windows 2009) on Asus K50IJ + - LP: #649483 + * bnx2: Fix netpoll crash. + - LP: #649483 + * bnx2: Fix hang during rmmod bnx2. + - LP: #649483 + * AT91: change dma resource index + - LP: #649483 + * cxgb3: fix hot plug removal crash + - LP: #649483 + * mm: page allocator: drain per-cpu lists after direct reclaim allocation + fails + - LP: #649483 + * mm: page allocator: calculate a better estimate of NR_FREE_PAGES when + memory is low and kswapd is awake + - LP: #649483 + * mm: page allocator: update free page counters after pages are placed on + the free list + - LP: #649483 + * guard page for stacks that grow upwards + - LP: #649483 + * Fix unprotected access to task credentials in waitid() + - LP: #649483 + * sctp: Do not reset the packet during sctp_packet_config(). + - LP: #649483 + * 3c503: Fix IRQ probing + - LP: #649483 + * asix: fix setting mac address for AX88772 + - LP: #649483 + * dasd: use correct label location for diag fba disks + - LP: #649483 + * clocksource: sh_tmu: compute mult and shift before registration + - LP: #649483 + * gro: Fix bogus gso_size on the first fraglist entry + - LP: #649483 + * hostap_pci: set dev->base_addr during probe + - LP: #649483 + * inotify: fix inotify oneshot support + - LP: #649483 + * Input: add compat support for sysfs and /proc capabilities output + - LP: #649483 + * MIPS: Quit using undefined behavior of ADDU in 64-bit atomic + operations. + - LP: #649483 + * MIPS: Set io_map_base for several PCI bridges lacking it + - LP: #649483 + * MIPS: uasm: Add OR instruction. + - LP: #649483 + * pata_pdc202xx_old: fix UDMA mode for Promise UDMA33 cards + - LP: #649483 + * pata_pdc202xx_old: fix UDMA mode for PDC2026x chipsets + - LP: #649483 + * MIPS: Sibyte: Fix M3 TLB exception handler workaround. + - LP: #649483 + * sis-agp: Remove SIS 760, handled by amd64-agp + - LP: #649483 + * alpha: Fix printk format errors + - LP: #649483 + * x86: Add memory modify constraints to xchg() and cmpxchg() + - LP: #649483 + * Linux 2.6.32.23 + - LP: #649483 + * (pre-stable) [SCSI] megaraid_sas: Add new megaraid SAS 2 controller + support to the driver + - LP: #546091 + * (pre-stable) [SCSI] megaraid_sas: allocate the application cmds to sas2 + controller + - LP: #546091 + * Xen: fix typo in previous patch + - LP: #655456 + * Linux 2.6.32.24 + - LP: #655456 + * (ore-stable) ALSA: hda - Apply ALC269 VAIO fix-up to all Sony laptops + with ALC269 + - See: #546769, #598938, #637291, #642892, #648871, #655386 + * (pre-stable) ALSA: HDA: Correctly apply position_fix quirks for ATI and + VIA controllers + - See: #465942, #580749, #587546 + * (pre-stable) ACPI: enable repeated PCIEXP wakeup by clearing + PCIEXP_WAKE_STS on resume + - LP: #613381 + * i915: return -EFAULT if copy_to_user fails + - LP: #663176 + * i915_gem: return -EFAULT if copy_to_user fails + - LP: #663176 + * drm/i915: Prevent double dpms on + - LP: #663176 + * drm: Only decouple the old_fb from the crtc is we call mode_set* + - LP: #663176 + * drm/radeon/kms: fix potential segfault in r600_ioctl_wait_idle + - LP: #663176 + * drm/i915: Unset cursor if out-of-bounds upon mode change (v4) + - LP: #586325, #663176 + * drm/i915: disable FBC when more than one pipe is active + - LP: #663176 + * drm/radeon/kms: fix macbookpro connector quirk + - LP: #663176 + * drm/nouveau: use ALIGN instead of open coding it + - LP: #663176 + * drm/nouveau: Fix fbcon corruption with font width not divisible by 8 + - LP: #663176 + * drm/i915,agp/intel: Add second set of PCI-IDs for B43 + - LP: #640214, #663176 + * Linux 2.6.32.24+drm33.11 + - LP: #663176 + + -- Stefan Bader Tue, 26 Oct 2010 15:16:00 -0400 + +linux-ec2 (2.6.32-309.18) lucid-security; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-25.45 + + [ Ubuntu: 2.6.32-25.45 ] + + * v4l: disable dangerous buggy compat function + - CVE-2010-2963 + * Local privilege escalation vulnerability in RDS sockets + - CVE-2010-3904 + * mm: (pre-stable) Move vma_stack_continue into mm.h + - LP: #646114 + * net sched: fix some kernel memory leaks + - CVE-2010-2942 + * irda: Correctly clean up self->ias_obj on irda_bind() failure. + - CVE-2010-2954 + * wireless extensions: fix kernel heap content leak + - CVE-2010-2955 + * KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() + - CVE-2010-2960 + * KEYS: Fix bug in keyctl_session_to_parent() if parent has no session + keyring + - CVE-2010-2960 + * aio: check for multiplication overflow in do_io_submit + - CVE-2010-3067 + * xfs: prevent reading uninitialized stack memory + - CVE-2010-3078 + * ALSA: seq/oss - Fix double-free at error path of snd_seq_oss_open() + - CVE-2010-3080 + * niu: Fix kernel buffer overflow for ETHTOOL_GRXCLSRLALL + - CVE-2010-3084 + * rose: Fix signedness issues wrt. digi count. + - CVE-2010-3310 + * sctp: Do not reset the packet during sctp_packet_config(). + - CVE-2010-3432 + * Fix pktcdvd ioctl dev_minor range check + - CVE-2010-3437 + * ALSA: prevent heap corruption in snd_ctl_new() + - CVE-2010-3442 + * net sched: fix kernel leak in act_police + - CVE-2010-3477 + * Fix out-of-bounds reading in sctp_asoc_get_hmac() + - CVE-2010-3705 + * ocfs2: Don't walk off the end of fast symlinks. + - CVE-2010-NNN2 + + -- Stefan Bader Mon, 18 Oct 2010 10:01:36 +0200 + +linux-ec2 (2.6.32-309.17) lucid-proposed; urgency=low + + [ Brad Figg ] + + * Rebased to 2.6.32-25.44 + + [ John Johansen ] + + * SAUCE: Revert "sched: update load count only once per cpu in 10 tick + update window" + - LP: #574910 + + [ Ubuntu: 2.6.32-25.44 ] + + * SAUCE: (no-up) Modularize vesafb -- fix initialization + - LP: #611471 + * Revert "SAUCE: sync before umount to reduce time taken by ext4 umount" + - LP: #543617, #585092 + * Revert "SAUCE: tulip: Let dmfe handle davicom on non-sparc" + - LP: #607824 + * [Config] Added ums-cypress to udeb + - LP: #576066 + * Revert "PCI quirk: Disable MSI on VIA K8T890 systems" + - LP: #607824 + * Revert "PCI quirks: disable msi on AMD rs4xx internal gfx bridges" + - LP: #607824 + * Revert "(pre-stable) Input: psmouse - reset all types of mice before + reconnecting" + - LP: #607824 + * Revert "jbd: jbd-debug and jbd2-debug should be writable" + - LP: #607824 + * Revert "ext4: Make fsync sync new parent directories in no-journal + mode" + - LP: #615548 + * Revert "ext4: Fix compat EXT4_IOC_ADD_GROUP" + - LP: #615548 + * Revert "ext4: Conditionally define compat ioctl numbers" + - LP: #615548 + * Revert "ext4: restart ext4_ext_remove_space() after transaction + restart" + - LP: #615548 + * Revert "ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted" + - LP: #615548 + * Revert "ext4: Avoid crashing on NULL ptr dereference on a filesystem + error" + - LP: #615548 + * Revert "ext4: Use bitops to read/modify i_flags in struct + ext4_inode_info" + - LP: #615548 + * Revert "ext4: Show journal_checksum option" + - LP: #615548 + * Revert "ext4: check for a good block group before loading buddy pages" + - LP: #615548 + * Revert "ext4: Prevent creation of files larger than RLIMIT_FSIZE using + fallocate" + - LP: #615548 + * Revert "ext4: Remove extraneous newlines in ext4_msg() calls" + - LP: #615548 + * Revert "ext4: init statistics after journal recovery" + - LP: #615548 + * Revert "ext4: clean up inode bitmaps manipulation in ext4_free_inode" + - LP: #615548 + * Revert "ext4: Do not zero out uninitialized extents beyond i_size" + - LP: #615548 + * Revert "ext4: don't scan/accumulate more pages than mballoc will + allocate" + - LP: #615548 + * Revert "ext4: stop issuing discards if not supported by device" + - LP: #615548 + * Revert "ext4: check s_log_groups_per_flex in online resize code" + - LP: #615548 + * Revert "ext4: fix quota accounting in case of fallocate" + - LP: #615548 + * Revert "ext4: allow defrag (EXT4_IOC_MOVE_EXT) in 32bit compat mode" + - LP: #615548 + * Revert "ext4: rename ext4_mb_release_desc() to ext4_mb_unload_buddy()" + - LP: #615548 + * Revert "ext4: Remove unnecessary call to ext4_get_group_desc() in + mballoc" + - LP: #615548 + * Revert "ext4: fix memory leaks in error path handling of + ext4_ext_zeroout()" + - LP: #615548 + * Revert "ext4: check missed return value in ext4_sync_file()" + - LP: #615548 + * Revert "ext4: Issue the discard operation *before* releasing the blocks + to be reused" + - LP: #615548 + * Revert "ext4: Fix buffer head leaks after calls to + ext4_get_inode_loc()" + - LP: #615548 + * Revert "ext4: Fix possible lost inode write in no journal mode" + - LP: #615548 + * Revert "ext4: Fixed inode allocator to correctly track a flex_bg's + used_dirs" + - LP: #615548 + * Revert "ext4: Fix estimate of # of blocks needed to write + indirect-mapped files" + - LP: #615548 + * Revert "ext4: Code cleanup for EXT4_IOC_MOVE_EXT ioctl" + - LP: #615548 + * Revert "ext4: Fix the NULL reference in double_down_write_data_sem()" + - LP: #615548 + * Revert "ext4: Fix insertion point of extent in + mext_insert_across_blocks()" + - LP: #615548 + * Revert "ext4: make "offset" consistent in ext4_check_dir_entry()" + - LP: #615548 + * Revert "ext4: Handle non empty on-disk orphan link" + - LP: #615548 + * Revert "ext4: explicitly remove inode from orphan list after failed + direct io" + - LP: #615548 + * Revert "ext4: fix error handling in migrate" + - LP: #615548 + * Revert "ext4: Fix fencepost error in chosing choosing group vs file + preallocation." + - LP: #615548 + * Revert "ext4: Add flag to files with blocks intentionally past EOF" + - LP: #615548 + * Revert "ext4: Fix BUG_ON at fs/buffer.c:652 in no journal mode" + - LP: #615548 + * Revert "ext4: Use bitops to read/modify EXT4_I(inode)->i_state" + - LP: #615548 + * Revert "ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag" + - LP: #615548 + * Revert "ext4: Fix quota accounting error with fallocate" + - LP: #615548 + * Revert "ext4: Handle -EDQUOT error on write" + - LP: #615548 + * Revert "ext4: Calculate metadata requirements more accurately" + - LP: #615548 + * Revert "ext4: Fix accounting of reserved metadata blocks" + - LP: #615548 + * Revert "ext4: Patch up how we claim metadata blocks for quota purposes" + - LP: #615548 + * Revert "ext4: Ensure zeroout blocks have no dirty metadata" + - LP: #615548 + * Revert "ext4: return correct wbc.nr_to_write in ext4_da_writepages" + - LP: #615548 + * Revert "ext4: Eliminate potential double free on error path" + - LP: #615548 + * Revert "ext4, jbd2: Add barriers for file systems with exernal + journals" + - LP: #615548 + * Revert "ext4: replace BUG() with return -EIO in ext4_ext_get_blocks" + - LP: #615548 + * Revert "ext4: Fix potential quota deadlock" + - LP: #615548 + * Revert "ext4: don't return to userspace after freezing the fs with a + mutex held" + - LP: #615548 + * ixgbe: Fix return of invalid txq + - LP: #607824 + * oprofile/x86: fix uninitialized counter usage during cpu hotplug + - LP: #607824 + * oprofile: remove double ring buffering + - LP: #607824 + * cpumask: fix compat getaffinity + - LP: #607824 + * NFSD: don't report compiled-out versions as present + - LP: #607824 + * sata_nv: use ata_pci_sff_activate_host() instead of ata_host_activate() + - LP: #607824 + * ARCNET: Limit com20020 PCI ID matches for SOHARD cards + - LP: #607824 + * rtl8180: fix tx status reporting + - LP: #607824 + * Staging: add Add Sitecom WL-349 to rtl8192su + - LP: #607824 + * staging: vt6655: Fix kernel BUG on driver wpa initialization + - LP: #607824 + * Fix racy use of anon_inode_getfd() in perf_event.c + - LP: #607824 + * posix_timer: Fix error path in timer_create + - LP: #607824 + * libata: disable ATAPI AN by default + - LP: #607824 + * libata: don't flush dcache on slab pages + - LP: #607824 + * mutex: Fix optimistic spinning vs. BKL + - LP: #607824 + * ALSA: hda: Fix model quirk for Dell M1730 + - LP: #576160, #607824 + * ALSA: hda: Use LPIB for Toshiba A100-259 + - LP: #549560, #607824 + * ALSA: hda: Use LPIB for Acer Aspire 5110 + - LP: #583983, #607824 + * ALSA: hda: Use LPIB for Sony VPCS11V9E + - LP: #586347, #607824 + * ALSA: hda: Use LPIB for a Shuttle device + - LP: #551949, #607824 + * ACPI: video: fix acpi_backlight=video + - LP: #573120, #607824 + * V4L/DVB: gspca - stv06xx: Remove the 046d:08da from the stv06xx driver + - LP: #607824 + * HID: Add the GYR4101US USB ID to hid-gyration + - LP: #607824 + * ar9170usb: add a couple more USB IDs + - LP: #607824 + * ar9170usb: fix panic triggered by undersized rxstream buffer + - LP: #607824 + * USB: visor: fix memory leak + - LP: #607824 + * USB: CP210x New Device IDs 11 New device IDs + - LP: #607824 + * USB: kobil: fix memory leak + - LP: #607824 + * USB: option: add PID for ZTE product + - LP: #607824 + * USB: option.c: Add Pirelli VID/PID and indicate Pirelli's modem + interface is 0xff + - LP: #607824 + * USB: serial: option: add cinterion device id + - LP: #607824 + * USB: option.c: OLIVETTI OLICARD100 support + - LP: #607824 + * USB: ir-usb: fix double free + - LP: #607824 + * USB: kl5usb105: fix memory leak + - LP: #607824 + * USB: mxc: gadget: Fix bitfield for calculating maximum packet size + - LP: #607824 + * USB: unusual-dev: Add bad sense flag for Appotech ax203 based picture + frames + - LP: #607824 + * USB: EHCI: clear PHCD before resuming + - LP: #607824 + * USB: xhci: Fix issue with set interface after stall. + - LP: #607824 + * USB: xhci: Fix check for room on the ring. + - LP: #607824 + * USB: xHCI: Fix wrong usage of macro TRB_TYPE + - LP: #607824 + * mac80211: give warning if building w/out rate ctrl algorithm + - LP: #607824 + * mac80211: Fix robust management frame handling (MFP) + - LP: #607824 + * mac80211: fix rts threshold check + - LP: #607824 + * drivers/base/cpu.c: fix the output from /sys/devices/system/cpu/offline + - LP: #607824 + * can: Fix SJA1000 command register writes on SMP systems + - LP: #607824 + * PCI quirk: Disable MSI on VIA K8T890 systems + - LP: #607824 + * PCI quirks: disable msi on AMD rs4xx internal gfx bridges + - LP: #607824 + * PCI: Disable MSI for MCP55 on P5N32-E SLI + - LP: #607824 + * virtio_net: Make delayed refill more reliable + - LP: #607824 + * mm: hugetlb: fix clear_huge_page() + - LP: #607824 + * powerpc: Fix handling of strncmp with zero len + - LP: #607824 + * powerpc/pseries: Only call start-cpu when a CPU is stopped + - LP: #607824 + * powerpc/pseries: Make query_cpu_stopped callable outside hotplug cpu + - LP: #607824 + * powerpc/oprofile: fix potential buffer overrun in op_model_cell.c + - LP: #607824 + * writeback: disable periodic old data writeback for + !dirty_writeback_centisecs + - LP: #607824 + * md/raid1: fix counting of write targets. + - LP: #607824 + * md: Fix read balancing in RAID1 and RAID10 on drives > 2TB + - LP: #607824 + * md: set mddev readonly flag on blkdev BLKROSET ioctl + - LP: #607824 + * x86/amd-iommu: Fix suspend/resume with IOMMU + - LP: #607824 + * exofs: confusion between kmap() and kmap_atomic() api + - LP: #607824 + * mn10300: set ARCH_KMALLOC_MINALIGN + - LP: #607824 + * m68k: set ARCH_KMALLOC_MINALIGN + - LP: #607824 + * rtc-cmos: do dev_set_drvdata() earlier in the initialization + - LP: #607824 + * rtc: s3c: initialize driver data before using it + - LP: #607824 + * frv: set ARCH_KMALLOC_MINALIGN + - LP: #607824 + * xtensa: set ARCH_KMALLOC_MINALIGN + - LP: #607824 + * Blackfin: set ARCH_KMALLOC_MINALIGN + - LP: #607824 + * tmpfs: insert tmpfs cache pages to inactive list at first + - LP: #607824 + * mlx4_core: Fix possible chunk sg list overflow in mlx4_alloc_icm() + - LP: #607824 + * ARM: 6166/1: Proper prefetch abort handling on pre-ARMv6 + - LP: #607824 + * ARM: 6164/1: Add kto and kfrom to input operands list. + - LP: #607824 + * ARM: 6146/1: sa1111: Prevent deadlock in resume path + - LP: #607824 + * ARM: 6144/1: TCM memory bug freeing bug + - LP: #607824 + * ARM: VFP: Fix vfp_put_double() for d16-d31 + - LP: #607824 + * ASoC: Fix dB scales for WM835x + - LP: #607824 + * ASoC: Fix dB scales for WM8400 + - LP: #607824 + * ASoC: Fix dB scales for WM8990 + - LP: #607824 + * hwmon: (ltc4245) Read only one GPIO pin + - LP: #607824 + * signals: check_kill_permission(): don't check creds if + same_thread_group() + - LP: #607824 + * do_generic_file_read: clear page errors when issuing a fresh read of + the page + - LP: #607824 + * ipmi: handle run_to_completion properly in deliver_recv_msg() + - LP: #607824 + * x86, setup: Phoenix BIOS fixup is needed on Dell Inspiron Mini 1012 + - LP: #607824 + * xen: ensure timer tick is resumed even on CPU driving the resume + - LP: #607824 + * xen: avoid allocation causing potential swap activity on the resume + path + - LP: #607824 + * ALSA: hda: Use LPIB for an ASUS device + - See: #465942, #607824 + * ALSA: hda: Use mb31 quirk for an iMac model + - LP: #542550, #607824 + * ALSA: hda: Use LPIB for another mainboard + - See: #580749, #607824 + * ALSA: hda: Use LPIB for ASUS M2V + - See: #587546, #607824 + * Staging: comedi - correct parameter gainlkup for DAQCard-6024E in + driver ni_mio_cs.c + - LP: #607824 + * clocksource: sh_cmt: compute mult and shift before registration + - LP: #607824 + * ath5k: retain promiscuous setting + - LP: #607824 + * ahci: add pci quirk for JMB362 + - LP: #607824 + * firewire: core: check for 1394a compliant IRM, fix inaccessibility of + Sony camcorder + - LP: #607824 + * perf_events: Fix resource leak in x86 __hw_perf_event_init() + - LP: #607824 + * sata_nv: don't diddle with nIEN on mcp55 + - LP: #607824 + * sata_via: magic vt6421 fix for transmission problems w/ WD drives + - LP: #422994, #607824 + * USB: mos7840: fix null-pointer dereference + - LP: #607824 + * USB: xhci: Wait for host to start running. + - LP: #607824 + * USB: xhci: Wait for controller to be ready after reset. + - LP: #607824 + * USB: ftdi_sio: fix DTR/RTS line modes + - LP: #607824 + * USB: cdc-acm: fix resource reclaim in error path of acm_probe + - LP: #607824 + * p54usb: Add device ID for Dell WLA3310 USB + - LP: #607824 + * atl1e: Allow TX checksum offload and TSO to be disabled and reenabled + - LP: #607824 + * via-velocity: Give RX descriptors to the NIC later on open or MTU + change + - LP: #607824 + * dmfe/tulip: Let dmfe handle DM910x except for SPARC on-board chips + - LP: #607824 + * Documentation/3c509: document ethtool support + - LP: #607824 + * wireless: report reasonable bitrate for MCS rates through wext + - LP: #607824 + * ath9k: add support for 802.11n bonded out AR2427 + - LP: #607824 + * wrong type for 'magic' argument in simple_fill_super() + - LP: #607824 + * iwlwifi: check for aggregation frame and queue + - LP: #607824 + * iwlwifi: recalculate average tpt if not current + - LP: #607824 + * iwlwifi: update supported PCI_ID list for 5xx0 series + - LP: #607824 + * wl1251: fix a memory leak in probe + - LP: #607824 + * vfs: add NOFOLLOW flag to umount(2) + - LP: #607824 + * l2tp: Fix oops in pppol2tp_xmit + - LP: #607824 + * ucc_geth: Fix empty TX queue processing + - LP: #607824 + * ucc_geth: Fix netdev watchdog triggering on link changes + - LP: #607824 + * ucc_geth: Fix full TX queue processing + - LP: #607824 + * Input: psmouse - reset all types of mice before reconnecting + - LP: #607824 + * KVM: s390: Fix possible memory leak of in kvm_arch_vcpu_create() + - LP: #607824 + * KVM: PPC: Do not create debugfs if fail to create vcpu + - LP: #607824 + * x86, paravirt: Add a global synchronization point for pvclock + - LP: #607824 + * KVM: Don't allow lmsw to clear cr0.pe + - LP: #607824 + * KVM: x86: Check LMA bit before set_efer + - LP: #607824 + * KVM: MMU: Segregate shadow pages with different cr0.wp + - LP: #607824 + * KVM: VMX: enable VMXON check with SMX enabled (Intel TXT) + - LP: #607824 + * KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots + - LP: #607824 + * KVM: Fix wallclock version writing race + - LP: #607824 + * KVM: x86: Add missing locking to arch specific vcpu ioctls + - LP: #607824 + * KVM: x86: Inject #GP with the right rip on efer writes + - LP: #607824 + * jbd: jbd-debug and jbd2-debug should be writable + - LP: #607824 + * parisc: clear floating point exception flag on SIGFPE signal + - LP: #607824 + * dm snapshot: simplify sector_to_chunk expression + - LP: #607824 + * KEYS: Return more accurate error codes + - LP: #607824 + * qla2xxx: Disable MSI on qla24xx chips other than QLA2432. + - LP: #607824 + * Linux 2.6.32.16 + - LP: #607824 + * drm/i915: Fix 82854 PCI ID, and treat it like other 85X + - LP: #607824 + * drm/i915: Reject bind_to_gtt() early if object > aperture + - LP: #607824 + * drm/edid: Fix 1024x768@85Hz + - LP: #607824 + * drm/radeon/kms: reset ddc_bus in object header parsing + - LP: #607824 + * drm/radeon/kms/atom: fix typo in LVDS panel info parsing + - LP: #607824 + * drm/radeon: r100/r200 ums: block ability for userspace app to trash 0 + page and beyond + - LP: #607824 + * drm/radeon: fix the r100/r200 ums block 0 page fix + - LP: #607824 + * drm/i915: Rebind bo if currently bound with incorrect alignment. + - LP: #607824 + * Linux 2.6.32.16+drm33.6 + - LP: #607824 + * virtio-pci: disable msi at startup + - LP: #615548 + * hwmon: (k8temp) Bypass core swapping on single-core processors + - LP: #615548 + * hwmon: (k8temp) Fix temperature reporting for ASB1 processor revisions + - LP: #615548 + * hwmon: (coretemp) Properly label the sensors + - LP: #615548 + * hwmon: (coretemp) Skip duplicate CPU entries + - LP: #615548 + * hwmon: (it87) Fix in7 on IT8720F + - LP: #615548 + * cifs: remove bogus first_time check in NTLMv2 session setup code + - LP: #615548 + * cifs: don't attempt busy-file rename unless it's in same directory + - LP: #615548 + * CIFS: Fix a malicious redirect problem in the DNS lookup code + - LP: #615548 + * ALSA: hda - Add Macbook 5,2 quirk + - LP: #463178, #615548 + * cpmac: do not leak struct net_device on phy_connect errors + - LP: #615548 + * sky2: enable rx/tx in sky2_phy_reinit() + - LP: #615548 + * sparc: Fix use of uid16_t and gid16_t in asm/stat.h + - LP: #615548 + * math-emu: correct test for downshifting fraction in _FP_FROM_INT() + - LP: #615548 + * NFSv4: Fix an embarassing typo in encode_attrs() + - LP: #615548 + * NFSv4: Ensure that /proc/self/mountinfo displays the minor version + number + - LP: #615548 + * SUNRPC: Fix a re-entrancy bug in xs_tcp_read_calldir() + - LP: #615548 + * ath5k: drop warning on jumbo frames + - LP: #615548 + * ath9k: re-enable ps by default for new single chip families + - LP: #615548 + * ath9k: Avoid corrupt frames being forwarded to mac80211. + - LP: #615548 + * hostap: Protect against initialization interrupt + - LP: #615548 + * TPM: ReadPubEK output struct fix + - LP: #615548 + * fb: fix colliding defines for fb flags. + - LP: #615548 + * iwlwifi: cancel scan watchdog in iwl_bg_abort_scan + - LP: #615548 + * mac80211: do not wip out old supported rates + - LP: #615548 + * mac80211: Handle mesh action frames in ieee80211_rx_h_action + - LP: #615548 + * Btrfs: fix checks in BTRFS_IOC_CLONE_RANGE + - LP: #615548 + * p54pci: add Symbol AP-300 minipci adapters pciid + - LP: #615548 + * dynamic debug: move ddebug_remove_module() down into free_module() + - LP: #615548 + * tpm_tis: fix subsequent suspend failures + - LP: #615548 + * ipvs: Add missing locking during connection table hashing and unhashing + - LP: #615548 + * netfilter: ip6t_REJECT: fix a dst leak in ipv6 REJECT + - LP: #615548 + * SCSI: aacraid: Eliminate use after free + - LP: #615548 + * amd64-agp: Probe unknown AGP devices the right way + - LP: #615548 + * perf: Resurrect flat callchains + - LP: #615548 + * x86: Fix vsyscall on gcc 4.5 with -Os + - LP: #615548 + * x86, Calgary: Increase max PHB number + - LP: #615548 + * x86, Calgary: Limit the max PHB number to 256 + - LP: #615548 + * sched: Prevent compiler from optimising the sched_avg_update() loop + - LP: #615548 + * sched: Fix over-scheduling bug + - LP: #615548 + * genirq: Deal with desc->set_type() changing desc->chip + - LP: #615548 + * serial: cpm_uart: implement the cpm_uart_early_write() function for + console poll + - LP: #615548 + * cmd640: fix kernel oops in test_irq() method + - LP: #615548 + * ide: Fix IDE taskfile with cfq scheduler + - LP: #615548 + * net/core: neighbour update Oops + - LP: #615548 + * tcp: fix crash in tcp_xmit_retransmit_queue + - LP: #615548 + * rtc: fix ds1388 time corruption + - LP: #615548 + * ethtool: Fix potential kernel buffer overflow in ETHTOOL_GRXCLSRLALL + - LP: #615548 + * sdhci-s3c: add missing remove function + - LP: #615548 + * ASoC: Remove duplicate AUX definition from WM8776 + - LP: #615548 + * x86: Fix x2apic preenabled system with kexec + - LP: #615548 + * IPoIB: Fix world-writable child interface control sysfs attributes + - LP: #615548 + * Input: i8042 - add Gigabyte Spring Peak to dmi_noloop_table + - LP: #580664, #615548 + * Input: twl40300-keypad - fix handling of "all ground" rows + - LP: #615548 + * ARM: 6201/1: RealView: Do not use outer_sync() on ARM11MPCore boards + with L220 + - LP: #615548 + * ARM: 6226/1: fix kprobe bug in ldr instruction emulation + - LP: #615548 + * x86: Do not try to disable hpet if it hasn't been initialized before + - LP: #615548 + * staging: rtl8192su: add USB VID/PID for HWNUm-300 + - LP: #615548 + * Staging: rtl8192su: add USB ID for 0bda:8171 + - LP: #615548 + * USB: obey the sysfs power/wakeup setting + - LP: #615548 + * USB: g_serial: don't set low_latency flag + - LP: #615548 + * USB: g_serial: fix tty cleanup on unload + - LP: #615548 + * USB: option: add support for 1da5:4518 + - LP: #615548 + * USB: Add PID for Sierra 250U to drivers/usb/serial/sierra.c + - LP: #599569, #615548 + * USB: ftdi_sio: support for Signalyzer tools based on FTDI chips + - LP: #615548 + * USB: option: Add support for AMOI Skypephone S2 + - LP: #615548 + * USB: adds Artisman USB dongle to list of quirky devices + - LP: #615548 + * USB: sisusbvga: Fix for USB 3.0 + - LP: #615548 + * USB: add quirk for Broadcom BT dongle + - LP: #615548 + * USB: FTDI: Add support for the RT System VX-7 radio programming cable + - LP: #615548 + * ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH + - LP: #615548 + * ext4: Fix potential quota deadlock + - LP: #615548 + * ext4: replace BUG() with return -EIO in ext4_ext_get_blocks + - LP: #615548 + * ext4, jbd2: Add barriers for file systems with exernal journals + - LP: #615548 + * ext4: Eliminate potential double free on error path + - LP: #615548 + * ext4: return correct wbc.nr_to_write in ext4_da_writepages + - LP: #615548 + * ext4: Ensure zeroout blocks have no dirty metadata + - LP: #615548 + * ext4: Patch up how we claim metadata blocks for quota purposes + - LP: #615548 + * ext4: Fix accounting of reserved metadata blocks + - LP: #615548 + * ext4: Calculate metadata requirements more accurately + - LP: #615548 + * ext4: Handle -EDQUOT error on write + - LP: #615548 + * ext4: Fix quota accounting error with fallocate + - LP: #615548 + * ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag + - LP: #615548 + * ext4: Use bitops to read/modify EXT4_I(inode)->i_state + - LP: #615548 + * ext4: Fix BUG_ON at fs/buffer.c:652 in no journal mode + - LP: #615548 + * ext4: Add flag to files with blocks intentionally past EOF + - LP: #615548 + * ext4: Fix fencepost error in chosing choosing group vs file + preallocation. + - LP: #615548 + * ext4: fix error handling in migrate + - LP: #615548 + * ext4: explicitly remove inode from orphan list after failed direct io + - LP: #615548 + * ext4: Handle non empty on-disk orphan link + - LP: #615548 + * ext4: make "offset" consistent in ext4_check_dir_entry() + - LP: #615548 + * ext4: Fix insertion point of extent in mext_insert_across_blocks() + - LP: #615548 + * ext4: Fix the NULL reference in double_down_write_data_sem() + - LP: #615548 + * ext4: Code cleanup for EXT4_IOC_MOVE_EXT ioctl + - LP: #615548 + * ext4: Fix estimate of # of blocks needed to write indirect-mapped files + - LP: #615548 + * ext4: Fixed inode allocator to correctly track a flex_bg's used_dirs + - LP: #615548 + * ext4: Fix possible lost inode write in no journal mode + - LP: #615548 + * ext4: Fix buffer head leaks after calls to ext4_get_inode_loc() + - LP: #615548 + * ext4: Issue the discard operation *before* releasing the blocks to be + reused + - LP: #615548 + * ext4: check missed return value in ext4_sync_file() + - LP: #615548 + * ext4: fix memory leaks in error path handling of ext4_ext_zeroout() + - LP: #615548 + * ext4: Remove unnecessary call to ext4_get_group_desc() in mballoc + - LP: #615548 + * ext4: rename ext4_mb_release_desc() to ext4_mb_unload_buddy() + - LP: #615548 + * ext4: allow defrag (EXT4_IOC_MOVE_EXT) in 32bit compat mode + - LP: #615548 + * ext4: fix quota accounting in case of fallocate + - LP: #615548 + * ext4: check s_log_groups_per_flex in online resize code + - LP: #615548 + * ext4: don't return to userspace after freezing the fs with a mutex held + - LP: #615548 + * ext4: stop issuing discards if not supported by device + - LP: #615548 + * ext4: don't scan/accumulate more pages than mballoc will allocate + - LP: #615548 + * ext4: Do not zero out uninitialized extents beyond i_size + - LP: #615548 + * ext4: clean up inode bitmaps manipulation in ext4_free_inode + - LP: #615548 + * ext4: init statistics after journal recovery + - LP: #615548 + * ext4: Remove extraneous newlines in ext4_msg() calls + - LP: #615548 + * ext4: Prevent creation of files larger than RLIMIT_FSIZE using + fallocate + - LP: #615548 + * ext4: check for a good block group before loading buddy pages + - LP: #615548 + * ext4: Show journal_checksum option + - LP: #615548 + * ext4: Use bitops to read/modify i_flags in struct ext4_inode_info + - LP: #615548 + * ext4: Avoid crashing on NULL ptr dereference on a filesystem error + - LP: #615548 + * ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted + - LP: #615548 + * ext4: restart ext4_ext_remove_space() after transaction restart + - LP: #615548 + * ext4: Conditionally define compat ioctl numbers + - LP: #615548 + * ext4: Fix compat EXT4_IOC_ADD_GROUP + - LP: #615548 + * ext4: Make fsync sync new parent directories in no-journal mode + - LP: #615548 + * KVM: MMU: Remove user access when allowing kernel access to gpte.w=0 + page + - LP: #615548 + * KVM: SVM: Handle MCEs early in the vmexit process + - LP: #615548 + * KVM: SVM: Implement workaround for Erratum 383 + - LP: #615548 + * KVM: MMU: invalidate and flush on spte small->large page size change + - LP: #615548 + * futex: futex_find_get_task remove credentails check + - LP: #615548 + * GFS2: Fix up system xattrs + - LP: #615548 + * PM / x86: Save/restore MISC_ENABLE register + - LP: #615548 + * ACPI: skip checking BM_STS if the BIOS doesn't ask for it + - LP: #615548 + * ACPI: Unconditionally set SCI_EN on resume + - LP: #615548 + * libertas/sdio: 8686: set ECSI bit for 1-bit transfers + - LP: #615548 + * dm9000: fix "BUG: spinlock recursion" + - LP: #615548 + * firmware_class: fix memory leak - free allocated pages + - LP: #615548 + * revert "[CPUFREQ] remove rwsem lock from CPUFREQ_GOV_STOP call (second + call site)" + - LP: #615548 + * ALSA: Echoaudio, fix Guru Meditation #00000005.48454C50 + - LP: #615548 + * V4L/DVB: dvb-core: Fix ULE decapsulation bug + - LP: #615548 + * V4L/DVB: FusionHDTV: Use quick reads for I2C IR device probing + - LP: #615548 + * forcedeth: fix tx limit2 flag check + - LP: #615548 + * staging: rtl8192su: add Support for Belkin F5D8053 v6 + - LP: #615548 + * MIPS FPU emulator: allow Cause bits of FCSR to be writeable by ctc1 + - LP: #615548 + * V4L/DVB: budget: Select correct frontends + - LP: #615548 + * cxgb3: fix linkup issue + - LP: #615548 + * mac80211: fix supported rates IE if AP doesn't give us it's rates + - LP: #615548 + * V4L/DVB: uvcvideo: Add support for unbranded Arkmicro 18ec:3290 webcams + - LP: #615548 + * V4L/DVB: uvcvideo: Add support for Packard Bell EasyNote MX52 + integrated webcam + - LP: #615548 + * V4L/DVB: uvcvideo: Add support for V4L2_PIX_FMT_Y16 + - LP: #615548 + * iwlagn: verify flow id in compressed BA packet + - LP: #615548 + * kbuild: Fix modpost segfault + - LP: #615548 + * eeepc-laptop: check wireless hotplug events + - LP: #615548 + * Fix spinaphore down_spin() + - LP: #615548 + * ath5k: initialize ah->ah_current_channel + - LP: #615548 + * Input: RX51 keymap - fix recent compile breakage + - LP: #615548 + * V4L/DVB (13830): uvcvideo: add another YUYV format GUID for iSight + cameras + - LP: #615548 + * Linux 2.6.32.17 + - LP: #615548 + * drm/i915: fix hibernation since i915 self-reclaim fixes + - LP: #615548 + * drm/i915: add 'reclaimable' to i915 self-reclaimable page allocations + - LP: #615548 + * i915: fix lock imbalance on error path... + - LP: #615548 + * drm/i915: Define MI_ARB_STATE bits + - LP: #615548 + * drm/i915: enable low power render writes on GEN3 hardware. + - LP: #615548 + * drm/i915: Make G4X-style PLL search more permissive + - LP: #615548 + * drm/radeon/r200: handle more hw tex coord types + - LP: #615548 + * drm/radeon/r100/r200: fix calculation of compressed cube maps + - LP: #615548 + * drm/radeon/kms: CS checker texture fixes for r1xx/r2xx/r3xx + - LP: #615548 + * drm/radeon/kms: fix shared ddc handling + - LP: #615548 + * drm/radeon/kms: fix shared ddc harder + - LP: #615548 + * drm/radeon/kms: add quirk for ASUS HD 3600 board + - LP: #615548 + * drm/radeon/kms: fix possible mis-detection of sideport on rs690/rs740 + - LP: #615548 + * drm/radeon/kms: fix legacy LVDS dpms sequence + - LP: #615548 + * drm/radeon/kms: fix legacy tv-out pal mode + - LP: #615548 + * Linux 2.6.32.17+drm33.7 + - LP: #615548 + * (pre-stable) writeback: remove the always false + bdi_cap_writeback_dirty() test + - LP: #543617, #585092 + * (pre-stable) writeback: remove unused nonblocking and congestion checks + - LP: #543617, #585092 + * (pre-stable) vfs: improve writeback_inodes_wb() + - LP: #543617, #585092 + * (pre-stable) writeback: add missing kernel-doc notation + - LP: #543617, #585092 + * (pre-stable) writeback: fix writeback completion notifications + - LP: #543617, #585092 + * (pre-stable) writeback: queue work on stack in writeback_inodes_sb + - LP: #543617, #585092 + * (pre-stable) writeback: enforce s_umount locking in writeback_inodes_sb + - LP: #543617, #585092 + * (pre-stable) writeback: fix writeback_inodes_wb from + writeback_inodes_sb + - LP: #543617, #585092 + * (pre-stable) writeback: simplify wakeup_flusher_threads + - LP: #543617, #585092 + * (pre-stable) writeback: simplify and split bdi_start_writeback + - LP: #543617, #585092 + * (pre-stable) writeback: add missing requeue_io in writeback_inodes_wb + - LP: #543617, #585092 + * (pre-stable) writeback: fix pin_sb_for_writeback + - LP: #543617, #585092 + * (pre-stable) writeback: remove writeback_inodes_wbc + - LP: #543617, #585092 + * (pre-stable) writeback: split writeback_inodes_wb + - LP: #543617, #585092 + * (pre-stable) writeback: simplify the write back thread queue + - LP: #543617, #585092 + * (pre-stable) Fix compiling NFS when backporting writeback + - LP: #543617, #585092 + * sched: cgroup: Implement different treatment for idle shares + - LP: #620755 + * mm: fix ia64 crash when gcore reads gate area + - LP: #620755 + * acl trouble after upgrading ubuntu + - LP: #620755 + * comedi: Uncripple 8255-based DIO subdevices + - LP: #620755 + * NFS: kswapd must not block in nfs_release_page + - LP: #620755 + * PARISC: led.c - fix potential stack overflow in led_proc_write() + - LP: #620755 + * arm/imx/gpio: add spinlock protection + - LP: #620755 + * parisc: pass through '\t' to early (iodc) console + - LP: #620755 + * amd64_edac: Fix DCT base address selector + - LP: #620755 + * amd64_edac: Correct scrub rate setting + - LP: #620755 + * e1000e: don't inadvertently re-set INTX_DISABLE + - LP: #620755 + * e1000e: 82577/82578 PHY register access issues + - LP: #620755 + * 9p: strlen() doesn't count the terminator + - LP: #620755 + * ath9k: enable serialize_regmode for non-PCIE AR9160 + - LP: #620755 + * ath9k_hw: fix an off-by-one error in the PDADC boundaries calculation + - LP: #620755 + * ath9k: fix TSF after reset on AR913x + - LP: #620755 + * ath9k: fix yet another buffer leak in the tx aggregation code + - LP: #620755 + * iwlwifi: fix scan abort + - LP: #620755 + * cfg80211: ignore spurious deauth + - LP: #620755 + * cfg80211: don't get expired BSSes + - LP: #620755 + * xfs: prevent swapext from operating on write-only files + - LP: #620755 + * SCSI: enclosure: fix error path - actually return ERR_PTR() on error + - LP: #620755 + * GFS2: rename causes kernel Oops + - LP: #620755 + * slow-work: use get_ref wrapper instead of directly calling get_ref + - LP: #620755 + * CIFS: Remove __exit mark from cifs_exit_dns_resolver() + - LP: #620755 + * CIFS: Fix compile error with __init in cifs_init_dns_resolver() + definition + - LP: #620755 + * xen: drop xen_sched_clock in favour of using plain wallclock time + - LP: #620755 + * ssb: do not read SPROM if it does not exist + - LP: #620755 + * ssb: Look for SPROM at different offset on higher rev CC + - LP: #620755 + * ssb: fix NULL ptr deref when pcihost_wrapper is used + - LP: #620755 + * ssb: Handle alternate SSPROM location + - LP: #620755 + * Linux 2.6.32.18 + - LP: #620755 + * ata_piix: fix locking around SIDPR access + - LP: #622877 + * powerpc: fix build with make 3.82 + - LP: #622877 + * nvram: Fix write beyond end condition; prove to gcc copy is safe + - LP: #622877 + * x86: Add memory modify constraints to xchg() and cmpxchg() + - LP: #622877 + * x86, vmware: Preset lpj values when on VMware. + - LP: #622877 + * Staging: line6: needs to select SND_PCM + - LP: #622877 + * Staging: panel: Prevent double-calling of parport_release - fix oops. + - LP: #622877 + * PCI: Do not run NVidia quirks related to MSI with MSI disabled + - LP: #622877 + * PCI: disable MSI on VIA K8M800 + - LP: #622877 + * solos-pci: Fix race condition in tasklet RX handling + - LP: #622877 + * splice: fix misuse of SPLICE_F_NONBLOCK + - LP: #622877 + * drivers/video/w100fb.c: ignore void return value / fix build failure + - LP: #622877 + * ide-cd: Do not access completed requests in the irq handler + - LP: #622877 + * md/raid10: fix deadlock with unaligned read during resync + - LP: #622877 + * blkdev: cgroup whitelist permission fix + - LP: #622877 + * eCryptfs: Handle ioctl calls with unlocked and compat functions + - LP: #622877 + * ecryptfs: release reference to lower mount if interpose fails + - LP: #622877 + * fs/ecryptfs/file.c: introduce missing free + - LP: #622877 + * bio, fs: update RWA_MASK, READA and SWRITE to match the corresponding + BIO_RW_* bits + - LP: #622877 + * signalfd: fill in ssi_int for posix timers and message queues + - LP: #622877 + * smsc911x: Add spinlocks around registers access + - LP: #622877 + * ARM: 6299/1: errata: TLBIASIDIS and TLBIMVAIS operations can broadcast + a faulty ASID + - LP: #622877 + * ARM: 6280/1: imx: Fix build failure when including + without + - LP: #622877 + * USB: resizing usbmon binary interface buffer causes protection faults + - LP: #622877 + * USB delay init quirk for logitech Harmony 700-series devices + - LP: #622877 + * USB: serial: enabling support for Segway RMP in ftdi_sio + - LP: #622877 + * USB: option: Huawei ETS 1220 support added + - LP: #622877 + * USB: option: add huawei k3765 k4505 devices to work properly + - LP: #622877 + * USB: ftdi_sio: device id for Navitator + - LP: #622877 + * USB: cp210x: Add four new device IDs + - LP: #622877 + * USB: usbtest: avoid to free coherent buffer in atomic context + - LP: #622877 + * USB: fix thread-unsafe anchor utiliy routines + - LP: #622877 + * drm/edid: Fix the HDTV hack sync adjustment + - LP: #622877 + * Bluetooth: Added support for controller shipped with iMac i5 + - LP: #622877 + * jfs: don't allow os2 xattr namespace overlap with others + - LP: #622877 + * arp_notify: allow drivers to explicitly request a notification event. + - LP: #622877 + * xen: netfront: explicitly generate arp_notify event after migration. + - LP: #622877 + * net: Fix NETDEV_NOTIFY_PEERS to not conflict with + NETDEV_BONDING_DESLAVE. + - LP: #622877 + * irq: Add new IRQ flag IRQF_NO_SUSPEND + - LP: #622877 + * xen: Do not suspend IPI IRQs. + - LP: #622877 + * drm/i915: Use RSEN instead of HTPLG for tfp410 monitor detection. + - LP: #622877 + * Btrfs: Avoid superfluous tree-log writeout + - LP: #622877 + * Btrfs: Add btrfs_duplicate_item + - LP: #622877 + * Btrfs: Rewrite btrfs_drop_extents + - LP: #622877 + * Btrfs: Fix disk_i_size update corner case + - LP: #622877 + * Btrfs: Avoid orphan inodes cleanup while replaying log + - LP: #622877 + * Btrfs: Avoid orphan inodes cleanup during committing transaction + - LP: #622877 + * Btrfs: Make fallocate(2) more ENOSPC friendly + - LP: #622877 + * Btrfs: Make truncate(2) more ENOSPC friendly + - LP: #622877 + * Btrfs: Pass transaction handle to security and ACL initialization + functions + - LP: #622877 + * Btrfs: Add delayed iput + - LP: #622877 + * Btrfs: Fix btrfs_drop_extent_cache for skip pinned case + - LP: #622877 + * Btrfs: Fix per root used space accounting + - LP: #622877 + * Btrfs: don't add extent 0 to the free space cache v2 + - LP: #622877 + * Btrfs: fail mount on bad mount options + - LP: #622877 + * Btrfs: deny sys_link across subvolumes. + - LP: #622877 + * Btrfs: Show discard option in /proc/mounts + - LP: #622877 + * Btrfs: make metadata chunks smaller + - LP: #622877 + * Btrfs: make sure fallocate properly starts a transaction + - LP: #622877 + * btrfs: fix missing last-entry in readdir(3) + - LP: #622877 + * Btrfs: align offsets for btrfs_ordered_update_i_size + - LP: #622877 + * Btrfs, fix memory leaks in error paths + - LP: #622877 + * Btrfs: Fix race in btrfs_mark_extent_written + - LP: #622877 + * Btrfs: fix regression in orphan cleanup + - LP: #622877 + * Btrfs: deal with NULL acl sent to btrfs_set_acl + - LP: #622877 + * Btrfs: fix possible panic on unmount + - LP: #622877 + * Btrfs: Use correct values when updating inode i_size on fallocate + - LP: #622877 + * Btrfs: fix a memory leak in btrfs_init_acl + - LP: #622877 + * Btrfs: run orphan cleanup on default fs root + - LP: #622877 + * Btrfs: do not mark the chunk as readonly if in degraded mode + - LP: #622877 + * Btrfs: check return value of open_bdev_exclusive properly + - LP: #622877 + * Btrfs: check total number of devices when removing missing + - LP: #622877 + * Btrfs: fix race between allocate and release extent buffer. + - LP: #622877 + * Btrfs: make error return negative in btrfs_sync_file() + - LP: #622877 + * Btrfs: remove BUG_ON() due to mounting bad filesystem + - LP: #622877 + * Btrfs: Fix oopsen when dropping empty tree. + - LP: #622877 + * Btrfs: do not try and lookup the file extent when finishing ordered io + - LP: #622877 + * Btrfs: apply updated fallocate i_size fix + - LP: #622877 + * Btrfs: btrfs_mark_extent_written uses the wrong slot + - LP: #622877 + * Btrfs: kfree correct pointer during mount option parsing + - LP: #622877 + * nohz: Introduce arch_needs_cpu + - LP: #622877 + * nohz: Reuse ktime in sub-functions of tick_check_idle. + - LP: #622877 + * timekeeping: Fix clock_gettime vsyscall time warp + - LP: #622877 + * sched: Fix granularity of task_u/stime() + - LP: #622877 + * sched, cputime: Introduce thread_group_times() + - LP: #622877 + * mutex: Don't spin when the owner CPU is offline or other weird cases + - LP: #622877 + * fix SBA IOMMU to handle allocation failure properly + - LP: #622877 + * crypto: testmgr - Fix complain about lack test for internal used + algorithm + - LP: #622877 + * memory hotplug: fix a bug on /dev/mem for 64-bit kernels + - LP: #622877 + * x86: Fix out of order of gsi + - LP: #622877 + * HWPOISON: remove the anonymous entry + - LP: #622877 + * HWPOISON: abort on failed unmap + - LP: #622877 + * powerpc/eeh: Fix a bug when pci structure is null + - LP: #622877 + * ACPI: Fix regression where _PPC is not read at boot even when + ignore_ppc=0 + - LP: #622877 + * ext4: Make sure the MOVE_EXT ioctl can't overwrite append-only files + - LP: #622877 + * ext4: Fix optional-arg mount options + - LP: #622877 + * reiserfs: properly honor read-only devices + - LP: #622877 + * reiserfs: fix oops while creating privroot with selinux enabled + - LP: #622877 + * dlm: always use GFP_NOFS + - LP: #622877 + * dlm: fix ordering of bast and cast + - LP: #622877 + * dlm: send reply before bast + - LP: #622877 + * ocfs2: Find proper end cpos for a leaf refcount block. + - LP: #622877 + * ocfs2: Set MS_POSIXACL on remount + - LP: #622877 + * Skip check for mandatory locks when unlocking + - LP: #622877 + * loop: Update mtime when writing using aops + - LP: #622877 + * aic79xx: check for non-NULL scb in ahd_handle_nonpkt_busfree + - LP: #622877 + * ibmvfc: Fix command completion handling + - LP: #622877 + * ibmvfc: Reduce error recovery timeout + - LP: #622877 + * md/raid1: delay reads that could overtake behind-writes. + - LP: #622877 + * mm: fix corruption of hibernation caused by reusing swap during image + saving + - LP: #622877 + * Linux 2.6.32.19 + - LP: #622877 + * Linux 2.6.32.20 + - LP: #622882 + * memstick: fix hangs on unexpected device removal in mspro_blk + - LP: #625392 + * ASoC: Fix inverted mute controls for WM8580 + - LP: #625392 + * ASoC: Remove DSP mode support for WM8776 + - LP: #625392 + * ALSA: riptide - Fix detection / load of firmware files + - LP: #625392 + * ALSA: emu10k1 - delay the PCM interrupts (add pcm_irq_delay parameter) + - LP: #625392 + * ALSA: hda - Fix missing stream for second ADC on Realtek ALC260 HDA + codec + - LP: #625392 + * ocfs2: do not overwrite error codes in ocfs2_init_acl + - LP: #625392 + * ocfs2/dlm: fix a dead lock + - LP: #625392 + * ocfs2 fix o2dlm dlm run purgelist (rev 3) + - LP: #625392 + * ocfs2: Count more refcount records in file system fragmentation. + - LP: #625392 + * ocfs2/dlm: avoid incorrect bit set in refmap on recovery master + - LP: #625392 + * ocfs2/dlm: remove potential deadlock -V3 + - LP: #625392 + * x86, hotplug: Serialize CPU hotplug to avoid bringup concurrency issues + - LP: #625392 + * x86, apic: Fix apic=debug boot crash + - LP: #625392 + * Fix the nested PR lock calling issue in ACL + - LP: #625392 + * hwmon: (pc87360) Fix device resource declaration + - LP: #625392 + * ARM: Tighten check for allowable CPSR values + - LP: #625392 + * nfs: Add "lookupcache" to displayed mount options + - LP: #625392 + * ath5k: disable ASPM L0s for all cards + - LP: #625392 + * pxa3xx: fix ns2cycle equation + - LP: #625392 + * dm mpath: fix NULL pointer dereference when path parameters missing + - LP: #625392 + * dm ioctl: release _hash_lock between devices in remove_all + - LP: #625392 + * mm: make the vma list be doubly linked + - LP: #625392 + * mm: make the mlock() stack guard page checks stricter + - LP: #625392 + * mm: make stack guard page logic use vm_prev pointer + - LP: #625392 + * slab: fix object alignment + - LP: #625392 + * sunxvr500: Ignore secondary output PCI devices. + - LP: #625392 + * sparc64: Add missing ID to parport probing code. + - LP: #625392 + * sparc64: Fix rwsem constant bug leading to hangs. + - LP: #625392 + * sparc64: Fix atomic64_t routine return values. + - LP: #625392 + * net: Fix a memmove bug in dev_gro_receive() + - LP: #625392 + * isdn: fix information leak + - LP: #625392 + * act_nat: the checksum of ICMP doesn't have pseudo header + - LP: #625392 + * vmscan: raise the bar to PAGEOUT_IO_SYNC stalls + - LP: #625392 + * pcmcia: avoid buffer overflow in pcmcia_setup_isa_irq + - LP: #625392 + * ext4: consolidate in_range() definitions + - LP: #625392 + * Oprofile: Change CPUIDS from decimal to hex, and add some comments + - LP: #625392 + * oprofile: add support for Intel processor model 30 + - LP: #625392 + * fixes for using make 3.82 + - LP: #625392 + * ALSA: intel8x0: Mute External Amplifier by default for ThinkPad X31 + - LP: #619439, #625392 + * netlink: fix compat recvmsg + - LP: #625392 + * powerpc: Fix typo in uImage target + - LP: #625392 + * USB: option: add Celot CT-650 + - LP: #625392 + * USB: add device IDs for igotu to navman + - LP: #625392 + * USB: pl2303: New vendor and product id + - LP: #625392 + * USB: CP210x Fix Break On/Off + - LP: #625392 + * USB: ftdi_sio: fix endianess of max packet size + - LP: #625392 + * USB: io_ti: check firmware version before updating + - LP: #625392 + * USB: xhci: Remove buggy assignment in next_trb() + - LP: #625392 + * USB: ftdi_sio: Add ID for Ionics PlugComputer + - LP: #625392 + * USB: ftdi_sio: add product ID for Lenz LI-USB + - LP: #625392 + * x86, apic: ack all pending irqs when crashed/on kexec + - LP: #625392 + * Linux 2.6.32.21 + - LP: #625392 + + [ Ubuntu: 2.6.32-24.43 ] + + * x86-64, compat: Test %rax for the syscall number, not %eax + - CVE-2010-3301 + * x86-64, compat: Retruncate rax after ia32 syscall entry tracing + - CVE-2010-3301 + * compat: Make compat_alloc_user_space() incorporate the access_ok() + - CVE-2010-3081 + + [ Ubuntu: 2.6.32-24.42 ] + + * (pre-stable) drm/i915: add PANEL_UNLOCK_REGS definition + - LP: #561802, #578673 + * (pre-stable) drm/i915: make sure eDP panel is turned on + - LP: #578673 + * (pre-stable) drm/i915: make sure we shut off the panel in eDP configs + - LP: #578673 + + -- Brad Figg Fri, 01 Oct 2010 10:39:03 -0700 + +linux-ec2 (2.6.32-308.16) lucid-security; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-24.43 + + [ Ubuntu: 2.6.32-24.43 ] + + * x86-64, compat: Test %rax for the syscall number, not %eax + - CVE-2010-3301 + * x86-64, compat: Retruncate rax after ia32 syscall entry tracing + - CVE-2010-3301 + * compat: Make compat_alloc_user_space() incorporate the access_ok() + - CVE-2010-3081 + + [ Ubuntu: 2.6.32-24.42 ] + + * (pre-stable) drm/i915: add PANEL_UNLOCK_REGS definition + - LP: #561802, #578673 + * (pre-stable) drm/i915: make sure eDP panel is turned on + - LP: #578673 + * (pre-stable) drm/i915: make sure we shut off the panel in eDP configs + - LP: #578673 + + -- Stefan Bader Thu, 16 Sep 2010 10:39:18 +0200 + +linux-ec2 (2.6.32-308.15) lucid-security; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-24.41 + + [ Ubuntu: 2.6.32-24.41 ] + + * (pre-stable) ext4: fix freeze deadlock under IO + - LP: #595489 + * drm: Initialize ioctl struct when no user data is present + - CVE-2010-2803 + * can: add limit for nframes and clean up signed/unsigned variables + - CVE-2010-2959 + * mm: keep a guard page below a grow-down stack segment + - CVE-2010-2240 + * mm: fix missing page table unmap for stack guard page failure case + - CVE-2010-2240 + * mm: fix page table unmap for stack guard page properly + - CVE-2010-2240 + * mm: fix up some user-visible effects of the stack guard page + - CVE-2010-2240 + * x86: don't send SIGBUS for kernel page faults + - CVE-2010-2240 + + -- Stefan Bader Wed, 18 Aug 2010 14:33:06 +0200 + +linux-ec2 (2.6.32-308.14) lucid-security; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-24.39 + + [ Ubuntu: 2.6.32-24.39 ] + + * sctp: Fix skb_over_panic resulting from multiple invalid parameter + errors (CVE-2010-1173) (v4) + - CVE-2010-1173 + * sctp: fix append error cause to ERROR chunk correctly + - CVE-2010-1173 + * GFS2: Fix writing to non-page aligned gfs2_quota structures + - CVE-2010-1436 + * KEYS: find_keyring_by_name() can gain access to a freed keyring + - CVE-2010-1437 + * GFS2: Fix permissions checking for setflags ioctl() + - CVE-2010-1641 + * Btrfs: should add a permission check for setfacl + - CVE-2010-2071 + * ecryptfs: Bugfix for error related to ecryptfs_hash_buckets + - CVE-2010-2492 + + -- Stefan Bader Wed, 21 Jul 2010 10:58:47 +0200 + +linux-ec2 (2.6.32-308.13) lucid-proposed; urgency=low + + [ Stefan Bader ] + + * Rebased to 2.6.32-24.38 + + [ Ubuntu: 2.6.32-24.38 ] + + * SAUCE: dell-laptop: fire SMI when toggling hardware killswitch + (revised) + - LP: #590607 + * sfc: Wait at most 10ms for the MC to finish reading out MAC statistics + - LP: #590783 + * sfc: Always close net device at the end of a disabling reset + - LP: #590783 + * sfc: Change falcon_probe_board() to fail for unsupported boards + - LP: #590783 + * ext4: Fix potential quota deadlock + - LP: #588069 + * jbd: jbd-debug and jbd2-debug should be writable + - LP: #588069 + * ext4: replace BUG() with return -EIO in ext4_ext_get_blocks + - LP: #588069 + * ext4, jbd2: Add barriers for file systems with exernal journals + - LP: #588069 + * ext4: Eliminate potential double free on error path + - LP: #588069 + * ext4: return correct wbc.nr_to_write in ext4_da_writepages + - LP: #588069 + * ext4: Ensure zeroout blocks have no dirty metadata + - LP: #588069 + * ext4: Patch up how we claim metadata blocks for quota purposes + - LP: #588069 + * ext4: Fix accounting of reserved metadata blocks + - LP: #588069 + * ext4: Calculate metadata requirements more accurately + - LP: #588069 + * ext4: Handle -EDQUOT error on write + - LP: #588069 + * ext4: Fix quota accounting error with fallocate + - LP: #588069 + * ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag + - LP: #588069 + * ext4: Use bitops to read/modify EXT4_I(inode)->i_state + - LP: #588069 + * ext4: Fix BUG_ON at fs/buffer.c:652 in no journal mode + - LP: #588069 + * ext4: Add flag to files with blocks intentionally past EOF + - LP: #588069 + * ext4: Fix fencepost error in chosing choosing group vs file + preallocation. + - LP: #588069 + * ext4: fix error handling in migrate + - LP: #588069 + * ext4: explicitly remove inode from orphan list after failed direct io + - LP: #588069 + * ext4: Handle non empty on-disk orphan link + - LP: #588069 + * ext4: make "offset" consistent in ext4_check_dir_entry() + - LP: #588069 + * ext4: Fix insertion point of extent in mext_insert_across_blocks() + - LP: #588069 + * ext4: Fix the NULL reference in double_down_write_data_sem() + - LP: #588069 + * ext4: Code cleanup for EXT4_IOC_MOVE_EXT ioctl + - LP: #588069 + * ext4: Fix estimate of # of blocks needed to write indirect-mapped files + - LP: #588069 + * ext4: Fixed inode allocator to correctly track a flex_bg's used_dirs + - LP: #588069 + * ext4: Fix possible lost inode write in no journal mode + - LP: #588069 + * ext4: Fix buffer head leaks after calls to ext4_get_inode_loc() + - LP: #588069 + * ext4: Issue the discard operation *before* releasing the blocks to be + reused + - LP: #588069 + * ext4: check missed return value in ext4_sync_file() + - LP: #588069 + * ext4: fix memory leaks in error path handling of ext4_ext_zeroout() + - LP: #588069 + * ext4: Remove unnecessary call to ext4_get_group_desc() in mballoc + - LP: #588069 + * ext4: rename ext4_mb_release_desc() to ext4_mb_unload_buddy() + - LP: #588069 + * ext4: allow defrag (EXT4_IOC_MOVE_EXT) in 32bit compat mode + - LP: #588069 + * ext4: fix quota accounting in case of fallocate + - LP: #588069 + * ext4: check s_log_groups_per_flex in online resize code + - LP: #588069 + * ext4: don't return to userspace after freezing the fs with a mutex held + - LP: #588069 + * ext4: stop issuing discards if not supported by device + - LP: #588069 + * ext4: don't scan/accumulate more pages than mballoc will allocate + - LP: #588069 + * ext4: Do not zero out uninitialized extents beyond i_size + - LP: #588069 + * ext4: clean up inode bitmaps manipulation in ext4_free_inode + - LP: #588069 + * ext4: init statistics after journal recovery + - LP: #588069 + * ext4: Remove extraneous newlines in ext4_msg() calls + - LP: #588069 + * ext4: Prevent creation of files larger than RLIMIT_FSIZE using + fallocate + - LP: #588069 + * ext4: check for a good block group before loading buddy pages + - LP: #588069 + * ext4: Show journal_checksum option + - LP: #588069 + * ext4: Use bitops to read/modify i_flags in struct ext4_inode_info + - LP: #588069 + * ext4: Avoid crashing on NULL ptr dereference on a filesystem error + - LP: #588069 + * ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted + - LP: #588069 + * ext4: restart ext4_ext_remove_space() after transaction restart + - LP: #588069 + * ext4: Conditionally define compat ioctl numbers + - LP: #588069 + * ext4: Fix compat EXT4_IOC_ADD_GROUP + - LP: #588069 + * ext4: Make fsync sync new parent directories in no-journal mode + - LP: #588069 + * (pre-stable) ahci,ata_generic: let ata_generic handle new MBP w/ MCP89 + - LP: #576601 + * (pre-stable) ata_generic: implement ATA_GEN_* flags and force enable + DMA on MBP 7,1 + - LP: #576601 + + -- Stefan Bader Thu, 08 Jul 2010 10:53:59 +0200 + +linux-ec2 (2.6.32-307.12) lucid-proposed; urgency=low + + [ Andy Whitcroft ] + + * rebase to Ubuntu-2.6.32-23.37 + * update to ubuntu-debian:7e708d33054c373faf41da23b73e8b48c342d958 + - LP: #570500, #576274 + * xen: follow changes to smp headers adding wbinvd + - LP: #575853 + + [ Ubuntu: 2.6.32-23.37 ] + + * SAUCE: drm/radeon/kms/atom: fix dual-link DVI on DCE3.2/4.0 + - LP: #564559 + * [Config] ports -- build in dm-mod to enable LVM boot + - LP: #560717 + * tools -- fix perf version extraction for multi-part flavours + - LP: #555130 + * [Config] enforce -- ensure dm_mod is built-in for LVM + - LP: #560717 + * update to ubuntu-debian:7e708d33054c373faf41da23b73e8b48c342d958 + - LP: #570500, #576274 + * Revert "(pre-stable): input: ALPS - Add signature for HP Pavilion dm3 + laptops" + - LP: #550625 + * Enable ftrace function profiler + - LP: #570389 + * enforce CONFIG_TMPFS_POSIX_ACL=y + - LP: #575940 + * Revert "staging/comdi -- disable" + - LP: #563436 + * [Config] Enable multicast routing for sparc + - LP: #416266 + * [Config] Add ahci.ko to virtual sub-flavour + - LP: #570542 + * Revert "SAUCE: drm/i915: Disable FBC on 915GM and 945GM" + - LP: #588832 + * [Config] Add atl1c to nic-modules udeb + - LP: #557130 + * Revert "(pre-stable) iwlwifi: fix nfreed--" + - LP: #575853 + * Revert "backlight: mbp_nvidia_bl - add five more MacBook variants" + - LP: #575853 + * Revert "(pre-stable) pata_via: Add VIA VX900 support" + - LP: #575853 + * Revert "(pre-stable) x86-32, resume: do a global tlb flush in S4 + resume" + - LP: #575853 + * Revert "x86: disable IOMMUs on kernel crash" + - LP: #575853 + * Revert "sunrpc: fix peername failed on closed listener" + - LP: #575853 + * Revert "sunrpc: move the close processing after do recvfrom method" + - LP: #575853 + * Revert "(pre-stable) drm/edid: allow certain bogus edids to hit a fixup + path rather than fail" + - LP: #575853 + * Revert "drm/radeon/kms: don't print error on -ERESTARTSYS." + - LP: #575853 + * Revert "ath9k: fix lockdep warning when unloading module" on stable + kernels + - LP: #588832 + * Staging: comedi: removed "depricated" from COMEDI_CB_BLOCK + - LP: #483343 + * fat: fix buffer overflow in vfat_create_shortname() + - LP: #575853 + * xfs: simplify inode teardown + - LP: #575853 + * xfs: fix mmap_sem/iolock inversion in xfs_free_eofblocks + - LP: #575853 + * xfs: I/O completion handlers must use NOFS allocations + - LP: #575853 + * xfs: Wrapped journal record corruption on read at recovery + - LP: #575853 + * xfs: Fix error return for fallocate() on XFS + - LP: #575853 + * xfs: check for not fully initialized inodes in xfs_ireclaim + - LP: #575853 + * xfs: fix timestamp handling in xfs_setattr + - LP: #575853 + * xfs: Don't flush stale inodes + - LP: #575853 + * xfs: Ensure we force all busy extents in range to disk + - LP: #575853 + * xfs: reclaim inodes under a write lock + - LP: #575853 + * xfs: Avoid inodes in reclaim when flushing from inode cache + - LP: #575853 + * xfs: reclaim all inodes by background tree walks + - LP: #575853 + * xfs: fix stale inode flush avoidance + - LP: #575853 + * xfs: xfs_swap_extents needs to handle dynamic fork offsets + - LP: #575853 + * xfs: quota limit statvfs available blocks + - LP: #575853 + * xfs: don't hold onto reserved blocks on remount, ro + - LP: #575853 + * xfs: remove invalid barrier optimization from xfs_fsync + - LP: #575853 + * xfs: Non-blocking inode locking in IO completion + - LP: #575853 + * xfs: fix locking for inode cache radix tree tag updates + - LP: #575853 + * sh: Enable the mmu in start_secondary() + - LP: #575853 + * sh: Fix FDPIC binary loader + - LP: #575853 + * libiscsi: Fix recovery slowdown regression + - LP: #575853 + * Freezer: Fix buggy resume test for tasks frozen with cgroup freezer + - LP: #575853 + * iwlwifi: counting number of tfds can be free for 4965 + - LP: #575853 + * iwlwifi: fix nfreed-- + - LP: #575853 + * iwlwifi: range checking issue + - LP: #575853 + * setup correct int pipe type in ar9170_usb_exec_cmd + - LP: #575853 + * mac80211: move netdev queue enabling to correct spot + - LP: #575853 + * mac80211: tear down all agg queues when restart/reconfig hw + - LP: #575853 + * WATCHDOG: hpwdt - fix lower timeout limit + - LP: #575853 + * WATCHDOG: iTCO_wdt: TCO Watchdog patch for additional Intel Cougar + Point DeviceIDs + - LP: #575853 + * genirq: Force MSI irq handlers to run with interrupts disabled + - LP: #575853 + * lis3: fix show rate for 8 bits chips + - LP: #575853 + * pata_ali: Fix regression with old devices + - LP: #575853 + * HID: fix oops in gyration_event() + - LP: #575853 + * raw: fsync method is now required + - LP: #575853 + * readahead: fix NULL filp dereference + - LP: #575853 + * ALSA: mixart: range checking proc file + - LP: #575853 + * ALSA: hda: Fix 0 dB offset for Lenovo Thinkpad models using AD1981 + - LP: #551606, #575853 + * x86, amd: Get multi-node CPU info from NodeId MSR instead of PCI config + space + - LP: #575853 + * resource: move kernel function inside __KERNEL__ + - LP: #575853 + * backlight: mbp_nvidia_bl - add five more MacBook variants + - LP: #575853 + * pata_via: Add VIA VX900 support + - LP: #575853 + * ext3: Don't update the superblock in ext3_statfs() + - LP: #575853 + * ext3: journal all modifications in ext3_xattr_set_handle + - LP: #575853 + * eeepc-laptop: disable cpu speed control on EeePC 701 + - LP: #575853 + * eeepc-laptop: dmi blacklist to disable pci hotplug code + - LP: #575853 + * eeepc-laptop: add hotplug_disable parameter + - LP: #575853 + * eeepc-laptop: disable wireless hotplug for 1201N + - LP: #575853 + * eeepc-laptop: disable wireless hotplug for 1005PE + - LP: #575853 + * libata: disable NCQ on Crucial C300 SSD + - LP: #575853 + * cifs: Fix a kernel BUG with remote OS/2 server (try #3) + - LP: #575853 + * CIFS: initialize nbytes at the beginning of CIFSSMBWrite() + - LP: #575853 + * iwlwifi: need check for valid qos packet before free + - LP: #575853 + * ARM: 6031/1: fix Thumb-2 decompressor + - LP: #575853 + * x86-32, resume: do a global tlb flush in S4 resume + - LP: #575853 + * x86: hpet: Make WARN_ON understandable + - LP: #575853 + * x86, hpet: Erratum workaround for read after write of HPET comparator + - LP: #575853 + * x86: Fix double enable_IR_x2apic() call on SMP kernel on !SMP boards + - LP: #575853 + * sched: sched_getaffinity(): Allow less than NR_CPUS length + - LP: #575853 + * sched: Fix sched_getaffinity() + - LP: #575853 + * NFSv4: Fall back to ordinary lookup if nfs4_atomic_open() returns + EISDIR + - LP: #575853 + * NFSv4: fix delegated locking + - LP: #575853 + * ALSA: hda - add a quirk for Clevo M570U laptop + - LP: #575853 + * ALSA: usb - Fix Oops after usb-midi disconnection + - LP: #575853 + * hwmon: (sht15) Fix sht15_calc_temp interpolation function + - LP: #575853 + * hwmon: (sht15) Properly handle the case CONFIG_REGULATOR=n + - LP: #575853 + * x86/amd-iommu: Use helper function to destroy domain + - LP: #575853 + * x86/amd-iommu: enable iommu before attaching devices + - LP: #575853 + * x86, lib: Add wbinvd smp helpers + - LP: #575853 + * x86, cacheinfo: Fix disabling of L3 cache indices + - LP: #575853 + * intel-agp: Switch to wbinvd_on_all_cpus + - LP: #575853 + * x86, cacheinfo: Add cache index disable sysfs attrs only to L3 caches + - LP: #575853 + * x86, cacheinfo: Calculate L3 indices + - LP: #575853 + * x86, cacheinfo: Remove NUMA dependency, fix for AMD Fam10h rev D1 + - LP: #575853 + * x86, cacheinfo: Enable L3 CID only on AMD + - LP: #575853 + * vgaarb: fix "target=default" passing + - LP: #575853 + * x86-32: clean up rwsem inline asm statements + - LP: #575853 + * x86: clean up rwsem type system + - LP: #575853 + * x86-64, rwsem: 64-bit xadd rwsem implementation + - LP: #575853 + * x86-64: support native xadd rwsem implementation + - LP: #575853 + * x86: Fix breakage of UML from the changes in the rwsem system + - LP: #575853 + * x86-64, rwsem: Avoid store forwarding hazard in __downgrade_write + - LP: #575853 + * fix NFS4 handling of mountpoint stat + - LP: #575853 + * dm mpath: fix stall when requeueing io + - LP: #575853 + * quota: Fix possible dq_flags corruption + - LP: #575853 + * Staging: comedi: fix usbdux timeout bug + - LP: #483343, #575853 + * Staging: comedi: usbdux.c: fix locking up of the driver when the comedi + ringbuffer runs empty + - LP: #483343, #575853 + * ocfs2: set i_mode on disk during acl operations + - LP: #575853 + * ocfs2: Change bg_chain check for ocfs2_validate_gd_parent. + - LP: #575853 + * 9p: Skip check for mandatory locks when unlocking + - LP: #575853 + * fc class: fail fast bsg requests + - LP: #575853 + * SCSI: add scsi target reset support to scsi ioctl + - LP: #575853 + * PCIe AER: prevent AER injection if hardware masks error reporting + - LP: #575853 + * vgaarb: Fix VGA arbiter to accept PCI domains other than 0 + - LP: #575853 + * SCSI: fc-transport: Use packed modifier for fc_bsg_request structure. + - LP: #575853 + * pci: Update pci_set_vga_state() to call arch functions + - LP: #575853 + * PCI: kill off pci_register_set_vga_state() symbol export. + - LP: #575853 + * PCI: fix nested spinlock hang in aer_inject + - LP: #575853 + * IPoIB: Fix TX queue lockup with mixed UD/CM traffic + - LP: #575853 + * x86/PCI: irq and pci_ids patch for Intel Cougar Point DeviceIDs + - LP: #575853 + * ALSA: hda_intel: ALSA HD Audio patch for Intel Cougar Point DeviceIDs + - LP: #575853 + * ALSA: hda - enable snoop for Intel Cougar Point + - LP: #575853 + * ata_piix: IDE Mode SATA patch for Intel Cougar Point DeviceIDs + - LP: #575853 + * ahci: AHCI and RAID mode SATA patch for Intel Cougar Point DeviceIDs + - LP: #575853 + * i2c-i801: Add Intel Cougar Point device IDs + - LP: #575853 + * b43: Remove reset after fatal DMA error + - LP: #575853 + * b43: Allow PIO mode to be selected at module load + - LP: #575853 + * b43: fall back gracefully to PIO mode after fatal DMA errors + - LP: #575853 + * ALSA: hda - Add position_fix quirk for Biostar mobo + - LP: #575853 + * agp/hp: fixup hp agp after ACPI changes + - LP: #575853 + * b43: Optimize PIO scratchbuffer usage + - LP: #575853 + * ecryptfs: fix use with tmpfs by removing d_drop from + ecryptfs_destroy_inode + - LP: #575853 + * eCryptfs: Decrypt symlink target for stat size + - LP: #575853 + * ecryptfs: fix error code for missing xattrs in lower fs + - LP: #575853 + * sched: Fix a race between ttwu() and migrate_task() + - LP: #575853 + * USB: cdc-acm: Update to new autopm API + - LP: #575853 + * USB: cdc-acm: Fix stupid NULL pointer in resume() + - LP: #575853 + * iwlwifi: clear all tx queues when firmware ready + - LP: #575853 + * iwlwifi: fix scan race + - LP: #575853 + * e1000e: stop cleaning when we reach tx_ring->next_to_use + - LP: #575853 + * tcp: fix ICMP-RTO war + - LP: #575853 + * perf_events, x86: Implement Intel Westmere/Nehalem-EX support + - LP: #575853 + * Input: wacom - switch mode upon system resume + - LP: #575853 + * md: deal with merge_bvec_fn in component devices better. + - LP: #575853 + * nfsd4: don't try to map gid's in generic rpc code + - LP: #575853 + * nfsd: ensure sockets are closed on error + - LP: #575853 + * ALSA: hda: Set Front Mic to input vref 50% for Lenovo 3000 Y410 + - LP: #479373, #575853 + * mac80211: fix deferred hardware scan requests + - LP: #575853 + * fs-writeback: Add helper function to start writeback if idle + - LP: #575853 + * ext4: flush delalloc blocks when space is low + - LP: #575853 + * ext4: fix async i/o writes beyond 4GB to a sparse file + - LP: #575853 + * tpm: autoload tpm_tis based on system PnP IDs + - LP: #575853 + * IB/iser: Rewrite SG handling for RDMA logic + - LP: #575853 + * mptctl : Remove printk which floods unnecessary messages to + var/log/message + - LP: #575853 + * mptspi: Fix for incorrect data underrun errata + - LP: #575853 + * sched: Use proper type in sched_getaffinity() + - LP: #575853 + * KVM: SVM: Fix memory leaks that happen when svm_create_vcpu() fails + - LP: #575853 + * KVM: Don't spam kernel log when injecting exceptions due to bad cr + writes + - LP: #575853 + * KVM: allow bit 10 to be cleared in MSR_IA32_MC4_CTL + - LP: #575853 + * KVM: VMX: Save/restore rflags.vm correctly in real mode + - LP: #575853 + * KVM: MMU: fix kvm_mmu_zap_page() and its calling path + - LP: #575853 + * KVM: fix the handling of dirty bitmaps to avoid overflows + - LP: #575853 + * KVM: Increase NR_IOBUS_DEVS limit to 200 + - LP: #575853 + * KVM: x86: Fix TSS size check for 16-bit tasks + - LP: #575853 + * x86/gart: Disable GART explicitly before initialization + - LP: #575853 + * r8169: clean up my printk uglyness + - LP: #562742, #575853 + * Linux 2.6.32.12 + - LP: #575853 + * drm/edid: allow certain bogus edids to hit a fixup path rather than + fail + - LP: #575853 + * drm/radeon: add new RS880 pci id + - LP: #575853 + * drm: remove the EDID blob stored in the EDID property when it is + disconnected + - LP: #575853 + * drm/radeon/kms: never treat rs4xx as AGP + - LP: #575853 + * drm/radeon/kms: Fix NULL pointer dereference if memory allocation + failed in a simple way + - LP: #575853 + * drm/radeon/kms: don't print error on -ERESTARTSYS. + - LP: #575853 + * drm/radeon/kms: fix pal tv-out support on legacy IGP chips + - LP: #575853 + * drm: Return ENODEV if the inode mapping changes + - LP: #575853 + * drm/edid/quirks: Envision EN2028 + - LP: #575853 + * drm/radeon: R300 AD only has one quad pipe. + - LP: #575853 + * drm/radeon/kms: fix washed out image on legacy tv dac + - LP: #575853 + * drm/radeon/kms/combios: verify dac_adj values are valid + - LP: #575853 + * drm/i915: Add no_lvds entry for the Clientron U800 + - LP: #544671, #575853 + * drm/radeon/kms: more atom parser fixes (v2) + - LP: #575853 + * drm/radeon/kms: disable the tv encoder when tv/cv is not in use + - LP: #575853 + * drm/radeon/kms: fix tv dac conflict resolver + - LP: #575853 + * drm/radeon/kms: fix rs600 tlb flush + - LP: #575853 + * drm/radeon/kms: add FireMV 2400 PCI ID. + - LP: #575853 + * Linux 2.6.32.12+drm33.3 + - LP: #575853 + * USB: EHCI: defer reclamation of siTDs + - LP: #583414 + * p54usb: Add usbid for Corega CG-WLUSB2GT. + - LP: #583414 + * md/raid5: allow for more than 2^31 chunks. + - LP: #583414 + * md/raid5: fix previous patch. + - LP: #583414 + * libata: fix locking around blk_abort_request() + - LP: #583414 + * libata: ensure NCQ error result taskfile is fully initialized before + returning it via qc->result_tf. + - LP: #583414 + * w1: w1 temp: fix negative termperature calculation + - LP: #583414 + * memcg: fix prepare migration + - LP: #583414 + * mac80211: remove bogus TX agg state assignment + - LP: #583414 + * flex_array: fix the panic when calling flex_array_alloc() without + __GFP_ZERO + - LP: #583414 + * core, x86: make LIST_POISON less deadly + - LP: #583414 + * hugetlb: fix infinite loop in get_futex_key() when backed by huge pages + - LP: #583414 + * reiserfs: fix corruption during shrinking of xattrs + - LP: #583414 + * nfsd4: bug in read_buf + - LP: #583414 + * keys: the request_key() syscall should link an existing key to the dest + keyring + - LP: #583414 + * staging: usbip: Fix deadlock + - LP: #583414 + * USB: fix remote wakeup settings during system sleep + - LP: #583414 + * USB: Add id for HP ev2210 a.k.a Sierra MC5725 miniPCI-e Cell Modem. + - LP: #511066, #583414 + * USB: fix testing the wrong variable in fs_create_by_name() + - LP: #583414 + * USB: don't choose configs with no interfaces + - LP: #583414 + * USB: OHCI: don't look at the root hub to get the number of ports + - LP: #583414 + * USB: xhci: properly set the "Mult" field of the endpoint context. + - LP: #583414 + * USB: xhci: properly set endpoint context fields for periodic eps. + - LP: #583414 + * procfs: fix tid fdinfo + - LP: #583414 + * ocfs2: Update VFS inode's id info after reflink. + - LP: #583414 + * ocfs2: potential ERR_PTR dereference on error paths + - LP: #583414 + * ocfs2: Compute metaecc for superblocks during online resize. + - LP: #583414 + * ocfs2_dlmfs: Fix math error when reading LVB. + - LP: #583414 + * powernow-k8: Fix frequency reporting + - LP: #572348, #583414 + * nfs d_revalidate() is too trigger-happy with d_drop() + - LP: #583414 + * NFS: rsize and wsize settings ignored on v4 mounts + - LP: #583414 + * Staging: hv: Fix a bug affecting IPv6 + - LP: #583414 + * Staging: hv: Fix up memory leak on HvCleanup + - LP: #583414 + * Staging: hv: name network device ethX rather than sethX + - LP: #583414 + * i2c: Fix probing of FSC hardware monitoring chips + - LP: #583414 + * perf: Fix resource leak in failure path of perf_event_open() + - LP: #583414 + * raid6: fix recovery performance regression + - LP: #583414 + * serial: 8250_pnp - add Fujitsu Wacom device + - LP: #583414 + * block: ensure jiffies wrap is handled correctly in + blk_rq_timed_out_timer + - LP: #583414 + * dm9601: fix phy/eeprom write routine + - LP: #583414 + * p54pci: fix bugs in p54p_check_tx_ring + - LP: #583414 + * edac, mce: Fix wrong mask and macro usage + - LP: #583414 + * x86-64: Clear a 64-bit FS/GS base on fork if selector is nonzero + - LP: #583414 + * x86: Disable large pages on CPUs with Atom erratum AAE44 + - LP: #583414 + * x86, k8 nb: Fix boot crash: enable k8_northbridges unconditionally on + AMD systems + - LP: #583414 + * x86, AMD: Fix stale cpuid4_info shared_map data in shared_cpu_map + cpumasks + - LP: #583414 + * ALSA: hda: Use LPIB quirk for DG965OT board version AAD63733-203 + - LP: #459083, #583414 + * ALSA: hda - Add PCI quirk for HP dv6-1110ax. + - LP: #583414 + * ALSA: hda: Use STAC_DELL_M6_BOTH quirk for Dell Studio XPS 1645 + - LP: #553002, #583414 + * ALSA: hda: Use STAC_DELL_M6_BOTH quirk for Dell Studio 1558 + - LP: #568600, #583414 + * ALSA: hda: Use ALC880_F1734 quirk for Fujitsu Siemens AMILO Xi 1526 + - LP: #567494, #583414 + * ALSA: snd-meastro3: Add amp_gpio quirk for Compaq EVO N600C + - LP: #583414 + * ALSA: snd-meastro3: Ignore spurious HV interrupts during suspend / + resume + - LP: #583414 + * ALSA: hda: Fix max PCM level to 0 dB for Fujitsu-Siemens laptops using + CX20549 (Venice) + - LP: #583414 + * ALSA: hda: Fix 0 dB for Packard Bell models using Conexant CX20549 + (Venice) + - LP: #541802, #583414 + * ALSA: hda: Use olpc-xo-1_5 quirk for Toshiba Satellite Pro T130-15F + - LP: #573284, #583414 + * ALSA: hda: Use olpc-xo-1_5 quirk for Toshiba Satellite + P500-PSPGSC-01800T + - LP: #549267, #583414 + * libata: Fix accesses at LBA28 boundary (old bug, but nasty) (v2) + - LP: #583414 + * ext4: correctly calculate number of blocks for fiemap + - LP: #474597, #583414 + * initramfs: handle unrecognised decompressor when unpacking + - LP: #583414 + * CRED: Fix a race in creds_are_invalid() in credentials debugging + - LP: #583414 + * jfs: fix diAllocExt error in resizing filesystem + - LP: #583414 + * ACPI: introduce kernel parameter acpi_sleep=sci_force_enable + - LP: #553498, #583414 + * p54pci: rx frame length check + - LP: #583414 + * drivers/net/wireless/p54/txrx.c Fix off by one error + - LP: #583414 + * dccp_probe: Fix module load dependencies between dccp and dccp_probe + - LP: #583414 + * KVM: remove unused load_segment_descriptor_to_kvm_desct + - LP: #583414 + * kgdb: don't needlessly skip PAGE_USER test for Fsl booke + - LP: #583414 + * r8169: use correct barrier between cacheable and non-cacheable memory + - LP: #562742, #583414 + * r8169: fix broken register writes + - LP: #562742, #583414 + * r8169: more broken register writes workaround + - LP: #562742, #583414 + * PCI: Ensure we re-enable devices on resume + - LP: #566149, #583414 + * skip sense logging for some ATA PASS-THROUGH cdbs + - LP: #583128, #583414 + * tg3: Fix INTx fallback when MSI fails + - LP: #583414 + * xfs: add a shrinker to background inode reclaim + - LP: #583414 + * qla2xxx: Properly handle UNDERRUN completion statuses. + - LP: #583414 + * bnx2: Fix lost MSI-X problem on 5709 NICs. + - LP: #583414 + * tracing: Fix ftrace_event_call alignment for use with gcc 4.5 + - LP: #583414 + * security: testing the wrong variable in create_by_name() + - LP: #583414 + * md: restore ability of spare drives to spin down. + - LP: #583414 + * virtio: initialize earlier + - LP: #583414 + * md/raid6: Fix raid-6 read-error correction in degraded state + - LP: #583414 + * V4L/DVB: budget: Oops: "BUG: unable to handle kernel NULL pointer + dereference" + - LP: #583414 + * ACPI: DMI init_set_sci_en_on_resume for multiple Lenovo ThinkPads + - LP: #583414 + * power_meter: acpi_device_class "power_meter_resource" too long + - LP: #583414 + * ACPI: sleep: init_set_sci_en_on_resume for Dell Studio 155x + - LP: #553498, #583414 + * cpuidle: Fix incorrect optimization + - LP: #583414 + * pxa/colibri: fix missing #include in colibri.h + - LP: #583414 + * SCSI: fix locking around blk_abort_request() + - LP: #583414 + * SCSI: libiscsi: regression: fix header digest errors + - LP: #583414 + * scsi_debug: virtual_gb ignores sector_size + - LP: #583414 + * Enable retries for SYNCRONIZE_CACHE commands to fix I/O error + - LP: #583414 + * SCSI: Retry commands with UNIT_ATTENTION sense codes to fix ext3/ext4 + I/O error + - LP: #583414 + * MIPS: Sibyte: Apply M3 workaround only on affected chip types and + versions. + - LP: #583414 + * Linux 2.6.32.13 + - LP: #583414 + * drm/i915: Add initial bits for VGA modesetting bringup on Sandybridge. + - LP: #583414 + * drm/i915: fix tiling limits for i915 class hw v2 + - LP: #583414 + * Linux 2.6.32.13+drm33.4 + - LP: #583414 + * (pre-stable) Input: psmouse - reset all types of mice before + reconnecting + - LP: #551234 + * ipv4: udp: fix short packet and bad checksum logging + - LP: #588832 + * hp_accel: fix race in device removal + - LP: #588832 + * fbdev: bfin-t350mcqb-fb: fix fbmem allocation with blanking lines + - LP: #588832 + * hugetlbfs: kill applications that use MAP_NORESERVE with SIGBUS instead + of OOM-killer + - LP: #588832 + * dma-mapping: fix dma_sync_single_range_* + - LP: #588832 + * ACPI: sleep: eliminate duplicate entries in acpisleep_dmi_table[] + - LP: #588832 + * mmc: atmel-mci: fix two parameters swapped + - LP: #588832 + * mmc: atmel-mci: prevent kernel oops while removing card + - LP: #588832 + * mmc: atmel-mci: remove data error interrupt after xfer + - LP: #588832 + * ptrace: fix return value of do_syscall_trace_enter() + - LP: #588832 + * powerpc/perf_event: Fix oops due to perf_event_do_pending call + - LP: #588832 + * cifs: guard against hardlinking directories + - LP: #588832 + * serial: imx.c: fix CTS trigger level lower to avoid lost chars + - LP: #588832 + * ALSA: ice1724 - Fix ESI Maya44 capture source control + - LP: #588832 + * ALSA: hda: Fix 0 dB for Lenovo models using Conexant CX20549 (Venice) + - LP: #588832 + * inotify: race use after free/double free in inotify inode marks + - LP: #588832 + * inotify: don't leak user struct on inotify release + - LP: #588832 + * profile: fix stats and data leakage + - LP: #588832 + * x86, k8: Fix build error when K8_NB is disabled + - LP: #588832 + * x86, cacheinfo: Turn off L3 cache index disable feature in virtualized + environments + - LP: #588832 + * x86, amd: Check X86_FEATURE_OSVW bit before accessing OSVW MSRs + - LP: #588832 + * Btrfs: check for read permission on src file in the clone ioctl + - LP: #588832 + * ALSA: hda - New Intel HDA controller + - LP: #588832 + * proc: partially revert "procfs: provide stack information for threads" + - LP: #588832 + * revert "procfs: provide stack information for threads" and its fixup + commits + - LP: #588832 + * iwlwifi: clear all the stop_queue flag after load firmware + - LP: #588832 + * p54: disable channels with incomplete calibration data sets + - LP: #588832 + * CacheFiles: Fix error handling in cachefiles_determine_cache_security() + - LP: #588832 + * megaraid_sas: fix for 32bit apps + - LP: #588832 + * mmap_min_addr check CAP_SYS_RAWIO only for write + - LP: #588832 + * nilfs2: fix sync silent failure + - LP: #588832 + * crypto: authenc - Add EINPROGRESS check + - LP: #588832 + * Linux 2.6.32.14 + - LP: #588832 + * drm/i915: use PIPE_CONTROL instruction on Ironlake and Sandy Bridge + - LP: #588832 + * drm/i915: fix non-Ironlake 965 class crashes + - LP: #588832 + * drm/i915: Disable FBC on 915GM and 945GM. + - LP: #492392, #588832 + * Linux 2.6.32.14+drm33.5 + - LP: #588832 + * Linux 2.6.32.15+drm33.5 + - LP: #588832 + * HID: remove MODULE_VERSION from new drivers + - LP: #583531 + * HID: fix N-trig touch panel with recent firmware + - LP: #583531 + * HID: ntrig: explain firmware quirk + - LP: #583531 + * HID: ntrig: Emit TOUCH with DOUBLETAP for single touch + - LP: #583531 + * HID: ntrig: TipSwitch for single touch mode touch. + - LP: #583531 + * HID: ntrig: Remove unused macro, TripleTap and QuadTap + - LP: #583531 + * (pre-stable) drm/radeon/kms: initialize set_surface_reg reg for rs600 + asic + - LP: #544590 + + [ Ubuntu: 2.6.32-22.36 ] + + * Revert "kvm: restrict writing of segment selectors to segment + registers" + - LP: #589223 + + -- Stefan Bader Sat, 12 Jun 2010 16:24:14 +0200 + +linux-ec2 (2.6.32-306.11) lucid-security; urgency=low + + [ Stefan Bader ] + + * Rebase to 2.6.32-22.35 + * [Config] EC2: Set CONFIG_BLK_DEV_DM=y to match enforcer settings + + [ Ubuntu: 2.6.32-22.35 ] + + * kvm: restrict writing of segment selectors to segment registers + - CVE-2010-0419 + * tty: release_one_tty() forgets to put pids + - CVE-2010-1162 + * oom: fix the unsafe usage of badness() in proc_oom_score() + - CVE-2010-1488 + * Attempt #2 to handle null nameidata + - CVE-2010-1148 + * reiserfs: fix permissions on .reiserfs_priv + - CVE-2010-1146 + * r8169: offical fix for CVE-2009-4537 (overlength frame DMAs) + - CVE-2009-4537 + + [ Ubuntu: 2.6.32-22.33 ] + + * SAUCE: ACPI: EC: Allow multibyte access to EC (v3) + - LP: #526354 + * ubuntu: rtl8192se -- update to version 0015.0127.2010 + - LP: #567016 + + [ Ubuntu: 2.6.32-21.32 ] + + * SAUCE: i915 KMS -- support disabling KMS for known broken devices + - LP: #563277 + * SAUCE: i915 KMS -- blacklist i830 + - LP: #542208, #563277 + * SAUCE: i915 KMS -- blacklist i845g + - LP: #541492, #563277 + * SAUCE: i915 KMS -- blacklist i855 + - LP: #511001, #541511, #563277 + * SAUCE: radeon KMS -- support disabling KMS for known broken devices + - LP: #546743 + * SAUCE: radeon KMS -- blacklist ES1000 + - LP: #546743 + + -- Stefan Bader Tue, 01 Jun 2010 12:14:05 +0200 + +linux-ec2 (2.6.32-305.9) lucid; urgency=low + + [ Andy Whitcroft ] + + * rebase to Ubuntu-2.6.32-21.31 + * SAUCE: xen -- TSC is not available under XEN + + [ John Johansen ] + + * [Config] enable NETFILTER_XT_MATCH_RECENT for ec2 kernels + - LP: #532553 + + [ Ubuntu: 2.6.32-21.31 ] + + * allow modules.builtin to be optional + * d-i: add mpt2sas to the message-modules udeb + - LP: #530361 + * SAUCE: Nouveau: Add quirk framework to disable acceleration + - LP: #544088, #546393 + * SAUCE: Nouveau: Disable acceleration on MacBook Pros + - LP: #546393 + * SAUCE: Nouveau: Disable acceleration on GeForce3 cards + - LP: #544088 + * SAUCE: Nouveau: Disable acceleration on 6100 cards + - LP: #542950 + * SAUCE: dma-mapping: Remove WARN_ON in dma_free_coherent + - LP: #458201 + * SAUCE: sync before umount to reduce time taken by ext4 umount + - LP: #543617 + * tipc: Fix oops on send prior to entering networked mode (v3) + - CVE-2010-1187 + * KVM: x86 emulator: Add Virtual-8086 mode of emulation + - LP: #561425 + * KVM: x86 emulator: fix memory access during x86 emulation + - LP: #561425 + * KVM: x86 emulator: Check IOPL level during io instruction emulation + - LP: #561425 + * KVM: x86 emulator: Fix popf emulation + - LP: #561425 + * KVM: Fix segment descriptor loading + - LP: #561425 + * KVM: VMX: Update instruction length on intercepted BP + - LP: #561425 + * KVM: VMX: Use macros instead of hex value on cr0 initialization + - LP: #561425 + * KVM: SVM: Reset cr0 properly on vcpu reset + - LP: #561425 + * KVM: VMX: Disable unrestricted guest when EPT disabled + - LP: #561425 + * KVM: x86: disable paravirt mmu reporting + - LP: #561425 + * AppArmor: Fix put of unassigned ns if aa_unpack fails + * AppArmor: Fix refcount bug when exec fails + - LP: #562063 + * AppArmor: Take refcount on cxt->profile to ensure it remains a valid + reference + - LP: #367499 + * AppArmor: fix typo in scrubbing environment variable warning + - LP: #562060 + * AppArmor: fix regression by setting default to mediate deleted files + - LP: #562056 + * AppArmor: fix refcount order bug that can trigger during replacement + - LP: #367499 + * AppArmor: Make sure to unmap aliases for vmalloced dfas before they are + live + - LP: #529288 + * AppArmor: address performance regression of replaced profile + - LP: #549428 + * AppArmor: make the global side the correct type + - LP: #562047 + * AppArmor: use the kernel shared workqueue to free vmalloc'ed dfas + * sky2: add register definitions for new chips + - LP: #537168 + * sky2: 88E8059 support + - LP: #537168 + * net: Fix Yukon-2 Optima TCP offload setup + - LP: #537168 + * net: Add missing TST_CFG_WRITE bits around sky2_pci_write + - LP: #537168 + * sky2: print Optima chip name + - LP: #537168 + * (Upstream) dell-laptop: defer dell_rfkill_update to worker thread + - LP: #555261 + * drm/nv40: add LVDS table quirk for Dell Latitude D620 + - LP: #539730 + + [ Ubuntu: 2.6.32-20.30 ] + + * Revert "(pre-stable) ACPI: EC: Allow multibyte access to EC" + - LP: #561151 + + [ Ubuntu: 2.6.32-20.29 ] + + * Revert "SAUCE: Use MODULE_IMPORT macro to tie intel_agp to i915" + - LP: #542251 + * add Breaks: against hardy lvm2 + - LP: #528155 + * d-i -- enable udebs for generic-pae + - LP: #160366 + * [Config] Add xen netboot support + - LP: #160366 + * (pre-stable): input: Support Clickpad devices in ClickZone mode + - LP: #516329 + * Revert "(pre-stable) Bluetooth: Fix sleeping function in RFCOMM within + invalid context" + - LP: #553837 + * Revert "(pre-stable) USB: fix usbfs regression" + - LP: #553837 + * Revert "(pre-stable) softlockup: Stop spurious softlockup messages due + to overflow" + - LP: #553837 + * Revert "(pre-stable) drm/nouveau: report unknown connector state if lid + closed" + - LP: #553837 + * drivers/scsi/ses.c: eliminate double free + - LP: #553837 + * decompress: fix new decompressor for PIC + - LP: #553837 + * ARM: Fix decompressor's kernel size estimation for ROM=y + - LP: #553837 + * MIPS: Cleanup forgotten label_module_alloc in tlbex.c + - LP: #553837 + * tg3: Fix tg3_poll_controller() passing wrong pointer to tg3_interrupt() + - LP: #553837 + * tg3: Fix 5906 transmit hangs + - LP: #553837 + * ALSA: hda - Fix input source elements of secondary ADCs on Realtek + - LP: #553837 + * ALSA: hda: enable MSI for Gateway M-6866 + - LP: #538918, #553837 + * timekeeping: Prevent oops when GENERIC_TIME=n + - LP: #553837 + * Input: alps - add support for the touchpad on Toshiba Tecra A11-11L + - LP: #553837 + * Input: i8042 - add ALDI/MEDION netbook E1222 to qurik reset table + - LP: #553837 + * i2c-i801: Don't use the block buffer for I2C block writes + - LP: #553837 + * ath5k: dont use external sleep clock in AP mode + - LP: #553837 + * ath5k: fix setup for CAB queue + - LP: #553837 + * ring-buffer: Move disabled check into preempt disable section + - LP: #553837 + * function-graph: Init curr_ret_stack with ret_stack + - LP: #553837 + * Bluetooth: Fix sleeping function in RFCOMM within invalid context + - LP: #553837 + * tracing: Use same local variable when resetting the ring buffer + - LP: #553837 + * tracing: Disable buffer switching when starting or stopping trace + - LP: #553837 + * tracing: Do not record user stack trace from NMI context + - LP: #553837 + * PCI: unconditionally clear AER uncorr status register during cleanup + - LP: #553837 + * efifb: fix framebuffer handoff + - LP: #553837 + * coredump: suppress uid comparison test if core output files are pipes + - LP: #553837 + * V4L/DVB (13961): em28xx-dvb: fix memleak in dvb_fini() + - LP: #553837 + * hrtimer: Tune hrtimer_interrupt hang logic + - LP: #553837 + * x86, apic: Don't use logical-flat mode when CPU hotplug may exceed 8 + CPUs + - LP: #553837 + * mvsas: add support for Adaptec ASC-1045/1405 SAS/SATA HBA + - LP: #553837 + * pci: add support for 82576NS serdes to existing SR-IOV quirk + - LP: #553837 + * sched: Mark boot-cpu active before smp_init() + - LP: #553837 + * sparc64: Make prom entry spinlock NMI safe. + - LP: #553837 + * sysctl: require CAP_SYS_RAWIO to set mmap_min_addr + - LP: #553837 + * e1000e: enable new 82567V-3 device + - LP: #553837 + * ixgbe: add support for 82599 KR device 0x1517 + - LP: #553837 + * ath9k: fix lockdep warning when unloading module + - LP: #553837 + * mqueue: fix mq_open() file descriptor leak on user-space processes + - LP: #553837 + * virtio: fix out of range array access + - LP: #553837 + * sched: Fix SCHED_MC regression caused by change in sched cpu_power + - LP: #553837 + * readahead: add blk_run_backing_dev + - LP: #553837 + * ALSA: hda: Use LPIB and 6stack-dig for eMachines T5212 + - LP: #538895, #553837 + * ALSA: hda - Disable MSI for Nvidia controller + - LP: #553837 + * ALSA: hda - Fix secondary ADC of ALC260 basic model + - LP: #553837 + * ALSA: hda: Fix 0 dB offset for HP laptops using CX20551 (Waikiki) + - LP: #420578, #553837 + * ALSA: cmipci: work around invalid PCM pointer + - LP: #553837 + * gigaset: correct clearing of at_state strings on RING + - LP: #553837 + * gigaset: prune use of tty_buffer_request_room + - LP: #553837 + * perf: Make the install relative to DESTDIR if specified + - LP: #553837 + * perf_event: Fix oops triggered by cpu offline/online + - LP: #553837 + * tmpfs: fix oops on mounts with mpol=default + - LP: #553837 + * tmpfs: mpol=bind:0 don't cause mount error. + - LP: #553837 + * tmpfs: handle MPOL_LOCAL mount option properly + - LP: #553837 + * tmpfs: cleanup mpol_parse_str() + - LP: #553837 + * doc: add the documentation for mpol=local + - LP: #553837 + * SCSI: scsi_transport_fc: Fix synchronization issue while deleting vport + - LP: #553837 + * NFSv4: Don't ignore the NFS_INO_REVAL_FORCED flag in + nfs_revalidate_inode() + - LP: #553837 + * NFS: Avoid a deadlock in nfs_release_page + - LP: #553837 + * NFS: Prevent another deadlock in nfs_release_page() + - LP: #553837 + * tty: Keep the default buffering to sub-page units + - LP: #553837 + * tty: Take a 256 byte padding into account when buffering below sub-page + units + - LP: #553837 + * USB: fix usbfs regression + - LP: #553837 + * USB: EHCI: fix ITD list order + - LP: #553837 + * USB: EHCI: adjust ehci_iso_stream for changes in ehci_qh + - LP: #553837 + * USB: qcserial: add new device ids + - LP: #553837 + * USB: xHCI: re-initialize cmd_completion + - LP: #553837 + * USB: serial: ftdi: add CONTEC vendor and product id + - LP: #553837 + * USB: option: fix incorrect manufacturer name in usb/serial/option: + MAXON->CMOTECH + - LP: #553837 + * USB: option: move hardcoded PID to a macro in usb/serial/option + - LP: #553837 + * USB: option: add support for a new CMOTECH device to usb/serial/option + - LP: #553837 + * usb: r8a66597-hcd: fix removed from an attached hub + - LP: #553837 + * wl1251: fix potential crash + - LP: #553837 + * jme: Fix VLAN memory leak + - LP: #553837 + * jme: Protect vlgrp structure by pause RX actions. + - LP: #553837 + * edac, mce: Filter out invalid values + - LP: #553837 + * iwlwifi: use dma_alloc_coherent + - LP: #553837 + * iwlwifi: Silence tfds_in_queue message + - LP: #553837 + * SUNRPC: Fix a potential memory leak in auth_gss + - LP: #553837 + * sunrpc: handle allocation errors from __rpc_lookup_create() + - LP: #553837 + * if_tunnel.h: add missing ams/byteorder.h include + - LP: #553837 + * fs/partitions/msdos: add support for large disks + - LP: #553837 + * fs/partition/msdos: fix unusable extended partition for > 512B sector + - LP: #553837 + * PCI: fix return value from pcix_get_max_mmrbc() + - LP: #553837 + * PCI: fix access of PCI_X_CMD by pcix get and set mmrbc functions + - LP: #553837 + * PCI: cleanup error return for pcix get and set mmrbc functions + - LP: #553837 + * rt2860sta: Fix argument to linux_pci_unmap_single() + - LP: #553837 + * ath9k: fix BUG_ON triggered by PAE frames + - LP: #553837 + * cpuset: fix the problem that cpuset_mem_spread_node() returns an + offline node + - LP: #553837 + * softlockup: Stop spurious softlockup messages due to overflow + - LP: #553837 + * netfilter: xt_recent: fix regression in rules using a zero hit_count + - LP: #553837 + * x86: Fix placement of FIX_OHCI1394_BASE + - LP: #553837 + * x86, amd: Restrict usage of c1e_idle() + - LP: #553837 + * hwmon: (coretemp) Add missing newline to dev_warn() message + - LP: #553837 + * ALSA: hda: Use LPIB for ga-ma770-ud3 board + - LP: #553837 + * ALSA: ac97: Add Toshiba P500 to ac97 jack sense blacklist + - LP: #481058, #553837 + * ALSA: ac97: Add IBM ThinkPad R40e to Headphone/Line Jack Sense + blacklist + - LP: #303789, #553837 + * ALSA: hda: Use ALC260_WILL quirk for another Acer model (0x1025007f) + - LP: #418627, #553837 + * ath9k: Enable TIM timer interrupt only when needed. + - LP: #553837 + * mac80211: Retry null data frame for power save + - LP: #553837 + * ath9k: Enable IEEE80211_HW_REPORTS_TX_ACK_STATUS flag for ath9k + - LP: #553837 + * mac80211: Reset dynamic ps timer in Rx path. + - LP: #553837 + * leds-gpio: fix default state handling on OF platforms + - LP: #553837 + * quota: manage reserved space when quota is not active [v2] + - LP: #553837 + * quota: Fix warning when a delayed write happens before quota is enabled + - LP: #553837 + * ahci: use BIOS date in broken_suspend list + - LP: #553837 + * Bluetooth: Fix potential bad memory access with sysfs files + - LP: #553837 + * Bluetooth: Fix kernel crash on L2CAP stress tests + - LP: #553837 + * sh: Fix zImage boot using fixed PMB. + - LP: #553837 + * b43: Workaround circular locking in hw-tkip key update callback + - LP: #553837 + * block: Backport of various I/O topology fixes from 2.6.33 and 2.6.34 + - LP: #553837 + * s3cmci: initialize default platform data no_wprotect and no_detect with + 1 + - LP: #553837 + * x86: Fix sched_clock_cpu for systems with unsynchronized TSC + - LP: #553837 + * GFS2: Skip check for mandatory locks when unlocking + - LP: #553837 + * Linux 2.6.32.11 + - LP: #553837 + * drm/i915: fix small leak on overlay error path + - LP: #553837 + * drm/i915: Avoid NULL deref in get_pages() unwind after error. + - LP: #553837 + * drm/nouveau: report unknown connector state if lid closed + - LP: #553837 + * Linux-2.6.32.11+drm33.2 + - LP: #553837 + * mmc: add module parameter to set whether cards are assumed removable + - LP: #477106 + * (pre-stable) ACPI: EC: Allow multibyte access to EC + - LP: #526354 + * PCI quirks: disable msi on AMD rs4xx internal gfx bridges + - LP: #509273 + * drm/i915: Add dependency on the intel agp module + - LP: #542251 + * (pre-stable) drm/edid: allow certain bogus edids to hit a fixup path + rather than fail + - LP: #540632 + * drm/radeon/kms: rework pll algo selection + - LP: #538377 + * drm/radeon/kms: update new pll algo + - LP: #538377 + * PCI quirk: Disable MSI on VIA K8T890 systems + - LP: #544741 + * sched: update load count only once per cpu in 10 tick update window + - LP: #513848 + + -- Andy Whitcroft Wed, 14 Apr 2010 13:09:10 +0100 + +linux-ec2 (2.6.32-304.8) lucid; urgency=low + + [ Andy Whitcroft ] + + * rebase to Ubuntu-2.6.32-19.28 + * updateconfigs after rebase to Ubuntu-2.6.32-19.28 + + [ John Johansen ] + + * [Config] fix EC2 config to remove soft lockup issue + - LP: #527208, #540378 + + [ Ubuntu: 2.6.32-19.28 ] + + * [Config] enable various multitouch devices + - LP: #541453 + * (pre-stable): input: ALPS - Add signature for HP Pavilion dm3 laptops + - LP: #545307 + * SAUCE: Disable function tracing after hitting __schedule_bug + * SAUCE: Reduce ACPI resource conflict message to KERN_INFO, printf + cleanup + - LP: #440470 + * SAUCE: drm/i915: don't change DRM configuration when releasing load + detect pipe + - LP: #488328 + * SAUCE: AppArmor: Remove null_profile's use of PFLAG_NO_LIST_REF + - LP: #539437 + * SAUCE: AppArmor: Stop page allocation warnings that can occur on policy + load + - LP: #458299 + * SAUCE: AppArmor: Return string len rather than the allocation size + - LP: #551844 + * SAUCE: AppArmor: Fix oops in profile verification if profile unpack + fails. + * [Config] Enable Nouveau DRM module on powerpc + * SAUCE: Pull in thinkpad-acpi from v2.6.34-rc1 + - LP: #357673 + * [Config] Enable thinkpad-acpi ALSA volume control + - LP: #357673 + * SAUCE: drm/i915: Disable FBC on 915GM and 945GM + - LP: #492392, #539609 + * Revert "(pre-stable) drm/i915: blacklist lid status: Sony VGN-BX196VP, + Dell Inspiron 700m" + - LP: #515246 + * (pre-stable) softlockup: Stop spurious softlockup messages due to + overflow + - LP: #551068 + * backlight: mbp_nvidia_bl - add five more MacBook variants + - LP: #511965 + * drm/nv04-nv40: Fix up the programmed horizontal sync pulse delay. + - LP: #529130 + * drm/nouveau: Fix fbcon corruption with font width not divisible by 8 + - LP: #544739 + * (pre-stable) USB: fix usbfs regression + * drm/radeon/bo: add some fallback placements for VRAM only objects. + - LP: #507148 + * drm/radeon/kms: don't print error on -ERESTARTSYS. + - LP: #507148 + * Input: add the ABS_MT_PRESSURE event + - LP: #541453 + * HID: Support for 3M multitouch panel + - LP: #541453 + * HID: make 3M PCT touchscreen driver standalone config option + - LP: #541453 + * HID: add support for Stantum multitouch panel + - LP: #541453 + * HID: make Stantum driver standalone config option + - LP: #541453 + * HID: add support for Acer T230H multitouch + - LP: #541453 + * HID: add support for Pixart Imaging Optical Touch Screen + - LP: #541453 + * HID: fixed bug in single-touch emulation on the stantum panel + - LP: #541453 + * HID: add pressure support for the Stantum multitouch panel + - LP: #541453 + * HID: Support for MosArt multitouch panel + - LP: #541453 + * HID: hid-ntrig add multi input quirk and clean up + - LP: #541453 + * HID: n-trig: remove unnecessary tool switching + - LP: #541453 + * HID: hid-ntrig: multitouch cleanup and fix + - LP: #541453 + * HID: hid-ntrig: Single touch mode tap + - LP: #541453 + * hid: ntrig touch events + - LP: #541453 + * (pre-stable) x86-32, resume: do a global tlb flush in S4 resume + - LP: #531309 + * drm/i915: Part of: Add initial bits for VGA modesetting bringup on + Sandybridge. + - LP: #515246 + * drm/i915: Stop trying to use ACPI lid status to determine LVDS + connection. + - LP: #515246 + + [ Ubuntu: 2.6.32-18.27 ] + + * SAUCE: Don't register vga16fb framebuffer if other framebuffers are + present + - LP: #527369 + * [Config] armel/versatile: Set CRAMFS=m + - LP: #524893 + * [Config] armel: Reset default command-line + - LP: #524893 + * build/modules: Update d-i to reflect recent config changes + - LP: #546929 + * (pre-stable) drm/nouveau: report unknown connector state if lid closed + - LP: #523072 + * (pre-stable) Staging: rt2870: Add USB ID for Buffalo Airstation + WLI-UC-GN + - LP: #441990 + * (pre-stable) iwlwifi: fix nfreed-- + - LP: #545585 + * (pre-stable) pata_via: Add VIA VX900 support + - LP: #548675 + + [ Ubuntu: 2.6.32-17.26 ] + + * [Config] SECURITY_FILE_CAPABILITIES dissapeared in 2.6.33 + * rules -- allow architecture configurations to be missing + * SAUCE: cdrom -- default to not locking the tray when in use + - LP: #397734 + * expose the kernel EXTRAVERSION in dmesg and /proc/version_signature + * record the drm version in EXTRAVERSION + * linux-tools -- pull out the perf binary into a binary package + * [Config] enable MMIOTRACE for graphics debugging + * [Config] enable BLK_DEV_BSG + * debian -- fix builds when tools are disabled + * allow us to build default configs for automated builds + * config -- allow locally specified configuration overrides + * [Config] de-modularise PATA disk controllers + * [Config] de-modularise SATA disk controllers + * Revert "SAUCE: (pre-stable) netfilter: xt_recent: fix buffer overflow" + - LP: #540231 + * Revert "SAUCE: (pre-stable) netfilter: xt_recent: fix false match" + - LP: #540231 + * [Config] Update configs for 2.6.32.10 + - LP: #540231 + * [Config] Add vmw_pvscsi and vmxnet3 to -virtual flavour + - LP: #531017 + * SAUCE: igb: Supress an upstream compiler complaint + * [Config] Fix sub-flavours package conflicts + - LP: #454827 + * Revert "tpm_tis: TPM_STS_DATA_EXPECT workaround" + - LP: #540231 + * Revert "(pre-stable) sched: Fix SMT scheduler regression in + find_busiest_queue()" + - LP: #540231 + * (pre-stable) Bluetooth: Fix sleeping function in RFCOMM within invalid + context + - LP: #534549 + * igb: remove unused temp variable from stats clearing path + * igb: update comments for serdes config and update to handle duplex + * igb: update the approach taken to acquiring and releasing the phy lock + * igb: add locking to reads of the i2c interface + * igb: add combined function for setting rar and pool bits + * igb: make use of the uta to allow for promiscous mode filter + * igb: add support for 82576NS SerDes adapter + * igb: add function to handle mailbox lock + * igb: fix a few items where weren't correctly setup for mbx timeout + * igb: change how we handle alternate mac addresses + * igb: remove microwire support from igb + * igb: move the generic copper link setup code into e1000_phy.c + * igb: add code to retry a phy read in the event of failure on link check + * igb: add additional error handling to the phy code + * igb: add flushes between RAR writes when setting mac address + * igb: Use the instance of net_device_stats from net_device. + * igb: Fix erroneous display of stats by ethtool -S + * igb: add new data structure for handling interrupts and NAPI + * igb: remove rx checksum good counter + * igb: increase minimum rx buffer size to 1K + * igb: move the tx and rx ring specific config into seperate functions + * igb: remove rx_ps_hdr_len + * igb: move SRRCTL register configuration into ring specific config + * igb: change the head and tail offsets into pointers + * igb: add pci device pointer to ring structure + * igb: move rx_buffer_len into the ring structure + * igb: move alloc_failed and csum_err stats into per rx-ring stat + * igb: add a flags value to the ring + * igb: place a pointer to the netdev struct in the ring itself + * igb: move the multiple receive queue configuration into seperate + function + * igb: delay VF reset notification until after interrupts are enabed + * igb: setup vlan tag replication stripping in igb_vmm_control + * igb: re-use ring configuration code in ethtool testing + * igb: make tx ring map and free functionality non-static + * igb: make ethtool use core xmit map and free functionality + * igb: add single vector msi-x testing to interrupt test + * igb: cleanup "todo" code found in igb_ethtool.c + * igb: add support for seperate tx-usecs setting in ethtool + * igb: cleanup some of the code related to hw timestamping + * igb: misc cleanups within igb_ethtool.c + * igb: use packet buffer sizes from RXPBS register + * igb: replace the VF clear_to_send with a flags value + * igb: rework use of VMOLR in regards to PF and VFs + * igb: rework handling of the vfta and vlvf registers in relation to + mng_vlan + * igb: move vf init into a seperate function + * igb: only process global stats in igb_update_stats + * igb: move global_quad_port_a from global into local static define + * igb: make tx hang check multiqueue, check eop descriptor + * igb: cleanup code related to ring resource allocation and free + * igb: change queue ordering for 82576 based adapters + * igb: cleanup interrupt enablement in regards to msix_other + * igb: Remove invalid stats counters + * igb: cleanup igb.h header whitespace and some structure formatting + * igb: cleanup igb xmit frame path + * igb: cleanup clean_rx_irq_adv and alloc_rx_buffers_adv + * igb: replace unecessary &adapter->hw with just hw where applicable + * igb: add pci_dev in few spots to clean up use of dev_err/info/warn + * igb: limit minimum mtu to 68 to keep ip bound to interface + * igb: open up SCTP checksum offloads to all MACs 82576 and newer + * igb: cleanup whitespace issues in igb_main.c + * igb: Fix warnings in igb_set_ringparam() + * igb: change type for ring sizes to u16 in igb_set_ring_param + * igb: move timesync init into a seperate function + * igb: when number of CPUs > 4 combine tx/rx queues to allow more queues + * igb: Rework how netdev->stats is handled + * igb: removed unused tx/rx total bytes/packets from adapter struct + * igb: check for packets on all tx rings when link is down + * igb: only recycle page if it is on our numa node + * igb: add support for the 82580 phy + * igb: add support for 82580 MAC + * igb: Add full support for 82580 devices + * igb: remove use of skb_dma_map from driver + * igb: fix handling of mailbox collisions between PF/VF + * igb: do not force pcs link when in KX mode + * igb: do not force retry count to 1 on 82580 phy + * igb: correctly offset 82575 flow control watermarks by 16 bytes + * igb: check both function bits in status register in wol exception + * igb: make certain to reassign legacy interrupt vectors after reset + * igb/igbvf: cleanup exception handling in tx_map_adv + * fix LOOKUP_FOLLOW on automount "symlinks" + - LP: #540231 + * ARM: 5944/1: scsi: fix timer setup in fas216.c + - LP: #540231 + * V4L/DVB: dvb: l64781.ko broken with gcc 4.5 + - LP: #540231 + * bfin: fix max timeout calculation + - LP: #540231 + * V4L/DVB: Video : pwc : Fix regression in pwc_set_shutter_speed caused + by bad constant => sizeof conversion. + - LP: #540231 + * V4L/DVB: bttv: Move I2C IR initialization + - LP: #540231 + * V4L/DVB: cxusb: Select all required frontend and tuner modules + - LP: #540231 + * memcg: fix oom killing a child process in an other cgroup + - LP: #540231 + * fs/exec.c: fix initial stack reservation + - LP: #540231 + * iwlwifi: error checking for number of tfds in queue + - LP: #540231 + * iwlwifi: set HT flags after channel in rxon + - LP: #540231 + * iwlwifi: sanity check before counting number of tfds can be free + - LP: #540231 + * netlabel: fix export of SELinux categories > 127 + - LP: #540231 + * ahci: disable FPDMA auto-activate optimization on NVIDIA AHCI + - LP: #540231 + * PCI hotplug: ibmphp: read the length of ebda and map entire ebda region + - LP: #540231 + * PCI hotplug: check ioremap() return value in ibmphp_ebda.c + - LP: #540231 + * ACPI: remove Asus P2B-DS from acpi=ht blacklist + - LP: #540231 + * ACPI: fix "acpi=ht" boot option + - LP: #540231 + * thinkpad-acpi: wrong thermal attribute_group removed in thermal_exit() + - LP: #540231 + * ACPI: Be in TS_POLLING state during mwait based C-state entry + - LP: #540231 + * mpt2sas: Delete volume before HBA detach. + - LP: #540231 + * slab: initialize unused alien cache entry as NULL at + alloc_alien_cache(). + - LP: #540231 + * mac80211: quit addba_resp_timer if Tx BA session is torn down + - LP: #540231 + * V4L/DVB (13991): gspca_mr973010a: Fix cif type 1 cameras not streaming + on UHCI controllers + - LP: #540231 + * vfs: take f_lock on modifying f_mode after open time + - LP: #540231 + * readahead: introduce FMODE_RANDOM for POSIX_FADV_RANDOM + - LP: #540231 + * HID: remove TENX iBuddy from blacklist + - LP: #540231 + * HID: add multi-input quirk for NextWindow Touchscreen. + - LP: #540231 + * HID: usbhid: introduce timeout for stuck ctrl/out URBs + - LP: #540231 + * airo: fix setting zero length WEP key + - LP: #540231 + * idr: fix a critical misallocation bug, take#2 + - LP: #540231 + * Switch proc/self to nd_set_link() + - LP: #540231 + * sparc: Align clone and signal stacks to 16 bytes. + - LP: #540231 + * sparc32: Fix page_to_phys(). + - LP: #540231 + * sparc32: Fix struct stat uid/gid types. + - LP: #540231 + * sparc: leds_resource.end assigned to itself in clock_board_probe() + - LP: #540231 + * sparc64: Fix sun4u execute bit check in TSB I-TLB load. + - LP: #540231 + * net: Fix sysctl restarts... + - LP: #540231 + * net-sysfs: Use rtnl_trylock in wireless sysfs methods. + - LP: #540231 + * net: bug fix for vlan + gro issue + - LP: #540231 + * inet: Remove bogus IGMPv3 report handling + - LP: #540231 + * ipv6: conntrack: Add member of user to nf_ct_frag6_queue structure + - LP: #540231 + * drivers/net: ks8851_mll ethernet network driver + - LP: #540231 + * sky2: fix transmit DMA map leakage + - LP: #540231 + * SCSI: qla2xxx: Obtain proper host structure during response-queue + processing. + - LP: #540231 + * rtc-core: fix memory leak + - LP: #540231 + * offb: Add support for framebuffer handoff to offb. + - LP: #540231 + * tpm_tis: TPM_STS_DATA_EXPECT workaround + - LP: #540231 + * rndis_wlan: handle NL80211_AUTHTYPE_AUTOMATIC + - LP: #540231 + * rndis_wlan: fix buffer overflow in rndis_query_oid + - LP: #540231 + * rndis_wlan: disable stall workaround + - LP: #540231 + * net/via-rhine: Fix scheduling while atomic bugs + - LP: #540231 + * clocksource: Fix up a registration/IRQ race in the sh drivers. + - LP: #540231 + * SCSI: qla1280: Drop host_lock while requesting firmware + - LP: #540231 + * Staging: hv: add a pci device table + - LP: #540231 + * Staging: hv: match on DMI values to know if we should run. + - LP: #540231 + * Staging: mimio: remove the mimio driver + - LP: #540231 + * dvb-core: Fix DoS bug in ULE decapsulation code that can be triggered + by an invalid Payload Pointer + - LP: #540231 + * V4L/DVB (13148): uvcvideo: Handle V4L2_CTRL_TYPE_BUTTON control type in + VIDIOC_QUERYCTRL + - LP: #540231 + * PM / Hibernate: Fix preallocating of memory + - LP: #540231 + * macintosh/therm_adt746x: Fix sysfs attributes lifetime + - LP: #540231 + * macintosh/hwmon/ams: Fix device removal sequence + - LP: #540231 + * oprofile/x86: fix perfctr nmi reservation for mulitplexing + - LP: #540231 + * oprofile: remove tracing build dependency + - LP: #540231 + * oprofile/x86: remove node check in AMD IBS initialization + - LP: #540231 + * oprofile/x86: use kzalloc() instead of kmalloc() + - LP: #540231 + * oprofile/x86: fix msr access to reserved counters + - LP: #540231 + * ALSA: hda: Use 3stack quirk for Toshiba Satellite L40-10Q + - LP: #524948, #540231 + * ALSA: via82xx: add quirk for D1289 motherboard + - LP: #540231 + * ALSA: pcm core - fix fifo_size channels interval check + - LP: #540231 + * ALSA: USB MIDI support for Access Music VirusTI + - LP: #540231 + * ALSA: hda: Use LPIB for Dell Latitude 131L + - LP: #530346, #540231 + * ALSA: hda: Use LPIB for a Biostar Microtech board + - LP: #523953, #540231 + * ALSA: hda - Add a position_fix quirk for MSI Wind U115 + - LP: #540231 + * ALSA: hda - Add position_fix quirk for HP dv3 + - LP: #540231 + * ALSA: hda-intel: Add position_fix quirk for ASUS M2V-MX SE. + - LP: #540231 + * ASoC: fix ak4104 register array access + - LP: #540231 + * driver-core: fix race condition in get_device_parent() + - LP: #540231 + * Driver-Core: devtmpfs - reset inode permissions before unlinking + - LP: #540231 + * tty: Fix the ldisc hangup race + - LP: #540231 + * serial: imx: fix NULL dereference Oops when pdata == NULL + - LP: #540231 + * USB: serial: sierra driver indat_callback fix + - LP: #511157, #540231 + * USB: SIS USB2VGA DRIVER: support KAIREN's USB VGA adaptor + USB20SVGA-MB-PLUS + - LP: #540231 + * USB: fix I2C API usage in ohci-pnx4008. + - LP: #540231 + * p54usb: Add the USB ID for Belkin (Accton) FD7050E ver 1010ec + - LP: #540231 + * p54pci: handle dma mapping errors + - LP: #540231 + * gpiolib: Actually set output state in wm831x_gpio_direction_output() + - LP: #540231 + * hwmon: (tmp421) Fix temperature conversions + - LP: #540231 + * hwmon: (tmp421) Restore missing inputs + - LP: #540231 + * pata_hpt3x2n: always stretch UltraDMA timing + - LP: #540231 + * scm: Only support SCM_RIGHTS on unix domain sockets. + - LP: #540231 + * ath9k: fix beacon timer restart after a card reset + - LP: #540231 + * ath9k: fix rate control fallback rate selection + - LP: #540231 + * ath9k: disable RIFS search for AR91xx based chips + - LP: #540231 + * ath5k: use correct packet type when transmitting + - LP: #540231 + * b43/b43legacy: Wake queues in wireless_core_start + - LP: #540231 + * netfilter: xt_recent: fix buffer overflow + - LP: #540231 + * netfilter: xt_recent: fix false match + - LP: #540231 + * sunxvr500: Additional PCI id for sunxvr500 driver + - LP: #540231 + * thinkpad-acpi: fix poll thread auto-start + - LP: #540231 + * thinkpad-acpi: R52 brightness_mode has been confirmed + - LP: #540231 + * thinkpad-acpi: document HKEY event 3006 + - LP: #540231 + * thinkpad-acpi: make driver events work in NVRAM poll mode + - LP: #540231 + * thinkpad-acpi: fix bluetooth/wwan resume + - LP: #540231 + * ocfs2: Only bug out in direct io write for reflinked extent. + - LP: #540231 + * x86, ia32_aout: do not kill argument mapping + - LP: #540231 + * x86: Add iMac9,1 to pci_reboot_dmi_table + - LP: #540231 + * x86, xen: Disable highmem PTE allocation even when CONFIG_HIGHPTE=y + - LP: #540231 + * x86: Avoid race condition in pci_enable_msix() + - LP: #540231 + * x86: Fix SCI on IOAPIC != 0 + - LP: #540231 + * USB: xhci: Fix finding extended capabilities registers + - LP: #540231 + * USB: fix the idProduct value for USB-3.0 root hubs + - LP: #540231 + * USB: fix crash in uhci_scan_schedule + - LP: #540231 + * USB: remove debugging message for uevent constructions + - LP: #540231 + * USB: Move hcd free_dev call into usb_disconnect to fix oops + - LP: #540231 + * USB: ftdi_sio: isolate all device IDs to new ftdi_sio_ids.h header + - LP: #540231 + * USB: ftdi_sio: sort PID/VID entries in new ftdi_sio_ids.h header + - LP: #540231 + * USB: ftdi_sio: new device id for papouch AD4USB + - LP: #540231 + * USB: ftdi_sio: add device IDs (several ELV, one Mindstorms NXT) + - LP: #540231 + * USB: add new ftdi_sio device ids + - LP: #540231 + * USB: serial: ftdi: add CONTEC vendor and product id + - LP: #540231 + * USB: cp210x: Add 81E8 (Zephyr Bioharness) + - LP: #540231 + * USB: unusual_devs: Add support for multiple Option 3G sticks + - LP: #540231 + * sunrpc: remove unnecessary svc_xprt_put + - LP: #540231 + * SUNRPC: Handle EINVAL error returns from the TCP connect operation + - LP: #540231 + * s3cmci: s3cmci_card_present: Use no_detect to decide whether there is a + card detect pin + - LP: #540231 + * rtc-coh901331: fix braces in resume code + - LP: #540231 + * NFS: Fix an allocation-under-spinlock bug + - LP: #540231 + * dm: free dm_io before bio_endio not after + - LP: #540231 + * KVM: x86 emulator: Add group8 instruction decoding + - LP: #540231 + * KVM: x86 emulator: Forbid modifying CS segment register by mov + instruction + - LP: #540231 + * KVM: x86 emulator: Add group9 instruction decoding + - LP: #540231 + * KVM: x86 emulator: Check CPL level during privilege instruction + emulation + - LP: #540231 + * sched: Fix sched_mv_power_savings for !SMT + - LP: #540231 + * sched: Fix SMT scheduler regression in find_busiest_queue() + - LP: #540231 + * sched: Don't use possibly stale sched_class + - LP: #540231 + * x86, mm: Allow highmem user page tables to be disabled at boot time + - LP: #540231 + * Linux 2.6.32.10 + - LP: #540231 + * drm/i915: give up on 8xx lid status + - LP: #540231 + * drm/i915: Use a dmi quirk to skip a broken SDVO TV output. + - LP: #540231 + * drm/ttm: handle OOM in ttm_tt_swapout + - LP: #540231 + * drm/radeon/kms/atom: fix shr/shl ops + - LP: #540231 + * Linux 2.6.32.10+drm33.1 + - LP: #540231 + + -- Andy Whitcroft Wed, 31 Mar 2010 20:09:01 +0100 + +linux-ec2 (2.6.32-303.7) lucid; urgency=low + + [ Andy Whitcroft ] + + * sync with master abstraction cleanup + * rebase to Ubuntu-2.6.32-12.17 + * [Config] fix MMAP_MIN_ADDR/DEV_KMEM/USB_DEVICEFS + * rebase to Ubuntu-2.6.32-15.22 + * [Config] update configs following rebase to Ubuntu-2.6.32-15.22 + * SAUCE: XEN -- drop references to TIF_ABI_PENDING + * [Config] updateconfigs following rebase to Ubuntu-2.6.32-15.22 + * rename the debug packages to match archive standard + - LP: #527837 + * [Config] cpu_debug module is no longer built + * updateconfigs following rebase to Ubuntu-2.6.32-16.24 + * lintian -- update debhelper package version dependancy + * lintian -- fix ghostscript dependancy + * lintian -- add required misc:Depends + * update to standards version 3.8.4.0 + * linux-tools -- disable linux-tools generation + * rebase to Ubuntu-2.6.32-16.25 + * d-i -- do not generate any udebs + * disable linux-libc-dev for ec2 + + [ John Johansen ] + + * rebase to Ubuntu-2.6.32-16.24 + + [ Upstream Kernel Changes ] + + * Linux 2.6.32.4 + + [ Ubuntu: 2.6.32-16.25 ] + + * linux-tools -- move to Suggests: with explicit seeding + - LP: #534635 + * [Config] CONFIG_HID=m + * (pre-stable) sched: Fix SMT scheduler regression in + find_busiest_queue() + * KVM: introduce kvm_vcpu_on_spin + * KVM: VMX: Add support for Pause-Loop Exiting + + [ Ubuntu: 2.6.32-16.24 ] + + * armel -- perf userspace does not support arm + * ia64 -- libelf-dev/binutils-dev to not provide necessary libraries + + [ Ubuntu: 2.6.32-16.23 ] + + * SAUCE: PM report driver and device suspend/resume times -- move config + * update to standards version 3.8.4.0 + * printenv -- expose all of the package selectors + * source package -- cleanup source content control + * doc package -- ensure we do build package content on buildd + * lintian -- correct the address in the debian/copyright + * lintian -- update debhelper package version dependancy + * lintian -- fix ghostscript dependancy + * lintian -- add required misc:Depends + * lintian -- move our debhelper compat level to debian/compat + * perf -- build the kernel carried tools + * perf -- add linux-tools carrying the version switches and manuals + * SAUCE: fix up Kconfig for staging drivers + * [Config] enable NOUVEAU etc following drm backport + * update DRM to mainline v2.6.33 + * [Config] Remove AppArmor config options that no longer exist (ports) + * [Config] updateportsconfigs following drm update + * ubuntu: AppArmor -- update to mainline 2010-03-04 + * SAUCE: AppArmor: Reintroduce AppArmor 2.4 compatibility + * SAUCE: AppArmor: replace strim with strstrip for 2.6.32 kernels + * [Config] Remove AppArmor config options that no longer exist + * ubuntu: rtl8192se -- version 2010-0115,0014 + - LP: #530275 + * [Config] added CONFIG_RTL8192SE module. + - LP: #530275 + * [Config] Added vmw_pvscsi to d-i/scsi-modules + - LP: #531017 + * [Upstream] netfilter: xt_recent: Add an entry reaper + * Revert "KVM: x86 emulator: Check CPL level during privilege instruction + emulation" + * Revert "KVM: x86 emulator: Fix popf emulation" + * Revert "KVM: x86 emulator: Check IOPL level during io instruction + emulation" + * Revert "KVM: x86 emulator: Add Virtual-8086 mode of emulation" + * Revert "KVM: fix memory access during x86 emulation." + * Add vlan (8021.Q) module package for d-i. + * (pre-stable) drm/i915: blacklist lid status: Sony VGN-BX196VP, Dell + Inspiron 700m + - LP: #515246 + * [Upstream] docbook: need xmldoclinks for all doc types + * x86: set_personality_ia32() misses force_personality32 + * lib: Introduce generic list_sort function + * drm/nv50: Implement ctxprog/state generation. + * drm/nv50: Remove redundant/incorrect ctxvals initialisation. + * (pre-stable) drm/i915: blacklist lid status: Sony VGN-BX196VP, Dell + Inspiron 700m + - LP: #515246 + + [ Ubuntu: 2.6.32-15.22 ] + + * Revert "[Config] added new config option CONFIG_SR_REPORT_TIME_LIMIT" + * Revert "SAUCE: PM report driver and device suspend/resume times." + * [Config] set CONFIG_SR_REPORT_TIME_LIMIT + * SAUCE: PM report driver and device suspend/resume times. + + [ Ubuntu: 2.6.32-15.21 ] + + * Revert "(pre-stable) drm/i915: Increase fb alignment to 64k" + * Revert "[Config] lenovo-sl-laptop -- enable" + * Revert "ubuntu: lenovo-sl-laptop -- git tip (b19a08f81f)" + * armel -- cramfs module will no longer be built + * d-i -- make all modules optional + * rename the debug packages to match archive standard + - LP: #527837 + * lenovo-sl-laptop is no longer built + * Disable 4MB page tables for Atom, work around errata AAE44 + - LP: #523112 + * ubuntu: dm-raid4-5: Depend on XOR_BLOCKS + * ubuntu: fsam7400: Depend on CHECK_SIGNATURE + * SAUCE: drm/i915: don't change DRM configuration when releasing load + detect pipe + - LP: #488328 + * [Config] armel Update versatile initrd configs + - LP: #524893 + * SAUCE: [um] Don't use nx_enabled under UML + - LP: #524849 + * [Config] added new config option CONFIG_SR_REPORT_TIME_LIMIT + * SAUCE: v3 - Add Dell Business Class Netbook LED driver + * SAUCE: PM report driver and device suspend/resume times. + * Revert "[Upstream] e1000e: enhance frame fragment detection" + - CVE-2009-4538 + * Revert "[Upstream] e1000: enhance frame fragment detection" + - CVE-2009-4536 + * [Config] Enabled CONFIG_LEDS_DELL_NETBOOKS=m + * SAUCE: (pre-stable) netfilter: xt_recent: fix buffer overflow + * SAUCE: (pre-stable) netfilter: xt_recent: fix false match + * Revert "(pre-stable) eCryptfs: Add getattr function" + * Fix potential crash with sys_move_pages + * futex_lock_pi() key refcnt fix + * futex: Handle user space corruption gracefully + * futex: Handle futex value corruption gracefully + * Fix race in tty_fasync() properly + * hwmon: (w83781d) Request I/O ports individually for probing + * hwmon: (lm78) Request I/O ports individually for probing + * hwmon: (adt7462) Wrong ADT7462_VOLT_COUNT + * ALSA: ctxfi - fix PTP address initialization + * drm/i915: disable hotplug detect before Ironlake CRT detect + * drm/i915: enable self-refresh on 965 + * drm/i915: Disable SR when more than one pipe is enabled + * drm/i915: Fix DDC on some systems by clearing BIOS GMBUS setup. + * drm/i915: Add HP nx9020/SamsungSX20S to ACPI LID quirk list + * drm/i915: Fix the incorrect DMI string for Samsung SX20S laptop + * drm/i915: Add MALATA PC-81005 to ACPI LID quirk list + * usb: r8a66597-hcd: Flush the D-cache for the pipe-in transfer buffers. + * i2c-tiny-usb: Fix on big-endian systems + * drm/i915: handle FBC and self-refresh better + * drm/i915: Increase fb alignment to 64k + * drm/i915: Update write_domains on active list after flush. + * regulator: Fix display of null constraints for regulators + * ALSA: hda-intel: Avoid divide by zero crash + * CPUFREQ: Fix use after free of struct powernow_k8_data + * freeze_bdev: don't deactivate successfully frozen MS_RDONLY sb + * cciss: Make cciss_seq_show handle holes in the h->drv[] array + * ioat: fix infinite timeout checking in ioat2_quiesce + * resource: add helpers for fetching rlimits + * fs/exec.c: restrict initial stack space expansion to rlimit + * cifs: fix length calculation for converted unicode readdir names + * NFS: Fix a reference leak in nfs_wb_cancel_page() + * NFS: Try to commit unstable writes in nfs_release_page() + * NFSv4: Don't allow posix locking against servers that don't support it + * NFSv4: Ensure that the NFSv4 locking can recover from stateid errors + * NFS: Fix an Oops when truncating a file + * NFS: Fix a umount race + * NFS: Fix a bug in nfs_fscache_release_page() + * NFS: Fix the mapping of the NFSERR_SERVERFAULT error + * md: fix 'degraded' calculation when starting a reshape. + * V4L/DVB: dvb-core: fix initialization of feeds list in demux filter + * Export the symbol of getboottime and mmonotonic_to_bootbased + * kvmclock: count total_sleep_time when updating guest clock + * KVM: PIT: control word is write-only + * tpm_infineon: fix suspend/resume handler for pnp_driver + * amd64_edac: Do not falsely trigger kerneloops + * netfilter: nf_conntrack: fix memory corruption with multiple namespaces + * netfilter: nf_conntrack: per netns nf_conntrack_cachep + * netfilter: nf_conntrack: restrict runtime expect hashsize modifications + * netfilter: xtables: compat out of scope fix + * netfilter: nf_conntrack: fix hash resizing with namespaces + * drm/i915: remove full registers dump debug + * drm/i915: add i915_lp_ring_sync helper + * drm/i915: Don't wait interruptible for possible plane buffer flush + * dasd: remove strings from s390dbf + * crypto: padlock-sha - Add import/export support + * wmi: Free the allocated acpi objects through wmi_get_event_data + * dell-wmi, hp-wmi, msi-wmi: check wmi_get_event_data() return value + * /dev/mem: introduce size_inside_page() + * devmem: check vmalloc address on kmem read/write + * devmem: fix kmem write bug on memory holes + * SCSI: mptfusion : mptscsih_abort return value should be SUCCESS instead + of value 0. + * sh: Couple kernel and user write page perm bits for CONFIG_X2TLB + * ALSA: hda - use WARN_ON_ONCE() for zero-division detection + * dst: call cond_resched() in dst_gc_task() + * ALSA: hda - Improved MacBook (Pro) 5,1 / 5,2 support + * befs: fix leak + * rtc-fm3130: add missing braces + * Call flush_dcache_page after PIO data transfers in libata-sff.c + * ahci: add Acer G725 to broken suspend list + * pktgen: Fix freezing problem + * x86/amd-iommu: Fix IOMMU-API initialization for iommu=pt + * x86/amd-iommu: Fix deassignment of a device from the pt_domain + * x86: Re-get cfg_new in case reuse/move irq_desc + * Staging: fix rtl8187se compilation errors with mac80211 + * ALSA: usb-audio - Avoid Oops after disconnect + * serial: 8250: add serial transmitter fully empty test + * sysfs: sysfs_sd_setattr set iattrs unconditionally + * class: Free the class private data in class_release + * USB: usbfs: only copy the actual data received + * USB: usbfs: properly clean up the as structure on error paths + * rtl8187: Add new device ID + * ACPI: Add NULL pointer check in acpi_bus_start + * ACPI: fix High cpu temperature with 2.6.32 + * drm/radeon/kms: use udelay for short delays + * NFS: Too many GETATTR and ACCESS calls after direct I/O + * eCryptfs: Add getattr function + * b43: Fix throughput regression + * ath9k: Fix sequence numbers for PAE frames + * mac80211: Fix probe request filtering in IBSS mode + * iwlwifi: Fix to set correct ht configuration + * dm stripe: avoid divide by zero with invalid stripe count + * dm log: userspace fix overhead_size calcuations + * Linux 2.6.32.9 + * sfc: Fix SFE4002 initialisation + * sfc: Fix sign of efx_mcdi_poll_reboot() error in efx_mcdi_poll() + * sfc: SFE4002/SFN4112F: Widen temperature and voltage tolerances + * (pre-stable) HID: handle joysticks with large number of buttons + - LP: #492056 + * (pre-stable) HID: extend mask for BUTTON usage page + - LP: #492056 + * PM: Measure device suspend and resume times + * e1000: enhance frame fragment detection + - CVE-2009-4536 + * e1000e: enhance frame fragment detection + - CVE-2009-4538 + * KVM: fix memory access during x86 emulation. + - CVE-2010-0306 + * KVM: x86 emulator: Add Virtual-8086 mode of emulation + - CVE-2010-0306 + * KVM: x86 emulator: Check IOPL level during io instruction emulation + - CVE-2010-0306 + * KVM: x86 emulator: Fix popf emulation + - CVE-2010-0306 + * KVM: x86 emulator: Check CPL level during privilege instruction + emulation + - CVE-2010-0306 + * Input: wacom - ensure the device is initialized properly upon resume + * Input: wacom - add defines for packet lengths of various devices + * Input: wacom - add support for new LCD tablets + - LP: #516777 + + [ Ubuntu: 2.6.32-14.20 ] + + * rebuild following the GCC update to match compiler for out of tree modules + * Revert "[Config] drbd -- enable" + * Revert "ubuntu: drbd -- version 8.3.1" + * SAUCE: khubd -- switch USB product/manufacturer/serial handling to RCU + - LP: #510937 + + [ Ubuntu: 2.6.32-14.19 ] + + * ensure we build the source package contents when enabled + - LP: #522308 + * [Config] enable CONFIG_X86_MCE_XEON75XX + * SAUCE: AppArmor -- add linux/kref.h for struct kref + * [Config] enable CONFIG_HID_ORTEK + * enable udeb generation for arm versatile flavour + - LP: #522515 + * ubuntu: AppArmor -- update to mainline 2010-02-18 + - LP: #439560, #496110, #507069 + * SAUCE: HID: add support for Ortek WKB-2000 + - LP: #405390 + * tpm_tis: TPM_STS_DATA_EXPECT workaround + - LP: #490487 + * x86, mce: Xeon75xx specific interface to get corrected memory error + information + * x86, mce: Rename cpu_specific_poll to mce_cpu_specific_poll + * x86, mce: Make xeon75xx memory driver dependent on PCI + * drm/edid: Unify detailed block parsing between base and extension + blocks + - LP: #500999 + * (pre-stable) eCryptfs: Add getattr function + - LP: #390833 + + [ Ubuntu: 2.6.32-13.18 ] + + * Revert "enforcer -- make the enforcement configuration common" + * Revert "(pre-stable) Input: ALPS - add interleaved protocol support + (Dell E6x00 series)" + * Revert "(pre-stable) driver-core: fix devtmpfs crash on s390" + * Revert "(pre-stable) Driver-Core: devtmpfs - set root directory mode to + 0755" + * Revert "SAUCE: Adds support for COMPAL JHL90 webcam" + * Revert "SAUCE: fix kernel oops in VirtualBox during paravirt patching" + * Revert "SAUCE: make fc transport removal of target configurable" + * enforcer -- make the enforcement configuration common + * getabis -- add preempt flavour to the list + * [Config] enforce DEVTMPFS options + * [Config] armel -- cleanup to-be builtin modules + * [Config] cleanup ports configs + * [Config] enable CRYPTO_GHASH_CLMUL_NI_INTEL + - LP: #485536 + * add printdebian target to find branch target + * distclean -- do not remove debian.env + * [Config] generic-pae switch to M586TSC + - LP: #519448 + * git-ubuntu-log -- commonise duplicated log handling + * git-ubuntu-log -- tighten up Bug: NNNN matching + * git-ubuntu-log -- sort the bug numbers + * (pre-stable) drm/i915: Increase fb alignment to 64k + - LP: #404064 + * arm -- enable ubuntu/ directory + * SAUCE: crypto: ghash - Add PCLMULQDQ accelerated implementation + * SAUCE: crypto: ghash-intel - Fix building failure on x86_32 + * [Config] cleanup preempt configuration + * [Config] versatile: Fix video output + - LP: #517594 + * [Config] armel DEFAULT_MMAP_MIN_ADDR=32768 + * [Config] Large update to armel/versatile + * [Config] versatile: Add RTC support + * [Config] armel: Enable NEON + * [Config] versatile: Builtin MMC support + * [Config] versatile Builtin SCSI controller + * [Config] armel Disable dma_cache_sync callers + * [Config] armel Disable asm/time.h users + * [Config] armel Disable out of range udelay() + * [Config] armel Disable flush_cache_range() users + * [Config] armel -- Enable ubuntu/ drivers + * SAUCE: drm/i915: Add display hotplug event on Ironlake + * SAUCE: drm/i915: Add ACPI OpRegion support for Ironlake + * Revert "[Upstream]: oprofile/x86: add Xeon 7500 series support" + * Revert "Revert "[Bluetooth] Eliminate checks for impossible conditions + in IRQ handler"" + * clockevent: Don't remove broadcast device when cpu is dead + * clockevents: Add missing include to pacify sparse + * ACPI: don't cond_resched if irq is disabled + * be2net: Add support for next generation of BladeEngine device. + * be2net: Add the new PCI IDs to PCI_DEVICE_TABLE. + * mpt2sas: New device SAS2208 support is added + * ar9170: Add support for D-Link DWA 160 A2 + * powerpc/fsl: Add PCI device ids for new QoirQ chips + * davinci: dm646x: Add support for 3.x silicon revision + * Input: ALPS - add interleaved protocol support (Dell E6x00 series) + * Driver-Core: devtmpfs - set root directory mode to 0755 + * driver-core: fix devtmpfs crash on s390 + * vfs: get_sb_single() - do not pass options twice + * ALSA: hda - Add PCI IDs for Nvidia G2xx-series + * V4L/DVB (13569): smsusb: add autodetection support for five additional + Hauppauge USB IDs + * USB: mos7840: add device IDs for B&B electronics devices + * USB: ftdi_sio: add USB device ID's for B&B Electronics line + * V4L/DVB (13168): Add support for Asus Europa Hybrid DVB-T card (SAA7134 + SubVendor ID: 0x1043 Device ID: 0x4847) + * iTCO_wdt: Add support for Intel Ibex Peak + * atl1c:use common_task instead of reset_task and link_chg_task + * atl1e:disable NETIF_F_TSO6 for hardware limit + * V4L/DVB (13680a): DocBook/media: copy images after building HTML + * V4L/DVB (13680b): DocBook/media: create links for included sources + * netfilter: xtables: fix conntrack match v1 ipt-save output + * partitions: read whole sector with EFI GPT header + * partitions: use sector size for EFI GPT + * ALSA: ice1724 - Patch for suspend/resume for ESI Juli@ + * sched: Fix isolcpus boot option + * sched: Fix missing sched tunable recalculation on cpu add/remove + * nohz: Prevent clocksource wrapping during idle + * nfsd: Fix sort_pacl in fs/nfsd/nf4acl.c to actually sort groups + * timers, init: Limit the number of per cpu calibration bootup messages + * PCI: Always set prefetchable base/limit upper32 registers + * iscsi class: modify handling of replacement timeout + * NFS: Revert default r/wsize behavior + * HID: fixup quirk for NCR devices + * scsi_devinfo: update Hitachi entries (v2) + * scsi_dh: create sysfs file, dh_state for all SCSI disk devices + * scsi_transport_fc: remove invalid BUG_ON + * lpfc: fix hang on SGI ia64 platform + * libfc: fix typo in retry check on received PRLI + * libfc: fix ddp in fc_fcp for 0 xid + * fcoe: remove redundant checking of netdev->netdev_ops + * libfc: Fix wrong scsi return status under FC_DATA_UNDRUN + * libfc: lport: fix minor documentation errors + * libfc: don't WARN_ON in lport_timeout for RESET state + * fcoe: initialize return value in fcoe_destroy + * libfc: Fix frags in frame exceeding SKB_MAX_FRAGS in fc_fcp_send_data + * libfc: fix memory corruption caused by double frees and bad error + handling + * libfc: fix free of fc_rport_priv with timer pending + * libfc: remote port gets stuck in restart state without really + restarting + * fcoe, libfc: fix an libfc issue with queue ramp down in libfc + * fcoe: Fix checking san mac address + * fcoe: Fix getting san mac for VLAN interface + * qlge: Remove explicit setting of PCI Dev CTL reg. + * qlge: Set PCIE max read request size. + * qlge: Don't fail open when port is not initialized. + * qlge: Add handler for DCBX firmware event. + * qlge: Bonding fix for mode 6. + * PCI: AER: fix aer inject result in kernel oops + * DMI: allow omitting ident strings in DMI tables + * Input: i8042 - remove identification strings from DMI tables + * Input: i8042 - add Gigabyte M1022M to the noloop list + * Input: i8042 - add Dritek quirk for Acer Aspire 5610. + * ALSA: hda - select IbexPeak handler for Calpella + * ALSA: hda - Fix quirk for Maxdata obook4-1 + * ALSA: hda - Add missing Line-Out and PCM switches as slave + * iTCO_wdt.c - cleanup chipset documentation + * iTCO_wdt: add PCI ID for the Intel EP80579 (Tolapai) SoC + * iTCO_wdt: Add Intel Cougar Point and PCH DeviceIDs + * ahci: disable SNotification capability for ich8 + * ata_piix: fix MWDMA handling on PIIX3 + * md: fix small irregularity with start_ro module parameter + * V4L/DVB (13826): uvcvideo: Fix controls blacklisting + * cio: fix double free in case of probe failure + * cio: dont panic in non-fatal conditions + * netiucv: displayed TX bytes value much too high + * ipc ns: fix memory leak (idr) + * ALSA: hda - Fix HP T5735 automute + * hwmon: (fschmd) Fix a memleak on multiple opens of /dev/watchdog + * UBI: fix memory leak in update path + * UBI: initialise update marker + * ASoC: fix a memory-leak in wm8903 + * mac80211: check that ieee80211_set_power_mgmt only handles STA + interfaces. + * cfg80211: fix channel setting for wext + * KVM: S390: fix potential array overrun in intercept handling + * KVM: only allow one gsi per fd + * KVM: Fix race between APIC TMR and IRR + * KVM: MMU: bail out pagewalk on kvm_read_guest error + * KVM: x86: Fix host_mapping_level() + * KVM: x86: Fix probable memory leak of vcpu->arch.mce_banks + * KVM: x86: Fix leak of free lapic date in kvm_arch_vcpu_init() + * KVM: fix lock imbalance in kvm_*_irq_source_id() + * KVM: only clear irq_source_id if irqchip is present + * IPoIB: Clear ipoib_neigh.dgid in ipoib_neigh_alloc() + * x86: Reenable TSC sync check at boot, even with NONSTOP_TSC + * ACPI: enable C2 and Turbo-mode on Nehalem notebooks on A/C + - LP: #516325 + * iwlwifi: Fix throughput stall issue in HT mode for 5000 + * fnctl: f_modown should call write_lock_irqsave/restore + * x86, msr/cpuid: Pass the number of minors when unregistering MSR and + CPUID drivers. + * Linux 2.6.32.7 + * scsi_lib: Fix bug in completion of bidi commands + * mptsas: Fix issue with chain pools allocation on katmai + * mm: add new 'read_cache_page_gfp()' helper function + * drm/i915: Selectively enable self-reclaim + * firewire: ohci: fix crashes with TSB43AB23 on 64bit systems + * S390: fix single stepped svcs with TRACE_IRQFLAGS=y + * x86: Set hotpluggable nodes in nodes_possible_map + * x86: Remove "x86 CPU features in debugfs" (CONFIG_X86_CPU_DEBUG) + * libata: retry FS IOs even if it has failed with AC_ERR_INVALID + * zcrypt: Do not remove coprocessor for error 8/72 + * dasd: fix possible NULL pointer errors + * ACPI: Add a generic API for _OSC -v2 + * ACPI: Add platform-wide _OSC support. + * ACPI: fix OSC regression that caused aer and pciehp not to load + * ACPI: Advertise to BIOS in _OSC: _OST on _PPC changes + * UBI: fix volume creation input checking + * e1000/e1000e: don't use small hardware rx buffers + * drm/i915: Reload hangcheck timer too for Ironlake + * Fix a leak in affs_fill_super() + * Fix failure exits in bfs_fill_super() + * fix oops in fs/9p late mount failure + * fix leak in romfs_fill_super() + * Fix remount races with symlink handling in affs + * fix affs parse_options() + * Fix failure exit in ipathfs + * mm: fix migratetype bug which slowed swapping + * FDPIC: Respect PT_GNU_STACK exec protection markings when creating + NOMMU stack + * Split 'flush_old_exec' into two functions + * sparc: TIF_ABI_PENDING bit removal + * x86: get rid of the insane TIF_ABI_PENDING bit + * Input: winbond-cir - remove dmesg spam + * x86: Disable HPET MSI on ATI SB700/SB800 + * iwlwifi: set default aggregation frame count limit to 31 + * drm/i915: only enable hotplug for detected outputs + * firewire: core: add_descriptor size check + * SECURITY: selinux, fix update_rlimit_cpu parameter + * regulator: Specify REGULATOR_CHANGE_STATUS for WM835x LED constraints + * x86: Add Dell OptiPlex 760 reboot quirk + - LP: #488319 + * x86: Add quirk for Intel DG45FC board to avoid low memory corruption + * x86/amd-iommu: Fix possible integer overflow + * clocksource: fix compilation if no GENERIC_TIME + * tcp: update the netstamp_needed counter when cloning sockets + * sky2: Fix oops in sky2_xmit_frame() after TX timeout + * net: restore ip source validation + * af_packet: Don't use skb after dev_queue_xmit() + * ax25: netrom: rose: Fix timer oopses + * KVM: allow userspace to adjust kvmclock offset + * oprofile/x86: add Xeon 7500 series support + * oprofile/x86: fix crash when profiling more than 28 events + * libata: retry link resume if necessary + * mm: percpu-vmap fix RCU list walking + * mm: purge fragmented percpu vmap blocks + * block: fix bio_add_page for non trivial merge_bvec_fn case + * Fix 'flush_old_exec()/setup_new_exec()' split + * random: drop weird m_time/a_time manipulation + * random: Remove unused inode variable + * block: fix bugs in bio-integrity mempool usage + * usb: r8a66597-hdc disable interrupts fix + * connector: Delete buggy notification code. + * be2net: Bug fix to support newer generation of BE ASIC + * be2net: Fix memset() arg ordering. + * mm: flush dcache before writing into page to avoid alias + * mac80211: fix NULL pointer dereference when ftrace is enabled + * imxfb: correct location of callbacks in suspend and resume + * mx3fb: some debug and initialisation fixes + * starfire: clean up properly if firmware loading fails + * kernel/cred.c: use kmem_cache_free + * uartlite: fix crash when using as console + * pktcdvd: removing device does not remove its sysfs dir + * ath9k: fix eeprom INI values override for 2GHz-only cards + * ath9k: fix beacon slot/buffer leak + * powerpc: TIF_ABI_PENDING bit removal + * NET: fix oops at bootime in sysctl code + * Linux 2.6.32.8 + + [ Ubuntu: 2.6.32-12.17 ] + + * restore linux-image prefix -- master + * enforce -- we require SELINUX enabled -- master + * enforce -- ensure APPARMOR is our default LSM -- master + * make doc package completely optional -- master + * make source package completely optional -- master + * make linux-libc-dev completly optional -- master + * convert package disable to a deps list -- master + * allow common headers to switch from indep to arch -- master + * convert binary package disable to a deps list -- master + * add configuration option for a full source build tree -- master + * add support for uImage kernels in package control scripts + * getabis -- cleanup and parameterise repository list -- master + * getabis -- move configuration to etc/getabi -- master + * kernelconfig -- move configuration to etc -- master + * rules -- make debian/debian.env master for branch name + * set the current branch name -- master + * pull back common debian.master files into debian -- master + * enforcer -- make the enforcement configuration common + * insert-changes -- correctly link to debian/rules in DROOT + * future-proof ddeb handling against buildd changes + * SAUCE: Make CONFIG_{OMNIBOOK, AVERATEC_5100P, PACKARDBELL_E5} depend on + X86 + * Add modules.builtin.bin to prerm rm list + - LP: #516584 + * [Config] Implement the amd64 preempt flavour + * syslog: distinguish between /proc/kmsg and syscalls + - LP: #515623 + * sfc: Fix polling for slow MCDI operations + * sfc: Fix conditions for MDIO self-test + * sfc: QT202x: Remove unreliable MMD check at initialisation + * sfc: Add workspace for GMAC bug workaround to MCDI MAC_STATS buffer + * sfc: Use fixed-size buffers for MCDI NVRAM requests + + -- Andy Whitcroft Tue, 09 Mar 2010 16:09:44 +0000 + +linux-ec2 (2.6.32-302.6) lucid; urgency=low + + [ John Johansen ] + + * rebase to Ubuntu-2.6.32-11.15 + * import raw xen patchset + * import series file for the xen patchset + * rebase to Ubuntu-2.6.32-12.16 + * [Config] update ec2 configs to make them closer to -server flavor + - LP: #510130 + + [ Ubuntu: 2.6.32-12.16 ] + + * Revert "SAUCE: acpi battery -- delay first lookup of the battery until + first use" + * SAUCE: acpi battery -- move first lookup asynchronous + - LP: #507211 + * [Config] update configs to cleanup generic configs + * [Config] disable CONFIG_X86_CPU_DEBUG for amd64 + * [Config] enable USER_NS + - LP: #480739, #509808 + * (pre-stable) driver-core: fix devtmpfs crash on s390 + - LP: #512370 + * [Config] for server and virtual flavours make CONFIG_SCSI_SYM53C8XX_2=y + - LP: #494565 + * [Config] VIRTIO=y for server/virtual flavours + - LP: #494565 + * (pre-stable) Driver-Core: devtmpfs - set root directory mode to 0755 + - LP: #512370 + * SAUCE: x86: brk away from exec rand area + - LP: #452175 + * [Upstream] e1000: enhance frame fragment detection + - CVE-2009-4536 + * [Upstream] e1000e: enhance frame fragment detection + - CVE-2009-4538 + * (pre-stable) Input: ALPS - add interleaved protocol support (Dell E6x00 + series) + - LP: #296610 + * inotify: do not reuse watch descriptors + - LP: #485556 + * inotify: only warn once for inotify problems + * revert "drivers/video/s3c-fb.c: fix clock setting for Samsung SoC + Framebuffer" + * memcg: ensure list is empty at rmdir + * drm/i915: remove loop in Ironlake interrupt handler + * block: Fix incorrect reporting of partition alignment + * x86, mce: Thermal monitoring depends on APIC being enabled + * futexes: Remove rw parameter from get_futex_key() + * page allocator: update NR_FREE_PAGES only when necessary + * x86, apic: use physical mode for IBM summit platforms + * edac: i5000_edac critical fix panic out of bounds + * x86: SGI UV: Fix mapping of MMIO registers + * mfd: WM835x GPIO direction register is not locked + * mfd: Correct WM835x ISINK ramp time defines + * ALSA: hda - Fix missing capture mixer for ALC861/660 codecs + * V4L/DVB (13868): gspca - sn9c20x: Fix test of unsigned. + * reiserfs: truncate blocks not used by a write + * HID: add device IDs for new model of Apple Wireless Keyboard + * PCI/cardbus: Add a fixup hook and fix powerpc + * Input: pmouse - move Sentelic probe down the list + * asus-laptop: add Lenovo SL hotkey support + * sched: Fix cpu_clock() in NMIs, on !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + * sparc64: Fix NMI programming when perf events are active. + * sparc64: Fix Niagara2 perf event handling. + * i2c: Do not use device name after device_unregister + * i2c/pca: Don't use *_interruptible + * serial/8250_pnp: add a new Fujitsu Wacom Tablet PC device + * sched: Fix task priority bug + * vfs: Fix vmtruncate() regression + * Linux 2.6.32.5 + * x86, msr/cpuid: Register enough minors for the MSR and CPUID drivers + * V4L/DVB (13900): gspca - sunplus: Fix bridge exchanges. + * Staging: asus_oled: fix oops in 2.6.32.2 + * Staging: hv: fix smp problems in the hyperv core code + * tty: fix race in tty_fasync + * ecryptfs: use after free + * ecryptfs: initialize private persistent file before dereferencing + pointer + * nozomi: quick fix for the close/close bug + * serial: 8250_pnp: use wildcard for serial Wacom tablets + * usb: serial: fix memory leak in generic driver + * USB: fix bitmask merge error + * USB: Don't use GFP_KERNEL while we cannot reset a storage device + * USB: EHCI: fix handling of unusual interrupt intervals + * USB: EHCI & UHCI: fix race between root-hub suspend and port resume + * USB: add missing delay during remote wakeup + * USB: add speed values for USB 3.0 and wireless controllers + * ACPI: EC: Accelerate query execution + * ACPI: EC: Add wait for irq storm + * SCSI: enclosure: fix oops while iterating enclosure_status array + * drm/i915: Read the response after issuing DDC bus switch command + * drm/i915: try another possible DDC bus for the SDVO device with + multiple outputs + * block: bdev_stack_limits wrapper + * DM: Fix device mapper topology stacking + * x86/PCI/PAT: return EINVAL for pci mmap WC request for !pat_enabled + * USB: fix usbstorage for 2770:915d delivers no FAT + * vmalloc: remove BUG_ON due to racy counting of VM_LAZY_FREE + * perf timechart: Use tid not pid for COMM change + * perf events: Dont report side-band events on each cpu for + per-task-per-cpu events + * perf: Honour event state for aux stream data + * Linux 2.6.32.6 + + [ Ubuntu: 2.6.32-11.15 ] + + * Revert "(pre-stable) drm/radeon/kms: fix crtc vblank update for r600" + * Revert "(pre-stable) sched: Fix balance vs hotplug race" + * Revert "[Upstream] acerhdf: Limit modalias matching to supported + boards" + * Revert "[Upstream] mmc: prevent dangling block device from accessing + stale queues" + * Revert "SAUCE: Fix nx_enable reporting" + * Revert "SAUCE: [x86] fix report of cs-limit nx-emulation" + * Revert "SAUCE: [x86] implement cs-limit nx-emulation for ia32" + * SAUCE: i915 -- disable powersave by default + - LP: #492392 + * SAUCE: [x86] implement cs-limit nx-emulation for ia32 + - LP: #369978 + * SAUCE: [x86] fix report of cs-limit nx-emulation + - LP: #454285 + * SAUCE: Fix nx_enable reporting + - LP: #454285 + * [Upstream] b43: Declare all possible firmware files. + - LP: #488636 + * [Config] updateconfigs after adding pvscsi + - LP: #497156 + * [Config] CONFIG_BT=m + * Revert "x86: Side-step lguest problem by only building cmpxchg8b_emu + for pre-Pentium" + * SCSI: ipr: fix EEH recovery + * SCSI: qla2xxx: dpc thread can execute before scsi host has been added + * SCSI: st: fix mdata->page_order handling + * SCSI: fc class: fix fc_transport_init error handling + * sched: Fix task_hot() test order + * x86, cpuid: Add "volatile" to asm in native_cpuid() + * sched: Select_task_rq_fair() must honour SD_LOAD_BALANCE + * clockevents: Prevent clockevent_devices list corruption on cpu hotplug + * pata_hpt3x2n: fix clock turnaround + * pata_cmd64x: fix overclocking of UDMA0-2 modes + * ASoC: wm8974: fix a wrong bit definition + * sound: sgio2audio/pdaudiocf/usb-audio: initialize PCM buffer + * ALSA: hda - Fix missing capsrc_nids for ALC88x + * acerhdf: limit modalias matching to supported + - LP: #435958 + * ACPI: EC: Fix MSI DMI detection + * ACPI: Use the return result of ACPI lid notifier chain correctly + * powerpc: Handle VSX alignment faults correctly in little-endian mode + * ASoC: Do not write to invalid registers on the wm9712. + * drm/radeon: fix build on 64-bit with some compilers. + * USB: emi62: fix crash when trying to load EMI 6|2 firmware + * USB: option: support hi speed for modem Haier CE100 + * USB: Fix a bug on appledisplay.c regarding signedness + * USB: musb: gadget_ep0: avoid SetupEnd interrupt + * Bluetooth: Prevent ill-timed autosuspend in USB driver + * USB: rename usb_configure_device + * USB: fix bugs in usb_(de)authorize_device + * drivers/net/usb: Correct code taking the size of a pointer + * x86: SGI UV: Fix writes to led registers on remote uv hubs + * md: Fix unfortunate interaction with evms + * dma: at_hdmac: correct incompatible type for argument 1 of + 'spin_lock_bh' + * dma-debug: Do not add notifier when dma debugging is disabled. + * dma-debug: Fix bug causing build warning + * cifs: NULL out tcon, pSesInfo, and srvTcp pointers when chasing DFS + referrals + * x86/amd-iommu: Fix initialization failure panic + * ioat3: fix p-disabled q-continuation + * ioat2,3: put channel hardware in known state at init + * KVM: MMU: remove prefault from invlpg handler + * KVM: LAPIC: make sure IRR bitmap is scanned after vm load + * Libertas: fix buffer overflow in lbs_get_essid() + * iwmc3200wifi: fix array out-of-boundary access + * mac80211: fix propagation of failed hardware reconfigurations + * mac80211: fix WMM AP settings application + * mac80211: Fix IBSS merge + * cfg80211: fix race between deauth and assoc response + * ath5k: fix SWI calibration interrupt storm + * ath9k: wake hardware for interface IBSS/AP/Mesh removal + * ath9k: Fix TX queue draining + * ath9k: fix missed error codes in the tx status check + * ath9k: wake hardware during AMPDU TX actions + * ath9k: fix suspend by waking device prior to stop + * ath9k_hw: Fix possible OOB array indexing in gen_timer_index[] on + 64-bit + * ath9k_hw: Fix AR_GPIO_INPUT_EN_VAL_BT_PRIORITY_BB and its shift value + in 0x4054 + * iwl3945: disable power save + * iwl3945: fix panic in iwl3945 driver + * iwlwifi: fix EEPROM/OTP reading endian annotations and a bug + * iwlwifi: fix more eeprom endian bugs + * iwlwifi: fix 40MHz operation setting on cards that do not allow it + * mac80211: fix race with suspend and dynamic_ps_disable_work + * NOMMU: Optimise away the {dac_,}mmap_min_addr tests + * 'sysctl_max_map_count' should be non-negative + * kernel/sysctl.c: fix the incomplete part of + sysctl_max_map_count-should-be-non-negative.patch + * V4L/DVB (13596): ov511.c typo: lock => unlock + * x86/ptrace: make genregs[32]_get/set more robust + * memcg: avoid oom-killing innocent task in case of use_hierarchy + * e100: Fix broken cbs accounting due to missing memset. + * ipv6: reassembly: use seperate reassembly queues for conntrack and + local delivery + * netfilter: fix crashes in bridge netfilter caused by fragment jumps + * hwmon: (sht15) Off-by-one error in array index + incorrect constants + * b43: avoid PPC fault during resume + * Keys: KEYCTL_SESSION_TO_PARENT needs TIF_NOTIFY_RESUME architecture + support + * sched: Fix balance vs hotplug race + * drm/radeon/kms: fix crtc vblank update for r600 + * drm: disable all the possible outputs/crtcs before entering KMS mode + * S390: dasd: support DIAG access for read-only devices + * xen: fix is_disconnected_device/exists_disconnected_device + * xen: improvement to wait_for_devices() + * xen: wait up to 5 minutes for device connetion + * orinoco: fix GFP_KERNEL in orinoco_set_key with interrupts disabled + * udf: Try harder when looking for VAT inode + * Add unlocked version of inode_add_bytes() function + * quota: decouple fs reserved space from quota reservation + * ext4: Convert to generic reserved quota's space management. + * ext4: fix sleep inside spinlock issue with quota and dealloc (#14739) + * x86, msr: Unify rdmsr_on_cpus/wrmsr_on_cpus + * cpumask: use modern cpumask style in drivers/edac/amd64_edac.c + * amd64_edac: unify MCGCTL ECC switching + * x86, msr: Add support for non-contiguous cpumasks + * x86, msr: msrs_alloc/free for CONFIG_SMP=n + * amd64_edac: fix driver instance freeing + * amd64_edac: make driver loading more robust + * amd64_edac: fix forcing module load/unload + * sched: Sched_rt_periodic_timer vs cpu hotplug + * ext4: Update documentation to correct the inode_readahead_blks option + name + * lguest: fix bug in setting guest GDT entry + * vmscan: do not evict inactive pages when skipping an active list scan + * ksm: fix mlockfreed to munlocked + * rt2x00: Disable powersaving for rt61pci and rt2800pci. + * generic_permission: MAY_OPEN is not write access + * Linux 2.6.32.3 + * untangle the do_mremap() mess + * fasync: split 'fasync_helper()' into separate add/remove functions + * ASoC: fix params_rate() macro use in several codecs + * modules: Skip empty sections when exporting section notes + * exofs: simple_write_end does not mark_inode_dirty + * nfsd: make sure data is on disk before calling ->fsync + * sunrpc: fix peername failed on closed listener + * SUNRPC: Fix up an error return value in + gss_import_sec_context_kerberos() + * SUNRPC: Fix the return value in gss_import_sec_context() + * sunrpc: on successful gss error pipe write, don't return error + * drm/i915: Update LVDS connector status when receiving ACPI LID event + * drm/i915: fix order of fence release wrt flushing + * drm/i915: Permit pinning whilst the device is 'suspended' + * drm: remove address mask param for drm_pci_alloc() + * drm/i915: Enable/disable the dithering for LVDS based on VBT setting + * drm/i915: Make the BPC in FDI rx/transcoder be consistent with that in + pipeconf on Ironlake + * drm/i915: Select the correct BPC for LVDS on Ironlake + * drm/i915: fix unused var + * rtc_cmos: convert shutdown to new pnp_driver->shutdown + * drivers/cpuidle/governors/menu.c: fix undefined reference to + `__udivdi3' + * cgroups: fix 2.6.32 regression causing BUG_ON() in cgroup_diput() + * lib/rational.c needs module.h + * dma-debug: allow DMA_BIDIRECTIONAL mappings to be synced with + DMA_FROM_DEVICE and + * kernel/signal.c: fix kernel information leak with print-fatal-signals=1 + * mmc_block: add dev_t initialization check + * mmc_block: fix probe error cleanup bug + * mmc_block: fix queue cleanup + * ALSA: hda - Fix ALC861-VD capture source mixer + * ALSA: ac97: Add Dell Dimension 2400 to Headphone/Line Jack Sense + blacklist + * ALSA: atiixp: Specify codec for Foxconn RC4107MA-RS2 + - LP: #498863 + * ASoC: Fix WM8350 DSP mode B configuration + * netfilter: ebtables: enforce CAP_NET_ADMIN + * netfilter: nf_ct_ftp: fix out of bounds read in update_nl_seq() + * hwmon: (coretemp) Fix TjMax for Atom N450/D410/D510 CPUs + * hwmon: (adt7462) Fix pin 28 monitoring + * quota: Fix dquot_transfer for filesystems different from ext4 + * xen: fix hang on suspend. + * iwlwifi: fix iwl_queue_used bug when read_ptr == write_ptr + * ath5k: Fix eeprom checksum check for custom sized eeproms + * cfg80211: fix syntax error on user regulatory hints + * iwl: off by one bug + * mac80211: add missing sanity checks for action frames + * drm/i915: remove render reclock support + * libertas: Remove carrier signaling from the scan code + * kernel/sysctl.c: fix stable merge error in NOMMU mmap_min_addr + * mac80211: fix skb buffering issue (and fixes to that) + * fix braindamage in audit_tree.c untag_chunk() + * fix more leaks in audit_tree.c tag_chunk() + * module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y + * ipv6: skb_dst() can be NULL in ipv6_hop_jumbo(). + * agp/intel-agp: Clear entire GTT on startup + * Linux 2.6.32.4 + * ethtool: Add reset operation + * gro: Name the GRO result enumeration type + * gro: Change all receive functions to return GRO result codes + * sfc: 10Xpress: Initialise pause advertising flags + * sfc: 10Xpress: Report support for pause frames + * sfc: Remove redundant header gmii.h + * sfc: Remove redundant hardware initialisation + * sfc: Rename Falcon-specific board code and types + * sfc: Remove boards.h, moving last remaining declaration to falcon.h + * sfc: Remove versioned bitfield macros + * sfc: Move RX data FIFO thresholds out of struct efx_nic_type + * sfc: Update hardware definitions for Siena + * sfc: Rename register I/O header and functions used by both Falcon and + Siena + * sfc: Eliminate indirect lookups of queue size constants + * sfc: Define DMA address mask explicitly in terms of descriptor field + width + * sfc: Move all TX DMA length limiting into tx.c + * sfc: Change order of device removal to reverse of probe order + * sfc: Remove declarations of nonexistent functions + * sfc: Move efx_xmit_done() declaration into correct stanza + * sfc: Move shared members of struct falcon_nic_data into struct efx_nic + * sfc: Maintain interrupt moderation values in ticks, not microseconds + * sfc: Removed kernel-doc for nonexistent member of efx_phy_operations + * sfc: Remove pointless abstraction of memory BAR number + * sfc: Remove incorrect assertion from efx_pci_remove_main() + * sfc: Remove unnecessary tests of efx->membase + * sfc: Move MTD probe after netdev registration and name allocation + * sfc: Remove unused code for non-autoneg speed/duplex switching + * sfc: Rename 'xfp' file and functions to reflect reality + * sfc: Really allow RX checksum offload to be disabled + * sfc: Feed GRO result into RX allocation policy and interrupt moderation + * sfc: Enable heuristic selection between page and skb RX buffers + * sfc: Remove pointless abstraction of memory BAR number (2) + * sfc: Remove redundant gotos from __efx_rx_packet() + * sfc: Remove ridiculously paranoid assertions + * sfc: Move assertions and buffer cleanup earlier in efx_rx_packet_lro() + * sfc: Record RX queue number on GRO path + * sfc: SFT9001: Reset LED configuration correctly after blinking + * sfc: Use a single blink implementation + * sfc: Rename efx_board::init_leds to init_phy and use for SFN4111T + * sfc: Make board information explicitly Falcon-specific + * sfc: Move definition of struct falcon_nic_data into falcon.h + * sfc: Move struct falcon_board into struct falcon_nic_data + * sfc: Move all I2C stuff into struct falcon_board + * sfc: Gather link state fields in struct efx_nic into new struct + efx_link_state + * sfc: Remove unnecessary casts to struct sk_buff * + * sfc: Remove redundant efx_xmit() function + * sfc: Combine high-level header files + * sfc: Log interrupt and reset type names, not numbers + * sfc: Fix descriptor cache sizes + * sfc: Treat all MAC registers as 128-bit + * sfc: Strengthen EFX_ASSERT_RESET_SERIALISED + * sfc: Comment corrections + * sfc: Remove unused constant + * sfc: Clean up struct falcon_board and struct falcon_board_data + * sfc: Fix bugs in RX queue flushing + * sfc: Remove unused function efx_flush_queues() + * sfc: Only switch Falcon MAC clocks as necessary + * sfc: Hold MAC lock for longer in efx_init_port() + * sfc: Split MAC stats DMA initiation and completion + * sfc: Move Falcon board/PHY/MAC monitoring code to falcon.c + * sfc: Simplify XMAC link polling + * sfc: Change MAC promiscuity and multicast hash at the same time + * sfc: Move inline comment into kernel-doc + * sfc: Do not set net_device::trans_start in self-test + * sfc: Simplify PHY polling + * sfc: QT202x: Reset before reading PHY id + * sfc: Replace MDIO spinlock with mutex + * sfc: Always start Falcon using the XMAC + * sfc: Limit some hardware workarounds to Falcon + * sfc: Remove EFX_WORKAROUND_9141 macro + * sfc: Remove another unused workaround macro + * sfc: Remove some redundant whitespace + * sfc: Decouple NIC revision number from Falcon PCI revision number + * sfc: Move descriptor cache base addresses to struct efx_nic_type + * sfc: Clean up RX event handling + * sfc: Remove redundant writes to INT_ADR_KER + * sfc: Remove duplicate hardware structure definitions + * sfc: Turn pause frame generation on and off at the MAC, not the RX FIFO + * sfc: Move Falcon NIC operations to efx_nic_type + * sfc: Refactor link configuration + * sfc: Generalise link state monitoring + * sfc: Add power-management and wake-on-LAN support + * sfc: Implement ethtool reset operation + * sfc: Add efx_nic_type operation for register self-test + * sfc: Add efx_nic_type operation for NVRAM self-test + * sfc: Add efx_nic_type operation for identity LED control + * sfc: Separate shared NIC code from Falcon-specific and rename + accordingly + * sfc: Fold falcon_probe_nic_variant() into falcon_probe_nic() + * sfc: Extend loopback mode enumeration + * sfc: Remove static PHY data and enumerations + * sfc: Extend MTD driver for use with new NICs + * sfc: Allow for additional checksum offload features + * sfc: Rename falcon.h to nic.h + * sfc: Move shared NIC code from falcon.c to new source file nic.c + * sfc: Add firmware protocol definitions (MCDI) + * sfc: Add support for SFC9000 family (1) + * sfc: Add support for SFC9000 family (2) + * sfc: Implement TSO for TCP/IPv6 + * sfc: Update version, copyright dates, authors + * drivers/net/sfc: Correct code taking the size of a pointer + * sfc: Move PHY software state initialisation from init() into probe() + * sfc: Include XGXS in XMAC link status check except in XGMII loopback + * sfc: Fix DMA mapping cleanup in case of an error in TSO + * sfc: QT2025C: Work around PHY bug + * sfc: QT2025C: Switch into self-configure mode when not in loopback + * sfc: QT2025C: Work around PHY firmware initialisation bug + * sfc: QT2025C: Add error message for suspected bad SFP+ cables + * sfc: Disable TX descriptor prefetch watchdog + * [SCSI] vmw_pvscsi: SCSI driver for VMware's virtual HBA. + - LP: #497156 + + [ Ubuntu: 2.6.32-10.14 ] + + * SAUCE: drm/radeon/kms: fix LVDS setup on r4xx + - LP: #493795 + * Revert "(pre-stable) acpi: Use the ARB_DISABLE for the CPU which model + id is less than 0x0f." + * config-check -- ensure the checks get run at build time + * config-check -- check the processed config during updateconfigs + * config-check -- CONFIG_SECCOMP may not be present + * TUN is now built in ignore + * SAUCE: acpi battery -- delay first lookup of the battery until first + use + * SAUCE: async_populate_rootfs: move rootfs init earlier + * ubuntu: AppArmor -- update to mainline 2010-01-06 + * SAUCE: move RLIMIT_CORE pipe dumper marker to 1 + - LP: #498525 + * (pre-stable) drm/radeon/kms: fix crtc vblank update for r600 + * Add asix to nic-usb-modules file + - LP: #499785 + * (pre-stable) sched: Fix balance vs hotplug race + * [Config] Enable CONFIG_FUNCTION_TRACER + - LP: #497989 + * [Config] Drop lpia from getabis + * [Config] Build in TUN/TAP driver + - LP: #499491 + * [Config] DH_COMPAT=5 + * Revert "(pre-stable) drm/i915: Avoid NULL dereference with + component_only tv_modes" + * Revert "(pre-stable) drm/i915: Fix sync to vblank when VGA output is + turned off" + * USB: usb-storage: fix bug in fill_inquiry + * USB: option: add pid for ZTE + * firewire: ohci: handle receive packets with a data length of zero + * rcu: Prepare for synchronization fixes: clean up for non-NO_HZ handling + of ->completed counter + * rcu: Fix synchronization for rcu_process_gp_end() uses of ->completed + counter + * rcu: Fix note_new_gpnum() uses of ->gpnum + * rcu: Remove inline from forward-referenced functions + * perf_event: Fix invalid type in ioctl definition + * perf_event: Initialize data.period in perf_swevent_hrtimer() + * perf: Don't free perf_mmap_data until work has been done + * PM / Runtime: Fix lockdep warning in __pm_runtime_set_status() + * sched: Check for an idle shared cache in select_task_rq_fair() + * sched: Fix affinity logic in select_task_rq_fair() + * sched: Rate-limit newidle + * sched: Fix and clean up rate-limit newidle code + * x86/amd-iommu: attach devices to pre-allocated domains early + * x86/amd-iommu: un__init iommu_setup_msi + * x86, Calgary IOMMU quirk: Find nearest matching Calgary while walking + up the PCI tree + * x86: Fix iommu=nodac parameter handling + * x86: GART: pci-gart_64.c: Use correct length in strncmp + * x86: ASUS P4S800 reboot=bios quirk + - LP: #366682 + * x86, apic: Enable lapic nmi watchdog on AMD Family 11h + * ssb: Fix range check in sprom write + * ath5k: allow setting txpower to 0 + * ath5k: enable EEPROM checksum check + * hrtimer: Fix /proc/timer_list regression + * ALSA: hrtimer - Fix lock-up + * ALSA: hda - Terradici HDA controllers does not support 64-bit mode + * KVM: x86 emulator: limit instructions to 15 bytes + * KVM: s390: Fix prefix register checking in arch/s390/kvm/sigp.c + * KVM: s390: Make psw available on all exits, not just a subset + * KVM: fix irq_source_id size verification + * KVM: x86: include pvclock MSRs in msrs_to_save + * x86: Prevent GCC 4.4.x (pentium-mmx et al) function prologue wreckage + * x86: Use -maccumulate-outgoing-args for sane mcount prologues + * x86, mce: don't restart timer if disabled + * x86/mce: Set up timer unconditionally + * x86: SGI UV: Fix BAU initialization + * x86: Fix duplicated UV BAU interrupt vector + * x86: Add new Intel CPU cache size descriptors + * x86: Fix typo in Intel CPU cache size descriptor + * pata_hpt{37x|3x2n}: fix timing register masks (take 2) + * s390: clear high-order bits of registers after sam64 + * V4L/DVB: Fix test in copy_reg_bits() + * bsdacct: fix uid/gid misreporting + * UBI: flush wl before clearing update marker + * jbd2: don't wipe the journal on a failed journal checksum + * USB: xhci: Add correct email and files to MAINTAINERS entry. + * USB: musb_gadget_ep0: fix unhandled endpoint 0 IRQs, again + * USB: option.c: add support for D-Link DWM-162-U5 + * USB: usbtmc: repeat usb_bulk_msg until whole message is transfered + * USB: usb-storage: add BAD_SENSE flag + * USB: Close usb_find_interface race v3 + * pxa/em-x270: fix usb hub power up/reset sequence + * hfs: fix a potential buffer overflow + * SUNRPC: IS_ERR/PTR_ERR confusion + * NFS: Fix nfs_migrate_page() + * md/bitmap: protect against bitmap removal while being updated. + * futex: Take mmap_sem for get_user_pages in fault_in_user_writeable + * devpts_get_tty() should validate inode + * debugfs: fix create mutex racy fops and private data + * Driver core: fix race in dev_driver_string + * Serial: Do not read IIR in serial8250_start_tx when UART_BUG_TXEN + * mac80211: Fix bug in computing crc over dynamic IEs in beacon + * mac80211: Fixed bug in mesh portal paths + * mac80211: Revert 'Use correct sign for mesh active path refresh' + * mac80211: fix scan abort sanity checks + * wireless: correctly report signal value for IEEE80211_HW_SIGNAL_UNSPEC + * rtl8187: Fix wrong rfkill switch mask for some models + * x86: Fix bogus warning in apic_noop.apic_write() + * mm: hugetlb: fix hugepage memory leak in mincore() + * mm: hugetlb: fix hugepage memory leak in walk_page_range() + * powerpc/windfarm: Add detection for second cpu pump + * powerpc/therm_adt746x: Record pwm invert bit at module load time] + * powerpc: Fix usage of 64-bit instruction in 32-bit altivec code + * drm/radeon/kms: Add quirk for HIS X1300 board + * drm/radeon/kms: handle vblanks properly with dpms on + * drm/radeon/kms: fix legacy crtc2 dpms + * drm/radeon/kms: fix vram setup on rs600 + * drm/radeon/kms: rs6xx/rs740: clamp vram to aperture size + * drm/ttm: Fix build failure due to missing struct page + * drm/i915: Set the error code after failing to insert new offset into mm + ht. + * drm/i915: Add the missing clonemask for display port on Ironlake + * xen/xenbus: make DEVICE_ATTR()s static + * xen: re-register runstate area earlier on resume. + * xen: restore runstate_info even if !have_vcpu_info_placement + * xen: correctly restore pfn_to_mfn_list_list after resume + * xen: register timer interrupt with IRQF_TIMER + * xen: register runstate on secondary CPUs + * xen: don't call dpm_resume_noirq() with interrupts disabled. + * xen: register runstate info for boot CPU early + * xen: call clock resume notifier on all CPUs + * xen: improve error handling in do_suspend. + * xen: don't leak IRQs over suspend/resume. + * xen: use iret for return from 64b kernel to 32b usermode + * xen: explicitly create/destroy stop_machine workqueues outside + suspend/resume region. + * Xen balloon: fix totalram_pages counting. + * xen: try harder to balloon up under memory pressure. + * dm exception store: free tmp_store on persistent flag error + * dm snapshot: only take lock for statustype info not table + * dm crypt: move private iv fields to structs + * dm crypt: restructure essiv error path + * dm: avoid _hash_lock deadlock + * dm snapshot: cope with chunk size larger than origin + * dm crypt: separate essiv allocation from initialisation + * dm crypt: make wipe message also wipe essiv key + * slc90e66: fix UDMA handling + * tcp: Stalling connections: Fix timeout calculation routine + * ip_fragment: also adjust skb->truesize for packets not owned by a + socket + * b44 WOL setup: one-bit-off stack corruption kernel panic fix + * sparc64: Don't specify IRQF_SHARED for LDC interrupts. + * sparc64: Fix overly strict range type matching for PCI devices. + * sparc64: Fix stack debugging IRQ stack regression. + * sparc: Set UTS_MACHINE correctly. + * b43legacy: avoid PPC fault during resume + * tracing: Fix event format export + * ath9k: Fix TX hang poll routine + * ath9k: fix processing of TX PS null data frames + * ath9k: Fix maximum tx fifo settings for single stream devices + * ath9k: fix tx status reporting + * mac80211: Fix dynamic power save for scanning. + * drm/i915: Fix sync to vblank when VGA output is turned off + * memcg: fix memory.memsw.usage_in_bytes for root cgroup + * thinkpad-acpi: fix default brightness_mode for R50e/R51 + * thinkpad-acpi: preserve rfkill state across suspend/resume + * ipw2100: fix rebooting hang with driver loaded + * matroxfb: fix problems with display stability + * acerhdf: add new BIOS versions + * asus-laptop: change light sens default values. + * vmalloc: conditionalize build of pcpu_get_vm_areas() + * ACPI: Use the ARB_DISABLE for the CPU which model id is less than 0x0f. + * net: Fix userspace RTM_NEWLINK notifications. + * ext3: Fix data / filesystem corruption when write fails to copy data + * V4L/DVB (13116): gspca - ov519: Webcam 041e:4067 added. + * bcm63xx_enet: fix compilation failure after get_stats_count removal + * x86: Under BIOS control, restore AP's APIC_LVTTHMR to the BSP value + * drm/i915: Avoid NULL dereference with component_only tv_modes + * drm/i915: PineView only has LVDS and CRT ports + * drm/i915: Fix LVDS stability issue on Ironlake + * mm: sigbus instead of abusing oom + * ipvs: zero usvc and udest + * jffs2: Fix long-standing bug with symlink garbage collection. + * intel-iommu: Detect DMAR in hyperspace at probe time. + * intel-iommu: Apply BIOS sanity checks for interrupt remapping too. + * intel-iommu: Check for an RMRR which ends before it starts. + * intel-iommu: Fix oops with intel_iommu=igfx_off + * intel-iommu: ignore page table validation in pass through mode + * netfilter: xtables: document minimal required version + * perf_event: Fix incorrect range check on cpu number + * implement early_io{re,un}map for ia64 + * Linux 2.6.32.2 + + -- Andy Whitcroft Sat, 06 Feb 2010 18:04:10 +0000 + +linux-ec2 (2.6.32-301.5) lucid; urgency=low + + [ Andy Whitcroft ] + + * rebase to Ubuntu-2.6.32-9.13 + * [Config] update configs following rebase to Ubuntu-2.6.31-9.13 + + [ Ubuntu: 2.6.32-9.13 ] + + * [Config] enable CONFIG_B43_PHY_LP + - LP: #493059 + * include modules.builtin in the binary debs + * config-check -- add a configuration enforcer + * config-check -- add a unit-test suite to the checker + * [Config] Enable CONFIG_SYN_COOKIES for versatile + * [Config] Enable CONFIG_SECURITY_SMACK for ports + * [Config] Enable CONFIG_SECURITY_FILE_CAPABILITIES for ports + * [Config] Disable CONFIG_COMPAT_BRK for ports + * getabis -- add armel versatile to the list + * SAUCE: Increase the default prealloc buffer for HDA audio devices + (non-modem) + * ubuntu: onmibook -- Added missing BOM file + * ubuntu: fsam7400 -- Cleanup Makefile + * Revert "ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT" + * signal: Fix alternate signal stack check + * SCSI: scsi_lib_dma: fix bug with dma maps on nested scsi objects + * SCSI: osd_protocol.h: Add missing #include + * SCSI: megaraid_sas: fix 64 bit sense pointer truncation + * ext4: fix potential buffer head leak when add_dirent_to_buf() returns + ENOSPC + * ext4: avoid divide by zero when trying to mount a corrupted file system + * ext4: fix the returned block count if EXT4_IOC_MOVE_EXT fails + * ext4: fix lock order problem in ext4_move_extents() + * ext4: fix possible recursive locking warning in EXT4_IOC_MOVE_EXT + * ext4: plug a buffer_head leak in an error path of ext4_iget() + * ext4: make sure directory and symlink blocks are revoked + * ext4: fix i_flags access in ext4_da_writepages_trans_blocks() + * ext4: journal all modifications in ext4_xattr_set_handle + * ext4: don't update the superblock in ext4_statfs() + * ext4: fix uninit block bitmap initialization when s_meta_first_bg is + non-zero + * ext4: fix block validity checks so they work correctly with meta_bg + * ext4: avoid issuing unnecessary barriers + * ext4: fix error handling in ext4_ind_get_blocks() + * ext4: make trim/discard optional (and off by default) + * ext4: make "norecovery" an alias for "noload" + * ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT + * ext4: initialize moved_len before calling ext4_move_extents() + * ext4: move_extent_per_page() cleanup + * jbd2: Add ENOMEM checking in and for + jbd2_journal_write_metadata_buffer() + * ext4: Return the PTR_ERR of the correct pointer in + setup_new_group_blocks() + * ext4: Avoid data / filesystem corruption when write fails to copy data + * ext4: wait for log to commit when umounting + * ext4: remove blocks from inode prealloc list on failure + * ext4: ext4_get_reserved_space() must return bytes instead of blocks + * ext4: quota macros cleanup + * ext4: fix incorrect block reservation on quota transfer. + * ext4: Wait for proper transaction commit on fsync + * ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT + * ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem) + * Linux 2.6.32.1 + * kbuild: generate modules.builtin + * (pre-stable) drm/i915: Fix sync to vblank when VGA output is turned off + - LP: #494461 + * (pre-stable) drm/i915: Avoid NULL dereference with component_only + tv_modes + - LP: #494045 + * (pre-stable) acpi: Use the ARB_DISABLE for the CPU which model id is + less than 0x0f. + - LP: #481765 + + [ Ubuntu: 2.6.32-8.12 ] + + * SAUCE: AppArmor -- add linux/err.h for ERR_PTR + + [ Ubuntu: 2.6.32-8.11 ] + + * Revert "SAUCE: default ATI Radeon KMS to off until userspace catches + up" + * Revert "SAUCE: AppArmor: Fix oops there is no tracer and doing unsafe + transition." + * Revert "SAUCE: AppArmor: Fix refcounting bug causing leak of creds" + * Revert "SAUCE: AppArmor: Fix cap audit_caching preemption disabling" + * Revert "SAUCE: AppArmor: Fix Oops when in apparmor_bprm_set_creds" + * Revert "SAUCE: AppArmor: Fix oops after profile removal" + * Revert "SAUCE: AppArmor: AppArmor disallows truncate of deleted files." + * Revert "SAUCE: AppArmor: AppArmor fails to audit change_hat correctly" + * Revert "SAUCE: AppArmor: Policy load and replacement can fail to alloc + mem" + * Revert "SAUCE: AppArmor: AppArmor wrongly reports allow perms as + denied" + * Revert "SAUCE: AppArmor: Fix mediation of "deleted" paths" + * Revert "SAUCE: AppArmor: Fix off by 2 error in getprocattr mem + allocation" + * Revert "SAUCE: AppArmor: Set error code after structure + initialization." + * Revert "AppArmor -- fix pstrace_may_access rename" + * Revert "ubuntu: AppArmor security module" + * Revert "SAUCE: Add config option to set a default LSM" + * Revert "ubuntu: fsam7400 -- sw kill switch driver" + * Revert "[Config] fsam7400 -- enable" + * Revert "[Config] AUFS -- enable" + * Revert "ubuntu: AUFS -- aufs2-30 20090727" + * Revert "ubuntu: AUFS -- export various core functions -- fixes" + * Revert "ubuntu: AUFS -- export various core functions" + * Revert "[Config] ubuntu/iscsitarget -- disable" + * Revert "[Config] iscsitarget -- enable" + * Revert "ubuntu: iscsitarget -- SVN revision r214" + * update Vcs-Git to point to the correct repository + - LP: #493589 + * update build environment overrides to lucid + - LP: #493589 + * [Config] enable CONFIG_DEVTMPFS + * [Config] update all configs following AppArmor 2009-12-08 update + * SAUCE: isapnp_init: make isa PNP scans occur async + * [Config] fsam7400 -- enable + * [Config] omnibook -- enable + * [Config] cleanup CONFIG_AUDIT + * ubuntu: AUFS -- export various core functions (aufs2-base.patch) + * ubuntu: AUFS -- export various core functions (aufs2-standalone.patch) + * ubuntu: AUFS -- aufs2 20091209 + * [Config] AUFS -- enable + * [Config] iscsitarget -- enable + * SAUCE: KMS: cache the EDID information of the LVDS + * bnx2: update d-i firmware filenames + - LP: #494052 + * add cdc_ether to nic-usb-modules udeb + - LP: #495060 + * ubuntu: AppArmor -- mainline 2009-10-08 + * ubuntu: fsam7400 -- kill switch for Fujitsu Siemens Amilo M 7400 + * ubuntu: omnibook -- support Toshiba (HP) netbooks + * ubuntu: iscsitarget --- version 1.4.19 + - LP: #494693 + * SAUCE: Make populate_rootfs asynchronous + * Parallelize flavour builds and packaging + * [Config] Enable CONFIG_KSM + * Config option to set a default LSM + * LSM: Add security_path_chroot(). + * LSM: Add security_path_chroot(). + * LSM: Move security_path_chmod()/security_path_chown() to after + mutex_lock(). + * ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT + + -- Andy Whitcroft Tue, 22 Dec 2009 19:04:09 +0000 + +linux-ec2 (2.6.32-301.4) lucid; urgency=low + + [ Andy Whitcroft ] + + * sort out previous ABI files to fix FTBFS + + -- Andy Whitcroft Tue, 22 Dec 2009 15:00:05 +0000 + +linux-ec2 (2.6.32-301.3) lucid; urgency=low + + [ Andy Whitcroft ] + + * drop generated files to fix FTBFS + + -- Andy Whitcroft Tue, 22 Dec 2009 13:36:44 +0000 + +linux-ec2 (2.6.32-301.2) lucid; urgency=low + + [ Andy Whitcroft ] + + * rebase to Ubuntu-2.6.32-7.9 + * [Config] update configs following rebase to Ubuntu-2.6.32-7.9 + * rebase to Ubuntu-2.6.32-7.10 + + [ Ubuntu: 2.6.32-7.10 ] + + * [Config] disable CONFIG_THUMB2_KERNEL to fix arm FTBFS + + [ Ubuntu: 2.6.32-7.9 ] + + * SAUCE: set /proc/acpi/video/*/DOS to 4 by default + - LP: #458982 + * SAUCE: ensure vga16fb loads if no other driver claims the VGA device + * [Config] update configs following versatile switch to V7 + * rebased to v2.6.32 + * [Config] update configs following rebase to v2.6.32 + * [Config] update ports configs following rebase to v2.6.32 + * SAUCE: vfs: Add a trace point in the mark_inode_dirty function + * [SCSI] megaraid_sas: remove sysfs poll_mode_io world writeable + permissions + - CVE-2009-3939 + * SAUCE: select a v7 CPU for versatile + * SAUCE: ALSA: hda - Add power on/off counter + * rebased to v2.6.32 + + [ Ubuntu: 2.6.32-6.8 ] + + * [Config] disable SSB devices for armel + + [ Ubuntu: 2.6.32-6.7 ] + + * Revert "SAUCE: default ATI Radeon KMS to off until userspace catches up" + * Revert "SAUCE: Dell XPS710 reboot quirk" + * Revert "SAUCE: Link acpi-cpufreq.o first" + * Revert "SAUCE: LPIA Logical reset of USB port on resume" + * Revert "SAUCE: LPIA Reboot fix for Intel Crownbeach development boards" + * Revert "SAUCE: Enable HDMI audio codec on Studio XPS 1340" + * Revert "SAUCE: Dell laptop digital mic does not work, PCI 1028:0271" + * Revert "Add Dell Dimension 9200 reboot quirk" + * Revert "SAUCE: Correctly blacklist Thinkpad r40e in ACPI" + * Revert "SAUCE: tulip: Define ULI PCI ID's" + * Revert "SAUCE: Lower warning level of some PCI messages" + * Revert "mac80211: fix two issues in debugfs" + Drop a number of known redundant commits as identified in the Ubuntu + delta review blueprint. + * reenable armel versatile flavour + * [Config] disable CONFIG_USB_DEVICEFS + * [Config] udeb: Add squashfs to fs-core-modules + - LP: #352615 + * [Config] Create a real squashfs udeb + - LP: #352615 + + -- Andy Whitcroft Mon, 07 Dec 2009 21:10:30 +0000 + +linux-ec2 (2.6.32-300.1) lucid; urgency=low + + [ Andy Whitcroft ] + + * SAUCE: fix nx-emulation when disabled by XEN + * SAUCE: fix disable of KERNEL_IMAGE_SIZE for i386 XEN builds + + [ John Johansen ] + + * Fix recursive include + * SAUCE: Fix build ASSERT for 3.0.2 compatibility + * SAUCE: ec2: Default domU console to tty. + - LP: #431103 + * [Config] Setup EC2 commit + * update configs for 2.6.32 + * rebase to Ubuntu-2.6.32-5.6 + + [ Ubuntu: 2.6.32-5.6 ] + + * rebase to v2.6.32-rc8 + * update configs following rebase to v2.6.32-rc8 + * update ports configs since rebase to v2.6.32-rc8 + * [Config] enable cgroup options + - LP: #480739 + * rebase to v2.6.32-rc8 + + [ Ubuntu: 2.6.32-4.5 ] + + * [Config] SERIO_LIBPS2 and SERIO_I8042 must match + * rebase to v2.6.32-rc7 + * resync with Karmic proposed + * SAUCE: AppArmor: Fix oops after profile removal + - LP: #475619 + * SAUCE: AppArmor: Fix Oops when in apparmor_bprm_set_creds + - LP: #437258 + * SAUCE: AppArmor: Fix cap audit_caching preemption disabling + - LP: #479102 + * SAUCE: AppArmor: Fix refcounting bug causing leak of creds + - LP: #479115 + * SAUCE: AppArmor: Fix oops there is no tracer and doing unsafe + transition. + - LP: #480112 + * resync with Karmic proposed (ddbc670a86a3dee18541a3734149f250ff307adf) + * rebase to v2.6.32-rc7 + + [ Ubuntu: 2.6.32-3.4 ] + + * [Config] SERIO_LIBPS2 and SERIO_I8042 must match + * [Upstream] add local prefix to oss local change_bits + * mtd/maps: gpio-addr-flash: pull in linux/ headers rather than asm/ + * mtd/maps: gpio-addr-flash: depend on GPIO arch support + + [ Ubuntu: 2.6.32-3.3 ] + + * rebase to v2.6.32-rc6 + * [Config] update configs following rebase to v2.6.32-rc6 + * [Config] update ports configs following rebase to v2.6.32-rc6 + * resync with Karmic Ubuntu-2.6.31-15.49 + * [Config] add module ignores for broken drivers + * SAUCE: AppArmor: AppArmor wrongly reports allow perms as denied + - LP: #453335 + * SAUCE: AppArmor: Policy load and replacement can fail to alloc mem + - LP: #458299 + * SAUCE: AppArmor: AppArmor fails to audit change_hat correctly + - LP: #462824 + * SAUCE: AppArmor: AppArmor disallows truncate of deleted files. + - LP: #451375 + * SAUCE: Fix nx_enable reporting + - LP: #454285 + * Revert "SAUCE: trace: add trace_event for the open() syscall" + * SAUCE: trace: add trace events for open(), exec() and uselib() + - LP: #462111 + * SAUCE: Fix sub-flavour script to not stop on missing directories + - LP: #453073 + * resync with Karmic Ubuntu-2.6.31-15.49 + * rebase to v2.6.32-rc6 + - LP: #464552 + + [ Ubuntu: 2.6.32-2.2 ] + + * install the full changelog with the binary package + * changelog -- explicitly note rebases and clean history + * reinstate armel.mk with no flavours + - LP: #449637 + * [Upstream] block: silently error unsupported empty barriers too + - LP: #420423 + * [Config] udate configs following karmic resync + * [Config] update ports configs following karmic resync + * [Upstream] lirc -- follow removal of .id element + * Use section 'admin' rather than 'base' + * Add more e100 firmware to nic-modules + - LP: #451872 + * Add qla1280 firmware to scsi-modules + - LP: #381037 + * SAUCE: AppArmor: Set error code after structure initialization. + - LP: #427948 + * SAUCE: AppArmor: Fix off by 2 error in getprocattr mem allocation + - LP: #446595 + * SAUCE: AppArmor: Fix mediation of "deleted" paths + * SAUCE: [x86] fix report of cs-limit nx-emulation + - LP: #454285 + * SAUCE: (drop after 2.6.31) input: Add support for filtering input + events + - LP: #430809 + * SAUCE: (drop after 2.6.31) dell-laptop: Trigger rfkill updates on wifi + toggle switch press + - LP: #430809 + * SAUCE: Add sr_mod to the scsi-modules udeb for powerpc + * [Config] Add sd_mod to scsi-modules udeb for powerpc + * SAUCE: Update to LIRC 0.8.6 + - LP: #432678 + * SAUCE: dell-laptop: Store the HW switch status internally rather than + requerying every time + - LP: #430809 + * SAUCE: dell-laptop: Blacklist machines not supporting dell-laptop + - LP: #430809 + * [Upstream] acerhdf: Limit modalias matching to supported boards + - LP: #435958 + * [Upstream] i915: Fix i2c init message + - LP: #409361 + * [Config] Add sym53c8xx.ko to virtual sub-flavour + - LP: #439415 + * [Config] Add d101m_ucode.bin to d-i/firmware/nic-modules + - LP: #439456 + * [Config] Set default I/O scheduler back to CFQ for desktop flavours + - LP: #381300 + * SAUCE: Created MODULE_EXPORT/MODULE_IMPORT macros + - LP: #430694 + * SAUCE: Use MODULE_IMPORT macro to tie intel_agp to i915 + - LP: #430694 + * [Config] CONFIG_GFS2_FS_LOCKING_DLM=y + - LP: #416325 + * SAUCE: Fix MODULE_IMPORT/MODULE_EXPORT + - LP: #430694 + * SAUCE: Raise the default console 'quiet' level to 2 + * [Config] CONFIG_X86_PAT=y + * [Config] Add armel arch to linux-libc-dev arches. + - LP: #449637 + * [Config] CONFIG_X86_MCE + * [Upstream] (drop after 2.6.31) Input: synaptics - add another Protege + M300 to rate blacklist + - LP: #433801 + * sgi-gru: Fix kernel stack buffer overrun, CVE-2009-2584 + * drm/i915: Fix FDI M/N setting according with correct color depth + - LP: #416792 + + [ Ubuntu: 2.6.32-1.1 ] + + * rebase to v2.6.32-rc3 + * [Config] update configs following rebase to 2.6.32-rc3 + * [Config] update ports configs following rebase to 2.6.32-rc3 + * AppArmor -- fix pstrace_may_access rename + * staging/android -- disable + * ubuntu: dm-raid-45 -- update to compile with 2.6.32 + * ubuntu: drbd -- disable + * staging/comdi -- disable + * staging/go7007 -- disable + * [Config] staging/winbond -- disable + * [Config] ubuntu/iscsitarget -- disable + * [d-i] cbc and ecb are builtin make them optional in udebs + * rebase to v2.6.32-rc5 + * [Config] update configs following rebase to v2.6.32-rc5 + * [Config] update ports configs following rebase to v2.6.31-rc5 + * [Config] Add cpio as a build dependency. + * rebase to v2.6.32-rc3 + * rebase to v2.6.32-rc5 + + -- Andy Whitcroft Mon, 30 Nov 2009 12:48:46 +0000 + +linux-ec2 (2.6.31-302.7) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] Updateconfigs after rebase to 2.6.31-14.46 + + * Rebased against linux (2.6.31-14.46) + + [ Andy Whitcroft ] + + * reinstate armel.mk with no flavours + - LP: #449637 + * [Upstream] elevator: fix fastfail checks to allow merge of readahead + requests + - LP: #444915 + * [Upstream] block: silently error unsupported empty barriers too + - LP: #420423 + + [ John Johansen ] + + * SAUCE: AppArmor: Fix mediation of "deleted" paths + - LP: #415632 + + [ Tim Gardner ] + + * [Config] CONFIG_X86_MCE + https://lists.ubuntu.com/archives/kernel-team/2009-October/007584.html + * Revert "appletalk: Fix skb leak when ipddp interface is not loaded, + CVE-2009-2903" - Use patch from 2.6.31.4 which is slightly different. + + [ Upstream Kernel Changes ] + + * x86: fix csum_ipv6_magic asm memory clobber + * tty: Avoid dropping ldisc_mutex over hangup tty re-initialization + * x86: Don't leak 64-bit kernel register values to 32-bit processes + * tracing: correct module boundaries for ftrace_release + * ftrace: check for failure for all conversions + * futex: fix requeue_pi key imbalance + * futex: Move exit_pi_state() call to release_mm() + * futex: Nullify robust lists after cleanup + * futex: Fix locking imbalance + * NOHZ: update idle state also when NOHZ is inactive + * ima: ecryptfs fix imbalance message + * libata: fix incorrect link online check during probe + * sound: via82xx: move DXS volume controls to PCM interface + * ASoC: WM8350 capture PGA mutes are inverted + * KVM: Prevent overflow in KVM_GET_SUPPORTED_CPUID + * KVM: VMX: flush TLB with INVEPT on cpu migration + * KVM: fix LAPIC timer period overflow + * KVM: SVM: Fix tsc offset adjustment when running nested + * KVM: SVM: Handle tsc in svm_get_msr/svm_set_msr correctly + * net: Fix wrong sizeof + * mm: add_to_swap_cache() must not sleep + * sis5513: fix PIO setup for ATAPI devices + * PIT fixes to unbreak suspend/resume (bug #14222) + * IMA: open new file for read + * ACPI: Clarify resource conflict message + * ACPI: fix Compaq Evo N800c (Pentium 4m) boot hang regression + * net: restore tx timestamping for accelerated vlans + * net: unix: fix sending fds in multiple buffers + * tun: Return -EINVAL if neither IFF_TUN nor IFF_TAP is set. + * tcp: fix CONFIG_TCP_MD5SIG + CONFIG_PREEMPT timer BUG() + * net: Fix sock_wfree() race + * smsc95xx: fix transmission where ZLP is expected + * sky2: Set SKY2_HW_RAM_BUFFER in sky2_init + * appletalk: Fix skb leak when ipddp interface is not loaded + * ax25: Fix possible oops in ax25_make_new + * ax25: Fix SIOCAX25GETINFO ioctl + * sit: fix off-by-one in ipip6_tunnel_get_prl + * Linux 2.6.31.4 + * drm/i915: Fix FDI M/N setting according with correct color depth + - LP: #416792 + + * linux (2.6.31-13.45) + + [ Luke Yelavich ] + + * [Config] Add sd_mod to scsi-modules udeb for powerpc + + [ Mario Limonciello ] + + * SAUCE: Update to LIRC 0.8.6 + - LP: #432678 + + [ Tim Gardner ] + + * [Config] CONFIG_X86_PAT=y + https://lists.ubuntu.com/archives/kernel-team/2009-October/007477.html + https://lists.ubuntu.com/archives/kernel-team/2009-October/007534.html + + * [Config] Add armel arch to linux-libc-dev arches. + - LP: #449637 + + [ Upstream Kernel Changes ] + + * e1000e: swap max hw supported frame size between 82574 and 82583 + * drm/i915: Initialize HDMI outputs as HDMI connectors, not DVI. + - LP: #392017 + * ALSA: hda - Add quirks for some HP laptops + - LP: #449742 + * ALSA: hda - Add support for HP dv6 + - LP: #449742 + + -- Tim Gardner Mon, 12 Oct 2009 11:27:10 -0600 + + * linux (2.6.31-13.44) + + [ Upstream Kernel Changes ] + + * sgi-gru: Fix kernel stack buffer overrun, CVE-2009-2584 + * appletalk: Fix skb leak when ipddp interface is not loaded, + CVE-2009-2903 + Note - This patch causes an ABI change in the symbol aarp_send_ddp + which I'm ignoring. + + -- Tim Gardner Tue, 13 Oct 2009 12:00:22 -0600 + +linux-ec2 (2.6.31-301.6) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] Added cpio as a build dependency. + * [Config] CONFIG_GFS2_FS_LOCKING_DLM=y + - LP: #416325 + * [Config] Disable CONFIG_UEVENT_HELPER_PATH + * [Config] Enable CONFIG_USB_GADGET_DUMMY_HCD + * [Config] Disable Intel AGP/DRM + + * Rebased against linux (2.6.31-13.43) karmic; urgency=low + + [ Andy Whitcroft ] + + * Revert "[Upstream] acerhdf: Limit modalias matching to supported + boards" + + [ Colin Watson ] + + * Use section 'admin' rather than 'base' + + [ John Johansen ] + + * SAUCE: AppArmor: Set error code after structure initialization. + - LP: #427948 + * SAUCE: AppArmor: Fix off by 2 error in getprocattr mem allocation + - LP: #446595 + + [ Luke Yelavich ] + + * SAUCE: Add sr_mod to the scsi-modules udeb for powerpc + + [ Stefan Bader ] + + * [Upstream] acerhdf: Limit modalias matching to supported boards + (supersedes previous revert made by Andy Whitcroft) + - LP: #435958 + + * Rebased against linux (2.6.31-13.42) karmic; urgency=low + + [ Leann Ogasawara ] + + * SAUCE: (drop after 2.6.31) input: Add support for filtering input + events + - LP: #430809 + * SAUCE: (drop after 2.6.31) dell-laptop: Trigger rfkill updates on wifi + toggle switch press + - LP: #430809 + + [ Tim Gardner ] + + * SAUCE: Raise the default console 'quiet' level to 2 + This supresses all but critical and emergency level messages. + https://lists.ubuntu.com/archives/kernel-team/2009-October/007476.html + + [ Upstream Kernel Changes ] + + * TTY: fix typos + * Linux 2.6.31.3 + * V4L/DVB (12439): cx88: add support for WinFast DTV2000H rev. J + - LP: #433904 + + * Rebased against linux (2.6.31-12.41) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] CONFIG_GFS2_FS_LOCKING_DLM=y + - LP: #416325 + * SAUCE: Fix MODULE_IMPORT/MODULE_EXPORT + The original patch failed to work for amd64. + - LP: #430694 + + [ Upstream Kernel Changes ] + + * ALSA: hda - Add a white-list for MSI option + Upstream cherry-pick: Infrastructure support for #445580 + * ALSA: hda - Add HP Pavilion dv4t-1300 to MSI whitelist + - LP: #445580 + * ALSA: intel8x0 - Mute External Amplifier by default for Sony VAIO + VGN-T350P + - LP: #410933 + * ALSA: intel8x0 - Mute External Amplifier by default for Sony VAIO + VGN-B1VP + - LP: #410933 + + * Rebased against linux (2.6.31-12.40) karmic; urgency=low + + [ Tim Gardner ] + + * SAUCE: Created MODULE_EXPORT/MODULE_IMPORT macros + - LP: #430694 + * SAUCE: Use MODULE_IMPORT macro to tie intel_agp to i915 + - LP: #430694 + + [ Upstream Kernel Changes ] + + * V4L/DVB (12352): gspca - vc032x: Fix mi1310_soc preview and LED + - LP: #310760 + + * Rebased against linux (2.6.31-12.39) karmic; urgency=low + + [ Andy Whitcroft ] + + * install the full changelog with the binary package + * changelog -- explicitly note rebases and clean history + + [ Tim Gardner ] + + * Revert "SAUCE: (drop after 2.6.31) Disable iwl-1000 series wireless + adapters" + * Revert "SAUCE: (drop after 2.6.31) eCryptfs: Prevent lower dentry from + going negative during unlink" + * Revert "SAUCE: (drop after 2.6.31) em28xx: ir-kbd-i2c init data needs a + persistent object" + * Revert "SAUCE: (drop after 2.6.31) saa7134: ir-kbd-i2c init data needs + a persistent object" + * Revert "drm/i915: Check that the relocation points to within the + target" + * Revert "drm/i915: fix tiling on IGDNG" + * Revert "Driver core: add new device to bus's list before probing" + * Revert "HID: completely remove apple mightymouse from blacklist" + * Revert "KVM: x86: check for cr3 validity in ioctl_set_sregs" + + All reverts superseded by corresponding v2.6.31.2 patches + + * [Config] Add d101m_ucode.bin to d-i/firmware/nic-modules + - LP: #439456 + * [Config] Enabled CONFIG_SND_HDA_HWDEP + - LP: #443117 + * [Config] Add cpio as a build dependency. + * [Config] Set default I/O scheduler back to CFQ for desktop flavours + - LP: #381300 + + [ Upstream Kernel Changes ] + + * KVM: VMX: Fix EPT with WP bit change during paging + * pata_amd: do not filter out valid modes in nv_mode_filter + * p54usb: add Zcomax XG-705A usbid + * x86: Increase MIN_GAP to include randomized stack + * serial: bfin_5xx: fix building as module when early printk is enabled + * USB: option.c Add support for ZTE AC2726 EVDO modem + * USB: option: TELIT UC864G support + * video: s3c_fb.c: fix build with CONFIG_HOTPLUG=n + * kbuild: fix cc1 options check to ensure we do not use -fPIC when + compiling + * drivers/mfd/ab3100-core.c: fix powerpc build error + * thinkpad-acpi: don't ask about brightness_mode for fw. 1V and 1R + * ACPI: pci_slot.ko wants a 64-bit _SUN + * fbcon: only unbind from console if successfully registered + * kallsyms: fix segfault in prefix_underscores_count() + * sisfb: change SiS_DDC_Port type to SISIOADDRESS + * mmc_spi: fail gracefully if host or card do not support the switch + command + * alpha: AGP update (fixes compile failure) + * fs: make sure data stored into inode is properly seen before unlocking + new inode + * eCryptfs: Handle unrecognized tag 3 cipher codes + * eCryptfs: Check for O_RDONLY lower inodes when opening lower files + * eCryptfs: Filename encryption only supports password auth tokens + * eCryptfs: Validate global auth tok keys + * eCryptfs: Prevent lower dentry from going negative during unlink + * Re-enable Lanman security + * xen: make -fstack-protector work under Xen + * xen: only enable interrupts while actually blocking for spinlock + * xen: use stronger barrier after unlocking lock + * xen: check EFER for NX before setting up GDT mapping + * perf_counter: Fix perf_copy_attr() pointer arithmetic + * perf tools: Fix buffer allocation + * tty: serial/pcmcia: add ID for Advantech card + * PM / PCMCIA: Drop second argument of pcmcia_socket_dev_suspend() + * PM / yenta: Fix cardbus suspend/resume regression + * sony-laptop: check for rfkill hard block at load time + * nilfs2: fix missing zero-fill initialization of btree node cache + * ar9170usb: add usbid for TP-Link TL-WN821N v2 + * mtd: nand: fix ECC Correction bug for SMC ordering for NDFC driver + * mtd: ofpart: Check availability of reg property instead of name + property + * mtd: cfi_cmdset_0002: add 0xFF intolerance for M29W128G + * USB: serial: ftdi_sio: new hardware support - hameg power supply + * USB: add PIDs for FTDI based OpenDCC hardware + * USB: serial: ftdi: handle gnICE+ JTAG adaptors + * USB: CDC WDM driver doesn't support non-blocking reads + * USB: fix cdc-acm regression in open + * cdc_acm: Fix to use modern speed interfaces + * tty: remove dtr/rts use from the driver open methods + * tty: gigaset: really fix chars_in_buffer + * kaweth: Fix memory leak in kaweth_control() + * x86: SGI UV: Fix IPI macros + * USB: serial: pl2303: new hardware support - sanwa multimeter + * USB: storage: fix a resume path GFP_NOIO must be used + * USB: usb-storage fails to attach to Huawei Datacard cdrom device + * USB: usbtmc: sanity checks for DEV_DEP_MSG_IN urbs + * USB: sl811-hcd: Fix device disconnect: + * drm/i915: remove restore in resume + * drm/i915: Only destroy a constructed mmap offset + * drm/i915: prevent FIFO calculation overflows on 32 bits with high + dotclocks + * drm/i915: Add buffer to inactive list immediately during fault + * drm/i915: Check that the relocation points to within the target + * drm/i915: Fix typo for wrong LVDS clock setting on IGDNG + * drm/i915: Fix SSC frequence for IGDNG + * drm/i915: Remove DAC disable in CRT force detect on IGDNG + * drm/i915: Fix LVDS panel fitting on Arrandale + * drm/I915: Use the CRT DDC to get the EDID for DVI-connector on Mac + * drm/i915: fix tiling on IGDNG + * agp/intel: Fix the pre-9xx chipset flush. + * nfsd4: fix null dereference creating nfsv4 callback client + * can: fix NOHZ local_softirq_pending 08 warning + * ahci: restore pci_intx() handling + * pxa/sharpsl_pm: zaurus c3000 aka spitz: fix resume + * net ax25: Fix signed comparison in the sockopt handler + * net: Make the copy length in af_packet sockopt handler unsigned + * pty_write: don't do a tty_wakeup() when the buffers are full + * KVM: fix cpuid E2BIG handling for extended request types + * KVM: MMU: fix missing locking in alloc_mmu_pages + * KVM: MMU: fix bogus alloc_mmu_pages assignment + * KVM: Protect update_cr8_intercept() when running without an apic + * Fix NULL ptr regression in powernow-k8 + * perf tools: do not complain if root is owning perf.data + * netfilter: nf_nat: fix inverted logic for persistent NAT mappings + * netfilter: nf_conntrack: netns fix re reliable conntrack event delivery + * netfilter: bridge: refcount fix + * netfilter: ebt_ulog: fix checkentry return value + * ath5k: Wakeup fixes + * ath5k: do not release irq across suspend/resume + * Driver core: add new device to bus's list before probing + * tty: Add a full port_close function + * tty: USB hangup is racy + * tty: USB can now use the shutdown method for kref based freeing of + ports + * hwmon: (asus_atk0110) Add maintainer information + * tty: USB serial termios bits + * usb-serial: change referencing of port and serial structures + * usb-serial: put subroutines in logical order + * usb-serial: change logic of serial lookups + * usb-serial: acquire references when a new tty is installed + * usb-serial: fix termios initialization logic + * usb-serial: rename subroutines + * usb-serial: add missing tests and debug lines + * usb-serial: straighten out serial_open + * USB serial: update the console driver + * USB: xhci: Work around for chain bit in link TRBs. + * USB: xhci: Fix slot and endpoint context debugging. + * USB: xhci: Configure endpoint code refactoring. + * USB: xhci: Set correct max packet size for HS/FS control endpoints. + * USB: xhci: Support full speed devices. + * USB: xhci: Handle stalled control endpoints. + * USB: xhci: Add quirk for Fresco Logic xHCI hardware. + * USB: xhci: Make TRB completion code comparison readable. + * USB: xhci: Handle babbling endpoints correctly. + * USB: xhci: Don't touch xhci_td after it's freed. + * USB: xhci: Check URB's actual transfer buffer size. + * USB: xhci: Check URB_SHORT_NOT_OK before setting short packet status. + * USB: xhci: Set -EREMOTEIO when xHC gives bad transfer length. + * USB: xhci: Support interrupt transfers. + * USB: Fix SS endpoint companion descriptor parsing. + * /proc/kcore: work around a BUG() + * hugetlb: restore interleaving of bootmem huge pages (2.6.31) + * page-allocator: limit the number of MIGRATE_RESERVE pageblocks per zone + * mm: munlock use follow_page + * mm: fix anonymous dirtying + * mmap: avoid unnecessary anon_vma lock acquisition in vma_adjust() + * Fix idle time field in /proc/uptime + * drm/i915: Handle ERESTARTSYS during page fault + * em28xx: ir-kbd-i2c init data needs a persistent object + * saa7134: ir-kbd-i2c init data needs a persistent object + * powerpc/8xx: Fix regression introduced by cache coherency rewrite + * powerpc: Fix incorrect setting of __HAVE_ARCH_PTE_SPECIAL + * HID: completely remove apple mightymouse from blacklist + * mptsas : PAE Kernel more than 4 GB kernel panic + * NOMMU: Fix MAP_PRIVATE mmap() of objects where the data can be mapped + directly + * iwlwifi: Handle new firmware file with ucode build number in header + * iwlwifi: update 1000 series API version to match firmware + * iwlagn: modify digital SVR for 1000 + * iwlwifi: traverse linklist to find the valid OTP block + * iwlwifi: fix unloading driver while scanning + * Linux 2.6.31.2 + + * Rebased against linux (2.6.31-11.38) karmic; urgency=low + + [ Stefan Bader ] + + * [Upstream] acerhdf: Limit modalias matching to supported boards + - LP: #435958 + + [ Tim Gardner ] + + * [Config] Added sata_via to d-i/modules/sata-modules + - LP: #434827 + * SAUCE: (drop after 2.6.31) Disable iwl-1000 series wireless adapters + - LP: #439285 + * [Upstream] i915: Fix i2c init message + - LP: #409361 + * [Config] Add sym53c8xx.ko to virtual sub-flavour + - LP: #439415 + + [ Upstream Kernel Changes ] + + * sched: Disable NEW_FAIR_SLEEPERS for now + - LP: #436342 + + * ACPI video: ignore buggy _BQC + * ACPI video: work-around BIOS AML bug in _BQC + - LP: #428910 + * ALSA: hda - Added quirk to enable sound on Toshiba NB200 + - LP: #438318 + + -- Tim Gardner Fri, 09 Oct 2009 10:42:15 -0600 + +linux-ec2 (2.6.31-300.5) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] Fix header package names and dependencies + - LP: #437784 + + -- Tim Gardner Wed, 30 Sep 2009 08:07:32 -0600 + +linux-ec2 (2.6.31-300.4) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] Disable Lenovo laptop config + * Rebase against master Ubuntu-2.6.31-11.37 + + [ Upstream Kernel Changes ] + + * n_tty: honor opost flag for echoes + * n_tty: move echoctl check and clean up logic + - LP: #438310 + + -- Tim Gardner Mon, 28 Sep 2009 22:15:20 -0600 + +linux-ec2 (2.6.31-300.3) karmic; urgency=low + + [ Chuck Short ] + + * SAUCE: ec2: Default domU console to tty. + - LP: #431103 + + [ Tim Gardner ] + + * [Config] Remove dependency on wireless-crda + - LP: #434755 + * [Config] Drop bootloaders as a recommendation + - LP: #434755 + * [Config] Drop virtual sub flavours + + -- Tim Gardner Fri, 25 Sep 2009 15:18:34 -0600 + +linux-ec2 (2.6.31-300.2) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] Use gawk in xen makefiles + * [Config] CONFIG_DMI=n for XEN_UNPRIVILEGED_GUEST + + -- Tim Gardner Tue, 08 Sep 2009 22:54:48 -0600 + +linux-ec2 (2.6.31-300.1) karmic; urgency=low + + [ Tim Gardner ] + + * Revert "[Config] Let lenovo-sl-laptop build" + * Revert "SAUCE: [x86] implement cs-limit nx-emulation for ia32" + * [Config] Initial EC2 commit + * [Config] Removed ports + * [Config] Removed lpia + * [Config] Settled on the single EC2 flavour + * [Config] Disable udebs + * SAUCE: Prepare swap.h for Xen patches + * SAUCE: Xen 3.02 + * [Config] Target vmlinuz + * [Config] Build with correct arch + + [ Upstream Kernel Changes ] + + * Revert "PCI SR-IOV: correct broken resource alignment calculations" + + -- Tim Gardner Tue, 08 Sep 2009 20:31:57 -0600 + +linux-ec2 (2.6.31-300.0) karmic; urgency=low + + * Dummy entry + + -- Tim Gardner Tue, 08 Sep 2009 20:31:57 -0600 + +linux (2.6.31-10.30) karmic; urgency=low + + [ Amit Kucheria ] + + * [Config] Enable CONFIG_USB_DEVICEFS + - LP: #417748 + * [Config] Populate the config-update template a bit more + + [ Andy Whitcroft ] + + * rebase to v2.6.31-rc9 + * [Config] update configs following rebase to v2.6.31-rc9 + * [Config] update ports configs following rebase to v2.6.31-rc9 + + [ Colin Ian King ] + + * SAUCE: wireless: hostap, fix oops due to early probing interrupt + - LP: #254837 + + [ Jerone Young ] + + * [Upstream] ACPI: Add Thinkpad T400 & Thinkpad T500 to OSI(Linux) + white-list + - LP: #281732 + * [Upstream] ACPI: Add Thinkpad X200, X200s, X200t to OSI(Linux) + white-list + - LP: #281732 + * [Upstream] ACPI: Add Thinkpad X300 & Thinkpad X301 to OSI(Linux) + white-list + - LP: #281732 + * [Upstream] ACPI: Add Thinkpad R400 & Thinkpad R500 to OSI(Linux) + white-list + - LP: #281732 + * [Upstream] ACPI: Add Thinkpad W500, W700, & W700ds to OSI(Linux) + white-list + - LP: #281732 + + [ John Johansen ] + + * SAUCE: AppArmor: Fix profile attachment for regexp based profile names + - LP: #419308 + * SAUCE: AppArmor: Return the correct error codes on profile + addition/removal + - LP: #408473 + * SAUCE: AppArmor: Fix OOPS in profile listing, and display full list + - LP: #408454 + * SAUCE: AppArmor: Fix mapping of pux to new internal permission format + - LP: #419222 + * SAUCE: AppArmor: Fix change_profile failure + - LP: #401931 + * SAUCE: AppArmor: Tell git to ignore generated include files + - LP: #419505 + + [ Stefan Bader ] + + * [Upstream] acpi: video: Loosen strictness of video bus detection code + - LP: #333386 + * SAUCE: Remove ov511 driver from ubuntu subdirectory + + [ Tim Gardner ] + + * [Config] Exclude char-modules from non-x86 udeb creation + * SAUCE: Notify the ACPI call chain of AC events + * [Config] CONFIG_SATA_VIA=m + - LP: #403385 + * [Config] Build in all phylib support modules. + * [Config] Don't fail when sub-flavour files are missing + - LP: #423426 + * [Config] Set CONFIG_LSM_MMAP_MIN_ADDR=0 + - LP: #423513 + + [ Upstream ] + + * Rebased against v2.6.31-rc9 + + -- Andy Whitcroft Mon, 07 Sep 2009 11:33:45 +0100 + +linux (2.6.31-9.29) karmic; urgency=low + + [ Leann Ogasawara ] + + * [Upstream] agp/intel: support for new chip variant of IGDNG mobile + - LP: #419993 + * [Config] d-i/modules: Add new char-modules file, initialize with + intel-agp + - LP: #420605 + + [ Upstream ] + + * Rebased against 2.6.31-rc8 plus some inotify regression patches: + up through git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + adda766193ea1cf3137484a9521972d080d0b7af. + + -- Tim Gardner Fri, 28 Aug 2009 06:31:30 -0600 + +linux (2.6.31-8.28) karmic; urgency=low + + [ Ike Panhc ] + + * [Config] Let nic-shared-modules depends on crypto-modules + - LP: #360966 + + [ Leann Ogasawara ] + + * [Upstream] (drop after 2.6.31) drm/i915: increase default latency + constant + - LP: #412492 + + [ Mario Limonciello ] + + * [Upstream]: (drop after 2.6.31) dell-laptop: don't change softblock + status if HW switch is disabled + - LP: #418721 + * [Upstream]: (drop after 2.6.31) compal-laptop: Add support for known + Compal made Dell laptops + * [Upstream]: (drop after 2.6.31) compal-laptop: Replace sysfs support + with rfkill support + + [ Tim Gardner ] + + * [Config] Add acpiphp to virtual sub-flavour + - LP: #364916 + * Drop KSM patch set for now because of instabilities with encrypted swap. + - LP: #418781 + + -- Tim Gardner Wed, 26 Aug 2009 08:14:26 -0600 + +linux (2.6.31-7.27) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] updateconfigs updateportsconfigs after 2.6.31-rc7 rebase + * SAUCE: (drop after 2.6.31) Added KSM from mmotm-2009-08-20-19-18 + Replaces previous ksm patches from 2.6.31-6.25 + * [Config] KSM=y + + -- Tim Gardner Sat, 22 Aug 2009 20:32:11 -0600 + +linux (2.6.31-6.26) karmic; urgency=low + + [ Andy Whitcroft ] + + * [Config] enable CONFIG_AUFS_BR_RAMFS + - LP: #414738 + * split out debian directory ready for abstraction + * add printdebian target to find branch target + * abstracted debian -- debian/files is not abstracted + * abstracted debian -- packages must be built in debian/ + * abstracted debian -- kernel-wedge needs to work in debian/ + * abstracted debian -- ensure we install the copyright file + * abstracted-debian -- drop the debian directories from headers + * abstracted-debian -- drop the debian directories from headers part 2 + * SAUCE: ubuntu-insert-changes -- follow abstracted debian + * [Upstream] aoe: ensure we initialise the request_queue correctly V2 + - LP: #410198 + + [ Luke Yelavich ] + + * [Config] Ports: Disable CONFIG_CPU_FREQ_DEBUG on powerpc-smp + * [Config] Ports: Re-enable windfarm modules on powerpc64-smp + - LP: #413150 + * [Config] Ports: Build all cpu frequency scaling governors into ports + kernels + * [Config] Ports: Build ext2 and ext3 modules into ports kernels + * [Config] Ports: CONFIG_PACKET=y for all ports kernels + * [Config] Ports: Enable PS3 network driver + + [ Stefan Bader ] + + * abstracted debian -- call $(DEBIAN)/rules using make + + [ Tim Gardner ] + + * [Config] Abstract the debian directory + * SAUCE: Improve error reporting in postinst + - LP: #358564 + + -- Tim Gardner Sun, 16 Aug 2009 20:33:28 -0600 + +linux (2.6.31-6.25) karmic; urgency=low + + [ Andy Whitcroft ] + + * script to generate Ubuntu changes from changelog + * [Config] standardise ANDROID options + * [Config] standardise CONFIG_ATM as module + * [Config] standardise CONFIG_LIB80211 as module + * [Config] disable CONFIG_PRINT_QUOTA_WARNING + * [Config] set CONFIG_CRAMFS as module + * [Config] enable CONFIG_DAB and modules + * [Config] set CONFIG_MAC80211_HWSIM as module + * [Config] set CONFIG_NET_CLS_FLOW as module + * [Config] set CONFIG_NF_CONNTRACK_SANE as module + * [Config] set CONFIG_NF_CT_PROTO_DCCP as module + * [Config] set CONFIG_RTC_DRV_DS1511 as module + * [Config] set CONFIG_RTC_DRV_R9701 as module + * [Config] set CONFIG_RTC_DRV_S35390A as module + * [Config] set CONFIG_TOIM3232_DONGLE as module + * [Config] standardise CONFIG_USB_MIDI_GADGET as module + * [Config] standardise CONFIG_USB_G_PRINTER as module + * [Config] standardise CONFIG_USB_SERIAL_IR as module + * [Config] set CONFIG_USB_SERIAL_IUU as module + * [Config] standardise CONFIG_USB_STORAGE_CYPRESS_ATACB as module + * [Config] standardise CONFIG_USB_STORAGE_ONETOUCH as module + * cleanup remains of dm-loop + * drop thinkpad ec and smapi support + * drop appleir + * [Config] update configs following rebase to v2.6.31-rc6 + + [ Hugh Dickins ] + + * SAUCE: ksm patch 1, drop after 2.6.31 + * SAUCE: ksm patch 2, drop after 2.6.31 + * SAUCE: ksm patch 3, drop after 2.6.31 + * SAUCE: ksm patch 4, drop after 2.6.31 + * SAUCE: ksm patch 5, drop after 2.6.31 + * SAUCE: ksm patch 7, drop after 2.6.31 + + [ Izik Eidus ] + + * SAUCE: ksm patch 0, drop after 2.6.31 + * SAUCE: ksm patch 6, drop after 2.6.31 + * SAUCE: ksm patch 8, drop after 2.6.31 + * SAUCE: ksm patch 9, drop after 2.6.31 + + [ Luke Yelavich ] + + * [Config] Ports: Re-add PS3 modules to udebs + + [ Michael Casadevall ] + + * [Config] Update SPARC config and d-i files to reflect what can be built + + [ Tim Gardner ] + + * [Config] Removed armel package support + * [Config] Enabled CONFIG_KSM=y + + [ Upstream Kernel Changes ] + + * ARM: Cleanup: Revert "ARM: Add more cache memory types macros" + * ARM: Cleanup: Revert "Do not use OOB with MLC NAND" + * ARM: Cleanup: Revert "ARM: Make ARM arch aware of ubuntu/ drivers" + * ARM: Cleanup: Revert "ARM: IMX51: Make video capture drivers compile" + * ARM: Cleanup: Revert "ARM: IMX51: Fix isl29003 HWMON driver for i2c + changes" + * ARM: Cleanup: Revert "ARM: IMX51: IPU irq handler deadlock fix" + * ARM: Cleanup: Revert "ARM: IMX51: Babbage 2.5 needs a different system + revision" + * ARM: Cleanup: Revert "ARM: IMX51: Compile-in the IMX51 cpufreq driver + by default" + * ARM: Cleanup: Revert "ARM: IMX51: Enable ZONE_DMA for ARCH_MXC" + * ARM: Cleanup: Revert "ARM: IMX51: Make ARCH_MXC auto-enable + ARCH_MXC_CANONICAL" + * ARM: Cleanup: Revert "ARM: IMX51: Unconditionally disable + CONFIG_GPIOLIB" + * ARM: Cleanup: Revert "ARM: IMX51: Minimal changes for USB to work on + 2.6.31" + * ARM: Cleanup: Revert "ARM: IMX51: Fix plat-mxc/timer.c to handle imx51" + * ARM: Cleanup: Revert "ARM: IMX51: Make it compile." + * ARM: Cleanup: Revert "ARM: IMX51: Clean-up the craziness of including + mxc_uart.h _everywhere_" + * ARM: Cleanup: Revert "ARM: IMX51: Move board-mx51* header files to the + correct location" + * ARM: Cleanup: Revert "ARM: IMX51: Changed from snd_card_new to + snd_card_create" + * ARM: Cleanup: Revert "ARM: IMX51: Fix up merge error in Kconfig" + * ARM: Cleanup: Revert "ARM: IMX51: mxc_timer_init prototype" + * ARM: Cleanup: Revert "ARM: IMX51: Removed the mxc_gpio_port structure." + * ARM: Cleanup: Revert "ARM: IMX51: Added external declaration for + mxc_map_io." + * ARM: Cleanup: Revert "ARM: IMX51: Get to bus_id by calling dev_name." + * ARM: Cleanup: Revert "ARM: IMX51: Get to bus_id by calling dev_name." + * ARM: Cleanup: Revert "ARM: IMX51: snd_soc_machine structure replaced + with snd_soc_card." + * ARM: Cleanup: Revert "ARM: IMX51: codec structure was moved to the card + structure" + * ARM: Cleanup: Revert "ARM: IMX51: Hack to add defines for + DMA_MODE_READ/WRITE/MASK" + * ARM: Cleanup: Revert "ARM: IMX51: Add SoC and board support for + Freescale mx51 platform" + * Driver core: add new device to bus's list before probing + * [Upstream] (drop after 2.6.31) ALSA: hda - Reduce click noise at + power-saving + - LP: #381693, #399750, #380892 + + -- Andy Whitcroft Fri, 14 Aug 2009 11:32:23 +0100 + +linux (2.6.31-5.24) karmic; urgency=low + + [ Amit Kucheria ] + + * ARM: IMX51: Make video capture drivers compile + * [Config] IMX51: Config updates + + [ Andy Whitcroft ] + + * remove leftovers of dm-bbr + + [ Leann Ogasawara ] + + * Add pata_cs5535 to pata-modules + - LP: #318805 + + [ Luke Yelavich ] + + * [Config] CONFIG_PPC64=y for powerpc64-smp + * [Config] Set the maximum number of CPUs to 1024 for powerpc64-smp + * [Config] CONFIG_PPC_PS3=y for powerpc64-smp + * [Config] CONFIG_PPC_MAPLE=y on powerpc64-smp + * [Config] CONFIG_PPC_PASEMI=y on powerpc64-smp + * [Config] CONFIG_CPU_FREQ_PMAC64=y on powerpc64-smp + * [Config] Enable all PS3 drivers in powerpc64-smp + + [ Mario Limonciello ] + + * LIRC -- fix lirc-i2c 2.6.31 compilation + + [ Matthew Garrett ] + + * [Upstream] dell-laptop: Fix rfkill state queries + + [ Tim Gardner ] + + * [Config] Ignore armel ABI and module changes + * [Config] Update configs after rebase against 2.6.31-rc5 + + [ Upstream ] + + * Rebased to 2.6.31-rc5 + + -- Andy Whitcroft Tue, 28 Jul 2009 10:10:09 +0100 + +linux (2.6.31-4.23) karmic; urgency=low + + [ Andy Whitcroft ] + + * AUFS -- update to aufs2-30 20090727 + * [Config] enable AUFS FUSE support + + [ Luke Yelavich ] + + * [Config] CONFIG_JFS_FS=m on sparc + + [ Tim Gardner ] + + * [Upstream] dell-laptop: Fix rfkill state setting. + + -- Andy Whitcroft Mon, 27 Jul 2009 11:11:47 +0100 + +linux (2.6.31-4.22) karmic; urgency=low + + [ Amit Kucheria ] + + * ARM: IMX51: Add SoC and board support for Freescale mx51 platform + * ARM: IMX51: Move board-mx51* header files to the correct location + * ARM: IMX51: Clean-up the craziness of including mxc_uart.h _everywhere_ + * ARM: IMX51: Make it compile. + * ARM: IMX51: Unconditionally disable CONFIG_GPIOLIB + * ARM: IMX51: Make ARCH_MXC auto-enable ARCH_MXC_CANONICAL + * ARM: IMX51: Enable ZONE_DMA for ARCH_MXC + * ARM: IMX51: Compile-in the IMX51 cpufreq driver by default + * ARM: IMX51: Fix isl29003 HWMON driver for i2c changes + * ARM: USB: musb: Refer to musb_otg_timer_func under correct #ifdef + * ARM: staging: udlfb: Add vmalloc.h include + * UBUNTU [Config]: Bring imx51 config upto date with other flavours + + [ Brad Figg ] + + * ARM: IMX51: Hack to add defines for DMA_MODE_READ/WRITE/MASK + * ARM: IMX51: codec structure was moved to the card structure + * ARM: IMX51: snd_soc_machine structure replaced with snd_soc_card. + * ARM: IMX51: Get to bus_id by calling dev_name. + * ARM: IMX51: Get to bus_id by calling dev_name. + * ARM: IMX51: Added external declaration for mxc_map_io. + * ARM: IMX51: Removed the mxc_gpio_port structure. + * ARM: IMX51: mxc_timer_init prototype + * ARM: IMX51: Fix up merge error in Kconfig + * ARM: IMX51: Changed from snd_card_new to snd_card_create + + [ Dinh Nguyen ] + + * ARM: IMX51: Fix plat-mxc/timer.c to handle imx51 + * ARM: IMX51: Minimal changes for USB to work on 2.6.31 + * ARM: IMX51: Babbage 2.5 needs a different system revision + * ARM: IMX51: IPU irq handler deadlock fix + + [ Tim Gardner ] + + * [Config] Enabled CONFIG_CAN=m + - LP: #327243 + * [Config] Enabled CONFIG_SERIAL=m + - LP: #397189 + + -- Tim Gardner Fri, 24 Jul 2009 06:19:10 -0600 + +linux (2.6.31-4.21) karmic; urgency=low + + [ Amit Kucheria ] + + * dm-raid-4-5: Add missing brackets around test_bit() + + [ John Johansen ] + + * AppArmor: Fix change_profile failing lpn401931 + * AppArmor: Fix determination of forced AUDIT messages. + * AppArmor: Fix oops in auditing of the policy interface offset + + -- Andy Whitcroft Thu, 23 Jul 2009 19:18:30 +0100 + +linux (2.6.31-4.20) karmic; urgency=low + + [ Andy Whitcroft ] + + * SAUCE: iscsitarget -- update to SVN revision r214 + * SAUCE: iscsitarget -- renable driver + * [Config] consolidate lpia/lpia and i386/generic configs + * [Config] enable CRYPTO modules for all architectures + * [Config] enable cryptoloop + * [Config] enable various filesystems for armel + * [Config] sync i386 generic and generic-pae + * [Config] add the 386 (486 processors and above) flavour + * [Config] re-set DEFAULT_MMAP_MIN_ADDR + - LP: #399914 + * add genconfigs/genportsconfigs to extract the built configs + * updateconfigs -- alter concatenation order allow easier updates + * intelfb -- INTELFB now conflicts with DRM_I915 + * printchanges -- rebase tree does not have stable tags use changelog + * AppArmor: fix argument size missmatch on 64 bit builds + + [ Ike Panhc ] + + * Ship bnx2x firmware in nic-modules udeb + - LP: #360966 + + [ Jeff Mahoney ] + + * AppArmor: fix build failure on ia64 + + [ John Johansen ] + + * AppArmour: ensure apparmor enabled parmater is off if AppArmor fails to + initialize. + * AppArmour: fix auditing of domain transitions to include target profile + information + * AppArmor: fix C99 violation + * AppArmor: revert reporting of create to write permission. + * SAUCE: Add config option to set a default LSM + * [Config] enable AppArmor by default + * AppArmor: Fix NULL pointer dereference oops in profile attachment. + + [ Keith Packard ] + + * SAUCE: drm/i915: Allow frame buffers up to 4096x4096 on 915/945 class + hardware + - LP: #351756 + + [ Luke Yelavich ] + + * [Config] add .o files found in arch/powerpc/lib to all powerpc kernel + header packages + - LP: #355344 + + [ Michael Casadevall ] + + * [Config] update SPARC config files to allow success build + + [ Scott James Remnant ] + + * SAUCE: trace: add trace_event for the open() syscall + + [ Stefan Bader ] + + * SAUCE: jfs: Fix early release of acl in jfs_get_acl + - LP: #396780 + + [ Tim Gardner ] + + * [Upstream] Fix Soltech TA12 volume hotkeys not sending key release + - LP: #397499 + * [Upstream] USB Option driver - Add USB ID for Novatel MC727/U727/USB727 + refresh + - LP: #365291 + * [Config] SSB/B44 are common across all arches/flavours. + + [ Upstream ] + + * Rebased to 2.6.31-rc4 + + -- Andy Whitcroft Thu, 23 Jul 2009 08:41:39 +0100 + +linux (2.6.31-3.19) karmic; urgency=low + + [ Andy Whitcroft ] + + * Revert "[Config] Disabled NDISWRAPPER" + * ndiswrapper -- fix i386 compilation failures on cmpxchg8b + * AUFS -- export various core functions + * AUFS -- export various core functions -- fixes + * AUFS -- core filesystem + * AUFS -- track changes in v2.6.31 + * [Config] Enable AUFS + * droppped 'iwl3945: do not send scan command if channel count zero' as it + is already upstream but failed to auto-drop on rebase. + + [ Eric Paris ] + + * SAUCE: fsnotify: use def_bool in kconfig instead of letting the user + choose + * SAUCE: inotify: check filename before dropping repeat events + * SAUCE: fsnotify: fix inotify tail drop check with path entries + + -- Andy Whitcroft Tue, 14 Jul 2009 12:52:55 +0100 + +linux (2.6.31-3.18) karmic; urgency=low + + [ Andy Whitcroft ] + + * Revert "Add splice-2.6.23.patch from AUFS to export a symbol needed by + AUFS" + * Revert "Add put_filp.patch from AUFS to export a symbol needed by AUFS" + * Revert "Add sec_perm-2.6.24.patch from AUFS - export + security_inode_permission" + * clear out left over AUFS files and modifications + + [ Luke Yelavich ] + + * [Config] Enable CONFIG_USB_ISP116X_HCD on sparc + * SAUCE: Explicitly include header files to allow apparmor to build on + powerpc + * [Config] Enable CONFIG_BLK_DEV_IDECD on powerpc + + [ Tim Gardner ] + + * [Config] Dropped ubuntu/misc/wireless/acx + * [Config] Disabled NDISWRAPPER until the compile issues are fixed. + + [ Upstream ] + + * Rebased to 2.6.31-rc3 + + -- Andy Whitcroft Fri, 10 Jul 2009 18:59:33 +0100 + +linux (2.6.31-2.17) karmic; urgency=low + + [ Andy Whitcroft ] + + * [Config] CONFIG_BLK_DEV_CRYPTOLOOP=m for sparc + * compcache -- remove redundant Kconfig entries part 2 + * compcache -- clean up CCFLAGS declarations + * [Config] enable AppArmor + * AppArmor: fix operator precidence issue in as_path_link + + [ John Johansen ] + + * AppArmor security module + * AppArmor: Correct mapping of file permissions. + * AppArmor: Turn auditing of ptrace on + + [ Luke Yelavich ] + + * [Config] disable CONFIG_DM_RAID45 on powerpc + + -- Andy Whitcroft Fri, 10 Jul 2009 15:02:05 +0100 + +linux (2.6.31-2.16) karmic; urgency=low + + [ Andy Whitcroft ] + + * compcache -- remove redundant Kconfig entries + added ignore and ignore.modules for all arches since the compcache update + changes the modules names as well as some compcache ABI values. + + [ Manoj Iyer ] + + * SAUCE: updated dm-raid45 module version to 2009.04.24 (2.6.30-rc3) + * SAUCE: update compcache version to 0.5.3 + + [ Tim Gardner ] + + * [Config]: Fix sparc FTBS by adding ignore.modules + + -- Tim Gardner Mon, 06 Jul 2009 13:35:29 -0600 + +linux (2.6.31-2.15) karmic; urgency=low + + [ Andy Whitcroft ] + + * SAUCE: default ATI Radeon KMS to off until userspace catches up + * [Config] Update configs following rebase to 2.6.31-rc2 + * [Config] update ports configs following update to 2.6.31-rc2 + + [ Luke Yelavich ] + + * [Config] powerpc - Disable CONFIG_RDS + + [ Matt Zimmerman ] + + * Rename linux-doc-PKGVER to linux-doc and clean up its description + - LP: #382115 + + [ Upstream Kernel Changes ] + + * rebased to mainline 2.6.31-rc2 + + -- Andy Whitcroft Sat, 04 Jul 2009 17:39:13 +0100 + +linux (2.6.31-1.14) karmic; urgency=low + + [ Andy Whitcroft ] + + * update ndiswrapper to 1.55 + * remove leftovers of gfs + * [Config] powerpc: enable CONFIG_PPC_DISABLE_WERROR + + [ Luke Yelavich ] + + * [Config] re-enable and build the ide-pmac driver into powerpc kernels + * [Config] Build the ServerWorks Frodo / Apple K2 SATA driver into the + kernel + + [ Manoj Iyer ] + + * Remove snd-bt-sco ubuntu driver + + [ Michael Casadevall ] + + * [Config] updates ia64 config and d-i folders to allow succesful build + * [Config] Update powerpc and sparc for 2.6.31 + + [ Upstream Kernel Changes ] + + * intel-iommu: fix Identity Mapping to be arch independent + - LP: #384695 + * ACPI: video: prevent NULL deref in acpi_get_pci_dev() + + -- Andy Whitcroft Tue, 30 Jun 2009 17:47:32 +0100 + +linux (2.6.31-1.13) karmic; urgency=low + + [ Andy Whitcroft ] + + * REBASE: rebased to mainline 2.6.31-rc1 + - "UBUNTU: SAUCE: UHCI USB quirk for resume" + no longer applies, using deprecated interfaces, LPIA only, dropped + - "UBUNTU: SAUCE: Mask off garbage in Dell WMI scan code data" + changes now upstream, dropped + * [Config] Update configs following rebase to 2.6.31-rc1 + * [Config] update ports configs following update to 2.6.31-rc1 + + * [Config] disable broken staging driver CONFIG_STLC45XX + * SAUCE: fix compcache to use updates accessors + * [Config] disable staging driver CONFIG_VT6655 + * SAUCE: fix DRDB to use updates accessors + * [Disable] ndiswrapper needs update + * [Disable] LIRC I2C needs update + * [Disable] CONFIG_LENOVO_SL_LAPTOP needs update + * [Config] disable I2C_DESIGNWARE does not compile + * [Config] disable CONFIG_TLSUP for lpia + * [Config] disable CONFIG_FB_UDL for arm + * SAUCE: disable adding scsi headers to linux-libc-dev + + [ Mario Limonciello ] + + * SAUCE: Add LIRC drivers + + -- Andy Whitcroft Thu, 25 Jun 2009 12:06:22 +0100 + +linux (2.6.30-10.12) karmic; urgency=low + + [ Andy Whitcroft ] + + * [Config] split out the ports configs into their own family + * [Config] update configs following introduction of ports family + + [ Upstream Kernel Changes ] + + * Revert "Rename linux-doc-PKGVER to linux-doc and clean up its + description". Fixes linux-doc package name conflicts for now. + - LP: #382115 + + -- Tim Gardner Mon, 22 Jun 2009 09:17:14 -0600 + +linux (2.6.30-10.11) karmic; urgency=low + + [ Amit Kucheria ] + + * [Config] Comment splitconfig.pl and misc cleanup + * [Config] Rename all configs to the new naming scheme + * [Config] Splitconfig rework + * [Config] Rename scripts/misc/oldconfig to kernelconfig + * [Config] Fix build system for new config split + * [Config] Run updateconfigs after the splitconfig rework + + [ Andy Whitcroft ] + + * Revert "SAUCE: Default to i915.modeset=0 if CONFIG_DRM_I915_KMS=y" + * [Config] standardise CONFIG_STAGING=y + * [Config] standardise CONFIG_RD_LZMA=y + * [Config] CONFIG_PCI_IOV=y + * [Config] CONFIG_PCI_STUB=m + * [Config] merge kernel configs more agressively + + [ Colin Watson ] + + * [Config] Run kernel-wedge in $(builddir) rather than at the top level + * [Config] Add support for including firmware in udebs + * [Config] Ship bnx2 firmware in nic-modules udeb + - LP: #384861 + + [ Luke Yelavich ] + + * [Config] ports - Import of ports architectures into kernel packaging + infrastructure + * [Config] ports - Do not update ports kernel configurations by default + * [Config] ports - Disable ABI checking for ports architectures + * [Config] ports - Build drivers in ubuntu sub-directory on powerpc + * [Config] ports - Add control.d/vars.* files for ports architectures + * [Config] ports - Add ports architectures for linux-libc-dev + * [Config] ports - Create powerpc specific message-modules and + block-modules udebs + * [Config] ports - Add configuration files for ports architectures + + [ Manoj Iyer ] + + * [Config] Enable CONFIG_BLK_DEV_AEC62XX=m for amd64 and i386 + - LP: #329864 + + [ Michael Casadevall ] + + * [Config] ports - Fix compression of kernels + + [ Stefan Bader ] + + * [Upstream] mmc: prevent dangling block device from accessing stale + queues + - LP: #383668 + + [ Tim Gardner ] + + * [Config] Recommend grub-pc in linux-image + - LP: #385741 + * [Config] Implement i386 generic and generic-pae flavours + * [Config] ports - Add control info after integrating ports arches + * [Config] Removed auto-generated files from git + * [Config] Added netxen_nic to nic-modules + - LP: #389603 + + [ Matt Zimmerman ] + + * Rename linux-doc-PKGVER to linux-doc and clean up its description + - LP: #382115 + + -- Tim Gardner Mon, 15 Jun 2009 14:38:26 -0600 + +linux (2.6.30-9.10) karmic; urgency=low + + [ Andy Whitcroft ] + + * [Config] CONFIG_SECURITY_TOMOYO=y (amd64, i386, lpia) + * [Config] CONFIG_KEXEC_JUMP=y (amd64, lpia) + * [Config] CONFIG_LENOVO_SL_LAPTOP=m (amd64, lpia) + * [Config] CONFIG_POHMELFS_CRYPTO=y (i386, amd64) + * [Config] CONFIG_SERIAL_MAX3100=m (i386, amd64, lpia) + * [Config] CONFIG_VIDEO_GO7007=m (amd64, i386) + + [ Upstream Kernel Changes ] + + * rebased to 2.6.30 final + + -- Andy Whitcroft Fri, 05 Jun 2009 11:42:53 +0100 + +linux (2.6.30-8.9) karmic; urgency=low + + [ Andy Whitcroft ] + + * Config update removed the following options: + CONFIG_EDAC_AMD8111=m + CONFIG_EDAC_AMD8131=m + + [ Upstream Kernel Changes ] + + * rebased to 2.6.30-rc8 + + -- Andy Whitcroft Wed, 03 Jun 2009 09:21:13 +0100 + +linux (2.6.30-7.8) karmic; urgency=low + + [ Andy Whitcroft ] + + * Enabled NEW configration options: + Paravirtualization layer for spinlocks (PARAVIRT_SPINLOCKS) [N/y/?] Y + Cisco FNIC Driver (FCOE_FNIC) [N/m/y/?] M + + [ Upstream Kernel Changes ] + + * rebased to 2.6.30-rc7 + + -- Andy Whitcroft Sat, 23 May 2009 23:47:24 +0100 + +linux (2.6.30-6.7) karmic; urgency=low + + [ Andy Whitcroft ] + + * Dropped: UBUNTU: SAUCE: input: Blacklist digitizers from joydev.c (now + upstream) + + [ Upstream Kernel Changes ] + + * rebased to 2.6.30-rc6 + + -- Andy Whitcroft Mon, 18 May 2009 18:05:54 +0100 + +linux (2.6.30-5.6) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] Enable Keyspan USB serial device firmware in kernel module + - LP: #334285 + + [ Upstream Kernel Changes ] + + * rebased to 2.6.30-rc5 + + -- Tim Gardner Mon, 11 May 2009 12:02:16 -0600 + +linux (2.6.30-4.5) karmic; urgency=low + + [ Colin Watson ] + + * Build-Conflict with findutils (= 4.4.1-1ubuntu1), to avoid + /usr/include/asm/* going missing + - LP: #373214 + + -- Stefan Bader Fri, 08 May 2009 11:09:08 +0200 + +linux (2.6.30-3.4) karmic; urgency=low + + [ Kees Cook ] + + * SAUCE: [x86] implement cs-limit nx-emulation for ia32 + - LP: #369978 + + [ Stefan Bader ] + + * SAUCE: input: Blacklist digitizers from joydev.c + - LP: #300143 + + -- Tim Gardner Fri, 01 May 2009 14:00:42 -0600 + +linux (2.6.30-2.3) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] Enabled CC_STACKPROTECTOR=y for all x86en + - LP: #369152 + * SAUCE: Default to i915_modeset=0 if CONFIG_DRM_I915_KMS=y + * [Config] CONFIG_DRM_I915_KMS=y + * [Config] Set CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR to appropriate ARCH + minimums + + [ Upstream Kernel Changes ] + + * rebased to 2.6.30-rc4 + + -- Tim Gardner Thu, 30 Apr 2009 09:17:05 -0600 + +linux (2.6.30-1.2) karmic; urgency=low + + [ Tim Gardner ] + + * [Config] armel: disable staging drivers, fixes FTBS + * [Config] armel imx51: Disable CONFIG_MTD_NAND_MXC, fixes FTBS + + [ Upstream Kernel Changes ] + + * mpt2sas: Change reset_type enum to avoid namespace collision. + Submitted upstream. + + -- Tim Gardner Tue, 28 Apr 2009 16:54:41 -0600 + +linux (2.6.30-1.1) karmic; urgency=low + + * Initial release after rebasing against v2.6.30-rc3 + + -- Tim Gardner Thu, 12 Mar 2009 19:16:07 -0600 --- linux-ec2-2.6.32.orig/debian.ec2/rules +++ linux-ec2-2.6.32/debian.ec2/rules @@ -0,0 +1,222 @@ +#!/usr/bin/make -f +# +# $(DEBIAN)/rules for Ubuntu linux +# +# Use this however you want, just give credit where credit is due. +# +# Copyright (c) 2007 Ben Collins +# + +ifeq ($(DEBIAN),) +DEBIAN=debian.master +endif + +# dpkg-buildpackage passes options that are incomptatible +# with the kernel build. +unexport CFLAGS +unexport LDFLAGS + +# This is the debhelper compatability version to use. +export DH_COMPAT=4 +export LC_ALL=C +export SHELL=/bin/bash -e + +# Common variables for all architectures +include $(DEBIAN)/rules.d/0-common-vars.mk + +# Pull in some arch specific stuff +include $(DEBIAN)/rules.d/$(arch).mk + +# Maintainer targets +include $(DEBIAN)/rules.d/1-maintainer.mk + +# Debian Build System targets +binary: binary-indep binary-arch + +build: build-arch build-indep + +clean: debian/control + dh_testdir + dh_testroot + dh_clean + + # d-i stuff + rm -rf $(DEBIAN)/d-i-$(arch) + + # normal build junk + rm -rf $(DEBIAN)/abi/$(release)-$(revision) + rm -rf $(builddir) + rm -f $(stampdir)/stamp-* + rm -rf $(DEBIAN)/linux-* + + # This gets rid of the d-i packages in control + cp -f $(DEBIAN)/control.stub $(DEBIAN)/control + cp $(DEBIAN)/changelog debian/changelog + + # Install the copyright information. + cp $(DEBIAN)/copyright debian/copyright + +# Builds the image, arch headers and debug packages +include $(DEBIAN)/rules.d/2-binary-arch.mk + +# Rules for building the udebs ($(DEBIAN)-installer) +include $(DEBIAN)/rules.d/5-udebs.mk + +# Builds the source, doc and linux-headers indep packages +include $(DEBIAN)/rules.d/3-binary-indep.mk + +# Various checks to be performed on builds +include $(DEBIAN)/rules.d/4-checks.mk + +# Misc stuff +$(DEBIAN)/control.stub: $(DEBIAN)/d-i/kernel-versions.in \ + $(DEBIAN)/scripts/control-create \ + $(DEBIAN)/control.stub.in \ + $(DEBIAN)/changelog \ + $(wildcard $(DEBIAN)/control.d/* $(DEBIAN)/sub-flavours/*.vars) + for i in $(DEBIAN)/d-i/kernel-versions.in $(DEBIAN)/control.stub.in; do \ + new=`echo $$i | sed 's/\.in$$//'`; \ + cat $$i | sed -e 's/PKGVER/$(release)/g' \ + -e 's/ABINUM/$(abinum)/g' \ + -e 's/SRCPKGNAME/$(src_pkg_name)/g' \ + > $$new; \ + done + flavours="$(wildcard $(DEBIAN)/control.d/vars.* $(DEBIAN)/sub-flavours/*.vars)";\ + for i in $$flavours; do \ + $(SHELL) $(DEBIAN)/scripts/control-create $$i | \ + sed -e 's/PKGVER/$(release)/g' \ + -e 's/ABINUM/$(abinum)/g' \ + -e 's/SRCPKGNAME/$(src_pkg_name)/g' \ + >> $(DEBIAN)/control.stub; \ + done + cp $(DEBIAN)/control.stub $(DEBIAN)/control + +.PHONY: debian/control +debian/control: $(DEBIAN)/control.stub + rm -rf $(builddir)/modules $(builddir)/firmware \ + $(builddir)/kernel-versions $(builddir)/package-list \ + $(builddir)/$(DEBIAN) + mkdir -p $(builddir)/modules/$(arch)/ + cp $(DEBIAN)/d-i/modules/* $(builddir)/modules/$(arch)/ + mkdir -p $(builddir)/firmware/$(arch)/ + cp $(DEBIAN)/d-i/firmware/* $(builddir)/firmware/$(arch)/ + cp $(DEBIAN)/d-i/package-list $(DEBIAN)/d-i/kernel-versions $(builddir)/ + touch $(builddir)/modules/$(arch)/kernel-image + # kernel-wedge needs to poke around in $(DEBIAN)/ + ln -nsf $(CURDIR)/debian $(builddir)/debian + + # Some files may need to differ between architectures + if [ -d $(DEBIAN)/d-i/modules-$(arch) ]; then \ + cp $(DEBIAN)/d-i/modules-$(arch)/* \ + $(builddir)/modules/$(arch)/; \ + fi + if [ -d $(DEBIAN)/d-i/firmware-$(arch) ]; then \ + cp $(DEBIAN)/d-i/firmware-$(arch)/* \ + $(builddir)/firmware/$(arch)/; \ + fi + + # Remove unwanted stuff for this architecture + if [ -r "$(DEBIAN)/d-i/exclude-modules.$(arch)" ]; then \ + (cat $(DEBIAN)/d-i/exclude-modules.$(arch); \ + ls $(builddir)/modules/$(arch)/) | sort | uniq -d | \ + (cd $(builddir)/modules/$(arch)/; xargs rm -f); \ + fi + if [ -r "$(DEBIAN)/d-i/exclude-firmware.$(arch)" ]; then \ + (cat $(DEBIAN)/d-i/exclude-firmware.$(arch); \ + ls $(builddir)/firmware/$(arch)/) | sort | uniq -d | \ + (cd $(builddir)/firmware/$(arch)/; xargs rm -f); \ + fi + + # Per flavour module lists + flavour_modules=`ls $(DEBIAN)/d-i/modules.$(arch)-* 2>/dev/null` \ + || true; \ + if [ "$$flavour_modules" != "" ]; then \ + for flav in $$flavour_modules; do \ + name=`echo $$flav | sed 's/.*\/modules.$(arch)-//'`; \ + mkdir $(builddir)/modules/$(arch)-$$name; \ + (cd $(builddir)/modules/; tar cf - `cat ../$$flav`) | \ + (cd $(builddir)/modules/$(arch)-$$name/; tar xf -); \ + touch $(builddir)/modules/$(arch)-$$name/kernel-image; \ + done; \ + fi + flavour_firmware=`ls $(DEBIAN)/d-i/firmware.$(arch)-* 2>/dev/null` \ + || true; \ + if [ "$$flavour_firmware" != "" ]; then \ + for flav in $$flavour_firmware; do \ + name=`echo $$flav | sed 's/.*\/firmware.$(arch)-//'`; \ + mkdir $(builddir)/firmware/$(arch)-$$name; \ + (cd $(builddir)/firmware/; tar cf - `cat ../$$flav`) | \ + (cd $(builddir)/firmware/$(arch)-$$name/; tar xf -);\ + touch $(builddir)/firmware/$(arch)-$$name/kernel-image; \ + done; \ + fi + + # Some files may need to differ between flavours + flavour_module_dirs=`ls -d $(DEBIAN)/d-i/modules-$(arch)-* 2>/dev/null`\ + || true; \ + if [ "$$flavour_module_dirs" ]; then \ + for flav in $$flavour_module_dirs; do \ + name=`echo $$flav | sed 's/.*\/modules-$(arch)-//'`; \ + [ -d $(builddir)/modules/$(arch)-$$name ] || \ + cp -a $(builddir)/modules/$(arch) \ + modules/$(arch)-$$name; \ + cp $$flav/* $(builddir)/modules/$(arch)-$$name/; \ + done; \ + fi + flavour_firmware_dirs=`ls -d $(DEBIAN)/d-i/firmware-$(arch)-* 2>/dev/null`\ + || true; \ + if [ "$$flavour_firmware_dirs" ]; then \ + for flav in $$flavour_firmware_dirs; do \ + name=`echo $$flav | sed 's/.*\/firmware-$(arch)-//'`; \ + [ -d $(builddir)/firmware/$(arch)-$$name ] || \ + cp -a $(builddir)/firmware/$(arch) \ + firmware/$(arch)-$$name; \ + cp $$flav/* $(builddir)/firmware/$(arch)-$$name/; \ + done; \ + fi + + # Remove unwanted stuff for each flavour + flavour_exclude=`ls $(DEBIAN)/d-i/exclude-modules.$(arch)-* 2>/dev/null`\ + || true; \ + if [ "$$flavour_exclude" ]; then \ + for flav in $$flavour_exclude; do \ + name=`echo $$flav | sed 's/.*\/exclude-modules.$(arch)-//'`;\ + [ -d $(builddir)/modules/$(arch)-$$name ] || \ + cp -a $(builddir)/modules/$(arch) \ + $(builddir)/modules/$(arch)-$$name; \ + (cat $$flav; \ + ls $(builddir)/modules/$(arch)-$$name) | \ + sort | uniq -d | \ + (cd $(builddir)/modules/$(arch)-$$name/; \ + xargs rm -f); \ + done; \ + fi + flavour_exclude=`ls $(DEBIAN)/d-i/exclude-firmware.$(arch)-* 2>/dev/null`\ + || true; \ + if [ "$$flavour_exclude" ]; then \ + for flav in $$flavour_exclude; do \ + name=`echo $$flav | sed 's/.*\/exclude-firmware.$(arch)-//'`;\ + [ -d $(builddir)/firmware/$(arch)-$$name ] || \ + cp -a $(builddir)/firmware/$(arch) \ + $(builddir)/firmware/$(arch)-$$name; \ + (cat $$flav; \ + ls $(builddir)/firmware/$(arch)-$$name) | \ + sort | uniq -d | \ + (cd $(builddir)/firmware/$(arch)-$$name/; \ + xargs rm -f); \ + done; \ + fi + + if [ ! -d $(builddir)/modules/$(build_arch) ]; then \ + mkdir -p $(builddir)/modules/$(build_arch); \ + cp $(builddir)/modules/$(arch)/* \ + $(builddir)/modules/$(build_arch); \ + fi + if [ ! -d $(builddir)/firmware/$(build_arch) ]; then \ + mkdir -p $(builddir)/firmware/$(build_arch); \ + cp $(builddir)/firmware/$(arch)/* \ + $(builddir)/firmware/$(build_arch); \ + fi + + cp $(DEBIAN)/control.stub debian/control.stub + cd $(builddir) && kernel-wedge gen-control > $(CURDIR)/debian/control --- linux-ec2-2.6.32.orig/debian.ec2/changelog.historical +++ linux-ec2-2.6.32/debian.ec2/changelog.historical @@ -0,0 +1,4176 @@ +linux (2.6.30-1.1) karmic; urgency=low + + * Initial release after rebasing against v2.6.30-rc3 + + -- Tim Gardner Thu, 12 Mar 2009 19:16:07 -0600 + +linux (2.6.28-9.31) jaunty; urgency=low + + [ Andy Whitcroft ] + + * SAUCE: cpufreq-nforce2: probe failures are not errors + - LP: #332170 + * SAUCE: mmc: add MODALIAS linkage for MMC/SD devices + - LP: #30335 + * remove test-suspend script + - LP: #333856 + + [ Kees Cook ] + + * handle relative paths in modules.dep + Fixes 2.6.28-9.30 FTBS. + + [ Upstream Kernel Changes ] + + * ricoh_mmc: Handle newer models of Ricoh controllers + + -- Tim Gardner Wed, 11 Mar 2009 08:19:24 -0600 + +linux (2.6.28-9.30) jaunty; urgency=low + + [ Amit Kucheria ] + + * ARM:mx51 Add SoC and board support for mx51 platforms + * ARM:mx51 Add CONFIG_ARCH_MXC_CANONICAL to disable parts of Freescale's + code + * MMC: Add support for 8-bit cards + * Add ARM:MX51 SoC support to the build system + * ARM: Make ARM arch aware of ubuntu/ drivers + * ARM: Add imx51 configuration + * Disable d-i modules for imx51 and mv78xx0 + * Disable Apparmor on boot for ARM + * Updating imx51 config + + [ Jason Liu ] + + * Do not use OOB with MLC NAND + + [ Richard Zhu ] + + * Support the eMMC4.3 card + + [ Rob Herring ] + + * ARM: Add more cache memory types macros + + [ Tim Gardner ] + + * Set CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y for i386/amd64/lpia + + [ Manoj Iyer ] + + * Enable CONFIG_RTL8187SE=m + + [ Upstream Kernel Changes ] + + * USB: EHCI: slow down ITD reuse + - LP: #329437 + + -- Tim Gardner Sun, 08 Mar 2009 14:14:15 -0600 + +linux (2.6.28-9.29) jaunty; urgency=low + + [ Andy Whitcroft ] + + * link-headers -- only link directories which do not already exist + - LP: #315252 + + [ Daniel Marjamäki ] + + * SAUCE: (drop after 2.6.28) netxen: fix memory leak in + drivers/net/netxen_nic_init.c + - LP: #330813 + + [ Dhananjay Phadke ] + + * SAUCE: (drop after 2.6.28) netxen: fix endianness in firmware commands + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: fix ipv6 offload and tx cleanup + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: fix link speed reporting for some + boards + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: firmware init fix + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: cleanup mac list on driver unload + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: hold tx lock while sending firmware + commands + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: handle dma mapping failures + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: avoid invalid iounmap + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: include ipv6.h (fixes build failure) + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: fix vlan tso/checksum offload + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: reduce memory footprint + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: revert jumbo ringsize + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: fix msi-x interrupt handling + - LP: #330813 + * SAUCE: (drop after 2.6.28) netxen: remove pcie workaround + - LP: #330813 + + [ Hannes Eder ] + + * SAUCE: (drop after 2.6.28) drivers/net/netxen: fix sparse warnings: use + NULL pointer instead of plain integer + - LP: #330813 + + [ Huaxu Wan ] + + * SAUCE: report rfkill changes event if interface is down + - LP: #193970 + + [ Tim Gardner ] + + * MV78XX0 must specify a target in the vars definition. + + [ Upstream Kernel Changes ] + + * Revert "ext4: wait on all pending commits in ext4_sync_fs()" + * jbd2: Fix return value of jbd2_journal_start_commit() + * jbd2: Avoid possible NULL dereference in + jbd2_journal_begin_ordered_truncate() + * ext4: Fix to read empty directory blocks correctly in 64k + * ext4: Fix lockdep warning + * ext4: Initialize preallocation list_head's properly + * ext4: Implement range_cyclic in ext4_da_writepages instead of + write_cache_pages + * ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling + * ext4: Add fallback for find_group_flex + * ext4: Fix deadlock in ext4_write_begin() and ext4_da_write_begin() + * Added mv78xx0 flavor + + -- Tim Gardner Fri, 06 Mar 2009 06:13:31 -0700 + +linux (2.6.28-8.28) jaunty; urgency=low + + [ Alexey Starikovskiy ] + + * SAUCE: ACPI: EC: Limit workaround for ASUS notebooks even more + - LP: #288385 + + [ Scott James Remnant ] + + * SAUCE: Auto-load esp module when device opened. + * SAUCE: Auto-load bridge module when socket opened. + * SAUCE: Auto-load af_netlink module when socket opened. + * SAUCE: Auto-load wanrouter module when socket opened. + * SAUCE: Auto-load ip_queue module when socket opened. + * SAUCE: Auto-load ip6_queue module when socket opened. + * SAUCE: Auto-load cn module when socket opened. + * SAUCE: Auto-load scsi_transport_iscsi module when socket opened. + * SAUCE: Auto-load ftl module when device opened. + * SAUCE: Auto-load pcd module when device opened. + * SAUCE: Auto-load pf module when device opened. + * SAUCE: Auto-load nftl module when device opened. + * SAUCE: Auto-load mousedev module when psaux device opened. + * SAUCE: Auto-load mousedev module when /dev/input/mice opened. + * SAUCE: Auto-load rng-core module when device opened. + * SAUCE: Auto-load openprom module when device opened. + * SAUCE: Auto-load applicom module when device opened. + * SAUCE: Auto-load toshiba module when device opened. + * SAUCE: Auto-load cyclades module when device opened. + * SAUCE: Auto-load riscom8 module when device opened. + * SAUCE: Auto-load specialix module when device opened. + * SAUCE: Auto-load videodev module when device opened. + * SAUCE: Auto-load i2c_dev module when device opened. + * SAUCE: Auto-load mtdchar module when device opened. + * SAUCE: Auto-load pt module when device opened. + * SAUCE: Auto-load pg module when device opened. + * SAUCE: Auto-load cdc_acm module when device opened. + * SAUCE: Auto-load msr module when device opened. + * SAUCE: Auto-load cpuid module when device opened. + * SAUCE: quickcam: Enable double-buffering by default + * SAUCE: libata: Ignore HPA by default. + * SAUCE: hostap: Change initial operation mode to managed (infra) + * SAUCE: floppy: Provide a PnP device table in the module. + - LP: #255651 + * SAUCE: Auto-load mwave module when device opened. + * Build CONFIG_FUSE_FS into kernel, not as module. + + [ Stefan Bader ] + + * Enable build of ext4 as a module on LPIA + - LP: #331848 + + [ Tim Gardner ] + + * Update configs to fix LPIA FTBS + + -- Tim Gardner Thu, 05 Mar 2009 10:43:24 -0700 + +linux (2.6.28-8.27) jaunty; urgency=low + + [ Amit Kucheria ] + + * Updating configs (arm:ixp4xx) + + [ Andy Whitcroft ] + + * SAUCE: enable Intel HDMI output + + [ Manoj Iyer ] + + * SAUCE: Added quirk for Linksys WUSB600N USB wifi-n networking adapter + - LP: #323473 + + [ Steve Beattie ] + + * fix apparmor memory leak on unlinked file ops + - LP: #329489 + + [ Tim Gardner ] + + * SAUCE: Dell XPS710 reboot quirk + - LP: #323592 + * SAUCE: (drop after 2.6.28) ieee80211: Add infrastructure to obsolete + scan results + - LP: #336055 + * Add modules.order to the linux-image package. + + [ Upstream Kernel Changes ] + + * iwlwifi: fix time interval misuse in iwl_poll_{direct_}bit + * x86: only scan the root bus in early PCI quirks + - LP: #267295 + * ALSA: hda - Intel HDMI audio support + * ALSA: hda - Fix unused function in patch_intelhdmi.c + * ALSA: handle SiI1392 HDMI codec in patch_intelhdmi.c + * ALSA: hda-intel: reorder HDMI audio enabling sequence + * ALSA: introduce snd_print_pcm_rates() + * ALSA: create hda_eld.c for ELD routines and proc interface + * ALSA: ELD proc interface for HDMI sinks + * ALSA: hda: make standalone hdmi_fill_audio_infoframe() + * ALSA: hda: make global snd_print_channel_allocation() + * ALSA: hda: HDMI channel allocations for audio infoframe + * ALSA: hda: HDMI channel mapping cleanups + * ALSA: hda: minor code cleanups + * ALSA: hda: rename sink_eld to hdmi_eld + * ALSA: hda - Release ELD proc file + * ALSA: hda - minor HDMI code cleanups + * ALSA: hda - report selected CA index for Audio InfoFrame + * ALSA: hda - Add Intel vendor id string + + -- Tim Gardner Wed, 25 Feb 2009 14:23:46 -0700 + +linux (2.6.28-8.26) jaunty; urgency=low + + [ Amit Kucheria ] + + * Updating configs (armel:ixp4xx) + - LP: #331510 + + [ Tim Gardner ] + + * Add more missing modules + + -- Tim Gardner Tue, 24 Feb 2009 06:58:53 -0700 + +linux (2.6.28-8.25) jaunty; urgency=low + + [ Scott James Remnant ] + + * SAUCE: Prefer powernow-k8 to acpi-cpufreq + * Change CONFIG_X86_P4_CLOCKMOD to be a module again. + + [ Tim Gardner ] + + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Initialize the new + group descriptor when resizing the filesystem" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add sanity check + to make_indexed_dir" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: only use + i_size_high for regular files" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add sanity checks + for the superblock before mounting the filesystem" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix + s_dirty_blocks_counter if block allocation failed with nodelalloc" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Init the complete + page while building buddy cache" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Don't allow new + groups to be added during block allocation" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: mark the + blocks/inode bitmap beyond end of group as used" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Use new + buffer_head flag to check uninit group bitmaps initialization" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix the race + between read_inode_bitmap() and ext4_new_inode()" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix race between + read_block_bitmap() and mark_diskspace_used()" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: don't use blocks + freed but not yet committed in buddy cache init" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: cleanup mballoc + header files" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Use + EXT4_GROUP_INFO_NEED_INIT_BIT during resize" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add blocks added + during resize to bitmap" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Don't overwrite + allocation_context ac_status" + * Revert "SAUCE: (revert before 2.6.28.y update) jbd2: Add barrier not + supported test to journal_wait_on_commit_record" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Widen type of + ext4_sb_info.s_mb_maxs[]" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: avoid ext4_error + when mounting a fs with a single bg" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix the delalloc + writepages to allocate blocks at the right offset." + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: tone down + ext4_da_writepages warnings" + * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add support for + non-native signed/unsigned htree hash algorithms" + * Enabled X86_ACPI_CPUFREQ=y + + [ Upstream Kernel Changes ] + + * ath9k: quiet harmless ForceXPAon messages + - LP: #321474 + * [WATCHDOG] iTCO_wdt: fix SMI_EN regression 2 + - LP: #314050 + * pid: implement ns_of_pid + * mqueue: fix si_pid value in mqueue do_notify() + * powerpc/vsx: Fix VSX alignment handler for regs 32-63 + * sata_nv: give up hardreset on nf2 + * Fix Intel IOMMU write-buffer flushing + * SCSI: libiscsi: fix iscsi pool leak + * x86/cpa: make sure cpa is safe to call in lazy mmu mode + * sched: SCHED_OTHER vs SCHED_IDLE isolation + * x86, vm86: fix preemption bug + * Add support for VT6415 PCIE PATA IDE Host Controller + * ext2/xip: refuse to change xip flag during remount with busy inodes + * 3c505: do not set pcb->data.raw beyond its size + * Bluetooth: Fix TX error path in btsdio driver + * ext4: Add support for non-native signed/unsigned htree hash algorithms + * ext4: tone down ext4_da_writepages warnings + * ext4: Fix the delalloc writepages to allocate blocks at the right + offset. + * ext4: avoid ext4_error when mounting a fs with a single bg + * ext4: Widen type of ext4_sb_info.s_mb_maxs[] + * jbd2: Add barrier not supported test to journal_wait_on_commit_record + * ext4: Don't overwrite allocation_context ac_status + * ext4: Add blocks added during resize to bitmap + * ext4: Use EXT4_GROUP_INFO_NEED_INIT_BIT during resize + * ext4: cleanup mballoc header files + * ext4: don't use blocks freed but not yet committed in buddy cache init + * ext4: Fix race between read_block_bitmap() and mark_diskspace_used() + * ext4: Fix the race between read_inode_bitmap() and ext4_new_inode() + * ext4: Use new buffer_head flag to check uninit group bitmaps + initialization + * ext4: mark the blocks/inode bitmap beyond end of group as used + * ext4: Don't allow new groups to be added during block allocation + * ext4: Init the complete page while building buddy cache + * ext4: Fix s_dirty_blocks_counter if block allocation failed with + nodelalloc + * ext4: Add sanity checks for the superblock before mounting the + filesystem + * ext4: only use i_size_high for regular files + * ext4: Add sanity check to make_indexed_dir + * ext4: Initialize the new group descriptor when resizing the filesystem + * Fix longstanding "error: storage size of '__mod_dmi_device_table' isn't + known" + * Linux 2.6.28.7 + + -- Tim Gardner Thu, 19 Feb 2009 06:45:55 -0700 + +linux (2.6.28-8.24) jaunty; urgency=low + + [ Scott James Remnant ] + + * Change CPU_FREQ_DEFAULT_GOV_ONDEMAND to y + * SAUCE: Link acpi-cpufreq.o first + + [ Tim Gardner ] + + * Build in CPU Frequency scaling drivers + + -- Tim Gardner Wed, 18 Feb 2009 06:12:24 -0700 + +linux (2.6.28-8.23) jaunty; urgency=low + + [ Andy Whitcroft ] + + * include the kernel configuration in the sub-flavour images + - LP: #328859 + + [ Tim Gardner ] + + * Revert "SAUCE: (drop after 2.6.28) [eCryptfs] Regression in unencrypted + filename symlinks" in favor of upstream commit. + * Fix compile issues with qc-usb + * SAUCE: (remove after 2.6.28) V4L/DVB (10216): saa7127: fix broken + S-Video with saa7129 + - LP: #329267 + + [ Upstream Kernel Changes ] + + * Subject:SAUCE: LP#193970 iwlagn: fix hw-rfkill while the interface is + down + - LP: #193970 + * x86, vmi: put a missing paravirt_release_pmd in pgd_dtor + * nbd: fix I/O hang on disconnected nbds + * mac80211: restrict to AP in outgoing interface heuristic + * w1: w1 temp calculation overflow fix + * zd1211rw: adding 0ace:0xa211 as a ZD1211 device + * zd1211rw: treat MAXIM_NEW_RF(0x08) as UW2453_RF(0x09) for TP-Link + WN322/422G + * parport: parport_serial, don't bind netmos ibm 0299 + * syscall define: fix uml compile bug + * kernel-doc: fix syscall wrapper processing + * Fix page writeback thinko, causing Berkeley DB slowdown + * write-back: fix nr_to_write counter + * writeback: fix break condition + * mm: rearrange exit_mmap() to unlock before arch_exit_mmap + * powerpc/fsl-booke: Fix mapping functions to use phys_addr_t + * lockd: fix regression in lockd's handling of blocked locks + * sctp: Fix crc32c calculations on big-endian arhes. + * sctp: Correctly start rtx timer on new packet transmissions. + * sctp: Properly timestamp outgoing data chunks for rtx purposes + * net: Fix frag_list handling in skb_seq_read + * net: Fix OOPS in skb_seq_read(). + * drivers/net/skfp: if !capable(CAP_NET_ADMIN): inverted logic + * ipv4: fix infinite retry loop in IP-Config + * net: Fix userland breakage wrt. linux/if_tunnel.h + * net: packet socket packet_lookup_frame fix + * packet: Avoid lock_sock in mmap handler + * sungem: Soft lockup in sungem on Netra AC200 when switching interface + up + * udp: Fix UDP short packet false positive + * udp: increments sk_drops in __udp_queue_rcv_skb() + * ipv6: Disallow rediculious flowlabel option sizes. + * ipv6: Copy cork options in ip6_append_data + * net: 4 bytes kernel memory disclosure in SO_BSDCOMPAT gsopt try #2 + * sky2: fix hard hang with netconsoling and iface going up + * tun: Add some missing TUN compat ioctl translations. + * tun: Fix unicast filter overflow + * virtio_net: Fix MAX_PACKET_LEN to support 802.1Q VLANs + * tcp: splice as many packets as possible at once + * tcp: Fix length tcp_splice_data_recv passes to skb_splice_bits. + * sparc: Enable syscall wrappers for 64-bit (CVE-2009-0029) + * sparc64: Annotate sparc64 specific syscalls with SYSCALL_DEFINEx() + * ALSA: hda - Add missing terminator in slave dig-out array + * ALSA: mtpav - Fix initial value for input hwport + * HID: adjust report descriptor fixup for MS 1028 receiver + * ide/libata: fix ata_id_is_cfa() (take 4) + * libata: fix EH device failure handling + * netfilter: fix tuple inversion for Node information request + * netfilter: xt_sctp: sctp chunk mapping doesn't work + * x86: microcode_amd: fix wrong handling of equivalent CPU id + * ide-cd: fix DMA for non bio-backed requests + * net: Fix data corruption when splicing from sockets. + * Linux 2.6.28.6 + * eCryptfs: Regression in unencrypted filename symlinks + + -- Tim Gardner Mon, 16 Feb 2009 06:43:51 -0700 + +linux (2.6.28-8.22) jaunty; urgency=low + + [ Amit Kucheria ] + + * Remove perm-blacklist + + [ Andy Whitcroft ] + + * SAUCE: psmouse/synaptics: ensure we reset the device on resume + - LP: #317270 + + [ Tim Gardner ] + + * Add lpia to getabi script + * SAUCE: tracer for sreadahead + + -- Amit Kucheria Fri, 13 Feb 2009 15:23:21 +0200 + +linux (2.6.28-8.21) jaunty; urgency=low + + [ Andy Whitcroft ] + + * SAUCE: switch the Asus Pundit P1-AH2 to old acpi sleep ordering + - LP: #327267 + + [ Tim Gardner ] + + * Added LPIA arch support + * Added libdrm-dev as a 'Replaces' to linux-libc-dev + * SAUCE: LPIA support for 9202 HDA Sigmatel codec + * SAUCE: Add an X86_LPIA Kconfig option + * SAUCE: UHCI USB quirk for resume + * SAUCE: LPIA Reboot fix for Intel Crownbeach development boards + * SAUCE: LPIA Logical reset of USB port on resume + * Set CONFIG_WIRELESS_OLD_REGULATORY=n, added wireless-crda + as an install dependency. + + [ Upstream Kernel Changes ] + + * Revert "Revert "x86, early_ioremap: fix fencepost error"" + - LP: #312554 + * drm/i915: capture last_vblank count at IRQ uninstall time too + - LP: #320813 + * drm/i915: add get_vblank_counter function for GM45 + - LP: #320813 + * Staging: comedi: fix Kbuild + * Staging: meilhaus: fix Kbuild + * Staging: android: binder: fix arm build errors + * Staging: android: timed_gpio: Fix build to build on kernels after + 2.6.25. + * Staging: android: fix build error on 64bit boxes + * Staging: android: Add lowmemorykiller documentation. + * Staging: android: task_get_unused_fd_flags: fix the wrong usage of + tsk->signal + * staging: agnx: drivers/staging/agnx/agnx.h needs + * Staging: usbip: usbip_start_threads(): handle kernel_thread failure + * Staging: poch: fix verification of memory area + * Documentation: move DMA-mapping.txt to Doc/PCI/ + * sgi-xp: fix writing past the end of kzalloc()'d space + * do_wp_page: fix regression with execute in place + * wait: prevent exclusive waiter starvation + * shm: fix shmctl(SHM_INFO) lockup with !CONFIG_SHMEM + * revert "rlimit: permit setting RLIMIT_NOFILE to RLIM_INFINITY" + * prevent kprobes from catching spurious page faults + * sound: usb-audio: handle wMaxPacketSize for FIXED_ENDPOINT devices + * md: Ensure an md array never has too many devices. + * md: Fix a bug in linear.c causing which_dev() to return the wrong + device. + * ACPI: Enable bit 11 in _PDC to advertise hw coord + * ACPI: dock: Don't eval _STA on every show_docked sysfs read + * ieee1394: ohci1394: increase AT req. retries, fix ack_busy_X from + Panasonic camcorders and others + * firewire: ohci: increase AT req. retries, fix ack_busy_X from Panasonic + camcorders and others + * firewire: sbp2: fix DMA mapping leak on the failure path + * firewire: sbp2: add workarounds for 2nd and 3rd generation iPods + * ieee1394: sbp2: add workarounds for 2nd and 3rd generation iPods + * module: remove over-zealous check in __module_get() + * x86: APIC: enable workaround on AMD Fam10h CPUs + * eeepc-laptop: fix oops when changing backlight brightness during + eeepc-laptop init + * eeepc-laptop: Add support for extended hotkeys + * e1000: fix bug with shared interrupt during reset + * e1000: Fix PCI enable to honor the need_ioport flag + * agp/intel: Fix broken ® symbol in device name. + * ALSA: hda - Add quirk for FSC Amilo Xi2550 + * ALSA: hda - Add missing COEF initialization for ALC887 + * ALSA: hda - Add missing initialization for ALC272 + * asus_acpi: Add R1F support + * panasonic-laptop: fix X[ ARRAY_SIZE(X) ] + * ACPI: Skip the first two elements in the _BCL package + * ACPI: proc_dir_entry 'video/VGA' already registered + * ACPI: disable ACPI cleanly when bad RSDP found + * ACPICA: Fix table entry truncation calculation + * PCI: properly clean up ASPM link state on device remove + * PCI: return error on failure to read PCI ROMs + * seq_file: move traverse so it can be used from seq_read + * seq_file: fix big-enough lseek() + read() + * serial: set correct baud_base for Oxford Semiconductor Ltd EXSYS + EX-41092 Dual 16950 Serial adapter + * Add support for '8-port RS-232 MIC-3620 from advantech' + * mm: fix error case in mlock downgrade reversion + * elf core dump: fix get_user use + * ACPI: video: Fix reversed brightness behavior on ThinkPad SL series + * ipw2200: fix scanning while associated + * XFS: set b_error from bio error in xfs_buf_bio_end_io + * Revert USB: option: add Pantech cards + * USB: option: New mobile broadband modems to be supported + * USB: new id for ti_usb_3410_5052 driver + * USB: two more usb ids for ti_usb_3410_5052 + * USB: usb-storage: add Pentax to the bad-vendor list + * sata_via: Add VT8261 support + * nbd: do not allow two clients at the same time + * sctp: Fix another socket race during accept/peeloff + * Linux 2.6.28.5 + + -- Tim Gardner Mon, 09 Feb 2009 16:11:28 -0700 + +linux (2.6.28-7.20) jaunty; urgency=low + + [ Tim Gardner ] + + * SAUCE: Input: atkbd - Samsung NC10 key repeat fix + + [ Upstream Kernel Changes ] + + * Manually revert "mlock: downgrade mmap sem while populating mlocked + regions" + * xen: make sysfs files behave as their names suggest + * sata_mv: fix 8-port timeouts on 508x/6081 chips + * m68knommu: set NO_DMA + * PCI/MSI: bugfix/utilize for msi_capability_init() + * x86: use early clobbers in usercopy*.c + * netfilter: ctnetlink: fix scheduling while atomic + * orinoco: move kmalloc(..., GFP_KERNEL) outside spinlock in + orinoco_ioctl_set_genie + * fbdev/atyfb: Fix DSP config on some PowerMacs & PowerBooks + * kmalloc: return NULL instead of link failure + * sata_nv: rename nv_nf2_hardreset() + * sata_nv: fix MCP5x reset + * sata_nv: ck804 has borked hardreset too + * Fix memory corruption in console selection + * Add enable_ms to jsm driver + * nfsd: only set file_lock.fl_lmops in nfsd4_lockt if a stateowner is + found + * nfsd: Ensure nfsv4 calls the underlying filesystem on LOCKT + * iwlwifi: fix rs_get_rate WARN_ON() + * p54: fix lm87 checksum endianness + * p54: fix p54_read_eeprom to cope with tx_hdr_len + * p54usb: rewriting rx/tx routines to make use of usb_anchor's facilities + * minstrel: fix warning if lowest supported rate index is not 0 + * PCI: irq and pci_ids patch for Intel Tigerpoint DeviceIDs + * cpuidle: Add decaying history logic to menu idle predictor + * ACPI: Avoid array address overflow when _CST MWAIT hint bits are set + * video: always update the brightness when poking "brightness" + * Newly inserted battery might differ from one just removed, so update of + battery info fields is required. + * ACPI: Do not modify SCI_EN directly + * dlm: initialize file_lock struct in GETLK before copying conflicting + lock + * sata_mv: Fix chip type for Hightpoint RocketRaid 1740/1742 + * ACPICA: Allow multiple backslash prefix in namepaths + * Linux 2.6.28.4 + + -- Tim Gardner Sat, 07 Feb 2009 18:53:42 -0700 + +linux (2.6.28-7.19) jaunty; urgency=low + + * Fix missing modules FTBS + + -- Tim Gardner Thu, 05 Feb 2009 15:28:15 -0700 + +linux (2.6.28-7.18) jaunty; urgency=low + + [ Alok Kataria ] + + * SAUCE: (drop after 2.6.29) x86: add a synthetic TSC_RELIABLE feature + bit + - LP: #319945 + * SAUCE: (drop after 2.6.29) x86: add X86_FEATURE_HYPERVISOR feature bit + - LP: #319945 + * SAUCE: (drop after 2.6.29) x86: Hypervisor detection and get tsc_freq + from hypervisor + - LP: #319945 + * SAUCE: (drop after 2.6.29) x86: Add a synthetic TSC_RELIABLE feature + bit. + - LP: #319945 + * SAUCE: (drop after 2.6.29) x86: Skip verification by the watchdog for + TSC clocksource. + - LP: #319945 + * SAUCE: (drop after 2.6.29) x86: VMware: Fix vmware_get_tsc code + - LP: #319945 + * SAUCE: (drop after 2.6.29) x86: vmware: look for DMI string in the + product serial key + - LP: #319945 + + [ Andy Whitcroft ] + + * SAUCE: toshiba_acpi -- pull in current -dev version of driver + - LP: #269831 + * SAUCE: toshiba_acpi -- add acpi hotkey kernel thread + - LP: #269831 + * move toshiba laptops back from tlsup to toshiba_acpi + - LP: #269831 + + [ Aneesh Kumar K.V ] + + * SAUCE: (revert before 2.6.28.y update) ext4: Fix the delalloc + writepages to allocate blocks at the right offset. + * SAUCE: (revert before 2.6.28.y update) ext4: avoid ext4_error when + mounting a fs with a single bg + * SAUCE: (revert before 2.6.28.y update) ext4: Don't overwrite + allocation_context ac_status + * SAUCE: (revert before 2.6.28.y update) ext4: Add blocks added during + resize to bitmap + * SAUCE: (revert before 2.6.28.y update) ext4: Use + EXT4_GROUP_INFO_NEED_INIT_BIT during resize + * SAUCE: (revert before 2.6.28.y update) ext4: cleanup mballoc header + files + * SAUCE: (revert before 2.6.28.y update) ext4: don't use blocks freed but + not yet committed in buddy cache init + * SAUCE: (revert before 2.6.28.y update) ext4: Fix race between + read_block_bitmap() and mark_diskspace_used() + * SAUCE: (revert before 2.6.28.y update) ext4: Fix the race between + read_inode_bitmap() and ext4_new_inode() + * SAUCE: (revert before 2.6.28.y update) ext4: Use new buffer_head flag + to check uninit group bitmaps initialization + * SAUCE: (revert before 2.6.28.y update) ext4: mark the blocks/inode + bitmap beyond end of group as used + * SAUCE: (revert before 2.6.28.y update) ext4: Don't allow new groups to + be added during block allocation + * SAUCE: (revert before 2.6.28.y update) ext4: Init the complete page + while building buddy cache + * SAUCE: (revert before 2.6.28.y update) ext4: Fix s_dirty_blocks_counter + if block allocation failed with nodelalloc + + [ Hannes Eder ] + + * SAUCE: (drop after 2.6.29) x86: vmware - fix sparse warnings + - LP: #319945 + + [ Luke Yelavich ] + + * hid modules have hyphens instead of underscores in their names + + [ Mark Fasheh ] + + * SAUCE: (revert before 2.6.28.y update) jbd2: Add BH_JBDPrivateStart + + [ Theodore Ts'o ] + + * SAUCE: (revert before 2.6.28.y update) ext4: Add support for non-native + signed/unsigned htree hash algorithms + * SAUCE: (revert before 2.6.28.y update) ext4: tone down + ext4_da_writepages warnings + * SAUCE: (revert before 2.6.28.y update) jbd2: Add barrier not supported + test to journal_wait_on_commit_record + * SAUCE: (revert before 2.6.28.y update) ext4: Add sanity checks for the + superblock before mounting the filesystem + * SAUCE: (revert before 2.6.28.y update) ext4: only use i_size_high for + regular files + * SAUCE: (revert before 2.6.28.y update) ext4: Add sanity check to + make_indexed_dir + * SAUCE: (revert before 2.6.28.y update) jbd2: On a __journal_expect() + assertion failure printk "JBD2", not "EXT3-fs" + * SAUCE: (revert before 2.6.28.y update) ext4: Initialize the new group + descriptor when resizing the filesystem + + [ Tyler Hicks ] + + * SAUCE: (drop after 2.6.28) [eCryptfs] Regression in unencrypted + filename symlinks + - LP: #322532 + + [ Upstream Kernel Changes ] + + * Input: atkbd - broaden the Dell DMI signatures + - LP: #261721 + * ti_usb_3410_5052: support alternate firmware + * ath5k: fix mesh point operation + * mac80211: decrement ref count to netdev after launching mesh discovery + * inotify: clean up inotify_read and fix locking problems + * fuse: destroy bdi on umount + * fuse: fix missing fput on error + * fuse: fix NULL deref in fuse_file_alloc() + * x86, mm: fix pte_free() + * klist.c: bit 0 in pointer can't be used as flag + * sysfs: fix problems with binary files + * x86: fix page attribute corruption with cpa() + * USB: fix toggle mismatch in disable_endpoint paths + * sound: virtuoso: enable UART on Xonar HDAV1.3 + * USB: usbmon: Implement compat_ioctl + * USB: fix char-device disconnect handling + * USB: storage: add unusual devs entry + * alpha: nautilus - fix compile failure with gcc-4.3 + * alpha: fix vmalloc breakage + * resources: skip sanity check of busy resources + * rtl8187: Add termination packet to prevent stall + * it821x: Add ultra_mask quirk for Vortex86SX + * libata: pata_via: support VX855, future chips whose IDE controller use + 0x0571 + * serial_8250: support for Sealevel Systems Model 7803 COMM+8 + * drm: stash AGP include under the do-we-have-AGP ifdef + * Fix OOPS in mmap_region() when merging adjacent VM_LOCKED file segments + * bnx2x: Block nvram access when the device is inactive + * ext3: Add sanity check to make_indexed_dir + * rtl8187: Fix error in setting OFDM power settings for RTL8187L + * epoll: drop max_user_instances and rely only on max_user_watches + * gpiolib: fix request related issue + * sgi-xpc: Remove NULL pointer dereference. + * sgi-xpc: ensure flags are updated before bte_copy + * include/linux: Add bsg.h to the Kernel exported headers + * ALSA: hda - Fix PCM reference NID for STAC/IDT analog outputs + * ALSA: hda - add another MacBook Pro 4, 1 subsystem ID + * ALSA: hda - Add quirk for HP DV6700 laptop + * crypto: authenc - Fix zero-length IV crash + * crypto: ccm - Fix handling of null assoc data + * x86, pat: fix reserve_memtype() for legacy 1MB range + * x86, pat: fix PTE corruption issue while mapping RAM using /dev/mem + * PCI hotplug: fix lock imbalance in pciehp + * dmaengine: fix dependency chaining + * NET: net_namespace, fix lock imbalance + * relay: fix lock imbalance in relay_late_setup_files + * Linux 2.6.28.3 + * ALSA: Enable SPDIF output on ALC655 + * ALSA: hda - Add ASUS V1Sn support + * ALSA: hda - support detecting HD Audio devices with PCI class code + * ALSA: hda: alc883 model for ASUS P5Q-EM boards + * ALSA: hda - Add quirk for MSI 7260 mobo + * ALSA: hda - Add quirk for Sony VAIO VGN-SR19XN + * ALSA: oxygen: add Claro halo support + * ALSA: hda - Add a new function to seek for a codec ID + * ALSA: patch_sigmatel: Add missing Gateway entries and autodetection + * ALSA: hda - More fixes on Gateway entries + * ALSA: hda - Add MCP67 HDMI support + * ALSA: hda - fix name for ALC1200 + * LSA: hda - Add HP Acacia detection + * ALSA: hda - Add quirk for HP 2230s + * ALSA: hda - Add quirk for Dell Inspiron Mini9 + * ALSA: hda - add support for Intel DX58SO board + * ALSA: hda - Fix silent headphone output on Panasonic CF-74 + * ALSA: USB quirk for Logitech Quickcam Pro 9000 name + * ALSA: hda - add quirks for some 82801H variants to use ALC883_MITAC + + [ Yasunori Goto ] + + * SAUCE: (revert before 2.6.28.y update) ext4: Widen type of + ext4_sb_info.s_mb_maxs[] + + -- Tim Gardner Mon, 02 Feb 2009 23:07:13 -0700 + +linux (2.6.28-6.17) jaunty; urgency=low + + [ Amit Kucheria ] + + * Updating configs: ARMEL/versatile + + -- Amit Kucheria Fri, 30 Jan 2009 13:36:59 +0200 + +linux (2.6.28-6.16) jaunty; urgency=low + + [ Luke Yelavich ] + + * Add hid quirks to input-modules udeb + + [ Tim Gardner ] + + * Revert "[arm] Fix kexec on ARM by properly calling the relocation + function". This patch was deemed 'bogus' by Russell King on the + ARM mailing list. + + [ Upstream Kernel Changes ] + + * PCI: keep ASPM link state consistent throughout PCIe hierarchy + * security: introduce missing kfree + * rt2x00: add USB ID for the Linksys WUSB200. + * p54usb: Add USB ID for Thomson Speedtouch 121g + * lib/idr.c: use kmem_cache_zalloc() for the idr_layer cache + * sgi-xp: eliminate false detection of no heartbeat + * sched: fix update_min_vruntime + * IA64: Turn on CONFIG_HAVE_UNSTABLE_CLOCK + * sound: virtuoso: do not overwrite EEPROM on Xonar D2/D2X + * ALSA: hda - Add quirk for another HP dv5 + * ALSA: hda - Fix HP dv5 mic input + * ALSA: hda - Don't reset HP pinctl in patch_sigmatel.c + * ALSA: hda - make laptop-eapd model back for AD1986A + * drivers/net/irda/irda-usb.c: fix buffer overflow + * usb-storage: add last-sector hacks + * usb-storage: set CAPACITY_HEURISTICS flag for bad vendors + * pkt_sched: sch_htb: Fix deadlock in hrtimers triggered by HTB + * ipv6: Fix fib6_dump_table walker leak + * sctp: Avoid memory overflow while FWD-TSN chunk is received with bad + stream ID + * pkt_sched: cls_u32: Fix locking in u32_change() + * r6040: fix wrong logic in mdio code + * r6040: save and restore MIER correctly in the interrupt routine + * r6040: bump release number to 0.19 + * tcp: don't mask EOF and socket errors on nonblocking splice receive + * p54usb: fix traffic stalls / packet drop + * netfilter: x_tables: fix match/target revision lookup + * netfilter: ebtables: fix inversion in match code + * netfilter: nf_conntrack: fix ICMP/ICMPv6 timeout sysctls on big-endian + * dell_rbu: use scnprintf() instead of less secure sprintf() + * powerpc: is_hugepage_only_range() must account for both 4kB and 64kB + slices + * hwmon: (abituguru3) Fix CONFIG_DMI=n fallback to probe + * mm: write_cache_pages cyclic fix + * mm: write_cache_pages early loop termination + * mm: write_cache_pages writepage error fix + * mm: write_cache_pages integrity fix + * mm: write_cache_pages cleanups + * mm: write_cache_pages optimise page cleaning + * mm: write_cache_pages terminate quickly + * mm: write_cache_pages more terminate quickly + * mm: do_sync_mapping_range integrity fix + * mm: direct IO starvation improvement + * fs: remove WB_SYNC_HOLD + * fs: sync_sb_inodes fix + * fs: sys_sync fix + * Linux 2.6.28.2 + + -- Tim Gardner Sun, 25 Jan 2009 13:36:16 -0700 + +linux (2.6.28-5.15) jaunty; urgency=low + + [ Tim Gardner ] + + * Revert "Enabled CONFIG_PID_NS=y for i386/amd64" + Somehow this commit also reverted the 7 prior commits (which is bad). + * Enabled CONFIG_PID_NS=y for i386/amd64 (version 2) + + -- Tim Gardner Thu, 22 Jan 2009 13:48:34 -0700 + +linux (2.6.28-5.14) jaunty; urgency=low + + [ Ben Collins ] + + * lirc_gpio: Forward ported to current kernel (jaunty) + * configs: Enable LIRC_GPIO on 64-bit/32-bit x86 + - LP: #298791 + + [ Jeff Layton ] + + * SAUCE: cifs: make sure we allocate enough storage for socket address + - LP: #318565 + + [ Tim Gardner ] + + * check-abi: Return success when ABI skip is requested and no ABI files exist. + This ought to fix the armel FTBS. + + -- Tim Gardner Thu, 22 Jan 2009 06:42:49 -0700 + +linux (2.6.28-5.13) jaunty; urgency=low + + [ Andy Whitcroft ] + + * Revert "SAUCE: don't use buggy _BCL/_BCM/_BQC for backlight control" + + [ Tim Gardner ] + + * Fix udeb generation breakage caused by the previous armel versatile + flavour config update. + + -- Tim Gardner Wed, 21 Jan 2009 12:38:35 -0700 + +linux (2.6.28-5.12) jaunty; urgency=low + + [ Ante ] + + * Update drbd to 8.3.0 + + [ Dave Airlie ] + + * i915/drm: provide compat defines for userspace for certain struct + + [ Eric Anholt ] + + * drm/i915: Don't double-unpin buffers if we take a signal in + * drm/i915: Don't complain when interrupted while pinning in execbuffers. + * drm/i915: Don't allow objects to get bound while VT switched. + + [ Jani Monoses ] + + * Fix webcam having USB ID 0ac8:303b + - LP: #292086 + + [ Jesse Barnes ] + + * drm/i915: set vblank enabled flag correctly across IRQ + * drm/i915: don't enable vblanks on disabled pipes + + [ Michael Casadevall ] + + * [arm] Fix kexec on ARM by properly calling the relocation function + + [ Tim Gardner ] + + * Enabled CONFIG_PID_NS=y for i386/amd64 + * SAUCE: Increase ATA_TMOUT_PMP_SRST_WAIT to 5 seconds. + - LP: #318978 + * Update armel versatile config + - LP: #314789 + * Enabled CONFIG_RT2860=m for i386/amd64 + * Enabled CONFIG_RT2870=m for i386/amd64 + + [ Upstream Kernel Changes ] + + * Input: atkbd - add keyboard quirk for HP Pavilion ZV6100 laptop + - LP: #291878 + * ALSA: hda - Add quirk for another HP dv7 + * ALSA: hda - Add quirk for HP6730B laptop + * ALSA: caiaq - Fix Oops with MIDI + * ALSA: hda - Fix typos for AD1882 codecs + * x86: fix intel x86_64 llc_shared_map/cpu_llc_id anomolies + * x86: default to SWIOTLB=y on x86_64 + * CIFS: make sure that DFS pathnames are properly formed + * ring-buffer: prevent false positive warning + * ring-buffer: fix dangling commit race + * iwlwifi: use GFP_KERNEL to allocate Rx SKB memory + * tx493[89]ide: Fix length for __ide_flush_dcache_range + * tx4939ide: Do not use zero count PRD entry + * SCSI: eata: fix the data buffer accessors conversion regression + * USB: emi26: fix oops on load + * x86, UV: remove erroneous BAU initialization + * x86: fix incorrect __read_mostly on _boot_cpu_pda + * vmalloc.c: fix flushing in vmap_page_range() + * fs: symlink write_begin allocation context fix + * cgroups: fix a race between cgroup_clone and umount + * dm raid1: fix error count + * dm log: fix dm_io_client leak on error paths + * minix: fix add link's wrong position calculation + * md: fix bitmap-on-external-file bug. + * sched_clock: prevent scd->clock from moving backwards, take #2 + * devices cgroup: allow mkfifo + * SCSI: aha152x_cs: Fix regression that keeps driver from using shared + interrupts + * ioat: fix self test for multi-channel case + * USB: isp1760: use a specific PLX bridge instead of any bdridge + * USB: isp1760: Fix probe in PCI glue code + * USB: unusual_devs.h additions for Pentax K10D + * inotify: fix type errors in interfaces + * Move compat system call declarations to compat header file + * Convert all system calls to return a long + * Rename old_readdir to sys_old_readdir + * Remove __attribute__((weak)) from sys_pipe/sys_pipe2 + * Make sys_pselect7 static + * Make sys_syslog a conditional system call + * System call wrapper infrastructure + * powerpc: Enable syscall wrappers for 64-bit + * s390: enable system call wrappers + * System call wrapper special cases + * System call wrappers part 01 + * System call wrappers part 02 + * System call wrappers part 03 + * System call wrappers part 04 + * System call wrappers part 05 + * System call wrappers part 06 + * System call wrappers part 07 + * System call wrappers part 08 + * System call wrappers part 09 + * System call wrappers part 10 + * System call wrappers part 11 + * System call wrappers part 12 + * System call wrappers part 13 + * System call wrappers part 14 + * System call wrappers part 15 + * System call wrappers part 16 + * System call wrappers part 17 + * System call wrappers part 18 + * System call wrappers part 19 + * System call wrappers part 20 + * System call wrappers part 21 + * System call wrappers part 22 + * System call wrappers part 23 + * System call wrappers part 24 + * System call wrappers part 25 + * System call wrappers part 26 + * System call wrappers part 27 + * System call wrappers part 28 + * System call wrappers part 29 + * System call wrappers part 30 + * System call wrappers part 31 + * System call wrappers part 32 + * System call wrappers part 33 + * s390 specific system call wrappers + * x86: fix RIP printout in early_idt_handler + * Fix timeouts in sys_pselect7 + * USB: another unusual_devs entry for another bad Argosy storage device + * USB: storage: extend unusual range for 067b:3507 + * USB: storage: recognizing and enabling Nokia 5200 cell phoes + * HID: fix error condition propagation in hid-sony driver + * fix switch_names() breakage in short-to-short case + * nfs: remove redundant tests on reading new pages + * eCryptfs: check readlink result was not an error before using it + * mvsas: increase port type detection delay to suit Seagate's 10k6 drive ST3450856SS 0003 + * x86: avoid theoretical vmalloc fault loop + * ath9k: enable RXing of beacons on STA/IBSS + * mm lockless pagecache barrier fix + * powerpc: Disable Collaborative Memory Manager for kdump + * ibmvfc: Delay NPIV login retry and add retries + * ibmvfc: Improve async event handling + * getrusage: RUSAGE_THREAD should return ru_utime and ru_stime + * ath5k: ignore the return value of ath5k_hw_noise_floor_calibration + * mm: fix assertion + * XFS: truncate readdir offsets to signed 32 bit values + * Linux 2.6.28.1 + * eCryptfs: Filename Encryption: Tag 70 packets + * eCryptfs: Filename Encryption: Header updates + * eCryptfs: Filename Encryption: Encoding and encryption functions + * eCryptfs: Filename Encryption: filldir, lookup, and readlink + * eCryptfs: Filename Encryption: mount option + * eCryptfs: Replace %Z with %z + * eCryptfs: Fix data types (int/size_t) + * eCryptfs: kerneldoc for ecryptfs_parse_tag_70_packet() + * eCryptfs: Clean up ecryptfs_decode_from_filename() + * fs/ecryptfs/inode.c: cleanup kerneldoc + * staging-p80211: Kill directly reference of netdev->priv + * staging-slicoss: Kill directly reference of netdev->priv + * staging-winbond: Kill directly reference of netdev->priv + * Staging: go7007: fixes due to video_usercopy api change + * Staging: go7007: fixes due v4l2_file_operations api change + * staging: correct dubious use of !x & y + * Staging: w35und: make wb35_probe() and wb35_disconnect() funtions static + * Staging: w35und: remove unused wb35_open() and wb35_close() functions + * Staging: w35und: use msleep() and udelay() + * Staging: w35und: remove the no-op pa_stall_execution macro + * Staging: w35und: purb typedef removal + * Staging: w35und: reg queue struct typedef removal + * Staging: w35und: wb35reg struct typedef removal + * Staging: w35und: padapter struct typedef removal + * Staging: w35und: merge wblinux struct to adapter + * Staging: w35und: wb35_probe() cleanup + * Staging: w35und: remove usb_submit_urb wrapper function + * Staging: w35und: remove usb_alloc_urb wrapper function + * w35und: remove dead code from wbusb_f.h + * Staging: w35und: remove true/false boolean macros + * Staging: w35und: OS_MEMORY_ALLOC wrapper removal + * Staging: w35und: usb_put_dev() is missing from wb35_disconnect() + * Staging: w35und: remove macro magic from MLME_GetNextPacket() + * Staging: w35und: plug memory leak in wbsoft_tx() + * Staging: w35und: move supported band initialization out of wb35_probe() + * Staging: w35und: remove timer wrappers + * Staging: w35und: remove atomic op wrappers + * Staging: w35und: remove memcpy/memcmp wrappers + * Staging: w35und: remove abs() and BIT() macros + * Staging: w35und: remove unused macros from common.h + * Staging: w35und: remove unused link status code + * Staging: w35und: #include cleanup + * Staging: w35und: remove some dead code + * Staging: w35und: move source files to one directory + * Staging: w35und: move struct wbsoft_priv to core.h and use it + * Staging: w35und: remove ->adapter from struct _HW_DATA_T + * Staging: w35und: clean up adapter.h a bit + * Staging: w35und: merge struct wb35_adapter to struct wbsoft_priv + * Staging: w35und: remove global struct ieee80211_hw + * Staging: w35und: inline DRIVER_AUTHOR and DRIVER_DESC macros + * Staging: w35und: clean up wblinux.c a bit + * Staging: w35und: remove unused ->ShutDowned member from struct + LOCAL_PARA + * Staging: w35und: move global wbsoft_enabled to struct wbsoft_priv + * Staging: w35und: move packet_came() to wb35rx.c + * Staging: w35und: remove ->skb_array from struct wbsoft_priv + * Staging: w35und: remove ->shutdown from struct wbsoft_priv + * Staging: w35und: make functions local to mds.c static + * Staging: w35und: make functions local to mlmetxrx.c static + * Staging: w35und: remove dead code from mto.c + * Staging: w35und: make functions local to wb35rx.c static + * Staging: w35und: make functions local to wb35tx.c static + * Staging: w35und: remove dead code from wbhal.c + * Staging: w35und: remove rxisr.c as dead code + * Staging: w35und: fix Kconfig + * Staging: w35und: fix config build warnings + * Staging: wlan-ng: Remove PCI/PLX/PCMCIA files. + * Staging: wlan-ng: Update Help text to mention prism3 devices. + * Staging: wlan-ng: Delete PCI/PLX/PCMCIA-specific code. + * Staging: wlan-ng: Make wlan-ng use WEXT mode by default. + * Staging: wlan-ng: Eliminate more <2.6 kernel support. + * Staging: wlan-ng: Eliminate all backwards-compatibility for <2.6.13 kernels. + * Staging: wlan-ng: Eliminate a boatload of tertiaryAP-only code. + * Staging: wlan-ng: Remove AP-only code from MLME functions. + * Staging: wlan-ng: Get rid of the MTU tests in the rx conversion path. + * Staging: wlan-ng: Eliminate one more rx mtu test. + * Staging: wlan-ng: Eliminate local 'version.h' + * Staging: wlan-ng: Eliminate usage of procfs. + * Staging: wlan-ng: Use standard kernel integer (u32/s32/etc) types. + * Staging: wlan-ng: Eliminate all backwards-compatible kernel code. + * Staging: wlan-ng: Wireless Extension support is mandatory. + * Staging: wlan-ng: use WIRELESS_EXT, not CONFIG_WIRELESS_EXT + * Staging: wlan-ng: Delete a large pile of now-unused code. + * Staging: wlan-ng: Delete a pile of unused mibs. And fix WEXT SET_TXPOWER. + * Staging: wlan-ng: Consolidate wlan-ng into a single module. + * Staging: wlan-ng: Purge all MIBs not used internally. + * Staging: wlan-ng: p80211netdev.c fix netdev alloc to prevent oops on device start + * Staging: wlan-ng: prism2_usb.c always enable the card in probe_usb + * Staging: wlan-ng: hfa384x_usb.c use newest version of 384x_drvr_start + * Staging: wlan-ng: p80211wext.c add latest changes & remove extra nulls from wext_handlers + * Staging: wlan-ng: p80211wext don't set default key id twice + * Staging: wlan-ng: hfa384x_usbin_callback: check for hardware removed + * Staging: wlan-ng: p80211conv.c copy code from wlan-ng-devel branch to not drop packets + * Staging: wlan-ng: remove unused #include + * Staging: wlan-ng: p80211wext.c: use ARRAY_SIZE + * Staging: wlan-ng: fix compiler warnings + * Staging: wlan-ng: skb_p80211_to_ether() - payload_length is unsigned, check before subtraction + * Staging: at76_usb: update drivers/staging/at76_usb w/ mac80211 port + * Staging: at76_usb: fix build breakage + * Staging: at76_usb: remove compiler warnings + * Staging: at76_usb: fix up all remaining checkpatch.pl warnings + * Staging: at76_usb: cleanup dma on stack issues + * Staging: poch: Block size bug fix + * Staging: poch: Update TODO list + * Staging: poch: Correct pages from bytes. + * Staging: poch: minor fixes + * Staging: poch: Fix build warnings + * Staging: poch: Rx control register init + * Staging: poch: Fix user space protocol syncing + * Staging: poch: Fine grained locking + * Staging: sxg: remove typedefs + * Staging: sxg: break the build in a cleaner way when !x86 + * Staging: sxg: update README + * staging: struct device - replace bus_id with dev_name(), dev_set_name() + * Staging: echo: remove typedefs + * Staging: echo: Lindent drivers/staging/echo + * Staging: go7007: saa7134 updates + * Staging: go7007: add sensoray 2250/2251 support + * Staging: go7007: Convert driver to use video_ioctl2 + * Staging: go7007: annotate code pointers + * Staging: go7007: fix minor build warnings + * Staging: go7007: small cleanup + * Staging: go7007: add some more v4l2 ioctls + * Staging: et131x: Cleanup et131x_debug.h defines + * Staging: et131x: fix build failure + * Staging: et131x: remove unused variable in et1310_tx.c + * Staging: usbip: cleanup kerneldoc + * Staging: slicoss: use kzalloc + * Staging: slicoss: use correct type for memory allcations + * Staging: slicoss: use request_firmware + * Staging: add agnx wireless driver + * Staging: agnx: fix build errors due to ssid removal + * Staging: agnx: fix build errors due to rate control API changes + * Staging: agnx: fix build warnings + * Staging: add otus Atheros wireless network driver + * Staging: otus: fix netdev->priv usage + * Staging: otus: fix name clash + * Staging: otus: fix urb callback function type + * Staging: otus: remove dependence on kernel version + * Staging: add rt2860 wireless driver + * Staging: rt2860: disable root hack for reading files + * Staging: rt2860: fix up netdev->priv usage + * Staging: rt2860: use standard bit-reverse function + * Staging: rt2860: Fix minor compiler warnings + * Staging: rt2860: enable WPA_SUPPLICANT support + * Staging: Add ServerEngines benet 10Gb ethernet driver + * Staging: benet: fix netif api breakage + * Staging: benet: fix up netdev->priv change + * Staging: benet: build is broken unless CONFIG_NETPOLL is enabled + * Staging: benet: patch to remove subdirectories + * Staging: benet: fix build errors when CONFIG_NETPOLL is off + * Staging: benet: fix build error. + * Staging: benet: patch to use offsetof() instead of AMAP_BYTE_OFFSET() + * Staging: benet: fix problems reported by checkpatch + * Staging: benet: cleanup a check while posting rx buffers + * Staging: add comedi core + * Staging: comedi: fix up a lot of checkpatch.pl warnings + * Staging: comedi: fix checkpatch.pl errors in comedi_fops.c + * Staging: comedi: fix build error in comedilib.h + * Staging: comedi: add kcomedilib to the tree + * Staging: comedi: set up infrastructure for individual drivers + * Staging: comedi: add local copy of interrupt.h + * Staging: comedi: add pci and usb wrapper header files + * Staging: comedi: comedi driver common function module + * Staging: comedi: add mite comedi pci driver + * Staging: comedi: add usb usbdux driver + * Staging: comedi: add usb usbduxfast driver + * Staging: comedi: add usb dt9812 driver + * Staging: comedi: add comedi_bond driver + * Staging: comedi: add comedi_test driver + * Staging: comedi: add comedi_parport driver + * Staging: comedi: dt9812: fix up a lot of coding style issues + * Staging: comedi: dt9812: remove dt9812.h + * Staging: comedi: dt9812: remove typedefs + * Staging: comedi: dt9812: fix sparse warnings + * Staging: comedi: usbdux: remove kernel version checks + * Staging: comedi: usbdux: code style cleanups + * Staging: comedi: usbdux: remove // comments + * Staging: comedi: usbdux: fix up printk calls + * Staging: comedi: usbdux: remove checkpatch.pl warnings + * Staging: comedi: usbdux: remove typedef + * Staging: comedi: usbdux: remove comedi usb wrappers + * Staging: comedi: usbduxfast: remove comedi usb wrappers + * Staging: comedi: dt9812: remove #ifdef that is not needed + * Staging: comedi: remove usb wrappers + * Staging: comedi: remove PCI wrappers + * Staging: comedi: add icp_multi driver + * Staging: comedi: add me4000 driver + * Staging: comedi: fix checkpatch.pl issues in comedi_bond.c + * Staging: comedi: fix checkpatch.pl issues in comedi_fc.c + * Staging: comedi: remove typedefs from comedi_bond.c + * Staging: comedi: fix sparse issues in comedi_bond.c + * Staging: comedi: fix checkpatch.pl issues in comedi_test.c + * Staging: comedi: fix sparse issues in comedi_test.c + * Staging: comedi: remove typedefs from comedi_test.c + * Staging: comedi: fix comedi_parport.c checkpatch.pl issues. + * Staging: comedi: fix comedi_fc.h checkpatch.pl issues. + * Staging: comedi: fix comedi_pci.h checkpatch.pl issues. + * Staging: comedi: comedi_pci.h: remove unneeded wrapper + * Staging: comedi: comedi_pci.h: remove comedi_pci_enable_no_regions + * Staging: comedi: comedi_pci.h: remove comedi_pci_disable_no_regions + * Staging: comedi: add s626 driver + * Staging: comedi: add rtd520 driver + * Staging: comedi: add me_daq driver + * Staging: comedi: me_daq: fix checkpatch.pl issues + * Staging: comedi: me_daq: remove typedefs + * Staging: comedi: me_daq: fix sparse issues + * Staging: comedi: fix checkpatch.pl warning in interrupt.h + * Staging: comedi: fix build if CONFIG_PROC_FS is not set + * Staging: add asus_oled driver + * Staging: asus_oled: fix build dependancy + * Staging: Add the Meilhaus ME-IDS driver package + * Staging: meilhaus: fix __symbol_get problems + * Staging: add lcd-panel driver + * Staging: panel: major checkpatch cleanup + * Staging: panel: remove ifdefs and code for pre-2.6 kernels + * Staging: panel: remove support for smartcards + * Staging: add Driver for Altera PCI Express Chaining DMA reference design + * Staging: add rtl8187se driver + * Staging: rtl8187se: remove unneeded files + * Staging: rtl8187se: make the built module be the proper name + * Staging: rtl8187se: remove duplicate pci ids + * Staging: me4000: switch to list_for_each*() + * Staging: usbip: switch to list_for_each_entry() + * Staging: add princeton instruments usb camera driver + * Staging: add mimio xi driver + * Staging: add rt2870 wireless driver + * Staging: rt2870: disable root hack for reading files + * Staging: rt2870: fix up netdev->priv usage + * Staging: add frontier tranzport and alphatrack drivers + * Staging: frontier: remove unused alphatrack_sysfs.c file + * Staging: frontier: fix compiler warnings + * Staging: add epl stack + * Staging: epl: run Lindent on all kernel/*.h files + * Staging: epl: run Lindent on all user/*.h files + * Staging: epl: run Lindent on *.h files + * Staging: epl: run Lindent on *.c files + * Staging: epl: hr timers all run in hard irq context now + * Staging: epl: fix netdev->priv b0rkage + * Staging: add android framework + * Staging: android: add binder driver + * Staging: android: binder: Fix gcc warnings about improper format specifiers for size_t in printk + * staging: android: binder: Fix use of euid + * Staging: android: add logging driver + * Staging: android: add ram_console driver + * Staging: android: add timed_gpio driver + * Staging: android: timed_gpio: Rename android_timed_gpio to timed_gpio + * Staging: android: remove dummy android.c driver + * Staging: android: add lowmemorykiller driver + * Staging: android: binder: fix build errors + * staging: __FUNCTION__ is gcc-specific, use __func__ + * V4L/DVB (10176a): Switch remaining clear_user_page users over to + clear_user_highpage + + [ Zhenyu Wang ] + + * agp/intel: add support for G41 chipset + + -- Tim Gardner Sun, 18 Jan 2009 20:22:54 -0700 + +linux (2.6.28-4.11) jaunty; urgency=low + + [ Mario Limonciello ] + + * SAUCE: Enable HDMI audio codec on Studio XPS 1340 + - LP: #309508 + + [ Tim Gardner ] + + * Fix armel d-i FTBSs + + [ Upstream Kernel Changes ] + + * USB: re-enable interface after driver unbinds + + -- Tim Gardner Tue, 13 Jan 2009 16:33:08 -0700 + +linux (2.6.28-4.10) jaunty; urgency=low + + [ Andy Whitcroft ] + + * update kernel bootloader recommends: to prefer grub + - LP: #314004 + * SAUCE: don't use buggy _BCL/_BCM/_BQC for backlight control + - LP: #311716 + * SAUCE: test-suspend -- add the suspend test scripts + - LP: #316419 + + [ Colin Watson ] + + * Enable udebs for armel + + [ Tim Gardner ] + + * SAUCE: Dell laptop digital mic does not work, PCI 1028:0271 + - LP: #309508 + * Enable CIFS_XATTR=y and CONFIG_CIFS_POSIX=y + - LP: #220658 + + -- Tim Gardner Thu, 08 Jan 2009 10:38:22 -0700 + +linux (2.6.28-4.9) jaunty; urgency=low + + [ Tim Gardner ] + + * Restore DM_CRYPT, AES, ECB, and CBC as modules. This fixes + some installer issues with encrypted /home and Private directories. + * Take one more stab at building armel without module or ABI errors. + + -- Tim Gardner Tue, 06 Jan 2009 08:38:23 -0700 + +linux (2.6.28-4.8) jaunty; urgency=low + + * Fix i386/amd64 FTBS by ignoring all module and ABI changes, + not something you would normally do, but I'm sure the ABI + has not changed. This will probably also allow the ARM builds to complete. + + -- Tim Gardner Mon, 05 Jan 2009 14:42:58 -0700 + +linux (2.6.28-4.7) jaunty; urgency=low + + [ Tim Gardner ] + + * Enable CONFIG_ATH5K=m for i386/amd64 + - LP: #306719 + * Build all i386/amd64 AGP/DRM components as modules. + - LP: #312721 + * git commands are now installed outside the default $PATH + Use 'git CMD' instead of 'git-CMD'. + * Build in most PATA/SATA drivers. This should allow most i386/amd64 systems to boot + without an initramfs, though some support work is still required in initramfs-tools + and grub. + - LP: #311730 + + -- Tim Gardner Fri, 02 Jan 2009 07:33:09 -0700 + +linux (2.6.28-4.6) jaunty; urgency=low + + [ Tim Gardner ] + + * Enable CONFIG_X86_E_POWERSAVER=m for i386 generic + - LP: #237405 + * Build i386 AGP drivers as modules + - LP: #312721 + * Build i386 DRM as a module + - LP: #312721 + + [ Upstream Kernel Changes ] + + * drm/i915: Add missing userland definitions for gem init/execbuffer. + - LP: #308387 + + -- Tim Gardner Mon, 29 Dec 2008 09:16:47 -0700 + +linux (2.6.28-4.5) jaunty; urgency=low + + [ Andy Whitcroft ] + + * clean up module dependancy information on package removal/purge + - LP: #300773 + + [ Tim Gardner ] + + * Update iscsitarget to 0.4.17 + * Build in ext{234} + * Build in Crypto modules AES, CBC, ECB + * Build in ACPI AC,BATTERY,BUTTON,FAN,PCI_SLOT,PROCESSOR,SBS,THERMAL,WMI + * Build in AGP intel,via,sis,ali,amd,amd64,efficeon,nvidia,sworks + * Build in ata,dev_dm,dev_loop,dev_md,dev_sd,dev_sr + * Build in BT l2cap,rfcomm,sco + * Reduce CONFIG_LEGACY_PTY_COUNT to 0 + * Build in CDROM_PKTCDVD and CHR_DEV_SG + * Build in CPU_FREQ + GOV_CONSERVATIVE,GOV_ONDEMAND,GOV_POWERSAVE,GOV_USERSPACE,STAT,TABLE + * Build in DM CRYPT,MIRROR,MULTIPATH,SNAPSHOT + * Build in DRM + * Build in HID + * Build in HOTPLUG PCI,PCIE + * Build in I2C + * Build in IEEE1394 OHCI1394 + * Build in INPUT EVDEV + * Build in IPV6 + * Build in MMC + * Build in PACKET + * Enable both IEEE1394 (Firewire) stacks as modules + - LP: #276463 + * Disable SUNRPC_REGISTER_V4 + - LP: #306016 + * Enable dm-raid4-5 + - LP: #309378 + * Build in PPP + * Build in RFKILL + * Build in USB SERIAL + + [ Upstream Kernel Changes ] + + * Rebased to v2.6.28 + + -- Tim Gardner Thu, 18 Dec 2008 21:18:44 -0700 + +linux (2.6.28-3.4) jaunty; urgency=low + + [ Tim Gardner ] + + * Build ecryptfs into the kernel + - LP: #302870 + * Deprecated gnbd + + [ Upstream Kernel Changes ] + + * Rebased to v2.6.28-rc8 + + -- Tim Gardner Wed, 10 Dec 2008 22:45:13 -0700 + +linux (2.6.28-2.3) jaunty; urgency=low + + [ Andy Whitcroft ] + + * update the templates so that we have spaces following the title line + + [ Tim Gardner ] + + * Add upload number to kernel version signature. This has the side effect + of renaming kernel packages back to the original way, e.g., without '-ub' + in the name. + + -- Tim Gardner Thu, 04 Dec 2008 12:18:31 -0700 + +linux (2.6.28-2.2) jaunty; urgency=low + + [ Andy Whitcroft ] + + * Revert "SAUCE: (no-up) version: Implement version_signature proc file." + * SAUCE: (no-up) version: Implement version_signature proc file. + * SAUCE: serial: RS485 ioctl structure uses __u32 include linux/types.h + - LP: #303711 + + [ Tim Gardner ] + + * UBUNTU: Removed CONFIG_DRM_VIA_CHROME9 since it is upstream. + * UBUNTU: Removed ubuntu/via_chrome9 + + [ Upstream Kernel Changes ] + + * Rebased to v2.6.28-rc7 + + -- Tim Gardner Tue, 02 Dec 2008 07:33:32 -0700 + +linux (2.6.28-1.1) jaunty; urgency=low + + [ Amit Kucheria ] + + * SAUCE: make fc transport removal of target configurable + * SAUCE: pm: Config option to disable handling of console during + suspend/resume + * SAUCE: Adds support for COMPAL JHL90 webcam + * Map armel to arm to all editconfigs to work correctly + * Add armel to getabis for completeness sake + * Add -ub to our versioning to allow kerneloops.org to identify us + + [ Andy Whitcroft ] + + * Fix Vcs-Git path for the kernel repository. + - LP: #296915 + + [ Ben Collins ] + + * SAUCE: Lower warning level of some PCI messages + - LP: #159241 + * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps + * SAUCE: tulip: Let dmfe handle davicom on non-sparc + * SAUCE: tulip: Define ULI PCI ID's + * SAUCE: (no-up) version: Implement version_signature proc file. + * SAUCE: (no-up) connector.h: Add idx/val for drbd + * SAUCE: (no-up) swap: Add notify_swap_entry_free callback for compcache + * SAUCE: drivers: Remove some duplicate device entries in various modules + * SAUCE: (no-up) [AppArmor] merge with upstream subversion r1291 + * SAUCE: (no-up) Enable ubuntu extra subdirectory + * SAUCE: (no-up) ACPI: initramfs DSDT override support + * ubuntu: Add drbd module + * ubuntu: Add iscsitarget module + * ubuntu: Add BOM for iscsitarget + * ubuntu: Add squashfs driver + * SAUCE: (no-up) Check for squashfs superblock in initramfs mounting. + * ubuntu: Add aufs module + * ubuntu: Added atl2 driver + * ubuntu: Added et131x driver + * ubuntu: Add dm-raid4-5 driver + * ubuntu: Add ndiswrapper driver + * ubuntu: Added ram backed compressed swap module (compcache) + * ubuntu: Add misc drivers from hardy lum + * ubuntu: Add heci driver 3.2.0.24 + * ubuntu: Add ov511 and bt-sco drivers + * ubuntu: Add acx, prism2_usb wireless drivers + * ubuntu: Add at76 driver to build + * ubuntu: Add fsam7400 sw kill switch driver + * ubuntu: Added qc-usb driver + * ubuntu: e1000e: Upgraded module to 0.4.1.7 + * ubuntu: Added rfkill drivers + * ubuntu: VIA - Add VIA DRM Chrome9 3D engine + * ubuntu: unionfs: Added v1.4 module from hardy + * ubuntu: Add LIRC driver + * ubuntu: Add GFS driver + * ubuntu: New tlsup driver for toshiba laptops + * SAUCE: (no-up) Export lookup_has for aufs + * SAUCE: (no-up) Modularize vesafb + * ubuntu: Config files + * Disable some modules that need porting to 2.6.28 + * ubuntu: Fixup headers creation to include arch/*/include + * ubuntu/module-check: Ignore comment lines + + [ Chuck Short ] + + * SAUCE: ata: blacklist FUJITSU MHW2160BH PL + + [ cking ] + + * SAUCE: Enable speedstep for sonoma processors. + + [ Colin Ian King ] + + * ubuntu: Add dm-loop + * SAUCE: cx88: Support Leadtek WinFast DTV2000 H version J. + * SAUCE: fix kernel oops in VirtualBox during paravirt patching + * SAUCE: qc-usb: Enable Logitech QuickCam Messenger + * SAUCE: appleir: Enable driver for new MacBook Pro + + [ Colin Watson ] + + * Enable configfs, fuse, jfs, reiserfs, and xfs for armel + * Extend debian/d-i/ modules handling to make armel easier to support + * Create udebs for armel + + [ Fabio M. Di Nitto ] + + * ubuntu: update GFS Cluster File System + + [ Kees Cook ] + + * SAUCE: AppArmor: update to upstream subversion r1302 + + [ Leann Ogasawara ] + + * Add automatic model setting for Samsung Q45 + * Add Dell Dimension 9200 reboot quirk + + [ Mackenzie Morgan ] + + * SAUCE: Add quirk for ASUS Z37E to make sound audible after resume + + [ Matthew Garrett ] + + * SAUCE: hostap: send events on data interface as well as master + interface + + [ Michael Frey (Senior Manager, MID ] + + * SAUCE: Send HCI_RESET for Broadcomm 2046 + + [ Michael Haas ] + + * add proper aufs source tree from 20080922 + * Fix AUFS compilation in vfsub.c + * Add splice-2.6.23.patch from AUFS to export a symbol needed by AUFS + * Add put_filp.patch from AUFS to export a symbol needed by AUFS + * Add deny_write_access.patch from AUFS - export deny_write_access + * Add sec_perm-2.6.24.patch from AUFS - export security_inode_permission + * make sure TMPFS_MAGIC is defined in AUFS Makefile + * SAUCE: Revert aufs changes from AppArmor merge + + [ Mohamed Abbas ] + + * SAUCE: iwlagn -- fix rfkill when on when driver loaded + + [ Phillip Lougher ] + + * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets. + + [ Stefan Bader ] + + * SAUCE: (no-up) Export dm_disk function of device-mapper + * SAUCE: Restore VT fonts on switch + * SAUCE: mmc: Increase power_up deleay to fix TI readers + * gfs1: GFS1 can't create more than 4kb file + * uvcvideo: Commit streaming parameters when enabling the video stream. + + [ Tim Gardner ] + + * SAUCE: Add extra headers to linux-libc-dev + * SAUCE: Catch nonsense keycodes and silently ignore + * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM + * SAUCE: Guest OS does not recognize a lun with non zero target id on + Vmware ESX Server + * SAUCE: (no-up) Take care of orinoco_cs overlap with hostap_cs + * ubuntu: Add GNBD driver + * SAUCE: e1000e: Map NV RAM dynamically only when needed. + * SAUCE: Correctly blacklist Thinkpad r40e in ACPI + * SAUCE: Update Wacom tablet driver to 1.49 + * SAUCE: Fix Wacom tablet 1.49 porting errors + * SAUCE: Enable an e1000e Intel Corporation 82567 Gigabit controller + * SAUCE: Fix Oops in wlan_setup + * SAUCE: ipw2200: change default policy for auto-associate + * Dell Wireless 365 needs BTUSB_RESET quirk. + * ndiswrapper remote buffer overflows on long ESSIDs (CVE 2008-4395) + * Disabled ubuntu/e1000e config + + [ Upstream Kernel Changes ] + + * Revert "[Bluetooth] Eliminate checks for impossible conditions in IRQ + handler" + * Revert "x86, early_ioremap: fix fencepost error" + * mac80211: fix two issues in debugfs + * iwl3945: do not send scan command if channel count zero + + -- Ben Collins Fri, 07 Nov 2008 09:37:42 -0700 + +linux (2.6.27-8.17) intrepid-proposed; urgency=low + + [ John W. Linville ] + + * SAUCE: iwlagn: avoid sleep in softirq context + -LP: #286285 + + [ Tim Gardner ] + + * Dell Wireless 365 needs BTUSB_RESET quirk. + - LP: #293670 + * SAUCE: ALSA: hda: make a STAC_DELL_EQ option (version 2) + - LP: #293271 + + [ Upstream Kernel Changes ] + + * iwlagn: downgrade BUG_ON in interrupt + * Input: atkbd - expand Latitude's force release quirk to other Dells + * fbcon_set_all_vcs: fix kernel crash when switching the rotated consoles + * modules: fix module "notes" kobject leak + * Driver core: Fix cleanup in device_create_vargs(). + * Driver core: Clarify device cleanup. + * ath9k/mac80211: disallow fragmentation in ath9k, report to userspace + * md: Fix rdev_size_store with size == 0 + * xfs: fix remount rw with unrecognized options + * OHCI: Allow broken controllers to auto-stop + * USB: OHCI: fix endless polling behavior + * USB: Fix s3c2410_udc usb speed handling + * USB: EHCI: log a warning if ehci-hcd is not loaded first + * usb gadget: cdc ethernet notification bugfix + * usb: musb_hdrc build fixes + * drm/i915: fix ioremap of a user address for non-root (CVE-2008-3831) + * DVB: au0828: add support for another USB id for Hauppauge HVR950Q + * DVB: sms1xxx: support two new revisions of the Hauppauge WinTV + MiniStick + * security: avoid calling a NULL function pointer in + drivers/video/tvaudio.c + * Linux 2.6.27.3 + -LP: #294152 + + * gpiolib: fix oops in gpio_get_value_cansleep() + * edac cell: fix incorrect edac_mode + * x86 ACPI: fix breakage of resume on 64-bit UP systems with SMP kernel + * sched: fix the wrong mask_len + * USB: cdc-wdm: make module autoload work + * USB: don't rebind drivers after failed resume or reset + * USB: fix memory leak in cdc-acm + * USB: Speedtouch: add pre_reset and post_reset routines + * dm kcopyd: avoid queue shuffle + * dm snapshot: fix primary_pe race + * amd_iommu: fix nasty bug that caused ILLEGAL_DEVICE_TABLE_ENTRY errors + * CIFS: fix saving of resume key before CIFSFindNext + * netfilter: xt_iprange: fix range inversion match + * netfilter: snmp nat leaks memory in case of failure + * netfilter: restore lost ifdef guarding defrag exception + * anon_vma_prepare: properly lock even newly allocated entries + * hvc_console: Fix free_irq in spinlocked section + * ACPI Suspend: Enable ACPI during resume if SCI_EN is not set + * ACPI suspend: Blacklist HP xw4600 Workstation for old code ordering + * ACPI suspend: Always use the 32-bit waking vector + * proc: fix vma display mismatch between /proc/pid/{maps,smaps} + * SCSI: scsi_dh: add Dell product information into rdac device handler + * PCI hotplug: cpqphp: fix kernel NULL pointer dereference + * V4L/DVB (9300): pvrusb2: Fix deadlock problem + * Linux 2.6.27.4 + -LP: #294155 + + -- Tim Gardner Tue, 04 Nov 2008 12:16:07 -0700 + +linux (2.6.27-7.16) intrepid-security; urgency=low + + [ Tim Gardner ] + + * ndiswrapper remote buffer overflows on long ESSIDs (CVE 2008-4395) + - LP: #275860 + + [ Upstream Kernel Changes ] + + * ext[234]: Avoid printk floods in the face of directory corruption + (CVE-2008-3528) + + -- Tim Gardner Mon, 03 Nov 2008 13:34:42 -0700 + +linux (2.6.27-7.15) intrepid-security; urgency=low + + [ Upstream Kernel Changes ] + + * tcp: Restore ordering of TCP options for the sake of inter-operability + - LP: #264019 + + -- Tim Gardner Mon, 27 Oct 2008 19:28:06 -0600 + +linux (2.6.27-7.14) intrepid; urgency=low + + [ Tim Gardner ] + + * Disable ath5k in 2.6.27 + - LP: #288148 + + -- Tim Gardner Thu, 23 Oct 2008 07:40:43 -0600 + +linux (2.6.27-7.13) intrepid; urgency=low + + [ Stefan Bader ] + + * gfs1: GFS1 can't create more than 4kb file + + [ Tim Gardner ] + + * Revert "SAUCE: x86: Reserve FIRST_DEVICE_VECTOR in used_vectors + bitmap.". Use upstream commit to avoid future conflicts. + * Revert "STABLE queue: mac80211: fix two issues in debugfs". + Use upstream commit to avoid future conflicts. + * Revert "x86, early_ioremap: fix fencepost error" + Use upstream commit to avoid future conflicts. + + [ Upstream Kernel Changes ] + + * sched_rt.c: resch needed in rt_rq_enqueue() for the root rt_rq + * x86: Reserve FIRST_DEVICE_VECTOR in used_vectors bitmap. + * mac80211: fix two issues in debugfs + * Fix barrier fail detection in XFS + * tty: Termios locking - sort out real_tty confusions and lock reads + * CIFS: make sure we have the right resume info before calling + CIFSFindNext + * rfkill: update LEDs for all state changes + * libertas: clear current command on card removal + * b43legacy: Fix failure in rate-adjustment mechanism + * x86, early_ioremap: fix fencepost error + * x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC + * x86: improve UP kernel when CPU-hotplug and SMP is enabled + * sky2: Fix WOL regression + * netdrvr: atl1e: Don't take the mdio_lock in atl1e_probe + * Linux 2.6.27.2 + + [ Amit Kucheria ] + + * Ubuntu: agp: Fix stolen memory counting on G4X. + -LP: 285572 + + [ Scott Remnant ] + + * add MODULE_ALIAS to load ipmi_devintf with ipmi_si + + -- Tim Gardner Sun, 19 Oct 2008 10:06:21 -0600 + +linux (2.6.27-7.12) intrepid; urgency=low + + [ Chuck Short ] + + * xen: Add xen modules to virtual flavours. + + [ Mario Limonciello ] + + * SAUCE: Add back in lost commit for Apple BT Wireless Keyboard + - LP: #162083 + + [ Tim Gardner ] + + * Remove depmod created files from packages. + - LP: #250511 + * Changed default TCP congestion algorithm to 'cubic' (again) + - LP: #278801 + * Update configs for 'disable CONFIG_DYNAMIC_FTRACE' + - LP: #263555 + + [ Upstream Kernel Changes ] + + * x86: register a platform RTC device if PNP doesn't describe it + * disable CONFIG_DYNAMIC_FTRACE due to possible memory corruption on + module unload + + -- Tim Gardner Fri, 17 Oct 2008 11:25:39 -0600 + +linux (2.6.27-7.11) intrepid; urgency=low + + [ Amit Kucheria ] + + * STABLE queue: mac80211: fix two issues in debugfs + - LP: #275227 + * SAUCE: Adds support for COMPAL JHL90 webcam + + [ Ben Collins ] + + * SAUCE: (no-up) x86: Quiet "Kernel alive" messages + - LP: #39985 + * SAUCE: (no-up) Modularize vesafb + * build/config: Enable vesafb module + * build: Switch to vesafb as preferred. + + [ Leann Ogasawara ] + + * Add Dell Dimension 9200 reboot quirk + - LP: #271370 + + [ Michael Haas ] + + * SAUCE: Revert aufs changes from AppArmor merge + + [ Tim Gardner ] + + * fix virtio udeb layout + - LP: #257739 + * Enabled CONFIG_EXT4DEV_FS=m + * Changed default TCP congestion algorithm to 'cubic' + - LP: #278801 + * SAUCE: ipw2200: change default policy for auto-associate + - LP: #264104 + + [ Upstream Kernel Changes ] + + * x86, early_ioremap: fix fencepost error + - LP: #263543 + + -- Tim Gardner Sat, 11 Oct 2008 08:07:42 -0600 + +linux (2.6.27-7.10) intrepid; urgency=low + + [ Alexey Starikovskiy ] + + * SAUCE: ACPI: EC: do transaction from interrupt context + - LP: #277802 + + [ Ben Collins ] + + * build/d-i: Change virtio-modules udeb to prio standard + + [ Colin Ian King ] + + * SAUCE: Blacklist IBM 2656 in serio/i8042 + - LP: #21558 + + [ Henrik Rydberg ] + + * Revert "SAUCE: applesmc: Add MacBookAir" + * SAUCE: [PATCH 1/5] hwmon: applesmc: Specified number of bytes to read + should match actual + * SAUCE: [PATCH 2/5] hwmon: applesmc: Fix the 'wait status failed: c != + 8' problem + * SAUCE: [PATCH 3/5] hwmon: applesmc: Prolong status wait + * SAUCE: [PATCH 4/5] hwmon: applesmc: Allow for variable ALV0 and ALV1 + package length + * SAUCE: [PATCH 5/5] hwmon: applesmc: Add support for Macbook Air + * SAUCE: hwmon: applesmc: Add support for Macbook Pro 4 + * SAUCE: hwmon: applesmc: Add support for Macbook Pro 3 + * SAUCE: hwmon: applesmc: Lighter wait mechanism, drastic improvement + + [ Leann Ogasawara ] + + * Add automatic model setting for Samsung Q45 + - LP: #200210 + + [ Tim Gardner ] + + * SAUCE: Correctly blacklist Thinkpad r40e in ACPI + - LP: #278794 + * SAUCE: Update Wacom tablet driver to 1.49 + - LP: #260675 + * SAUCE: ALPS touchpad for Dell Latitude E6500/E6400 + - LP: #270643 + * SAUCE: Fix Wacom tablet 1.49 porting errors + * SAUCE: Enable an e1000e Intel Corporation 82567 Gigabit controller + * SAUCE: Fix Oops in wlan_setup + - LP: #263309 + + [ Upstream Kernel Changes ] + + * ath9k: fix oops on trying to hold the wrong spinlock + * [Bluetooth] Fix double frees on error paths of btusb and bpa10x drivers + * [Bluetooth] Add reset quirk for new Targus and Belkin dongles + * [Bluetooth] Add reset quirk for A-Link BlueUSB21 dongle + * Revert "ax25: Fix std timer socket destroy handling." + * ax25: Quick fix for making sure unaccepted sockets get destroyed. + * netrom: Fix sock_orphan() use in nr_release + * Revert "V4L/DVB (8904): cx88: add missing unlock_kernel" + * SLOB: fix bogus ksize calculation + * net: only invoke dev->change_rx_flags when device is UP + * tcp: Fix possible double-ack w/ user dma + * net: Fix netdev_run_todo dead-lock + * tcp: Fix tcp_hybla zero congestion window growth with small rho and large cwnd. + * [MIPS] Sibyte: Register PIO PATA device only for Swarm and Litte Sur + * eeepc-laptop: Fix hwmon interface + * hwmon: (it87) Prevent power-off on Shuttle SN68PT + * hwmon: Define sysfs interface for energy consumption register + * hwmon: (adt7473) Fix some bogosity in documentation file + * hwmon: (abituguru3) Enable reading from AUX3 fan on Abit AT8 32X + * hwmon: (abituguru3) Enable DMI probing feature on Abit AT8 32X + * [CPUFREQ] correct broken links and email addresses + * SLOB: fix bogus ksize calculation fix + * Don't allow splice() to files opened with O_APPEND + * Linux 2.6.27 + + -- Tim Gardner Wed, 08 Oct 2008 21:19:34 -0600 + +linux (2.6.27-6.9) intrepid; urgency=low + + [ Kees Cook ] + + * SAUCE: AppArmor: update to upstream subversion r1302 + - LP: #269921 + + [ Stefan Bader ] + + * Update configuration files to be compliant to desktop specs + - LP: #279019 + + [ Tim Gardner ] + + * Add support in e1000e for a couple of ICH10 PCI IDs + * Enable CONFIG_INPUT_PCSPKR=m + - LP: #275453 + + [ Upstream Kernel Changes ] + + * V4L/DVB (8559a): Fix a merge conflict at gspca/sonixb + * V4L/DVB (8789): wm8739: remove wrong kfree + * V4L/DVB (8883): w9968cf: Fix order of usb_alloc_urb validation + * V4L/DVB (8884): em28xx-audio: fix memory leak + * V4L/DVB (8885): cpia2_usb: fix memory leak + * V4L/DVB (8886): ov511: fix memory leak + * V4L/DVB (8887): gspca: fix memory leak + * V4L/DVB (8892): pvrusb2: Handle USB ID 2040:2950 same as 2040:2900 + * V4L/DVB (8904): cx88: add missing unlock_kernel + * V4L/DVB (8905): ov511: fix exposure sysfs attribute bug + * V4L/DVB (8909): gspca: PAC 7302 webcam 093a:262a added. + * hrtimer: migrate pending list on cpu offline + * hrtimer: fix migration of CB_IRQSAFE_NO_SOFTIRQ hrtimers + * hrtimer: mark migration state + * hrtimer: prevent migration of per CPU hrtimers + * [IA64] Put the space for cpu0 per-cpu area into .data section + * powerpc: Fix PCI in Holly device tree + * powerpc: Fix failure to shutdown with CPU hotplug + * mfd: Fix Kconfig accroding to the new gpiolib symbols + * mfd: Fix asic3 compilation + * x86: fix typo in enable_mtrr_cleanup early parameter + * ipsec: Fix pskb_expand_head corruption in xfrm_state_check_space + * iucv: Fix mismerge again. + * ALSA: ASoC: Fix cs4270 error path + * ALSA: hda - Fix model for Dell Inspiron 1525 + * sctp: Fix kernel panic while process protocol violation parameter + * x86: Fix broken LDT access in VMI + * x86, vmi: fix broken LDT access + * tcp: Fix NULL dereference in tcp_4_send_ack() + * ipv6: NULL pointer dereferrence in tcp_v6_send_ack + * XFRM,IPv6: initialize ip6_dst_blackhole_ops.kmem_cachep + * af_key: Free dumping state on socket close + * dm: always allow one page in dm_merge_bvec + * dm: cope with access beyond end of device in dm_merge_bvec + * dm mpath: add missing path switching locking + * MN10300: Fix IRQ handling + * pxa2xx_spi: fix build breakage + * e1000e: write protect ICHx NVM to prevent malicious write/erase + * powerpc: Fix boot hang regression on MPC8544DS + * ASoC: Set correct name for WM8753 rec mixer output + * ALSA: snd-powermac: mixers for PowerMac G4 AGP + * ALSA: snd-powermac: HP detection for 1st iMac G3 SL + * fbcon: fix monochrome color value calculation + * inotify: fix lock ordering wrt do_page_fault's mmap_sem + * braille_console: only register notifiers when the braille console is used + * fix error-path NULL deref in alloc_posix_timer() + * memory hotplug: missing zone->lock in test_pages_isolated() + * mm: tiny-shmem nommu fix + * mm: handle initialising compound pages at orders greater than MAX_ORDER + * e1000e: reset swflag after resetting hardware + * e1000e: do not ever sleep in interrupt context + * e1000e: remove phy read from inside spinlock + * e1000e: drop stats lock + * e1000e: debug contention on NVM SWFLAG + * e1000e: update version from k4 to k6 + * Check mapped ranges on sysfs resource files + * e1000e: Fix incorrect debug warning + * [MIPS] Build fix: Fix irq flags type + * [MIPS] SMTC: Build fix: Fix filename in Makefile + * [MIPS] SMTC: Fix holes in SMTC and FPU affinity support. + * [MIPS] SMTC: Close tiny holes in the SMTC IPI replay system. + * [MIPS] SMTC: Fix SMTC dyntick support. + * [S390] nohz: Fix __udelay. + * [S390] qdio: prevent stack clobber + * Fix init/main.c to use regular printk with '%pF' for initcall fn + * x86 setup: correct segfault in generation of 32-bit reloc kernel + * selinux: Fix an uninitialized variable BUG/panic in selinux_secattr_to_sid() + * rtc: fix kernel panic on second use of SIGIO nofitication + * fbdev: fix recursive notifier and locking when fbdev console is blanked + * orion_spi: fix handling of default transfer speed + * include/linux/stacktrace.h: declare struct task_struct + * cpusets: remove pj from cpuset maintainers + * MAINTAINERS: add mailing list for man-pages + * SubmitChecklist: interfaces changes should CC linux-api@ + * Documentation/HOWTO: info about interface changes should CC linux-api@vger + * dw_dmac: fix copy/paste bug in tasklet + * leds-fsg: change order of initialization and deinitialization + * leds-pca955x: add proper error handling and fix bogus memory handling + * ACPI: Make /proc/acpi/wakeup interface handle PCI devices (again) + * clockevents: check broadcast tick device not the clock events device + * V4L/DVB (8919): cx18: Fix tuner audio input for Compro H900 cards + * V4L/DVB (8926): gspca: Bad fix of leak memory (changeset 43d2ead315b1). + * V4L/DVB (8933): gspca: Disable light frquency for zc3xx cs2102 Kokom. + * V4L/DVB (8935): em28xx-cards: Remove duplicate entry (EM2800_BOARD_KWORLD_USB2800) + * V4L/DVB (8955): bttv: Prevent NULL pointer dereference in radio_open + * V4L/DVB (8957): zr36067: Restore the default pixel format + * V4L/DVB (8958): zr36067: Return proper bytes-per-line value + * V4L/DVB (8960): drivers/media/video/cafe_ccic.c needs mm.h + * V4L/DVB (8961): zr36067: Fix RGBR pixel format + * V4L/DVB (8963): s2255drv field count fix + * V4L/DVB (8967): Use correct XC3028L firmware for AMD ATI TV Wonder 600 + * V4L/DVB (8978): sms1xxx: fix product name for Hauppauge WinTV MiniStick + * V4L/DVB (8979): sms1xxx: Add new USB product ID for Hauppauge WinTV MiniStick + * V4L/DVB (9029): Fix deadlock in demux code + * V4L/DVB (9037): Fix support for Hauppauge Nova-S SE + * V4L/DVB (9043): S5H1420: Fix size of shadow-array to avoid overflow + * V4L/DVB (9053): fix buffer overflow in uvc-video + * V4L/DVB (9075): gspca: Bad check of returned status in i2c_read() spca561. + * V4L/DVB (9080): gspca: Add a delay after writing to the sonixj sensors. + * V4L/DVB (9092): gspca: Bad init values for sonixj ov7660. + * V4L/DVB (9099): em28xx: Add detection for K-WORLD DVB-T 310U + * V4L/DVB (9103): em28xx: HVR-900 B3C0 - fix audio clicking issue + * x86: gart iommu have direct mapping when agp is present too + * ide-cd: temporary tray close fix + * ide-dma: fix ide_build_dmatable() for TRM290 + * IDE: Fix platform device registration in Swarm IDE driver (v2) + * ide-cd: Optiarc DVD RW AD-7200A does play audio + * ide: workaround for bogus gcc warning in ide_sysfs_register_port() + * [MIPS] Fix CMP Kconfig configuration and mark as broken. + * [MIPS] IP27: Fix build errors if CONFIG_MAPPED_KERNEL=y + * x86 ACPI: Blacklist two HP machines with buggy BIOSes + * kgdb, x86: Avoid invoking kgdb_nmicallback twice per NMI + * kgdb: call touch_softlockup_watchdog on resume + * atmel-mci: Initialize BLKR before sending data transfer command + * Marker depmod fix core kernel list + * Linux 2.6.27-rc9 + + -- Tim Gardner Sun, 05 Oct 2008 21:27:49 -0600 + +linux (2.6.27-5.8) intrepid; urgency=low + + [ Amit Kucheria ] + + * Update AUFS-related Kconfig + - LP: #264048 + + [ Michael Haas ] + + * add proper aufs source tree from 20080922 + * Fix AUFS compilation in vfsub.c + * Add splice-2.6.23.patch from AUFS to export a symbol needed by AUFS + * Add put_filp.patch from AUFS to export a symbol needed by AUFS + * apply (modified) lhash.patch from AUFS to export __lookup_hash() + * Add deny_write_access.patch from AUFS - export deny_write_access + * Add sec_perm-2.6.24.patch from AUFS - export security_inode_permission + * make sure TMPFS_MAGIC is defined in AUFS Makefile + + [ Tim Gardner ] + + * Enabled CONFIG_IPWIRELESS + - LP: #274748 + * Enabled CONFIG_E1000E, disabled CONFIG_E1000E_NEW + This takes advantage of the upstream NVM protection fix in + commit 4a7703582836f55a1cbad0e2c1c6ebbee3f9b3a7. + + [ Upstream Kernel Changes ] + + * Revert "[Bluetooth] Eliminate checks for impossible conditions in IRQ + handler" + * [SCSI] qla2xxx: Defer enablement of RISC interrupts until ISP + initialization completes. + * PCI: Fix pcie_aspm=force + * PCI: fix compiler warnings in pci_get_subsys() + * UBIFS: create the name of the background thread in every case + * UBIFS: TNC / GC race fixes + * UBIFS: remove incorrect assert + * UBIFS: fix printk format warnings + * AMD IOMMU: set iommu sunc flag after command queuing + * AMD IOMMU: protect completion wait loop with iommu lock + * sparc64: Fix disappearing PCI devices on e3500. + * x86, oprofile: BUG scheduling while atomic + * ALSA: ASoC: Fix at32-pcm build breakage with PM enabled + * ath9k: connectivity is lost after Group rekeying is done + * wireless: zd1211rw: add device ID fix wifi dongle "trust nw-3100" + * [IA64] Ski simulator doesn't need check_sal_cache_flush + * [IA64] kexec fails on systems with blocks of uncached memory + * ath9k: Fix IRQ nobody cared issue with ath9k + * [Bluetooth] Fix I/O errors on MacBooks with Broadcom chips + * [Bluetooth] Fix wrong URB handling of btusb driver + * [Bluetooth] Fix USB disconnect handling of btusb driver + * sparc64: Fix missing devices due to PCI bridge test in + of_create_pci_dev(). + * [WATCHDOG] ibmasr: remove unnecessary spin_unlock() + * [WATCHDOG] wdt285: fix sparse warnings + * [WATCHDOG] unlocked_ioctl changes + * x86: fix 27-rc crash on vsmp due to paravirt during module load + * sched: fix init_hrtick() section mismatch warning + * clockevents: prevent cpu online to interfere with nohz + * x86: prevent stale state of c1e_mask across CPU offline/online + * clockevents: prevent stale tick_next_period for onlining CPUs + * clockevents: check broadcast device not tick device + * clockevents: prevent mode mismatch on cpu online + * x86: prevent C-states hang on AMD C1E enabled machines + * x86: c1e_idle: don't mark TSC unstable if CPU has invariant TSC + * timers: fix build error in !oneshot case + * ALSA: ASoC: maintainers - update email address for Liam Girdwood + * ibmasr: remove unnecessary spin_unlock() + * smb.h: do not include linux/time.h in userspace + * kernel-doc: allow structs whose members are all private + * kexec: fix segmentation fault in kimage_add_entry + * Documentation/DMA-mapping.txt: update for pci_dma_mapping_error() + changes + * sys_paccept: disable paccept() until API design is resolved + * mm: tiny-shmem fix lock ordering: mmap_sem vs i_mutex + * Documentation/sysctl/kernel.txt: fix softlockup_thresh description + * memcg: check under limit at shrink_usage + * atmel_serial: update the powersave handler to match serial core + * [SCSI] Fix hang with split requests + * USB Storage: Sierra: Non-configurable TRU-Install + * USB Serial: Sierra: Device addition & version rev + * USB: ehci: fix some ehci hangs and crashes + * USB: Fix the Nokia 6300 storage-mode. + * USB: Correct Sierra Wireless USB EVDO Modem Device ID + * USB: fix hcd interrupt disabling + * USB: update of Documentation/usb/anchors.txt + * usb gadget: fix omap_udc DMA regression + * USB: Fixing Nokia 3310c in storage mode + * usb: musb: fix include path + * USB: fix EHCI periodic transfers + * usb-serial: Add Siemens EF81 to PL-2303 hack triggers + * USB: SERIAL CP2101 add device IDs + * USB: unusual_devs addition for RockChip MP3 player + * USB: fsl_usb2_udc: fix VDBG() format string + * usb serial: ti_usb_3410_5052 obviously broken by firmware changes + * USB: ftdi_sio: Add 0x5050/0x0900 USB IDs (Papouch Quido USB 4/4) + * USB: serial: add ZTE CDMA Tech id to option driver + * USB Serial: Sierra: Add MC8785 VID/PID + * USB: drivers/usb/musb/: disable it on SuperH + * usb: ftdi_sio: add support for Domintell devices + * usb: unusual devs patch for Nokia 5310 Music Xpress + * USB: revert recovery from transient errors + * [MIPS] au1000: Fix gpio direction + * [MIPS] Fixe the definition of PTRS_PER_PGD + * x86: prevent stale state of c1e_mask across CPU offline/online, fix + * x86: disable apm on the olpc + * i2c-powermac: Fix section for probe and remove functions + * i2c-dev: Return correct error code on class_create() failure + * i2c: Fix mailing lists in two MAINTAINERS entries + * ath9k: disable MIB interrupts to fix interrupt storm + * 9p: implement proper trans module refcounting and unregistration + * 9p-trans_fd: fix trans_fd::p9_conn_destroy() + * 9p-trans_fd: clean up p9_conn_create() + * 9p-trans_fd: don't do fs segment mangling in p9_fd_poll() + * 9p-trans_fd: fix and clean up module init/exit paths + * 9p: introduce missing kfree + * 9p: use an IS_ERR test rather than a NULL test + * 9p: fix put_data error handling + * netfilter: ip6t_{hbh,dst}: Rejects not-strict mode on rule insertion + * MN10300: Move asm-arm/cnt32_to_63.h to include/linux/ + * MN10300: Make sched_clock() report time since boot + * ALSA: fix locking in snd_pcm_open*() and snd_rawmidi_open*() + * ALSA: remove unneeded power_mutex lock in snd_pcm_drop + * IPoIB: Fix crash when path record fails after path flush + * [XFS] Fix extent list corruption in xfs_iext_irec_compact_full(). + * [XFS] Remove xfs_iext_irec_compact_full() + * kgdb: could not write to the last of valid memory with kgdb + * kgdb, x86, arm, mips, powerpc: ignore user space single stepping + * kgdb, x86_64: gdb serial has BX and DX reversed + * kgdb, x86_64: fix PS CS SS registers in gdb serial + * kgdboc,tty: Fix tty polling search to use name correctly + * ARM: Delete ARM's own cnt32_to_63.h + * m32r: remove the unused NOHIGHMEM option + * m32r: don't offer CONFIG_ISA + * m32r: export empty_zero_page + * m32r: export __ndelay + * m32r/kernel/: cleanups + * [MIPS] au1000: Make sure GPIO value is zero or one + * [MIPS] IP27: Switch to dynamic interrupt routing avoding panic on + error. + * [MIPS] BCM47xx: Fix build error due to missing PCI functions + * [SSB] Initialise dma_mask for SSB_BUSTYPE_SSB devices + * Swarm: Fix crash due to missing initialization + * ide-tape: fix vendor strings + * ide: note that IDE generic may prevent other drivers from attaching + * cdrom: update ioctl documentation + * [SCSI] qlogicpti: fix sg list traversal error in continuation entries + * sata_nv: reinstate nv_hardreset() for non generic controllers + * scsi: fix fall out of sg-chaining patch in qlogicpti + * ALSA: make the CS4270 driver a new-style I2C driver + * ALSA: ASoC: Fix another cs4270 error path + * Fix NULL pointer dereference in proc_sys_compare + * kconfig: fix silentoldconfig + * kconfig: readd lost change count + * mm owner: fix race between swapoff and exit + * Linux 2.6.27-rc8 + * e1000e: write protect ICHx NVM to prevent malicious write/erase + + -- Amit Kucheria Tue, 30 Sep 2008 18:22:35 +0300 + +linux (2.6.27-4.7) intrepid; urgency=low + + [ Ben Collins ] + + * build/abi: Add gfs1 to perm blacklist + * build/abi: Ignored changes in gfs2 symbols + + [ Fabio M. Di Nitto ] + + * Revert "SAUCE: Export gfs2 symbols required for gfs1 kernel module" + * ubuntu: update GFS Cluster File System + + [ Stefan Bader ] + + * SAUCE: x86: Reserve FIRST_DEVICE_VECTOR in used_vectors bitmap. + - LP: #276334 + + [ Tim Gardner ] + + * Revert "Disable e1000e until the NVRAM corruption problem is found." + * Add atl1e and atl2 to Debian installer bits + - LP: #273904 + * SAUCE: e1000e: Map NV RAM dynamically only when needed. + - LP: #263555 + + -- Tim Gardner Fri, 26 Sep 2008 20:51:22 -0600 + +linux (2.6.27-4.6) intrepid; urgency=low + + [ Tim Gardner ] + + * Disable e1000e until the NVRAM corruption problem is found. + - LP: #263555 + + [ Upstream Kernel Changes ] + + * Revert "[Bluetooth] Eliminate checks for impossible conditions in IRQ + handler" + + -- Ben Collins Tue, 23 Sep 2008 09:53:57 -0400 + +linux (2.6.27-4.5) intrepid; urgency=low + + [ Upstream Kernel Changes ] + + * Revert "b43/b43legacy: add RFKILL_STATE_HARD_BLOCKED support" + * udf: Fix lock inversion between iprune_mutex and alloc_mutex (v2) + * udf: Fix error paths in udf_new_inode() + * [SCSI] sd: select CRC_T10DIF only when necessary + * [SCSI] zfcp: Fix request queue locking + * [SCSI] zfcp: Correctly query end flag in gpn_ft response + * [SCSI] zfcp: Simplify ccw notify handler + * [SCSI] zfcp: Fix reference counter for remote ports + * [SCSI] zfcp: channel cannot be detached due to refcount imbalance + * [SCSI] zfcp: Remove duplicated unlikely() macros. + * [SCSI] scsi_dh: make check_sense return ADD_TO_MLQUEUE + * [SCSI] make scsi_check_sense HARDWARE_ERROR return ADD_TO_MLQUEUE on + retry + * [SCSI] fix check of PQ and PDT bits for WLUNs + * pcm037: add rts/cts support for serial port + * i.MX serial: fix init failure + * imx serial: set RXD mux bit on i.MX27 and i.MX31 + * imx serial: fix rts handling for non imx1 based hardware + * mlx4_core: Set RAE and init mtt_sz field in FRMR MPT entries + * udf: add llseek method + * PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets + * PCI: Fix printk warnings in probe.c + * PCI: Fix printk warnings in setup-bus.c + * PCI Hotplug: fakephp: fix deadlock... again + * clockevents: remove WARN_ON which was used to gather information + * ocfs2: Fix a bug in direct IO read. + * arch/x86/kernel/kdebugfs.c: introduce missing kfree + * [IA64] fix compile failure with non modular builds + * [IA64] fix up bte.h + * [IA64] arch/ia64/sn/pci/tioca_provider.c: introduce missing kfree + * PCI: fix pciehp_free_irq() + * [IA64] prevent ia64 from invoking irq handlers on offline CPUs + * ide: Fix pointer arithmetic in hpt3xx driver code (3rd try) + * add deprecated ide-scsi to feature-removal-schedule.txt + * swiotlb: fix back-off path when memory allocation fails + * sparc64: Fix interrupt register calculations on Psycho and Sabre. + * VIDEO_SH_MOBILE_CEU should depend on HAS_DMA + * m68k: Update defconfigs for 2.6.27-rc6 + * sparc32: Fix function signature of of_bus_sbus_get_flags(). + * sched: fix 2.6.27-rc5 couldn't boot on tulsa machine randomly + * sched: fix deadlock in setting scheduler parameter to zero + * KVM: SVM: fix random segfaults with NPT enabled + * KVM: SVM: fix guest global tlb flushes with NPT + * KVM: VMX: Always return old for clear_flush_young() when using EPT + * clocksource, acpi_pm.c: fix check for monotonicity + * [ARM] OMAP: Fix MMC device data + * block: disable sysfs parts of the disk command filter + * ath9k: Assign seq# when mac80211 requests this + * sg: disable interrupts inside sg_copy_buffer + * MN10300: Change the fault handler to check in_atomic() not + in_interrupt() + * [Bluetooth] Fix regression from using default link policy + * netlink: fix overrun in attribute iteration + * x86: fix possible x86_64 and EFI regression + * sparc64: Fix PCI error interrupt registry on PSYCHO. + * sparc: Fix user_regset 'n' field values. + * niu: panic on reset + * PCI: re-add debug prints for unmodified BARs + * [ARM] 5245/1: Fix warning about unused return value in drivers/pcmcia + * [ARM] 5246/1: tosa: add proper clock alias for tc6393xb clock + * [ARM] 5247/1: tosa: SW_EAR_IN support + * [ARM] Fix PCI_DMA_BUS_IS_PHYS for ARM + * ata: duplicate variable sparse warning + * sata_inic162x: enable LED blinking + * [libata] LBA28/LBA48 off-by-one bug in ata.h + * proc: more debugging for "already registered" case + * include/linux/ioport.h: add missing macro argument for devm_release_* + family + * cpuset: avoid changing cpuset's cpus when -errno returned + * cpuset: hotplug documentation fix + * coredump_filter: add description of bit 4 + * bfs: fix Lockdep warning + * mm: ifdef Quicklists in /proc/meminfo + * spi_mpc83xx: fix clockrate calculation for low speed + * spi_mpc83xx: reject invalid transfer sizes + * pxa2xx_spi: chipselect bugfixes + * pxa2xx_spi: dma bugfixes + * mm: mark the correct zone as full when scanning zonelists + * Documentation/ABI: /sys/class/gpio + * MAINTAINERS: fix USB VIDEO CLASS mail list address + * ia64: fix panic during `modprobe -r xpc' + * atmel_lcdfb: disable LCD and DMA engines when suspending + * spi_s3c24xx: fix section warning + * rescan_partitions(): make device capacity errors non-fatal + * memstick: fix MSProHG 8-bit interface mode support + * Add Uwe Kleine-König to .mailmap + * xen: fix for xen guest with mem > 3.7G + * x86/paravirt: Remove duplicate paravirt_pagetable_setup_{start, done}() + * crypto: talitos - Avoid consecutive packets going out with same IV + * slub: fixed uninitialized counter in struct kmem_cache_node + * udp: Fix rcv socket locking + * IB/mlx4: Fix up fast register page list format + * [MIPS] VR41xx: unsigned irq cannot be negative + * x86: completely disable NOPL on 32 bits + * [S390] cio: Fix driver_data handling for ccwgroup devices. + * [S390] cio: fix orb initialization in cio_start_key + * sparc64: Fix OOPS in psycho_pcierr_intr_other(). + * sparc64: Fix SMP bootup with CONFIG_STACK_DEBUG or ftrace. + * RDMA/nes: Fix client side QP destroy + * IPoIB: Fix deadlock on RTNL between bcast join comp and ipoib_stop() + * clockevents: make device shutdown robust + * powerpc: Fix interrupt values for DMA2 in MPC8610 HPCD device tree + * hpplus: fix build regression + * Fix PNP build failure, bugzilla #11276 + * warn: Turn the netdev timeout WARN_ON() into a WARN() + * [XFS] Move memory allocations for log tracing out of the critical path + * [XFS] Fix regression introduced by remount fixup + * [XFS] Prevent direct I/O from mapping extents beyond eof + * [XFS] Fix barrier status change detection. + * [XFS] Prevent lockdep false positives when locking two inodes. + * [XFS] Fix use-after-free with buffers + * [XFS] Don't do I/O beyond eof when unreserving space + * powerpc: Holly board needs dtbImage target + * Fix compile failure with non modular builds + * [ARM] 5249/1: davinci: remove redundant check in davinci_psc_config() + * [ARM] omap: back out 'internal_clock' support + * sctp: set the skb->ip_summed correctly when sending over loopback. + * [ARM] 5255/1: Update jornada ssp to remove build errors/warnings + * sctp: do not enable peer features if we can't do them. + * sctp: Fix oops when INIT-ACK indicates that peer doesn't support AUTH + * bnx2: Promote vector field in bnx2_irq structure from u16 to unsigned + int + * forcedeth: call restore mac addr in nv_shutdown path + * e1000: prevent corruption of EEPROM/NVM + * e100: Use pci_pme_active to clear PME_Status and disable PME# + * md: Don't wait UNINTERRUPTIBLE for other resync to finish + * atstk1000: fix build breakage with BOARD_ATSTK100X_SW2_CUSTOM=y + * avr32: add .gitignore files + * avr32: add generic_find_next_le_bit bit function + * avr32: fix sys_sync_file_range() call convention + * avr32: nmi_enter() without nmi_exit() + * KVM: ia64: 'struct fdesc' build fix + * hwmon: (atxp1) Fix device detection logic + * hwmon: (it87) Fix fan tachometer reading in IT8712F rev 0x7 (I) + * hwmon: (ad7414) Make ad7414_update_device() static + * tmio_mmc: fix compilation with debug enabled + * atmel-mci: debugfs: enable clock before dumping regs + * atmel-mci: Fix memory leak in atmci_regs_show + * atmel-mci: Fix bogus debugfs file size + * atmel-mci: Set MMC_CAP_NEEDS_POLL if no detect_pin + * mmc_block: handle error from mmc_register_driver() + * mmc_test: initialize mmc_test_lock statically + * [MIPS] Fix 64-bit IP checksum code + * [MIPS] SMTC: Clear TIF_FPUBOUND on clone / fork. + * [MIPS] Fix potential latency problem due to non-atomic cpu_wait. + * [MIPS] vmlinux.lds.S: handle .text.* + * MAINTAINERS: Trivial whitespace cleanups + * MAINTAINERS: Various fixes + * Linux 2.6.27-rc7 + + -- Tim Gardner Sun, 21 Sep 2008 21:49:28 -0600 + +linux (2.6.27-3.4) intrepid; urgency=low + + [ Colin Ian King ] + + * SAUCE: fix kernel oops in VirtualBox during paravirt patching + - LP: #246067 + * SAUCE: qc-usb: Enable Logitech QuickCam Messenger + - LP: #209901 + * SAUCE: appleir: Enable driver for new MacBook Pro + - LP: #157919 + + [ Tim Gardner ] + + * Enabled CONFIG_DEBUG_RODATA=y + + [ Upstream Kernel Changes ] + + * Revert "ALSA: hda - Added model selection for iMac 24"" + * Revert "x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet + against BAR, v3" + * Revert "[ARM] use the new byteorder headers" + * Revert "mac80211: Use IWEVASSOCREQIE instead of IWEVCUSTOM" + * Revert "crypto: camellia - Use kernel-provided bitops, unaligned access + helpers" + * svcrdma: Fix race between svc_rdma_recvfrom thread and the dto_tasklet + * sched, cpuset: rework sched domains and CPU hotplug handling (v4) + * ACPI: Fix now signed module parameter. + * ACPI: Change package length error to warning + * ACPI: Fix now signed module parameter. + * ACPI: Fix typo in "Disable MWAIT via DMI on broken Compal board" + * acpi: add checking for NULL early param + * UBIFS: fix zero-length truncations + * Input: bcm5974 - add maintainer entry + * sh64: re-add the __strnlen_user() prototype + * sh: fix ptrace_64.c:user_disable_single_step() + * PNPACPI: ignore the producer/consumer bit for extended IRQ descriptors + * UBIFS: always read hashed-key nodes under TNC mutex + * UBIFS: allow for racing between GC and TNC + * [CIFS] Fix plaintext authentication + * sparc32: Implement smp_call_function_single(). + * sh: crash kernel resource fix + * sh: fix kexec entry point for crash kernels + * sh: fix platform_resource_setup_memory() section mismatch + * sh: update Migo-R defconfig + * sh: update AP325RXA defconfig + * sh: fix semtimedop syscall + * cifs: fix O_APPEND on directio mounts + * [CIFS] update cifs change log + * [CIFS] Turn off Unicode during session establishment for plaintext + authentication + * ACPI: thinkpad-acpi: wan radio control is not experimental + * sparc: Fix resource flags for PCI children in OF device tree. + * remove blk_register_filter and blk_unregister_filter in gendisk + * ALSA: oxygen: fix distorted output on AK4396-based cards + * ipv6: When we droped a packet, we should return NET_RX_DROP instead of + 0 + * pkt_sched: Fix locking of qdisc_root with qdisc_root_sleeping_lock() + * net: Unbreak userspace usage of linux/mroute.h + * Don't trigger softlockup detector on network fs blocked tasks + * Resource handling: add 'insert_resource_expand_to_fit()' function + * sparc64: setup_valid_addr_bitmap_from_pavail() should be __init + * UBIFS: do not update min_idx_lebs in stafs + * UBIFS: push empty flash hack down + * UBIFS: remove incorrect index space check + * UBIFS: improve statfs reporting + * UBIFS: fix assertion + * UBIFS: add forgotten gc_idx_lebs component + * UBIFS: introduce LEB overhead + * UBIFS: improve statfs reporting even more + * UBIFS: fill f_fsid + * drm/radeon: downgrade debug message from info to debug. + * Remove invalidate_partition call from do_md_stop. + * Fix problem with waiting while holding rcu read lock in md/bitmap.c + * ALSA: hda: Distortion fix for dell_m6_core_init + * ALSA: ASoC: fix pxa2xx-i2s clk_get call + * block: restore original behavior of /proc/partition when there's no + partition + * debugobjects: fix lockdep warning + * avr32: Fix lockup after Java stack underflow in user mode + * avr32: pm_standby low-power ram bug fix + * nfsd: fix compound state allocation error handling + * sunrpc: fix possible overrun on read of /proc/sys/sunrpc/transports + * nfsd: fix buffer overrun decoding NFSv4 acl + * audit: Moved variable declaration to beginning of function + * Fix modules_install on RO nfs-exported trees. + * Remove '#include ' from mm/page_isolation.c + * dabusb_fpga_download(): fix a memory leak + * [MTD] mtdchar.c: Fix regression in MEMGETREGIONINFO ioctl() + * ALSA: hda - Fix ALC663 auto-probe + * ALSA: hda - Add mic-boost controls to ALC662/663 auto configuration + * Un-break printk strings in x86 PCI probing code + * kernel/resource.c: fix new kernel-doc warning + * softlockup: minor cleanup, don't check task->state twice + * fix typo in arch/parisc/hpux/fs.c + * m68k: atari_keyb_init operator precedence fix + * ACPI: Fix typo in "Disable MWAIT via DMI on broken Compal board" + * don't diff generated firmware files + * IDE: compile fix for sff_dma_ops + * IDE: palm_bk3710: fix compile warning for unused variable + * ide: fix hwif_to_node() + * palm_bk3710: improve IDE registration + * ide-disk: remove stale init_idedisk_capacity() documentation + * ide/Kconfig: mark ide-scsi as deprecated + * net/wireless/Kconfig: clarify the description for + CONFIG_WIRELESS_EXT_SYSFS + * iwlwifi: do not use GFP_DMA in iwl_tx_queue_init + * iwlwifi: workaround interrupt handling no some platforms + * iwlwifi: fix apm_stop (wrong bit polarity for FLAG_INIT_DONE) + * iwlwifi: fix 64bit platform firmware loading + * orinoco: Multicast to the specified addresses + * wireless/libertas/if_cs.c: fix memory leaks + * mac80211: Fix debugfs union misuse and pointer corruption + * rt2x00: Compiler warning unmasked by fix of BUILD_BUG_ON + * ath9k: Incorrect key used when group and pairwise ciphers are + different. + * ath9: Fix ath_rx_flush_tid() for IRQs disabled kernel warning message. + * net/xfrm: Use an IS_ERR test rather than a NULL test + * ipv: Re-enable IP when MTU > 68 + * NTFS: update homepage + * mm: make setup_zone_migrate_reserve() aware of overlapping nodes + * VFS: fix dio write returning EIO when try_to_release_page fails + * acer-wmi: remove debugfs entries upon unloading + * mm/bootmem: silence section mismatch warning - + contig_page_data/bootmem_node_data + * MAINTAINERS: add a maintainer for the BCM5974 multitouch driver + * 8250: improve workaround for UARTs that don't re-assert THRE correctly + * mmc: at91_mci: don't use coherent dma buffers + * pid_ns: zap_pid_ns_processes: fix the ->child_reaper changing + * pid_ns: (BUG 11391) change ->child_reaper when init->group_leader exits + * cirrusfb: check_par fixes + * devcgroup: fix race against rmdir() + * mm: show quicklist usage in /proc/meminfo + * mm: size of quicklists shouldn't be proportional to the number of CPUs + * ipc: document the new auto_msgmni proc file + * hp-wmi: update to match current rfkill semantics + * hp-wmi: add proper hotkey support + * tdfxfb: fix SDRAM memory size detection + * tdfxfb: fix frame buffer name overrun + * rtc_time_to_tm: fix signed/unsigned arithmetic + * ibft: fix target info parsing in ibft module + * sysfs: document files in /sys/firmware/sgi_uv/ + * rtc-cmos: wake again from S5 + * pm_qos_requirement might sleep + * drivers/char/random.c: fix a race which can lead to a bogus BUG() + * ipsec: Fix deadlock in xfrm_state management. + * [x86] Fix TSC calibration issues + * tipc: Don't use structure names which easily globally conflict. + * sparc64: Fix IPI call locking. + * [ARM] omap: fix gpio.c build error + * sparc64: Prevent sparc64 from invoking irq handlers on offline CPUs + * powerpc: Fix uninitialised variable in VSX alignment code + * powerpc: Only make kernel text pages of linear mapping executable + * powerpc: Make sure _etext is after all kernel text + * powerpc: Work around gcc's -fno-omit-frame-pointer bug + * powerpc: Fix build error with 64K pages and !hugetlbfs + * powerpc: Fix for getting CPU number in power_save_ppc32_restore() + * UBIFS: amend f_fsid + * net/usb/pegasus: avoid hundreds of diagnostics + * ixgbe: initialize interrupt throttle rate + * pcnet-cs, axnet_cs: add new IDs, remove dup ID with less info + * netxen: Remove workaround for chipset quirk + * Split up PIT part of TSC calibration from native_calibrate_tsc + * iwlwifi: W/A for the TSF correction in IBSS + * iwlwifi: fix hidden ssid discovery in passive channels + * iwlwifi: remove false rxon if rx chain changes + * iwlwifi: fix station mimo power save values + * iwlwifi: fix rx_chain computation + * iwlwifi: fix Tx cmd memory allocation failure handling + * iwlwifi: call apm stop on exit + * iwlwifi: fix STATUS_EXIT_PENDING is not set on pci_remove + * ath9k: Fix TX status reporting + * ath9k: Fix TX control flag use for no ACK and RTS/CTS + * V4L/DVB (8555): au8522: add mechanism to configure IF frequency for vsb + and qam + * V4L/DVB (8556): au0828: add support for Hauppauge Woodbury + * V4L/DVB (8598): au8522: clean up function au8522_set_if + * V4L/DVB (8599): au8522: remove if frequency settings from vsb/qam + modulation tables + * V4L/DVB (8600): au0828: explicitly set 6 MHz IF frequency in + hauppauge_hvr950q_config + * V4L/DVB (8629): v4l2-ioctl: do not try to handle private V4L1 ioctls + * V4L/DVB (8633): ivtv: update ivtv version number + * V4L/DVB (8648): ivtv: improve CC support + * V4L/DVB (8660): gspca: Simplify the scan of URB packets in pac7311. + * V4L/DVB (8661): gspca: Bug in the previous changeset about pac7311. + * V4L/DVB (8663): gspca: Webcam 0c45:6128 added in sonixj. + * V4L/DVB (8664): gspca: The bridge/sensor of the webcam 093a:2621 is a + PAC 7302. + * V4L/DVB (8665): gspca: Fix the 640x480 resolution of the webcam + 093a:2621. + * V4L/DVB (8666): gspca: Bad scanning of frames in pac7311. + * V4L/DVB (8667): gspca: Bad probe of Z-Star/Vimicro webcams with pas106 + sensor. + * V4L/DVB (8668): gspca: Conflict GSPCA / ET61X251 for the webcam + 102c:6251. + * V4L/DVB (8669): gspca: Add white balance control for spca561 rev 012A. + * V4L/DVB (8671): gspca: Remove the unused field 'dev_name' of the device + structure. + * V4L/DVB (8672): gspca: Big rewrite of spca561. + * V4L/DVB (8673): gspca: Bad frame scanning again and bad init in + pac7311. + * V4L/DVB (8674): gspca: Webcam 0c45:612e added in sonixj. + * V4L/DVB (8675): gspca: Pixmap PJPG (Pixart 73xx JPEG) added, generated + by pac7311. + * V4L/DVB (8678): Remove the dead CONFIG_RADIO_MIROPCM20{,_RDS} code + * V4L/DVB (8681): v4l2-ioctl.c: fix warning + * V4L/DVB (8682): V4L: fix return value of register video func + * V4L/DVB (8701): cx18: Add missing lock for when the irq handler + manipulates the queues + * V4L/DVB (8703): gspca: Do controls work for spca561 revision 12a. + * V4L/DVB (8705): gspca: Adjust some control limits in spca561. + * V4L/DVB (8706): Make contrast and brightness work for pac7302. + * V4L/DVB (8707): gspca: Colors, hflip and vflip controls added for + pac7302. + * V4L/DVB (8709): gspca: Fix initialization and controls of sn9x110 - + ov7630. + * V4L/DVB (8710): gspca: Bad color control in sonixj. + * V4L/DVB (8711): gspca: Bad controls and quantization table of pac7311. + * V4L/DVB (8712): gspca: Bad start of sonixj webcams since changeset + a8779025e7e8. + * V4L/DVB (8713): gspca: Bad color control again in sonixj. + * V4L/DVB (8714): gspca: Bad start of sn9c110 and sensor om6802. + * V4L/DVB (8715): gspca: Change the name of some webcam in the gspca doc. + * V4L/DVB (8716): gspca: Bad start of sn9c110 and sensor ov7630. + * V4L/DVB (8717): gspca: Frame buffer too small for small resolutions + (sonixj and t613). + * V4L/DVB (8718): gspca: suspend/resume added. + * V4L/DVB (8719): gspca: Have VIDIOC_QUERYCTRL more compliant to the + spec. + * V4L/DVB (8720): gspca: V4L2_CAP_SENSOR_UPSIDE_DOWN added as a cap for + some webcams. + * V4L/DVB (8722): sms1xxx: fix typo in license header + * V4L/DVB (8726): link tuner before saa7134 + * V4L/DVB (8727): V4L1: make PMS not autoprobe when builtin. + * V4L/DVB (8728): 1-make-pms-not-autoprobe-when-builtin update + * V4L/DVB (8749): Fix error code, when camera is not turned on by sonypi + * V4L/DVB (8750): V4L: check inval in video_register_device_index() + * V4L/DVB (8751): vivi: Fix some issues at vivi register routine + * V4L/DVB (8757): v4l-dvb: fix a bunch of sparse warnings + * V4L/DVB (8769): cx18: Simplify queue flush logic to prevent oops in + cx18_flush_queues() + * V4L/DVB (8778): radio: fix incorrect video_register_device result check + * V4L/DVB (8779): v4l: fix more incorrect video_register_device result + checks + * V4L/DVB (8790): saa7115: call i2c_set_clientdata only when state != + NULL + * V4L/DVB (8803): s5h1409: Enable QAM_AUTO mode + * V4L/DVB (8804): s5h1411: Enable QAM_AUTO mode + * V4L/DVB (8805): Steven Toth email address change + * V4L/DVB (8809): gspca: Revert commit + 9a9335776548d01525141c6e8f0c12e86bbde982 + * V4L/DVB (8810): gspca: Compile error when CONFIG_PM not defined. + * V4L/DVB (8812): gspca: Do pac73xx webcams work. + * V4L/DVB (8813): gspca: Adjust SOF detection for pac73xx. + * V4L/DVB (8814): gspca: Set DISABLED the disabled controls at query + control time. + * V4L/DVB (8815): gspca: Fix problems with disabled controls. + * V4L/DVB (8816): gspca: Set disabled ctrls and fix a register pb with + ovxxxx in sonixb. + * V4L/DVB (8817): gspca: LED and proble changes in sonixb. + * V4L/DVB (8818): gspca: Reinitialize the device on resume. + * V4L/DVB (8819): gspca: Initialize the ov519 at open time and source + cleanup. + * V4L/DVB (8820): gspca: Change initialization and gamma of zc3xx - + pas106. + * V4L/DVB (8822): gspca: Change some subdriver functions for + suspend/resume. + * V4L/DVB (8823): gspca: H and V flips work for ov7670 only in ov519. + * V4L/DVB (8824): gspca: Too much code removed in the suspend/resume + changeset. + * V4L/DVB (8825): gspca: More controls for pac73xx and new webcam + 093a:2624. + * V4L/DVB (8826): gspca: Webcam Labtec 2200 (093a:2626) added in pac7311. + * V4L/DVB (8827): gspca: Stop pac7302 autogain oscillation. + * V4L/DVB (8828): gspca: Set the clock at the end of initialization in + sonixj. + * V4L/DVB (8829): gspca: Have a clean kmalloc-ated buffer for USB + exchanges. + * V4L/DVB (8830): gspca: Move some probe code to the new init function. + * V4L/DVB (8831): gspca: Resolve webcam conflicts between some drivers. + * V4L/DVB (8832): gspca: Bad pixelformat of vc0321 webcams. + * V4L/DVB (8833): gspca: Cleanup the sonixb code. + * V4L/DVB (8834): gspca: Have a bigger buffer for sn9c10x compressed + images. + * V4L/DVB (8835): gspca: Same pixfmt as the sn9c102 driver and raw Bayer + added in sonixb. + * V4L/DVB (8837): dvb: fix I2C adapters name size + * V4L/DVB (8839): dib0700: add comment to identify 35th USB id pair + * V4L/DVB (8840): dib0700: add basic support for Hauppauge Nova-TD-500 + (84xxx) + * V4L/DVB (8842): vivi_release(): fix use-after-free + * V4L/DVB (8843): tda10048_firmware_upload(): fix a memory leak + * V4L/DVB (8844): dabusb_fpga_download(): fix a memory leak + * bnx2x: Accessing un-mapped page + * SELinux: memory leak in security_context_to_sid_core + * x86: add io delay quirk for Presario F700 + * mmap: fix petty bug in anonymous shared mmap offset handling + * x86: Change warning message in TSC calibration. + * PCI: fix pbus_size_mem() resource alignment for CardBus controllers + * [ARM] omap: fix build error in ohci-omap.c + * [ARM] remove unused #include + * ACPI: Make Len Brown the ACPI maintainer again + * fujitsu-laptop: fix regression for P8010 in 2.6.27-rc + * ACPI: Avoid bogus timeout about SMbus check + * acer-wmi: remove debugfs entries upon unloading + * forgotten refcount on sysctl root table + * V4L/DVB (8868): gspca: Support for vga modes with sif sensors in + sonixb. + * V4L/DVB (8869): gspca: Move the Sonix webcams with TAS5110C1B from + sn9c102 to gspca. + * V4L/DVB (8870): gspca: Fix dark room problem with sonixb. + * V4L/DVB (8872): gspca: Bad image format and offset with rev072a of + spca561. + * V4L/DVB (8873): gspca: Bad image offset with rev012a of spca561 and + adjust exposure. + * V4L/DVB (8874): gspca: Adjust hstart for sn9c103/ov7630 and update + usb-id's. + * [ARM] omap: fix virtual vs physical address space confusions + * V4L/DVB (8876): budget: udelay changed to mdelay + * V4L/DVB (8877): b2c2 and bt8xx: udelay to mdelay + * V4L/DVB (8880): PATCH: Fix parents on some webcam drivers + * V4L/DVB (8881): gspca: After 'while (retry--) {...}', retry will be -1 + but not 0. + * powerpc/spufs: Fix multiple get_spu_context() + * powerpc/spufs: Fix race for a free SPU + * Input: bcm5974 - small formatting cleanup + * Input: bcm5974 - improve finger tracking and counting + * Input: bcm5974 - add BTN_TOUCH event for mousedev benefit + * Input: i8042 - make Lenovo 3000 N100 blacklist entry more specific + * sh: resume_kernel fix for kernel oops built with CONFIG_BKL_PREEMPT=y. + * sh64: resume_kernel fix for kernel oops built with + CONFIG_BKL_PREEMPT=y. + * i2c: fix i2c-sh_mobile timing issues + * clockevents: prevent clockevent event_handler ending up handler_noop + * clockevents: prevent endless loop in periodic broadcast handler + * clockevents: enforce reprogram in oneshot setup + * clockevents: prevent multiple init/shutdown + * clockevents: prevent endless loop lockup + * HPET: make minimum reprogramming delta useful + * [MTD] [NAND] tmio_nand: fix base address programming + * Fix conditional export of kvh.h and a.out.h to userspace. + * async_tx: fix the bug in async_tx_run_dependencies + * sched_clock: fix NOHZ interaction + * sched: fix process time monotonicity + * UBIFS: fix division by zero + * UBIFS: make minimum fanout 3 + * [MIPS] Fix data bus error recovery + * [MIPS] Fix WARNING: at kernel/smp.c:290 + * [MIPS] TXx9: Fix txx9_pcode initialization + * [MIPS] TX39xx: Add missing local_flush_icache_range initialization + * [MIPS] Probe initrd header only if explicitly specified + * res_counter: fix off-by-one bug in setting limit + * forcedeth: fix kexec regression + * atmel_lcdfb: fix oops in rmmod when framebuffer fails to register + * tracehook: comment pasto fixes + * drivers/mmc/card/block.c: fix refcount leak in mmc_block_open() + * x86: boot: stub out unimplemented CPU feature words + * x86: add NOPL as a synthetic CPU feature bit + * x86: use X86_FEATURE_NOPL in alternatives + * clockevents: broadcast fixup possible waiters + * x86: HPET fix moronic 32/64bit thinko + * x86: HPET: read back compare register before reading counter + * Fix CONFIG_AC97_BUS dependency + * [ARM] 5241/1: provide ioremap_wc() + * ntp: fix calculation of the next jiffie to trigger RTC sync + * clocksource, acpi_pm.c: use proper read function also in errata mode + * clocksource, acpi_pm.c: check for monotonicity + * x86: delay early cpu initialization until cpuid is done + * x86: move mtrr cpu cap setting early in early_init_xxxx + * sched: arch_reinit_sched_domains() must destroy domains to force + rebuild + * x86, xen: Use native_pte_flags instead of native_pte_val for .pte_flags + * x86: pda_init(): fix memory leak when using CPU hotplug + * x86: cpu_init(): fix memory leak when using CPU hotplug + * powerpc/spufs: Fix possible scheduling of a context to multiple SPEs + * netfilter: nf_conntrack_sip: de-static helper pointers + * netfilter: nf_conntrack_gre: more locking around keymap list + * netfilter: nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet + * netfilter: nf_conntrack_irc: make sure string is terminated before + calling simple_strtoul + * pkt_sched: Fix qdisc state in net_tx_action() + * powerpc: Fix rare boot build breakage + * ahci, pata_marvell: play nicely together + * sata_mv: add RocketRaid 1720 PCI ID to driver + * ahci: disable PMP for marvell ahcis + * sata_nv: disable hardreset for generic + * libata-sff: kill spurious WARN_ON() in ata_hsm_move() + * pata_sil680: remove duplicate pcim_enable_device + * ahci: RAID mode SATA patch for Intel Ibex Peak DeviceIDs + * [MIPS] IP22: Fix detection of second HPC3 on Challenge S + * xen: fix 2.6.27-rc5 xen balloon driver warnings + * x86: disable static NOPLs on 32 bits + * netns : fix kernel panic in timewait socket destruction + * bridge: don't allow setting hello time to zero + * NFS: Restore missing hunk in NFS mount option parser + * usb: fix null deferences in low level usb serial + * Fix format of MAINTAINERS + * sparc64: Disable timer interrupts in fixup_irqs(). + * [Bluetooth] Fix reference counting during ACL config stage + * [Bluetooth] Enforce correct authentication requirements + * [Bluetooth] Reject L2CAP connections on an insecure ACL link + * [S390] CVE-2008-1514: prevent ptrace padding area read/write in 31-bit + mode + * [S390] cio: Correct cleanup on error. + * [S390] cio: handle ssch() return codes correctly. + * [S390] cio: allow offline processing for disconnected devices + * ipsec: Restore larval states and socket policies in dump + * update Documentation/filesystems/Locking for 2.6.27 changes + * MAINTAINERS: add Atheros maintainer for atlx + * lib: Correct printk %pF to work on all architectures + * x86: fix memmap=exactmap boot argument + * clockevents: remove WARN_ON which was used to gather information + * ipv6: Fix OOPS in ip6_dst_lookup_tail(). + * Linux 2.6.27-rc6 + + -- Ben Collins Tue, 02 Sep 2008 12:45:56 -0400 + +linux (2.6.27-2.3) intrepid; urgency=low + + [ Ben Collins ] + + * build/retag: Make script save .orig of tags for later use + * ubuntu/lirc: Fix device_create call + * build/firmware: Put in-kernel firmware into version specific subdir + - LP: #262115 + * Rebase on linux-2.6 git. + * ABI bump + + [ Herton Ronaldo Krzesinski ] + + * SAUCE: (no-up) Apparmor warning fixes + + [ John Johansen ] + + * SAUCE: (no-up) Proper AppArmor ptrace updates for newer lsm API + + [ Mackenzie Morgan ] + + * SAUCE: Add quirk for ASUS Z37E to make sound audible after resume + - LP: #25896 + + -- Ben Collins Wed, 27 Aug 2008 14:03:05 -0400 + +linux (2.6.27-1.2) intrepid; urgency=low + + [ Amit Kucheria ] + + * SAUCE: make fc transport removal of target configurable + * SAUCE: pm: Config option to disable handling of console during + suspend/resume + + [ Ben Collins ] + + * SAUCE: Lower warning level of some PCI messages + * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps + * SAUCE: tulip: Let dmfe handle davicom on non-sparc + * SAUCE: tulip: Define ULI PCI ID's + * SAUCE: (no-up) version: Implement version_signature proc file. + * SAUCE: (no-up) connector.h: Add idx/val for drbd + * SAUCE: (no-up) swap: Add notify_swap_entry_free callback for compcache + * SAUCE: drivers: Remove some duplicate device entries in various modules + * SAUCE: (no-up) [AppArmor] merge with upstream subversion r1291 + * SAUCE: apparmor: Update for changes to ptrace lsm hooks + * SAUCE: (no-up) Enable ubuntu extra subdirectory + * SAUCE: applesmc: Add MacBookAir + * SAUCE: (no-up) ACPI: initramfs DSDT override support + * ubuntu: Add drbd module + * ubuntu: Add iscsitarget module + * ubuntu: Add BOM for iscsitarget + * ubuntu: Add squashfs driver + * SAUCE: (no-up) Check for squashfs superblock in initramfs mounting. + * ubuntu: Add aufs module + * ubuntu: Added atl2 driver + * ubuntu: Added et131x driver + * ubuntu: Add dm-raid4-5 driver + * ubuntu: Add ndiswrapper driver + * ubuntu: Added ram backed compressed swap module (compcache) + * ubuntu: Add misc drivers from hardy lum + * ubuntu: Add heci driver 3.2.0.24 + * ubuntu: Add ov511 and bt-sco drivers + * ubuntu: Add acx, prism2_usb wireless drivers + * ubuntu: Add at76 driver to build + * ubuntu: Add fsam7400 sw kill switch driver + * ubuntu: Added qc-usb driver + * ubuntu: e1000e: Upgraded module to 0.4.1.7 + * ubuntu: Added rfkill drivers + * ubuntu: VIA - Add VIA DRM Chrome9 3D engine + * ubuntu: unionfs: Added v1.4 module from hardy + * ubuntu: Add LIRC driver + * ubuntu: Add GFS driver + * ubuntu: New tlsup driver for toshiba laptops + * Update config files + * build/d-i: Remove obsolete dm modules + + [ Chuck Short ] + + * SAUCE: ata: blacklist FUJITSU MHW2160BH PL + + [ Colin Ian King ] + + * ubuntu: Add dm-loop + * SAUCE: Enable speedstep for sonoma processors. + + [ Dennis Noordsij ] + + * SAUCE: Work around ACPI corruption upon suspend on some Dell machines. + + [ Fabio M. Di Nitto ] + + * SAUCE: Export gfs2 symbols required for gfs1 kernel module + + [ Matthew Garrett ] + + * SAUCE: hostap: send events on data interface as well as master + interface + + [ Michael Frey (Senior Manager, MID ] + + * SAUCE: Send HCI_RESET for Broadcomm 2046 + + [ Phillip Lougher ] + + * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets. + + [ Stefan Bader ] + + * SAUCE: (no-up) Export dm_disk function of device-mapper + * SAUCE: Restore VT fonts on switch + * SAUCE: mmc: Increase power_up deleay to fix TI readers + + [ Tim Gardner ] + + * SAUCE: Add extra headers to linux-libc-dev + * SAUCE: Catch nonsense keycodes and silently ignore + * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM + * SAUCE: Guest OS does not recognize a lun with non zero target id on + Vmware ESX Server + * SAUCE: (no-up) Take care of orinoco_cs overlap with hostap_cs + * ubuntu: Add GNBD driver + + -- Ben Collins Sat, 23 Aug 2008 15:48:35 -0400 + +linux (2.6.27-0.0) intrepid; urgency=low + + * Not uploaded, placeholder for new release + + -- Ben Collins Sat, 23 Aug 2008 15:48:35 -0400 + +linux (2.6.26-5.17) intrepid; urgency=low + + [ Ben Collins ] + + * build/abi: Add tosh_smm symbol to blacklist + + -- Ben Collins Fri, 15 Aug 2008 09:29:34 -0400 + +linux (2.6.26-5.16) intrepid; urgency=low + + [ Ben Collins ] + + * Revert "SAUCE: toshiba_acpi: Rewrote most of the proc entry bits." + * Revert "SAUCE: Update toshiba_acpi.c to version 0.19a" + * build/config: Disable in-kernel toshiba driver(s) + * ubuntu/tlsup: New driver for toshiba laptops + * build/config: Enable TLSUP driver + * SAUCE: e1000e: Fix E1000E_ENABLED logic to check for our E1000E_NEW + driver as well + * ubuntu/e1000e: Remove E1000E_ENABLED option in local config + * build/config: Update configs to have E1000E_ENABLED set + * ubuntu/prism2: Remove duplicate device + + [ Fabio M. Di Nitto ] + + * SAUCE: Export gfs2 symbols required for gfs1 kernel module + + [ Stefan Bader ] + + * SAUCE: x86: HPET rework for SB700 + - LP: #255910 + + [ Tim Gardner ] + + * Add GNBD driver + * Enable GNBD driver + * SAUCE: Add GFS driver + * SAUCE: Enable gfs driver configs + * b43: Linksys WMP54G (BCM4306/3) card in a PCI format has an SPROM + coding + + [ Upstream Kernel Changes ] + + * KVM: x86 emulator: emulate clflush + * USB: quirk PLL power down mode + + -- Ben Collins Mon, 11 Aug 2008 13:19:28 -0400 + +linux (2.6.26-5.15) intrepid; urgency=low + + [ Ben Collins ] + + * Revert "SAUCE: Add blacklist support to fix Belkin bluetooth dongle." + - Superceded by upstream changes. + * build/config: New option enabled for uvcvideo + * build/control: Add Vcs-Git meta data to control file + * SAUCE: toshiba_acpi: Rewrote most of the new code + * abi/perm-blacklist: Add emu10k1 driver to blacklist + + [ Upstream Kernel Changes ] + + * pxamci: trivial fix of DMA alignment register bit clearing + * udplite: Protection against coverage value wrap-around + * ipv6: use timer pending + * ipv6: __KERNEL__ ifdef struct ipv6_devconf + * hdlcdrv: Fix CRC calculation. + * quota: fix possible infinite loop in quota code + * isofs: fix minor filesystem corruption + * KVM: VMX: Fix a wrong usage of vmcs_config + * KVM: SVM: fix suspend/resume support + * KVM: mmu_shrink: kvm_mmu_zap_page requires slots_lock to be held + * KVM: VMX: Add ept_sync_context in flush_tlb + * KVM: x86 emulator: Fix HLT instruction + * KVM: MMU: nuke shadowed pgtable pages and ptes on memslot destruction + * KVM: MMU: Fix potential race setting upper shadow ptes on nonpae hosts + * Patch Upstream: x86 ptrace: fix PTRACE_GETFPXREGS error + * rcu: fix rcu_try_flip_waitack_needed() to prevent grace-period stall + * Fix typos from signal_32/64.h merge + * x86 reboot quirks: add Dell Precision WorkStation T5400 + * USB: fix usb serial pm counter decrement for disconnected interfaces + * x86, suspend, acpi: enter Big Real Mode + * markers: fix duplicate modpost entry + * Fix build on COMPAT platforms when CONFIG_EPOLL is disabled + * proc: fix /proc/*/pagemap some more + * cpusets: fix wrong domain attr updates + * x86: fix crash due to missing debugctlmsr on AMD K6-3 + * ide-cd: fix oops when using growisofs + * rtc-at91rm9200: avoid spurious irqs + * vmlinux.lds: move __attribute__((__cold__)) functions back into final + .text section + * ARM: fix fls() for 64-bit arguments + * tcp: Clear probes_out more aggressively in tcp_ack(). + * sparc64: Fix lockdep issues in LDC protocol layer. + * sparc64: Fix cpufreq notifier registry. + * sparc64: Do not define BIO_VMERGE_BOUNDARY. + * iop-adma: fix platform driver hotplug/coldplug + * myri10ge: do not forget to setup the single slice pointers + * myri10ge: do not use mgp->max_intr_slots before loading the firmware + * ALSA: trident - pause s/pdif output + * V4L: cx18: Upgrade to newer firmware & update documentation + * DVB: dib0700: add support for Hauppauge Nova-TD Stick 52009 + * V4L: uvcvideo: Fix a buffer overflow in format descriptor parsing + * V4L: uvcvideo: Use GFP_NOIO when allocating memory during resume + * V4L: uvcvideo: Don't free URB buffers on suspend + * V4L: uvcvideo: Make input device support optional + * V4L: uvcvideo: Add support for Medion Akoya Mini E1210 integrated + webcam + * V4L: saa7134: Copy tuner data earlier to avoid overwriting manual tuner + type + * V4L: cx23885: Bugfix for concurrent use of /dev/video0 and /dev/video1 + * DVB: cx23885: Ensure PAD_CTRL is always reset to a sensible default + * DVB: cx23885: DVB Transport cards using DVB port VIDB/TS1 did not + stream + * DVB: cx23885: Reallocated the sram to avoid concurrent VIDB/C issues + * DVB: cx23885: SRAM changes for the 885 and 887 silicon parts + * x86: fix kernel_physical_mapping_init() for large x86 systems + * eCryptfs: use page_alloc not kmalloc to get a page of memory + * UML - Fix boot crash + * ixgbe: remove device ID for unsupported device + * mpc52xx_psc_spi: fix block transfer + * tmpfs: fix kernel BUG in shmem_delete_inode + * markers: fix markers read barrier for multiple probes + * VFS: increase pseudo-filesystem block size to PAGE_SIZE + * cpufreq acpi: only call _PPC after cpufreq ACPI init funcs got called + already + * b43legacy: Release mutex in error handling code + * ath5k: don't enable MSI, we cannot handle it yet + * Fix off-by-one error in iov_iter_advance() + * Linux 2.6.26.1 + * ftrace: remove unneeded documentation + * romfs_readpage: don't report errors for pages beyond i_size + * netfilter: nf_nat_sip: c= is optional for session + * SCSI: bsg: fix bsg_mutex hang with device removal + * x86: idle process - add checking for NULL early param + * x86: io delay - add checking for NULL early param + * Close race in md_probe + * Kprobe smoke test lockdep warning + * netfilter: xt_time: fix time's time_mt()'s use of do_div() + * linear: correct disk numbering error check + * SCSI: ch: fix ch_remove oops + * NFS: Ensure we zap only the access and acl caches when setting new acls + * jbd: fix race between free buffer and commit transaction + * Input: i8042 - add Intel D845PESV to nopnp list + * Input: i8042 - add Gericom Bellagio to nomux blacklist + * Input: i8042 - add Acer Aspire 1360 to nomux blacklist + * Bluetooth: Signal user-space for HIDP and BNEP socket errors + * Add compat handler for PTRACE_GETSIGINFO + * ALSA: hda - Fix wrong volumes in AD1988 auto-probe mode + * ALSA: hda - Fix DMA position inaccuracy + * ALSA: hda - Add missing Thinkpad Z60m support + * ALSA: emu10k1 - Fix inverted Analog/Digital mixer switch on Audigy2 + * vfs: fix lookup on deleted directory + * Ath5k: fix memory corruption + * Ath5k: kill tasklets on shutdown + * sound: ensure device number is valid in snd_seq_oss_synth_make_info + * Linux 2.6.26.2 + + -- Ben Collins Sun, 03 Aug 2008 13:25:02 -0400 + +linux (2.6.26-5.14) intrepid; urgency=low + + [ Ben Collins ] + + * SAUCE: applesmc: Add MacBookAir + * build: Do not build ddeb unless we are on the buildd + * build: control: Consistency in arch fields. + * SAUCE: Update toshiba_acpi.c to version 0.19a + - LP: #77026 + * build: Added perm blacklist support and per-module support to abi-check + - Blacklist p80211 module from abi checks + * ubuntu/lirc: Get rid of drivers symlink and use real include stuff + + + [ Colin Ian King ] + + * SAUCE: acerhk module - add support for Amilo A1650g keyboard + - LP: #84159 + * SAUCE: rt2x00: Fix OOPS on failed creation of rt2x00lib workqueue + - LP: #249242 + + [ Mario Limonciello ] + + * Add LIRC back in + + [ Tim Gardner ] + + * Makefile race condition can lead to ndiswrapper build failure + - LP: #241547 + * update linux-wlan-ng (prism2_usb) to upstream version 1861 + - LP: #245026 + + [ Upstream Kernel Changes ] + + * Fix typos from signal_32/64.h merge + + -- Ben Collins Fri, 01 Aug 2008 00:05:01 -0400 + +linux (2.6.26-5.13) intrepid; urgency=low + + [ Ben Collins ] + + * build: Make makedumpfile an amd64/i386 only build-dep + * ubuntu/acerhk: Fixup assembly to compile with newer binutils + + -- Ben Collins Sat, 26 Jul 2008 16:41:50 -0400 + +linux (2.6.26-4.12) intrepid; urgency=low + + [ Ben Collins ] + + * e1000e: Upgraded module to 0.4.1.7 upstream. Placed in ubuntu/, + in-kernel driver disabled + * config: Disable e1000e in-kernel, and enable newer driver in ubuntu/ + * rfkill: Update to 1.3 drivers, and move to common location + * ubuntu: Actually link kconfig/kbuild into rfkill subdir + * config: Enable loading dsdt from initramfs + - LP: #246222 + * ubuntu: [compcache] Update to fix crashes in improper BUG() + * build: Create a retag scripts to recover tags from rebases + * build: Updates for dbg pkg + * build: Make sure no empty lines show up in debian/files + * ubuntu: atl1e: Add new driver from 2.6.27-pre-rc1 + - LP: #243894 + * sys_getcwd: Fix some brokeness introduced by AppArmor __d_path + changes + - LP: #251223 + * ubuntu: unionfs: Added v1.4 module from hardy + * build: Add sub-flavour infrastructure, and virtual subflav + + [ Eric Piel ] + + * ACPI: Allow custom DSDT tables to be loaded from initramfs + + [ Kees Cook ] + + * AppArmor: Smack VFS patches + + [ Mario Limonciello ] + + * Work around ACPI corruption upon suspend on some Dell machines. + - LP: #183033 + + [ Tim Gardner ] + + * Export usbhid_modify_dquirk for LBM module bcm5974 + - LP: #250838 + * VIA - Add VIA DRM Chrome9 3D engine + - LP: #251862 + * Define TRUE/FALSE for VIA DRM driver. + + -- Ben Collins Tue, 15 Jul 2008 12:51:39 -0400 + +linux (2.6.26-4.11) intrepid; urgency=low + + [ Ben Collins ] + + * config: Enable bcm5974 driver in all configs + + [ 2.6.26-4.10 ] + + [ Amit Kucheria ] + + * Fix typo in GSPCA Makefile and make it compile + + [ Ben Collins ] + + * ubuntu: Remove UVC driver in favor of in-kernel one (-rc9) + * config: Updates for -rc9 + * ubuntu: Add acx, prism2_usb wireless drivers + * config: Enable prism2_usb and acx drivers. + * ubuntu: Add at76 driver to build + * config: Enable at76_usb driver. + * iscsitarget: Fix prototype for bi_end_io callback. + * acx: Fix section type mismatch warnings + * fsam7400: Add sw kill switch driver + * config: Enable fsam7400 driver + * qc-usb: Added new driver + * config: Enable qc-usb driver + * drbd: Remove built-in connector usage + * drbd: Do not define idx/val for connector here + * connector.h: Add idx/val for drbd + * bcm5974: Added new driver + + [ Kees Cook ] + + * SAUCE: [AppArmor] merge with upstream subversion r1291 + * SAUCE: [AppArmor] fix typo in selinux_inode_link + * SAUCE: [AppArmor] aufs patches + + [ Michael Frey (Senior Manager, MID ] + + * SAUCE: Send HCI_RESET for Broadcomm 2046 + - LP: #241749 + + [ Tim Gardner ] + + * SAUCE: Medion Akoya Mini E1210 + + [ Upstream Kernel Changes ] + + * Revert "BAST: Remove old IDE driver" + * ARM: OMAP: DMA: Don't mark channel active in omap_enable_channel_irq + * ARM: OMAP: Correcting the gpmc prefetch control register address + * debugobjects: fix lockdep warning + * [ARM] 5115/1: pxafb: fix ifdef for command line option handling + * [ARM] 5116/1: pxafb: cleanup and fix order of failure handling + * [ARM] 5109/1: Mark rtc sa1100 driver as wakeup source before + registering it + * [ARM] Export dma_sync_sg_for_device() + * fix cgroup-inflicted breakage in block_dev.c + * [patch for 2.6.26 2/4] vfs: utimensat(): be consistent with utime() for + immutable and append-only files + * [patch for 2.6.26 1/4] vfs: utimensat(): ignore tv_sec if tv_nsec == + UTIME_OMIT or UTIME_NOW + * [patch for 2.6.26 3/4] vfs: utimensat(): fix error checking for + {UTIME_NOW,UTIME_OMIT} case + * [patch for 2.6.26 4/4] vfs: utimensat(): fix write access check for + futimens() + * [patch 1/4] vfs: path_{get,put}() cleanups + * [patch 2/4] fs: make struct file arg to d_path const + * [patch 3/4] vfs: fix ERR_PTR abuse in generic_readlink + * [patch 4/4] flock: remove unused fields from file_lock_operations + * [patch 3/3] vfs: make d_path() consistent across mount operations + * [patch 1/3] vfs: dcache sparse fixes + * [patch 2/3] vfs: dcache cleanups + * udf: Fix regression in UDF anchor block detection + * [SCSI] ses: Fix timeout + * netfilter: ip6table_mangle: don't reroute in LOCAL_IN + * [SCSI] esp: Fix OOPS in esp_reset_cleanup(). + * kernel/audit.c: nlh->nlmsg_type is gotten more than once + * audit: fix kernel-doc parameter notation + * remove useless argument type in audit_filter_user() + * Blackfin arch: fix bug - kernel boot fails when Spinlock and rw-lock + debugging enabled + * Blackfin arch: fix up section mismatch warning + * mac80211: implement EU regulatory domain + * b43: Do not return TX_BUSY from op_tx + * b43legacy: Do not return TX_BUSY from op_tx + * b43: Fix possible MMIO access while device is down + * b43legacy: Fix possible NULL pointer dereference in DMA code + * rt2x00: Fix unbalanced mutex locking + * iwlwifi: improve scanning band selection management + * [SCSI] esp: tidy up target reference counting + * [ARM] 5117/1: pxafb: fix __devinit/exit annotations + * thermal: Create CONFIG_THERMAL_HWMON=n + * ACPI: don't walk tables if ACPI was disabled + * dock: bay: Don't call acpi_walk_namespace() when ACPI is disabled. + * x86: shift bits the right way in native_read_tscp + * x86: section/warning fixes + * V4L/DVB (8004): Fix INPUT dependency at budget-ci + * V4L/DVB (8005): Fix OOPS if frontend is null + * V4L/DVB (8007): cx18/cx25840: the S-Video LUMA input can use all + In1-In8 inputs + * V4L/DVB (8008): cx18: remove duplicate audio and video input enums + * V4L/DVB (8010): em28xx: Properly register extensions for already + attached devices + * V4L/DVB (8011): em28xx: enable DVB for HVR-900 + * V4L/DVB (8012): gl861: sleep a little to avoid I2C errors + * V4L/DVB (8013): gl861: remove useless identify_state + * V4L/DVB (8015): gl861: replace non critical msleep(0) with msleep(1) to + be on the safe side + * V4L/DVB (8017): Ensure em28xx extensions only get run against devs that + support them + * V4L/DVB (8018): Add em2860 chip ID + * V4L/DVB (8020): Fix callbacks functions of saa7134_empress + * V4L/DVB (8022): saa7134: fix race between opening and closing the + device + * V4L/DVB (8026): Avoids an OOPS if dev struct can't be successfully + recovered + * V4L/DVB (8027): saa7134: Avermedia A700: only s-video and composite + input are working + * V4L/DVB (8028): Improve error messages for tda1004x attach + * V4L/DVB (8029): Improve error message at tda1004x_attach + * V4L/DVB (8034): tda18271: fix IF notch frequency handling + * V4L/DVB (8035): tda18271: dont touch EB14 if rf_cal lookup is out of + range + * V4L/DVB (8036): tda18271: toggle rf agc speed mode on TDA18271HD/C2 + only + * V4L/DVB (8037): tda18271: ensure that the thermometer is off during + channel configuration + * V4L/DVB (8039): pxa-camera: fix platform_get_irq() error handling. + * V4L/DVB (8040): soc-camera: remove soc_camera_host_class class + * V4L/DVB (8042): DVB-USB UMT-010 channel scan oops + * V4L/DVB (8043): au0828: add support for additional USB device id's + * V4L/DVB (8044): au8522: tuning optimizations + * V4L/DVB (8048): saa7134: Fix entries for Avermedia A16d and Avermedia + E506 + * V4L/DVB (8061): cx18: only select tuner / frontend modules if + !DVB_FE_CUSTOMISE + * V4L/DVB (8063): cx18: Fix unintended auto configurations in + cx18-av-core + * V4L/DVB (8066): cx18: Fix audio mux input definitions for HVR-1600 Line + In 2 and FM radio + * V4L/DVB (8067): cx18: Fix firmware load for case when digital capture + happens first + * V4L/DVB (8068): cx18: Add I2C slave reset via GPIO upon initialization + * V4L/DVB (8069): cx18: Fix S-Video and Compsite inputs for the Yuan + MPC718 and enable card entry + * V4L/DVB (8071): tda10023: Fix possible kernel oops during + initialisation + * V4L/DVB (8073): av7110: Catch another type of ARM crash + * V4L/DVB (8074): av7110: OSD transfers should not be interrupted + * V4L/DVB (8075): stv0299: Uncorrected block count and bit error rate + fixed + * V4L/DVB (8092): videodev: simplify and fix standard enumeration + * V4L/DVB (8096): au8522: prevent false-positive lock status + * V4L/DVB (8097): xc5000: check device hardware state to determine if + firmware download is needed + * V4L/DVB (8100): V4L/vivi: fix possible memory leak in vivi_fillbuff + * V4L/DVB (8108): Fix open/close race in saa7134 + * s2io: fix documentation about intr_type + * tc35815: Mark carrier-off before starting PHY + * tc35815: Fix receiver hangup on Rx FIFO overflow + * ixgbe: fix EEH recovery during reset on PPC + * igb: fix EEH recovery during reset on PPC + * e1000e: fix EEH recovery during reset on PPC + * pcnet_cs, axnet_cs: clear bogus interrupt before request_irq + * drivers/net/r6040.c: Eliminate double sizeof + * ipg: fix jumbo frame compilation + * ipg: use NULL, not zero, for pointers + * [netdrvr] 3c59x: remove irqs_disabled warning from local_bh_enable + * [netdrvr] netxen: fix netxen_pci_tbl[] breakage + * e100: Do pci_dma_sync after skb_alloc for proper operation on ixp4xx + * e1000: only enable TSO6 via ethtool when using correct hardware + * [netdrvr] Fix IOMMU overflow checking in s2io.c + * qla3xxx: Hold RTNL while calling dev_close() + * Hold RTNL while calling dev_close() + * sata_uli: hardreset is broken + * rt2x00: Fix lock dependency errror + * prism: islpci_eth.c endianness fix + * mac80211: fix an oops in several failure paths in key allocation + * firewire: fw-sbp2: fix parsing of logical unit directories + * kbuild: fix a.out.h export to userspace with O= build. + * Ensure interrupted recovery completed properly (v1 metadata plus + bitmap) + * Don't acknowlege that stripe-expand is complete until it really is. + * Fix error paths if md_probe fails. + * hamradio: remove unused variable + * tcp: calculate tcp_mem based on low memory instead of all memory + * tcp: fix for splice receive when used with software LRO + * af_unix: fix 'poll for write'/connected DGRAM sockets + * netdevice: Fix typo of dev_unicast_add() comment + * pkt_sched: ERR_PTR() ususally encodes an negative errno, not positive. + * pkt_sched: Remove CONFIG_NET_SCH_RR + * include/linux/netdevice.h: don't export MAX_HEADER to userspace + * tcp: /proc/net/tcp rto,ato values not scaled properly (v2) + * netlink: Fix some doc comments in net/netlink/attr.c + * CONNECTOR: add a proc entry to list connectors + * inet fragments: fix race between inet_frag_find and + inet_frag_secret_rebuild + * net/inet_lro: remove setting skb->ip_summed when not LRO-able + * netlabel: Fix a problem when dumping the default IPv6 static labels + * ipv6 route: Convert rt6_device_match() to use RT6_LOOKUP_F_xxx flags. + * sched: fix cpu hotplug + * Fix and clean top .gitignore + * x86: fix cpu hotplug crash + * ptrace GET/SET FPXREGS broken + * Input: add KEY_MEDIA_REPEAT definition + * Input: fix locking in force-feedback core + * [ARM] 5131/1: Annotate platform_secondary_init with trace_hardirqs_off + * ide: fix /proc/ide/ide?/mate reporting + * netfilter: nf_conntrack_tcp: fixing to check the lower bound of valid + ACK + * textsearch: fix Boyer-Moore text search bug + * hostap: don't report useless WDS frames by default + * hostap: fix sparse warnings + * mac80211: don't accept WEP keys other than WEP40 and WEP104 + * V4L/DVB (8145a): USB Video Class driver + * [IA64] Bugfix for system with 32 cpus + * [IA64] export account_system_vtime + * sched: fix divide error when trying to configure rt_period to zero + * x86: fix NODES_SHIFT Kconfig range + * block: Fix the starving writes bug in the anticipatory IO scheduler + * Properly notify block layer of sync writes + * rcu: fix hotplug vs rcu race + * I2C: S3C2410: Check ACK on byte transmission + * I2C: S3C2410: Fixup error codes returned rom a transfer. + * I2C: S3C2410: Add MODULE_ALIAS() for s3c2440 device. + * PCI: Restrict VPD read permission to root + * powerpc/bootwrapper: update for initrd with simpleImage + * i2c: Documentation: fix device matching description + * i2c: Fix bad hint about irqs in i2c.h + * powerpc/legacy_serial: Bail if reg-offset/shift properties are present + * powerpc/mpc5200: Fix lite5200b suspend/resume + * ipv4: fix sysctl documentation of time related values + * net-sched: change tcf_destroy_chain() to clear start of filter list + * net-sched: fix filter destruction in atm/hfsc qdisc destruction + * netlink: Unneeded local variable + * net: Tyop of sk_filter() comment + * netdevice: Fix wrong string handle in kernel command line parsing + * net: fib_rules: fix error code for unsupported families + * dm crypt: use cond_resched + * V4L/DVB (8178): uvc: Fix compilation breakage for the other drivers, if + uvc is selected + * PCI: Limit VPD read/write lengths for Broadcom 5706, 5708, 5709 rev. + * PCI: acpiphp: cleanup notify handler on all root bridges + * drivers/input/ff-core.c needs + * DRM/i915: only use tiled blits on 965+ + * tty: Fix inverted logic in send_break + * x86: fix Intel Mac booting with EFI + * arch/x86/mm/init_64.c: early_memtest(): fix types + * 9p: fix O_APPEND in legacy mode + * slub: Do not use 192 byte sized cache if minimum alignment is 128 byte + * Do not overwrite nr_zones on !NUMA when initialising zlcache_ptr + * [MIPS] IP32: Fix unexpected irq 71 + * [MIPS] IP22: Fix crashes due to wrong L1_CACHE_BYTES + * [MIPS] cevt-txx9: Reset timer counter on initialization + * hrtimer: prevent migration for raising softirq + * svcrpc: fix handling of garbage args + * OHCI: Fix problem if SM501 and another platform driver is selected + * USB: fix cdc-acm resume() + * USB: ehci - fix timer regression + * USB: ohci - record data toggle after unlink + * USB: mass storage: new id for US_SC_CYP_ATACB + * sisusbvga: Fix oops on disconnect. + * USB: New device ID for ftdi_sio driver + * USB: fix interrupt disabling for HCDs with shared interrupt handlers + * USB: don't lose disconnections during suspend + * USB: another option device id + * USB: add a pl2303 device id + * USB: fix Oops on loading ipaq module since 2.6.26 + * USB: adding comment for ipaq forcing number of ports + * [MIPS] Fix bug in atomic_sub_if_positive. + * xen: fix address truncation in pte mfn<->pfn conversion + * sata_sil24: add DID for another adaptec flavor + * ahci: always clear all bits in irq_stat + * libata-sff: improve HSM violation reporting + * sata_mv: safer logic for limit_warnings + * Update maintainers for powerpc + * Christoph has moved + * mm: dirty page accounting vs VM_MIXEDMAP + * rtc: rtc_read_alarm() handles wraparound + * firmware: fix the request_firmware() dummy + * serial: fix serial_match_port() for dynamic major tty-device numbers + * get_user_pages(): fix possible page leak on oom + * rtc-x1205: Fix alarm set + * rtc: fix CMOS time error after writing /proc/acpi/alarm + * pci: VT3336 can't do MSI either + * Miguel Ojeda has moved + * ext3: add missing unlock to error path in ext3_quota_write() + * ext4: add missing unlock to an error path in ext4_quota_write() + * reiserfs: add missing unlock to an error path in reiserfs_quota_write() + * ecryptfs: remove unnecessary mux from ecryptfs_init_ecryptfs_miscdev() + * lib: taint kernel in common report_bug() WARN path. + * gpio: pca953x (i2c) handles max7310 too + * fsl_diu_fb: fix build with CONFIG_PM=y, plus fix some warnings + * Update taskstats-struct document for scaled time accounting + * cciss: fix regression that no device nodes are created if no logical + drives are configured. + * delay accounting: maintainer update + * Doc*/kernel-parameters.txt: fix stale references + * hdaps: add support for various newer Lenovo thinkpads + * mn10300: export certain arch symbols required to build allmodconfig + * mn10300: provide __ucmpdi2() for MN10300 + * Introduce rculist.h + * man-pages is supported + * ntfs: update help text + * add kernel-doc for simple_read_from_buffer and memory_read_from_buffer + * w100fb: do not depend on SHARPSL + * w100fb: add 80 MHz modeline + * MFD maintainer + * cgroups: document the effect of attaching PID 0 to a cgroup + * spi: fix the read path in spidev + * doc: doc maintainers + * security: filesystem capabilities: fix fragile setuid fixup code + * security: filesystem capabilities: fix CAP_SETPCAP handling + * Alpha Linux kernel fails with inconsistent kallsyms data + * cpusets: document proc status cpus and mems allowed lists + * MAINTAINERS: update the email address of Andreas Dilger + * cciss: read config to obtain max outstanding commands per controller + * olpc: sdhci: add quirk for the Marvell CaFe's vdd/powerup issue + * olpc: sdhci: add quirk for the Marvell CaFe's interrupt timeout + * cpumask: introduce new APIs + * mm: switch node meminfo Active & Inactive pages to Kbytes + * Update MAINTAINERS file for the TPM device driver + * devcgroup: fix odd behaviour when writing 'a' to devices.allow + * doc: document the relax_domain_level kernel boot argument + * mmc: don't use DMA on newer ENE controllers + * mempolicy: mask off internal flags for userspace API + * x86 ACPI: normalize segment descriptor register on resume + * x86 ACPI: fix resume from suspend to RAM on uniprocessor x86-64 + * softlockup: print a module list on being stuck + * ide: fix hwif->gendev refcounting + * ide: ide_unregister() warm-plug bugfix + * ide: ide_unregister() locking bugfix + * ahci: give another shot at clearing all bits in irq_stat + * Fix clear_refs_write() use of struct mm_walk + * Move _RET_IP_ and _THIS_IP_ to include/linux/kernel.h + * Fix pagemap_read() use of struct mm_walk + * Linux 2.6.26-rc9 + * Revert "USB: don't explicitly reenable root-hub status interrupts" + * Revert "PCI: Correct last two HP entries in the bfsort whitelist" + * iwlwifi: fix incorrect 5GHz rates reported in monitor mode + * iwlwifi: drop skb silently for Tx request in monitor mode + * libertas: support USB persistence on suspend/resume (resend) + * tcp: net/ipv4/tcp.c needs linux/scatterlist.h + * tcp: fix a size_t < 0 comparison in tcp_read_sock + * bridge: fix use-after-free in br_cleanup_bridges() + * Add missing skb->dev assignment in Frame Relay RX code + * forcedeth: fix lockdep warning on ethtool -s + * ehea: fix might sleep problem + * ehea: add MODULE_DEVICE_TABLE + * ehea: fix race condition + * ehea: Access iph->tot_len with correct endianness + * pasemi_mac: Access iph->tot_len with correct endianness + * ibm_newemac: Fixes kernel crashes when speed of cable connected changes + * ibm_newemac: Fixes entry of short packets + * fs_enet: restore promiscuous and multicast settings in restart() + * can: add sanity checks + * x86: KVM guest: Add memory clobber to hypercalls + * KVM: IOAPIC: Fix level-triggered irq injection hang + * [SCSI] erase invalid data returned by device + * pxamci: fix byte aligned DMA transfers + * vsprintf: split out '%s' handling logic + * vsprintf: split out '%p' handling logic + * vsprintf: add infrastructure support for extended '%p' specifiers + * vsprintf: add support for '%pS' and '%pF' pointer formats + * powerpc: Fix unterminated of_device_id array in legacy_serial.c + * [UML] fix gcc ICEs and unresolved externs + * ocfs2/dlm: Fixes oops in dlm_new_lockres() + * hostap_cs: correct poor NULL checks in suspend/resume routines + * drivers/net/wireless/iwlwifi/iwl-3945.c Fix type issue on 64bit + * mac80211: move netif_carrier_on to after + ieee80211_bss_info_change_notify + * mac80211: Only flush workqueue when last interface was removed + * zd1211rw: add ID for AirTies WUS-201 + * ssb-pcicore: Fix IRQ-vector init on embedded devices + * mac80211: don't report selected IBSS when not found + * crypto: tcrypt - Fix memory leak in test_cipher + * sctp: Mark the tsn as received after all allocations finish + * [S390] protect _PAGE_SPECIAL bit against mprotect + * irda: via-ircc proper dma freeing + * irda: New device ID for nsc-ircc + * irda: Fix netlink error path return value + * [SCSI] mptspi: fix oops in mptspi_dv_renegotiate_work() + * Correct hash flushing from huge_ptep_set_wrprotect() + * ide: add __ide_default_irq() inline helper + * palm_bk3710: fix IDECLK period calculation + * it8213: fix return value in it8213_init_one() + * [MIPS] Atlas, decstation: Fix section mismatches triggered by + defconfigs + * [MIPS] Fix 32bit kernels on R4k with 128 byte cache line size + * NFS: Fix readdir cache invalidation + * SUNRPC: Fix a double-free in rpcbind + * SUNRPC: Fix an rpcbind breakage for the case of IPv6 lookups + * reiserfs: discard prealloc in reiserfs_delete_inode + * Fix broken fix for fsl-diu-db + * RDMA/cxgb3: Fix regression caused by class_device -> device conversion + * ipv6: fix race between ipv6_del_addr and DAD timer + * sctp: Add documentation for sctp sysctl variable + * kernel/printk.c: Made printk_recursion_bug_msg static. + * powerpc: Add missing reference to coherent_dma_mask + * rc80211_pid: Fix fast_start parameter handling + * rt2x00: Disable synchronization during initialization + * zd1211rw: stop beacons on remove_interface + * libertas: fix memory alignment problems on the blackfin + * netfilter: nf_conntrack_tcp: fix endless loop + * netfilter: nf_nat_snmp_basic: fix a range check in NAT for SNMP + * md: ensure all blocks are uptodate or locked when syncing + * sched: fix cpu hotplug + * x86: fix /dev/mem compatibility under PAT + * crypto: chainiv - Invoke completion function + * ocfs2: Fix flags in ocfs2_file_lock + * kernel/kprobes.c: Made kprobe_blacklist static. + * arch/x86/kernel/.gitignore: Added vmlinux.lds to .gitignore file + because it shouldn't be tracked. + * ftrace: Documentation + * Fix PREEMPT_RCU without HOTPLUG_CPU + * sched: fix cpu hotplug, cleanup + * exec: fix stack excutability without PT_GNU_STACK + * slub: Fix use-after-preempt of per-CPU data structure + * Documentation: clarify tcp_{r,w}mem sysctl docs + * ip: sysctl documentation cleanup + * tcp: correct kcalloc usage + * ipv4: fib_trie: Fix lookup error return + * netlabel: netlink_unicast calls kfree_skb on error path by itself + * ipv6: missed namespace context in ipv6_rthdr_rcv + * xfrm: Add a XFRM_STATE_AF_UNSPEC flag to xfrm_usersa_info + * tun: Persistent devices can get stuck in xoff state + * tpm: add Intel TPM TIS device HID + * rapidio: fix device reference counting + * Fix name of Russell King in various comments + * rtc: fix reported IRQ rate for when HPET is enabled + * libata-acpi: filter out DIPM enable + * Added Targa Visionary 1000 IDE adapter to pata_sis.c + * libata-acpi: don't call sleeping function from invalid context + * Fix reference counting race on log buffers + * [SCSI] ipr: Fix HDIO_GET_IDENTITY oops for SATA devices + * IPMI: return correct value from ipmi_write + * x86: fix ldt limit for 64 bit + * [SCSI] fusion: default MSI to disabled for SPI and FC controllers + * [SCSI] bsg: fix oops on remove + * drivers/char/pcmcia/ipwireless/hardware.c fix resource leak + * drivers/isdn/i4l/isdn_common.c fix small resource leak + * fbdev: bugfix for multiprocess defio + * serial8250: sanity check nr_uarts on all paths. + * ov7670: clean up ov7670_read semantics + * rtc-fm3130: fix chip naming + * rtc-pcf8563: add chip id + * OProfile kernel maintainership changes + * frv: fix irqs_disabled() to return an int, not an unsigned long + * cifs: fix inode leak in cifs_get_inode_info_unix + * cifs: fix wksidarr declaration to be big-endian friendly + * cpusets, hotplug, scheduler: fix scheduler domain breakage + * Documentation/HOWTO: correct wrong kernel bugzilla FAQ URL + * devcgroup: always show positive major/minor num + * devcgroup: fix permission check when adding entry to child cgroup + * Linux 2.6.26 + + -- Ben Collins Mon, 14 Jul 2008 13:41:50 -0400 + +linux (2.6.26-3.9) intrepid; urgency=low + + * abi: Add dca and ioatdma to modules.ignore + + [ 2.6.26-3.8 ] + + [ Ben Collins ] + + * ubuntu: Add heci driver 3.2.0.24 + * ubuntu: Add heci to kconfig/kbuild + * config: Enable heci module on all flavours + * dm-bbr: Update to get it to compile with 2.6.26 + * config: Enable dm-bbr + * ubuntu: Add some media drivers + * config: Enable misc media drivers + * udeb: Switch to uvesafb in fb-modules + * abi: Add more modules to ignore (known) + + [ 2.6.26-3.7 ] + + [Amit Kucheria] + + * SAUCE: make fc transport removal of target configurable + - LP: #163075 + * SAUCE: pm: Config option to disable handling of console during + suspend/resume + + [Ben Collins] + + * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps + * SAUCE: irda: Default to dongle type 9 on IBM hardware + * SAUCE: tulip: Let dmfe handle davicom on non-sparc + * SAUCE: tulip: Define ULI PCI ID's + * SAUCE: version: Implement version_signature proc file. + * build: Cleanup arches + * build: Remove remnants of unused binary-custom infrastructure + * build: Remove disable_d_i (not needed) and cleanup ppa build stuff + * ubuntu: New modules, acer-acpi + * build: Remove -virtual, and rebuild configs + * ubuntu: Add drbd module + * acer-acpi: Fix makefile + * x86/Kconfig: Fix missing quote for ubuntu Kconfig source + * ubuntu: Add iscsitarget module + * ubuntu: Added Amiga FS driver + * ubuntu: Add squashfs driver + * ubuntu: Remove asfs (Amiga FS). Need to be in linux-ports instead + * squashfs: Move headers to real include directory + * build/configs: The Great Config Consistency Check of 2008 + * ubuntu: Move third-party includes to ubuntu/include + * ubuntu: Add aufs module + * ubuntu: Added atl2 driver + * ubuntu: Add dm-radi4-5 driver + * build: Add CONFIG_DEBUG_SECTION_MISMATCH=y to get old style warnings + from build + * ubuntu/Makefile: Fixup dm-raid4-5 and add kludge for kbuild + * squashfs: Fixes for VFS changes + * ubuntu/dm-raid4-5: Fixups for moved/renamed headers/functions in core + md + * ubuntu: Add ndiswrapper driver + * d-i: Update module listings + * build: Disable xd block device (ancient) + * ndiswrapper: Fixup makefile + * d-i: Remove efi-modules. The only module, efivars, is built-in + * build: Remove install-source, obsolete and caused build failure + * Ubuntu-2.6.26-1.3 + * build: linux-doc rules got broken when disabling html side. Fixed now. + * Ubuntu-2.6.26-1.4 + * x86: Update to -rc6 allows CONFIG_PCI_OLPC to work with PCI_GOANY + * d-i: Make virtio-ring optional (it's built-in on i386) + * Ubuntu-2.6.26-1.4 + * Ubuntu-2.6.26-1.5 + * config: Enable DVB devices + * ubuntu/aufs: Make aufs a bool config, since it needs to be built-in + * config: Build aufs into the kernels + * build: Fix arguments passed to link-headers script + * config: Disable early printk + * d-i: Move isofs to storage-core and kill st (scsi tape) from list + * config: Enable non-promiscuous access to /dev/mem + * x86: Add option to disable decompression info messages + * config: Enable no-bz-chatter config options + * build: Re-add linux-source package + * d-i: Re-add socket-modules. Accidentally removed + - LP: #241295 + * Ubuntu-2.6.26-2.6 + * Use makedumpfile to generate a vmcoreinfo file. + * build: Build-Depend on makedumpfile for vmcoreinfo generation + * build: Remove debug print from git-ubuntu-log + * Updated configs for -rc7 + * build: postinst, do not call depmod with -F + * config: Enable rtc-cmos as a built-in driver. + * control: Provide ndiswrapper-modules-1.9 + * build: Generate vmcoreinfo in image build for crashdumps without debug + image + * config: Disable vesafb, since we'll prefer uvesafb + * build: Copy uvesafb module to initrd mod directory + * abi-check: New, more robust script + * config: Enable heap randomization by default + * abi-check: Cleanup output and call with perl (not $SHELL) + * abi: Ignore missing vesafb (known) + * config: Disable pcspkr (in favor of snd-pcsp) + * swap: Add notify_swap_entry_free callback for compcache + * compcache: Added ram backed compressed swap module + * ubuntu: Enable kbuild and kconfig for compcache + * config: Enable compcache and tlsf allocator as modules + * config: Updated for -rc8. Disables XEN on i386 + * config: Switch i386-server to 64G, enable PAE, 64-bit res, and XEN + * ubuntu: Add misc drivers from hardy lum + * ubuntu: Enable build of misc/ subdir + * config: Enable misc drivers + * aufs: Fix warning about single non-string-literal arg to printf style + function + * drivers: Remove some duplicate device entries in various modules + * config: Disable some duplicate drivers + * keyspan: Remove duplicate device ID's + * check-aliases: Cleanup output, and fix rolling checks + * ubuntu: Disable dm-bbr for now + * dm-bbr: First cut at forward portiong. Still needs work. + * ubuntu: Disable dm-bbr in kbuild/kconfig + + [Chuck Short] + + * SAUCE: ata: blacklist FUJITSU MHW2160BH PL + - LP: #175834 + * SAUCE: [USB]: add ASUS LCM to the blacklist + + [Colin Ian King] + + * SAUCE: airprime.c supports more devices + - LP: #208250 + * SAUCE: Enable speedstep for sonoma processors. + - LP: #132271 + * Add dm-loop + * Add dm-loop BOM + + [Kyle McMartin] + + * SAUCE: fix orinoco_cs oops + + [Mario Limonciello] + + * SAUCE: Enable Reset and SCO workaround on Dell 410 BT adapter + + [Matthew Garrett] + + * SAUCE: hostap: send events on data interface as well as master + interface + + [Phillip Lougher] + + * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets. + + [Stefan Bader] + + * SAUCE: Export dm_disk function of device-mapper + * SAUCE: Restore VT fonts on switch + * SAUCE: Always use SCO protocol (disable eSCO support) Bug: #39414 + * SAUCE: mmc: Increase power_up deleay to fix TI readers OriginalAuthor: + Pascal Terjan Bug: #137686 + * SAUCE: Add blacklist support to fix Belkin bluetooth dongle. Bug: + #140511 + * SAUCE: Lower warning level of pci resource allocation messages. Bug: + 159241 + * SAUCE: Lower message level for PCI memory and I/O allocation. + - LP: #159241 + * Modify log generation to catch bug numbers when adding with git-am. + + [Tim Gardner] + + * Added the debian directory. Ignore: yes + * Add support for UBUNTUINCLUDE Ignore: yes + * LUM headers go in /usr/src Ignore: yes + * First pass at 2.6.25 configs Ignore: yes + * i386 -generic builds. Ignore: yes + * SAUCE: Increase CONFIG_IDE_MAX_HWIFS to 8 (from 4) + * SAUCE: Add extra headers to linux-libc-dev OriginalAuthor: Soren Hansen + OriginalLocation: + https://lists.ubuntu.com/archives/kernel-team/2007-November/001891.html + * Set CONFIG_DEVKMEM=n Ignore: yes + * Enabled ALSA and CGROUPS for i386 Ignore: yes + * Enabled amd64 configs. Ignore: yes + * CONFIG_STANDALONE=n Ignore: yes + * CONFIG_BLK_DEV_4DRIVES=n for i386 Ignore: yes + * CONFIG: CONFIG_DEFAULT_RELATIME=y for all flavours. Ignore: yes + * Set CONFIG_EDD_OFF=y Ignore: yes + * SAUCE: Blacklist Bluetooth Dell Wireless 370 for SCO MTU + OriginalAuthor: Mario Limonciello Bug: + #209715 + * SAUCE: Catch nonsense keycodes and silently ignore + * SAUCE: frame buffer regression - screen blank except for blinking + cursor after fbcon vtswitch OriginalAuthor: Matthew Garrett + Bug: #201591 + * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM + OriginalAuthor: Klaus S. Madsen + OriginalAuthor: Chuck Short + * SAUCE: Guest OS does not recognize a lun with non zero target id on + Vmware ESX Server + * SAUCE: orinoco_cs.ko missing + * Set CONFIG_FB_VESA=m for i386/amd64 Ignore: yes + * Set CONFIG_PM_DISABLE_CONSOLE=y for all flavours Ignore: yes + * Thorough review of amd64 -generic config Ignore: yes + * Build PPA packages for Hardy until the Intrepid archive is opened. + * Deleted obsolete flavours Ignore: yes + * Don't build docs for PPA Ignore: yes + * Build all standard packages in PPA. Ignore: yes + * Remove duplicate USB ids + * SAUCE: DVB-USB UMT-010 driver oops on install Bug: #115284 + * Update configs after rebase to 2.6.26-rc1 Ignore: yes + * Update configs after rebase Ignore: yes + * Disable V4L until the build issues get ironed out. Ignore: yes + * Update configs after rebase. Ignore: yes + * Another device enable pass Ignore: yes + * Update configs after merge. Ignore: yes + * SAUCE: fn key doesn't work in hardy with macbook pro fourth generation + (4,1) + - LP: #207127 + * Enabled CONFIG_CIFS_DFS_UPCALL=y and CONFIG_CIFS_UPCALL=y + - LP: #236830 + + [Upstream Kernel Changes] + + * Revert "[WATCHDOG] hpwdt: Add CFLAGS to get driver working" + * mac80211: detect driver tx bugs + * hwmon: (lm85) Fix function RANGE_TO_REG() + * hwmon: (adt7473) Initialize max_duty_at_overheat before use + * hwmon: Update the sysfs interface documentation + * hwmon: (abituguru3) Identify Abit AW8D board as such + * hwmon: (w83791d) new maintainer + * hwmon: (abituguru3) update driver detection + * hwmon: (lm75) sensor reading bugfix + * ipv6: Remove options header when setsockopt's optlen is 0 + * ipv6: Drop packets for loopback address from outside of the box. + * sched: rt: dont stop the period timer when there are tasks wanting to + run + * sched: fix wait_for_completion_timeout() spurious failure under heavy + load + * x86: fix NULL pointer deref in __switch_to + * xen: Use wmb instead of rmb in xen_evtchn_do_upcall(). + * xen: mask unwanted pte bits in __supported_pte_mask + * xen: don't drop NX bit + * sched: refactor wait_for_completion_timeout() + * Ext4: Fix online resize block group descriptor corruption + * [IA64] SN2: security hole in sn2_ptc_proc_write + * alpha: fix module load failures on smp (bug #10926) + * alpha: link failure fix + * alpha: fix compile failures with gcc-4.3 (bug #10438) + * alpha: resurrect Cypress IDE quirk + * pppoe: warning fix + * sctp: Make sure N * sizeof(union sctp_addr) does not overflow. + * netns: Don't receive new packets in a dead network namespace. + * Add return value to reserve_bootmem_node() + * Slab: Fix memory leak in fallback_alloc() + * Fix performance regression on lmbench select benchmark + * ALSA: aw2 - Fix Oops at initialization + * ALSA: sb - Fix wrong assertions + * futexes: fix fault handling in futex_lock_pi + * IB/mthca: Clear ICM pages before handing to FW + * tty_driver: Update required method documentation + * removed unused var real_tty on n_tty_ioctl() + * Fix ZERO_PAGE breakage with vmware + * mm: fix race in COW logic + * NFS: Reduce the NFS mount code stack usage. + * NFS: Fix filehandle size comparisons in the mount code + * NFS: nfs_updatepage(): don't mark page as dirty if an error occurred + * alpha: fix compile error in arch/alpha/mm/init.c + * KVM: Fix race between timer migration and vcpu migration + * KVM: close timer injection race window in __vcpu_run + * KVM: MMU: Fix rmap_write_protect() hugepage iteration bug + * KVM: MMU: large page update_pte issue with non-PAE 32-bit guests + (resend) + * KVM: MMU: Fix oops on guest userspace access to guest pagetable + * KVM: ioapic: fix lost interrupt when changing a device's irq + * KVM: VMX: Fix host msr corruption with preemption enabled + * [GFS2] BUG: unable to handle kernel paging request at ffff81002690e000 + * xen: remove support for non-PAE 32-bit + * kgdb: documentation update - remove kgdboe + * kgdb: sparse fix + * [IA64] Fix boot failure on ia64/sn2 + * [IA64] Handle count==0 in sn2_ptc_proc_write() + * [IA64] Eliminate NULL test after alloc_bootmem in iosapic_alloc_rte() + * [GFS2] fix gfs2 block allocation (cleaned up) + * x86: Add structs and functions for paravirt clocksource + * x86: Make xen use the paravirt clocksource structs and functions + * KVM: Make kvm host use the paravirt clocksource structs + * x86: KVM guest: Use the paravirt clocksource structs and functions + * KVM: Remove now unused structs from kvm_para.h + * enable bus mastering on i915 at resume time + * Linux 2.6.26-rc8 + * # Ubuntu external driver commit. + * # Ubuntu commit template. + + -- Ben Collins Sat, 21 Jun 2008 09:05:15 -0400 + +linux (2.6.26-2.6) intrepid; urgency=low + + [Ben Collins] + + * Revert "SAUCE: Export symbols for aufs (in lum) (not needed) + * config: Enable DVB devices + * ubuntu/aufs: Make aufs a bool config, since it needs to be built-in + * config: Build aufs into the kernels + * build: Fix arguments passed to link-headers script + * config: Disable early printk + * d-i: Move isofs to storage-core and kill st (scsi tape) from list + * config: Enable non-promiscuous access to /dev/mem + * x86: Add option to disable decompression info messages + * config: Enable no-bz-chatter config options + * build: Re-add linux-source package + * d-i: Re-add socket-modules. Accidentally removed + - LP: #241295 + + [Colin Ian King] + + * Add dm-loop + + [Tim Gardner] + + * Revert "SAUCE: USB bluetooth device 0x0e5e:0x6622 floods errors to + syslog (merged upstream) + + -- Ben Collins Mon, 16 Jun 2008 10:56:01 -0400 + +linux (2.6.26-1.5) intrepid; urgency=low + + * d-i: Make virtio-ring optional (it's built-in on i386) + * Rebased on 2.6.26-rc6 + + [Ubuntu-2.6.26-1.4 Changes below] + + * build: linux-doc rules got broken when disabling html side. Fixed now. + + [Ubuntu-2.6.26-1.3 Changes below] + + * build: Remove install-source, obsolete and caused build failure + + [Ubuntu-2.6.26-1.2 Changes below] + + * Remove efi-modules from d-i module list (efivars is built-in). Caused a + build failure. + * Patch to arch/x86/xen/time.c to remove __divdi3 usage (build failure on + i386). + + [Ubuntu-2.6.26-1.1 Changes below] + + [Amit Kucheria] + + * SAUCE: make fc transport removal of target configurable + * SAUCE: Add AGP support for Radeon Mobility 9000 chipset + * SAUCE: pm: Config option to disable handling of console during + suspend/resume + + [Ben Collins] + + * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps + * SAUCE: irda: Default to dongle type 9 on IBM hardware + * SAUCE: tulip: Let dmfe handle davicom on non-sparc + * SAUCE: tulip: Define ULI PCI ID's + * SAUCE: version: Implement version_signature proc file. + * build: Remove remnants of unused binary-custom infrastructure + * mmc_block: Fix bad allocation on 64-bit (zero len array) + * ubuntu: New modules, acer-acpi + * build: Remove -virtual, and rebuild configs + * ubuntu: Add drbd module + * ubuntu: Add iscsitarget module + * ubuntu: Add squashfs driver + * build/configs: The Great Config Consistency Check of 2008 + * ubuntu: Add aufs module + * ubuntu: Added atl2 driver + * ubuntu: Add dm-radi4-5 driver + * build: Add CONFIG_DEBUG_SECTION_MISMATCH=y to get old style warnings + from build + * squashfs: Fixes for VFS changes + * ubuntu/dm-raid4-5: Fixups for moved/renamed headers/functions in core + md + * ubuntu: Add ndiswrapper driver + * d-i: Update module listings + + [Chuck Short] + + * SAUCE: ata: blacklist FUJITSU MHW2160BH PL + * SAUCE: [USB]: add ASUS LCM to the blacklist + + [Colin Ian King] + + * SAUCE: Enable speedstep for sonoma processors. + * SAUCE: airprime.c supports more devices + + [Kyle McMartin] + + * SAUCE: fix orinoco_cs oops + + [Mario Limonciello] + + * SAUCE: Enable Reset and SCO workaround on Dell 410 BT adapter + + [Matthew Garrett] + + * SAUCE: hostap: send events on data interface as well as master + interface + + [Phillip Lougher] + + * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets. + + [Stefan Bader] + + * SAUCE: Export dm_disk function of device-mapper + * SAUCE: Restore VT fonts on switch + * SAUCE: Always use SCO protocol (disable eSCO support) Bug: #39414 + * SAUCE: mmc: Increase power_up deleay to fix TI readers + * SAUCE: Add blacklist support to fix Belkin bluetooth dongle. + * SAUCE: Lower warning level of pci resource allocation messages. + * SAUCE: Lower message level for PCI memory and I/O allocation. + - LP: #159241 + * Modify log generation to catch bug numbers when adding with git-am. + + [Tim Gardner] + + * SAUCE: hdaps module does not load on Thinkpad T61P + * SAUCE: Add extra headers to linux-libc-dev + * SAUCE: Export symbols for aufs (in lum). + * SAUCE: USB bluetooth device 0x0e5e:0x6622 floods errors to syslog + * SAUCE: Blacklist Bluetooth Dell Wireless 370 for SCO MTU + * SAUCE: Catch nonsense keycodes and silently ignore + * SAUCE: frame buffer regression - screen blank except for blinking + cursor after fbcon vtswitch + * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM + * SAUCE: Guest OS does not recognize a lun with non zero target id on + Vmware ESX Server + * SAUCE: Modualrize vesafb + * SAUCE: DVB-USB UMT-010 driver oops on install + * SAUCE: fn key doesn't work in hardy with macbook pro fourth generation + (4,1) + - LP: #207127 + + -- Ben Collins Wed, 11 Jun 2008 05:28:35 -0400 --- linux-ec2-2.6.32.orig/debian.ec2/control +++ linux-ec2-2.6.32/debian.ec2/control @@ -0,0 +1,112 @@ +Source: linux-ec2 +Section: devel +Priority: optional +Maintainer: Ubuntu Kernel Team +Standards-Version: 3.8.4.0 +Build-Depends: debhelper (>= 5), cpio, module-init-tools, kernel-wedge (>= 2.24ubuntu1), makedumpfile [amd64 i386], gawk +Build-Depends-Indep: xmlto, docbook-utils, ghostscript, transfig, bzip2, sharutils +Build-Conflicts: findutils (= 4.4.1-1ubuntu1) +Vcs-Git: http://kernel.ubuntu.com/git-repos/ubuntu/ubuntu-karmic.git + +Package: linux-ec2-source-2.6.32 +Architecture: all +Section: devel +Priority: optional +Provides: linux-ec2-source, linux-ec2-source-2.6 +Depends: ${misc:Depends}, binutils, bzip2, coreutils | fileutils (>= 4.0) +Recommends: libc-dev, gcc, make +Suggests: libncurses-dev | ncurses-dev, kernel-package, libqt3-dev +Description: Linux kernel source for version 2.6.32 with Ubuntu patches + This package provides the source code for the Linux kernel version + 2.6.32. + . + This package is mainly meant for other packages to use, in order to build + custom flavours. + . + If you wish to use this package to create a custom Linux kernel, then it + is suggested that you investigate the package kernel-package, which has + been designed to ease the task of creating kernel image packages. + . + If you are simply trying to build third-party modules for your kernel, + you do not want this package. Install the appropriate linux-headers + package instead. + +Package: linux-ec2-doc +Architecture: all +Section: doc +Priority: optional +Depends: ${misc:Depends} +Conflicts: linux-ec2-doc-2.6 +Replaces: linux-ec2-doc-2.6 +Description: Linux kernel specific documentation for version 2.6.32 + This package provides the various documents in the 2.6.32 kernel + Documentation/ subdirectory. These document kernel subsystems, APIs, device + drivers, and so on. See + /usr/share/doc/linux-ec2-doc/00-INDEX for a list of what is + contained in each file. + +Package: linux-headers-2.6.32-343 +Architecture: all +Section: devel +Priority: optional +Depends: ${misc:Depends}, coreutils | fileutils (>= 4.0) +#Provides: linux-headers, linux-headers-2.6 +Description: Header files related to Linux kernel version 2.6.32 + This package provides kernel header files for version 2.6.32, for sites + that want the latest kernel headers. Please read + /usr/share/doc/linux-headers-2.6.32-343/debian.README.gz for details + + +Package: linux-image-2.6.32-343-ec2 +Architecture: i386 amd64 +Section: admin +Priority: optional +Pre-Depends: dpkg (>= 1.10.24) +Provides: linux-image, linux-image-2.6, fuse-module, kvm-api-4, redhat-cluster-modules, ivtv-modules, ndiswrapper-modules-1.9 +Depends: ${misc:Depends}, initramfs-tools (>= 0.36ubuntu6), coreutils | fileutils (>= 4.0), module-init-tools (>= 3.3-pre11-4ubuntu3) +Conflicts: hotplug (<< 0.0.20040105-1) +Recommends: +Suggests: fdutils, linux-ec2-doc-2.6.32 | linux-ec2-source-2.6.32 +Description: Linux kernel image for version 2.6.32 on x86/x86_64 + This package contains the Linux kernel image for version 2.6.32 on + x86/x86_64. + . + Also includes the corresponding System.map file, the modules built by the + packager, and scripts that try to ensure that the system is not left in an + unbootable state after an update. + . + Supports Generic processors. + . + Geared toward desktop systems. + . + You likely do not want to install this package directly. Instead, install + the linux-ec2 meta-package, which will ensure that upgrades work + correctly, and that supporting packages are also installed. + +Package: linux-headers-2.6.32-343-ec2 +Architecture: i386 amd64 +Section: devel +Priority: optional +Depends: ${misc:Depends}, coreutils | fileutils (>= 4.0), linux-headers-2.6.32-343, ${shlibs:Depends} +Description: Linux kernel headers for version 2.6.32 on x86/x86_64 + This package provides kernel header files for version 2.6.32 on + x86/x86_64. + . + This is for sites that want the latest kernel headers. Please read + /usr/share/doc/linux-headers-2.6.32-343/debian.README.gz for details. + +Package: linux-image-2.6.32-343-ec2-dbgsym +Architecture: i386 amd64 +Section: devel +Priority: optional +Depends: ${misc:Depends} +Provides: linux-debug +Description: Linux kernel debug image for version 2.6.32 on x86/x86_64 + This package provides a kernel debug image for version 2.6.32 on + x86/x86_64. + . + This is for sites that wish to debug the kernel. + . + The kernel image contained in this package is NOT meant to boot from. It + is uncompressed, and unstripped. This package also includes the + unstripped modules. --- linux-ec2-2.6.32.orig/debian.ec2/etc/getabis +++ linux-ec2-2.6.32/debian.ec2/etc/getabis @@ -0,0 +1,9 @@ +repo_list=( + "http://archive.ubuntu.com/ubuntu/pool/main/l/linux-ec2" + "http://ports.ubuntu.com/ubuntu-ports/pool/main/l/linux-ec2" + "http://archive.ubuntu.com/ubuntu/pool/universe/l/linux-ec2" + "http://ports.ubuntu.com/ubuntu-ports/pool/universe/l/linux-ec2" +) + +getall amd64 ec2 +getall i386 ec2 --- linux-ec2-2.6.32.orig/debian.ec2/etc/kernelconfig +++ linux-ec2-2.6.32/debian.ec2/etc/kernelconfig @@ -0,0 +1,2 @@ +archs="amd64 i386" +family='ubuntu' --- linux-ec2-2.6.32.orig/debian.ec2/d-i/exclude-modules.sparc +++ linux-ec2-2.6.32/debian.ec2/d-i/exclude-modules.sparc @@ -0,0 +1,10 @@ +efi-modules +nic-pcmcia-modules +pcmcia-modules +pcmcia-storage-modules +irda-modules +floppy-modules +fb-modules +acpi-modules +virtio-modules +char-modules --- linux-ec2-2.6.32.orig/debian.ec2/d-i/package-list +++ linux-ec2-2.6.32/debian.ec2/d-i/package-list @@ -0,0 +1,177 @@ +Package: kernel-image + +Package: char-modules +Depends: kernel-image +Priority: standard +Description: Character module support + +Package: fat-modules +Depends: kernel-image +Priority: standard +Description: FAT filesystem support + This includes Windows FAT and VFAT support. + +Package: fb-modules +Depends: kernel-image +Priority: standard +Description: Framebuffer modules + +Package: firewire-core-modules +Depends: kernel-image, storage-core-modules +Priority: standard +Description: Firewire (IEEE-1394) Support + +Package: floppy-modules +Depends: kernel-image +Priority: standard +Description: Floppy driver support + +Package: fs-core-modules +Depends: kernel-image +Priority: standard +Provides: jfs-modules, reiserfs-modules, xfs-modules +Description: Base filesystem modules + This includes jfs, reiserfs and xfs. + +Package: fs-secondary-modules +Depends: kernel-image, fat-modules +Priority: standard +Provides: ntfs-modules, hfs-modules +Description: Extra filesystem modules + This includes support for Windows NTFS and MacOS HFS/HFSPlus + +Package: input-modules +Depends: kernel-image, usb-modules +Priority: standard +Description: Support for various input methods + +Package: irda-modules +Depends: kernel-image, nic-shared-modules +Priority: standard +Description: Support for Infrared protocols + +Package: md-modules +Depends: kernel-image +Priority: standard +Description: Multi-device support (raid, device-mapper, lvm) + +Package: nic-modules +Depends: kernel-image, nic-shared-modules, virtio-modules +Priority: standard +Description: Network interface support + +Package: nic-pcmcia-modules +Depends: kernel-image, nic-shared-modules, nic-modules +Priority: standard +Description: PCMCIA network interface support + +Package: nic-usb-modules +Depends: kernel-image, nic-shared-modules, usb-modules +Priority: standard +Description: USB network interface support + +Package: nic-shared-modules +Depends: kernel-image, crypto-modules +Priority: standard +Description: nic shared modules + This package contains modules which support nic modules + +Package: parport-modules +Depends: kernel-image +Priority: standard +Description: Parallel port support + +Package: pata-modules +Depends: kernel-image, storage-core-modules +Priority: standard +Description: PATA support modules + +Package: pcmcia-modules +Depends: kernel-image +Priority: standard +Description: PCMCIA Modules + +Package: pcmcia-storage-modules +Depends: kernel-image, scsi-modules +Priority: standard +Description: PCMCIA storage support + +Package: plip-modules +Depends: kernel-image, nic-shared-modules, parport-modules +Priority: standard +Description: PLIP (parallel port) networking support + +Package: ppp-modules +Depends: kernel-image, nic-shared-modules, serial-modules +Priority: standard +Description: PPP (serial port) networking support + +Package: sata-modules +Depends: kernel-image, storage-core-modules +Priority: standard +Description: SATA storage support + +Package: scsi-modules +Depends: kernel-image, storage-core-modules +Priority: standard +Description: SCSI storage support + +Package: serial-modules +Depends: kernel-image +Priority: standard +Description: Serial port support + +Package: storage-core-modules +Depends: kernel-image +Priority: standard +Provides: loop-modules +Description: Core storage support + Includes core SCSI, LibATA, USB-Storage. Also includes related block + devices for CD, Disk and Tape medium (and IDE Floppy). + +Package: usb-modules +Depends: kernel-image, storage-core-modules +Priority: standard +Description: Core USB support + +Package: nfs-modules +Priority: standard +Depends: kernel-image +Description: NFS filesystem drivers + Includes the NFS client driver, and supporting modules. + +Package: block-modules +Priority: standard +Depends: kernel-image, storage-core-modules, parport-modules, virtio-modules +Description: Block storage devices + This package contains the block storage devices, including DAC960 and + paraide. + +Package: message-modules +Priority: standard +Depends: kernel-image, storage-core-modules, scsi-modules +Description: Fusion and i2o storage modules + This package containes the fusion and i2o storage modules. + +Package: crypto-modules +Priority: extra +Depends: kernel-image +Description: crypto modules + This package contains crypto modules. + +Package: virtio-modules +Priority: standard +Depends: kernel-image +Description: VirtIO Modules + Includes modules for VirtIO (virtual machine, generally kvm guests) + +Package: socket-modules +Depends: kernel-image +Priority: standard +Description: Unix socket support + +Package: mouse-modules +Depends: kernel-image, input-modules, usb-modules +Priority: extra +Description: Mouse support + This package contains mouse drivers for the Linux kernel. --- linux-ec2-2.6.32.orig/debian.ec2/d-i/exclude-modules.powerpc +++ linux-ec2-2.6.32/debian.ec2/d-i/exclude-modules.powerpc @@ -0,0 +1,5 @@ +efi-modules +fb-modules +acpi-modules +virtio-modules +char-modules --- linux-ec2-2.6.32.orig/debian.ec2/d-i/kernel-versions.in +++ linux-ec2-2.6.32/debian.ec2/d-i/kernel-versions.in @@ -0,0 +1,4 @@ +# arch version flavour installedname suffix bdep +#amd64 PKGVER-ABINUM ec2 PKGVER-ABINUM-ec2 - + +#i386 PKGVER-ABINUM ec2 PKGVER-ABINUM-ec2 - --- linux-ec2-2.6.32.orig/debian.ec2/d-i/kernel-versions +++ linux-ec2-2.6.32/debian.ec2/d-i/kernel-versions @@ -0,0 +1,4 @@ +# arch version flavour installedname suffix bdep +#amd64 2.6.32-343 ec2 2.6.32-343-ec2 - + +#i386 2.6.32-343 ec2 2.6.32-343-ec2 - --- linux-ec2-2.6.32.orig/debian.ec2/d-i/exclude-modules.ia64 +++ linux-ec2-2.6.32/debian.ec2/d-i/exclude-modules.ia64 @@ -0,0 +1,5 @@ +irda-modules +floppy-modules +fb-modules +virtio-modules +char-modules --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules-sparc/block-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules-sparc/block-modules @@ -0,0 +1,9 @@ +aoe +cciss +comm +cpqarray ? +DAC960 +nbd +sx8 +umem +virtio_blk ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules-sparc/message-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules-sparc/message-modules @@ -0,0 +1,13 @@ +mptbase +mptctl +mptfc +mptlan +mptsas +mptscsih +mptspi +i2o_block +i2o_bus +i2o_config ? +i2o_core +i2o_proc +i2o_scsi --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules-powerpc/storage-core-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules-powerpc/storage-core-modules @@ -0,0 +1,13 @@ +# Core stacks +usb-storage ? + +# Block level + +# Loop modules +cryptoloop + +# Needs to be here for better cdrom initrd layout +isofs + +ps3stor_lib ? +ps3rom ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules-powerpc/block-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules-powerpc/block-modules @@ -0,0 +1,31 @@ +aoe +aten +bpck +bpck6 ? +cciss +comm +cpqarray ? +DAC960 +dstr +epat +epia +fit2 +fit3 +friq +frpw +kbic +ktti +nbd +on20 +on26 +paride +pcd +pd +pf +pg +ps3disk ? +ps3vram ? +pt +sx8 +umem +virtio_blk ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules-powerpc/message-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules-powerpc/message-modules @@ -0,0 +1,13 @@ +mptbase +mptctl +mptfc +mptlan +mptsas +mptscsih +mptspi +i2o_block +i2o_bus +i2o_config ? +i2o_core +i2o_proc +i2o_scsi --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules-powerpc/nic-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules-powerpc/nic-modules @@ -0,0 +1,152 @@ +3c359 ? +3c501 ? +3c503 ? +3c505 ? +3c507 ? +3c509 ? +3c515 ? +3c523 ? +3c527 ? +3c59x ? +8139cp ? +8139too ? +82596 ? +abyss ? +ac3200 ? +adm8211 ? +airo ? +airport ? +amd8111e ? +arc4 ? +arcnet ? +arc-rawmode ? +arc-rimi ? +arlan ? +at1700 ? +atl1 ? +atl1e ? +atl2 ? +atmel ? +atmel_pci ? +b44 ? +bcm43xx ? +bcm43xx-mac80211 ? +bmac ? +bnx2 ? +bnx2x ? +bonding ? +cassini ? +com20020 ? +com20020-pci ? +com90io ? +com90xx ? +cs89x0 ? +de2104x ? +de4x5 ? +de600 ? +de620 ? +defxx ? +depca ? +dl2k ? +dmfe ? +dummy ? +e100 ? +e1000 ? +e1000e ? +e2100 ? +eepro ? +eepro100 ? +eexpress ? +epic100 ? +eql ? +es3210 ? +eth16i ? +ewrk3 ? +fealnx ? +forcedeth ? +igb ? +hamachi ? +hermes ? +hp ? +hp100 ? +hp-plus ? +ibmtr ? +ipddp ? +ipw2100 ? +ipw2200 ? +ipw3945 ? +ixgb ? +lance ? +lanstreamer ? +lasi_82596 ? +lne390 ? +lp486e ? +mace ? +mv643xx_eth ? +myri_sbus ? +natsemi ? +ne ? +ne2 ? +ne2k-pci ? +ne3210 ? +netconsole ? +netxen_nic ? +ni5010 ? +ni52 ? +ni65 ? +niu ? +ns83820 ? +olympic ? +orinoco ? +orinoco_pci ? +orinoco_plx ? +orinoco_tmd ? +pcnet32 ? +prism54 ? +ps3_gelic ? +r8169 ? +rate_control ? +rfc1051 ? +rfc1201 ? +rrunner ? +rt2400 ? +rt2500 ? +rt61pci ? +s2io ? +shaper ? +sis190 ? +sis900 ? +spidernet ? +skfp ? +skge ? +sk98lin ? +sky2 ? +smc9194 ? +smc-ultra ? +smc-ultra32 ? +starfire ? +strip ? +sunbmac ? +sundance ? +sungem ? +sungem_phy ? +sunhme ? +sunlance ? +sunqe ? +sunvnet ? +tg3 ? +tlan ? +tms380tr ? +tmspci ? +tulip ? +tun ? +typhoon ? +uli526x ? +via-rhine ? +via-velocity ? +virtio_net ? +wavelan ? +wd ? +winbond-840 ? +yellowfin ? +znet ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/fb-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/fb-modules @@ -0,0 +1,3 @@ +fbcon +vesafb +vga16fb --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/pata-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/pata-modules @@ -0,0 +1,3 @@ +pata_cs5535 ? +pata_it8213 +pata_ninja32 --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/nic-pcmcia-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/nic-pcmcia-modules @@ -0,0 +1,19 @@ +3c574_cs ? +3c589_cs ? +airo_cs ? +atmel_cs ? +axnet_cs ? +com20020_cs ? +fmvj18x_cs ? +ibmtr_cs ? +netwave_cs ? +nmclan_cs ? +orinoco_cs ? +pcnet_cs ? +ray_cs ? +smc91c92_cs ? +wavelan_cs ? +wl3501_cs ? +xirc2ps_cs ? +xircom_cb ? +xircom_tulip_cb ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/sata-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/sata-modules @@ -0,0 +1,2 @@ + +sata_mv --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/usb-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/usb-modules @@ -0,0 +1,9 @@ +ehci-hcd ? +isp116x-hcd +isp1760 +ohci-hcd ? +r8a66597-hcd +sl811_cs +sl811-hcd +u132-hcd +uhci-hcd ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/crypto-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/crypto-modules @@ -0,0 +1,8 @@ +aes_generic +blowfish +twofish +serpent +sha256_generic +cbc +ecb +crc32c --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/scsi-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/scsi-modules @@ -0,0 +1,114 @@ +# SCSI +raid_class ? +scsi_transport_spi ? +scsi_transport_fc ? +scsi_transport_iscsi ? +scsi_transport_sas ? +iscsi_tcp ? +libiscsi ? +amiga7xx ? +a3000 ? +a2091 ? +gvp11 ? +mvme147 ? +sgiwd93 ? +cyberstorm ? +cyberstormII ? +blz2060 ? +blz1230 ? +fastlane ? +oktagon_esp_mod ? +atari_scsi ? +mac_scsi ? +mac_esp ? +sun3_scsi ? +mvme16x ? +bvme6000 ? +sim710 ? +advansys ? +psi240i ? +BusLogic ? +dpt_i2o ? +u14-34f ? +ultrastor ? +aha152x ? +aha1542 ? +aha1740 ? +aic7xxx_old ? +ips ? +fd_mcs ? +fdomain ? +in2000 ? +g_NCR5380 ? +g_NCR5380_mmio ? +NCR53c406a ? +NCR_D700 ? +NCR_Q720_mod ? +sym53c416 ? +qlogicfas408 ? +qla1280 ? +pas16 ? +seagate ? +seagate ? +t128 ? +dmx3191d ? +dtc ? +zalon7xx ? +eata_pio ? +wd7000 ? +mca_53c9x ? +ibmmca ? +eata ? +dc395x ? +tmscsim ? +megaraid ? +atp870u ? +esp ? +gdth ? +initio ? +a100u2w ? +qlogicpti ? +ide-scsi ? +mesh ? +mac53c94 ? +pluto ? +dec_esp ? +3w-xxxx ? +3w-9xxx ? +ppa ? +imm ? +jazz_esp ? +sun3x_esp ? +fcal ? +lasi700 ? +nsp32 ? +ipr ? +hptiop ? +stex ? +osst ? +sg ? +ch ? +scsi_debug ? +aacraid ? +aic7xxx ? +aic79xx ? +aic94xx ? +arcmsr ? +acornscsi_mod ? +arxescsi ? +cumana_1 ? +cumana_2 ? +ecoscsi ? +oak ? +powertec ? +eesox ? +ibmvscsic ? +libsas ? +lpfc ? +megaraid_mm ? +megaraid_mbox ? +megaraid_sas ? +qla2xxx ? +sym53c8xx ? +qla4xxx ? +mvsas ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/fat-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/fat-modules @@ -0,0 +1,7 @@ +fat +vfat + +# Supporting modules ? +nls_cp437 ? +nls_iso8859-1 ? +nls_utf8 ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/storage-core-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/storage-core-modules @@ -0,0 +1,10 @@ +# Core stacks +usb-storage ? + +# Block level + +# Loop modules +cryptoloop + +# Needs to be here for better cdrom initrd layout +isofs --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/pcmcia-storage-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/pcmcia-storage-modules @@ -0,0 +1,6 @@ +pata_pcmcia +qlogic_cs +fdomain_cs +aha152x_cs ? +nsp_cs ? +sym53c500_cs --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/firewire-core-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/firewire-core-modules @@ -0,0 +1,4 @@ +ieee1394 +ohci1394 +sbp2 +eth1394 --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/irda-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/irda-modules @@ -0,0 +1,30 @@ +act200l-sir +actisys-sir +ali-ircc +donauboe ? +esi-sir +girbil-sir +ircomm +ircomm-tty +irda +irda-usb +irlan +irnet +irport ? +irtty-sir +kingsun-sir +ks959-sir +ksdazzle-sir +litelink-sir +ma600-sir +mcp2120-sir +mcs7780 +nsc-ircc +old_belkin-sir +sir-dev +smsc-ircc2 +stir4200 +tekram-sir +via-ircc +vlsi_ir +w83977af_ir --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/block-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/block-modules @@ -0,0 +1,29 @@ +aoe +aten +bpck +bpck6 ? +cciss +comm +cpqarray +DAC960 +dstr +epat +epia +fit2 +fit3 +friq +frpw +kbic +ktti +nbd +on20 +on26 +paride +pcd +pd +pf +pg +pt +sx8 +umem +virtio_blk ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/mouse-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/mouse-modules @@ -0,0 +1,2 @@ +psmouse +usbmouse ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/floppy-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/floppy-modules @@ -0,0 +1 @@ +floppy --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/plip-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/plip-modules @@ -0,0 +1 @@ +plip --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/ppp-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/ppp-modules @@ -0,0 +1,6 @@ +ppp_async +ppp_deflate +ppp_mppe +pppoe +pppox +ppp_synctty --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/fs-core-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/fs-core-modules @@ -0,0 +1,3 @@ +jfs +reiserfs +xfs --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/char-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/char-modules @@ -0,0 +1 @@ +intel-agp --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/input-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/input-modules @@ -0,0 +1,21 @@ +hid-a4tech ? +hid-apple ? +hid-belkin ? +hid-bright ? +hid-cherry ? +hid-chicony ? +hid-cypress ? +hid-dell ? +hid-ezkey ? +hid-gyration ? +hid-logitech ? +hid-microsoft ? +hid-monterey ? +hid-petalynx ? +hid-pl ? +hid-samsung ? +hid-sony ? +hid-sunplus ? +hid-tmff ? +hid-zpff ? +usbhid --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/message-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/message-modules @@ -0,0 +1,13 @@ +mptbase +mptctl +mptfc +mptlan +mptsas +mptscsih +mptspi +i2o_block +i2o_bus +i2o_config +i2o_core +i2o_proc +i2o_scsi --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/serial-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/serial-modules @@ -0,0 +1,3 @@ +generic_serial +serial_cs +synclink_cs --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/nic-usb-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/nic-usb-modules @@ -0,0 +1,11 @@ +catc ? +kaweth ? +pegasus ? +prism2_usb ? +rtl8150 ? +usbnet ? +zd1211rw ? +zd1201 ? +rt2500usb ? +rt73usb ? +rt2570 ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/nfs-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/nfs-modules @@ -0,0 +1,4 @@ +nfs +nfs_acl ? +lockd +sunrpc --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/virtio-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/virtio-modules @@ -0,0 +1,4 @@ +virtio_balloon +virtio_pci +virtio_ring ? +virtio-rng --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/md-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/md-modules @@ -0,0 +1,13 @@ +dm-crypt +dm-zero +faulty +linear +multipath +raid0 +raid1 +raid10 +raid456 + +# Extras +dm-raid4-5 ? +dm-loop ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/nic-shared-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/nic-shared-modules @@ -0,0 +1,22 @@ +# PHY +8390 +mii + +# CRC modules +crc-ccitt +crc-itu-t +libcrc32c + +# mac80211 stuff +mac80211 +cfg80211 + +# rt2x00 lib (since rt2x00 is split across usb/pci/cb +rt2x00lib + +# Wireless 802.11 modules +lib80211 +cfg80211 +lib80211_crypt_ccmp +lib80211_crypt_tkip +lib80211_crypt_wep --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/nic-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/nic-modules @@ -0,0 +1,152 @@ +3c359 ? +3c501 ? +3c503 ? +3c505 ? +3c507 ? +3c509 ? +3c515 ? +3c523 ? +3c527 ? +3c59x ? +8139cp ? +8139too ? +82596 ? +abyss ? +ac3200 ? +adm8211 ? +airo ? +airport ? +amd8111e ? +arc4 ? +arcnet ? +arc-rawmode ? +arc-rimi ? +arlan ? +at1700 ? +atl1 ? +atl1e ? +atl2 ? +atmel ? +atmel_pci ? +b44 ? +bcm43xx ? +bcm43xx-mac80211 ? +bmac ? +bnx2 ? +bnx2x ? +bonding ? +cassini ? +com20020 ? +com20020-pci ? +com90io ? +com90xx ? +cs89x0 ? +de2104x ? +de4x5 ? +de600 ? +de620 ? +defxx ? +depca ? +dl2k ? +dmfe ? +dummy ? +e100 ? +e1000 ? +e1000e ? +e2100 ? +eepro ? +eepro100 ? +eexpress ? +epic100 ? +eql ? +es3210 ? +eth16i ? +ewrk3 ? +fealnx ? +forcedeth ? +igb ? +ps3_gelic ? +hamachi ? +hermes ? +hp ? +hp100 ? +hp-plus ? +ibmtr ? +ipddp ? +ipw2100 ? +ipw2200 ? +ipw3945 ? +ixgb ? +lance ? +lanstreamer ? +lasi_82596 ? +lne390 ? +lp486e ? +mace ? +mv643xx_eth ? +myri_sbus ? +natsemi ? +ne ? +ne2 ? +ne2k-pci ? +ne3210 ? +netconsole ? +netxen_nic ? +ni5010 ? +ni52 ? +ni65 ? +niu ? +ns83820 ? +olympic ? +orinoco ? +orinoco_pci ? +orinoco_plx ? +orinoco_tmd ? +pcnet32 ? +prism54 ? +r8169 ? +rate_control ? +rfc1051 ? +rfc1201 ? +rrunner ? +rt2400 ? +rt2500 ? +rt61pci ? +s2io ? +shaper ? +sis190 ? +sis900 ? +spidernet ? +skfp ? +skge ? +sk98lin ? +sky2 ? +smc9194 ? +smc-ultra ? +smc-ultra32 ? +starfire ? +strip ? +sunbmac ? +sundance ? +sungem ? +sungem_phy ? +sunhme ? +sunlance ? +sunqe ? +sunvnet ? +tg3 ? +tlan ? +tms380tr ? +tmspci ? +tulip ? +tun ? +typhoon ? +uli526x ? +via-rhine ? +via-velocity ? +virtio_net ? +wavelan ? +wd ? +winbond-840 ? +yellowfin ? +znet ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/pcmcia-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/pcmcia-modules @@ -0,0 +1,8 @@ +i82092 +i82365 ? +pcmcia +pcmcia_core +pd6729 +rsrc_nonstatic +tcic ? +yenta_socket --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/parport-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/parport-modules @@ -0,0 +1,2 @@ +parport +parport_pc --- linux-ec2-2.6.32.orig/debian.ec2/d-i/modules/fs-secondary-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/modules/fs-secondary-modules @@ -0,0 +1,4 @@ +fuse ? +ntfs ? +hfs ? +hfsplus ? --- linux-ec2-2.6.32.orig/debian.ec2/d-i/firmware/README.txt +++ linux-ec2-2.6.32/debian.ec2/d-i/firmware/README.txt @@ -0,0 +1,4 @@ +# +# Place the names of udeb modules into this directory that require +# runtime firmware. +# --- linux-ec2-2.6.32.orig/debian.ec2/d-i/firmware/nic-modules +++ linux-ec2-2.6.32/debian.ec2/d-i/firmware/nic-modules @@ -0,0 +1,6 @@ +bnx2/bnx2-mips-06-4.6.16.fw ? +bnx2/bnx2-mips-09-4.6.17.fw ? +bnx2/bnx2-rv2p-06-4.6.16.fw ? +bnx2/bnx2-rv2p-09-4.6.15.fw ? +bnx2x-e1-4.8.53.0.fw ? +bnx2x-e1h-4.8.53.0.fw ? --- linux-ec2-2.6.32.orig/debian.ec2/rules.d/i386.mk +++ linux-ec2-2.6.32/debian.ec2/rules.d/i386.mk @@ -0,0 +1,12 @@ +build_arch = x86 +header_arch = x86 +asm_link = x86 +defconfig = defconfig +flavours = ec2 +build_image = vmlinuz +kernel_file = arch/$(build_arch)/boot/vmlinuz +install_file = vmlinuz +loader = grub + +do_libc_dev_package = false +do_tools = false --- linux-ec2-2.6.32.orig/debian.ec2/rules.d/amd64.mk +++ linux-ec2-2.6.32/debian.ec2/rules.d/amd64.mk @@ -0,0 +1,12 @@ +build_arch = x86_64 +header_arch = $(build_arch) +asm_link = $(build_arch) +defconfig = defconfig +flavours = ec2 +build_image = vmlinuz +kernel_file = arch/$(build_arch)/boot/vmlinuz +install_file = vmlinuz +loader = grub + +do_libc_dev_package = false +do_tools = false --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-x86_64-pgd-alloc-order +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-x86_64-pgd-alloc-order @@ -0,0 +1,337 @@ +From: jbeulich@novell.com +Subject: don't require order-1 allocations for pgd-s +Patch-mainline: obsolete + +At the same time remove the useless user mode pair of init_level4_pgt. + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:55:40.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/hypervisor.h 2009-12-04 12:11:43.000000000 +0100 +@@ -104,8 +104,8 @@ void do_hypervisor_callback(struct pt_re + * be MACHINE addresses. + */ + +-void xen_pt_switch(unsigned long ptr); +-void xen_new_user_pt(unsigned long ptr); /* x86_64 only */ ++void xen_pt_switch(pgd_t *); ++void xen_new_user_pt(pgd_t *); /* x86_64 only */ + void xen_load_gs(unsigned int selector); /* x86_64 only */ + void xen_tlb_flush(void); + void xen_invlpg(unsigned long ptr); +@@ -113,7 +113,7 @@ void xen_invlpg(unsigned long ptr); + void xen_l1_entry_update(pte_t *ptr, pte_t val); + void xen_l2_entry_update(pmd_t *ptr, pmd_t val); + void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */ +-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */ ++void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */ + void xen_pgd_pin(pgd_t *); + void xen_pgd_unpin(pgd_t *); + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2009-11-06 10:52:22.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context.h 2009-12-04 12:11:43.000000000 +0100 +@@ -82,6 +82,9 @@ static inline void switch_mm(struct mm_s + { + unsigned cpu = smp_processor_id(); + struct mmuext_op _op[2 + (sizeof(long) > 4)], *op = _op; ++#ifdef CONFIG_X86_64 ++ pgd_t *upgd; ++#endif + + if (likely(prev != next)) { + BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && +@@ -100,10 +103,11 @@ static inline void switch_mm(struct mm_s + op->arg1.mfn = virt_to_mfn(next->pgd); + op++; + +- /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */ ++ /* xen_new_user_pt(next->pgd) */ + #ifdef CONFIG_X86_64 + op->cmd = MMUEXT_NEW_USER_BASEPTR; +- op->arg1.mfn = virt_to_mfn(__user_pgd(next->pgd)); ++ upgd = __user_pgd(next->pgd); ++ op->arg1.mfn = likely(upgd) ? virt_to_mfn(upgd) : 0; + op++; + #endif + +@@ -131,7 +135,7 @@ static inline void switch_mm(struct mm_s + * to make sure to use no freed page tables. + */ + load_cr3(next->pgd); +- xen_new_user_pt(__pa(__user_pgd(next->pgd))); ++ xen_new_user_pt(next->pgd); + load_LDT_nolock(&next->context); + } + } +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2009-10-13 17:22:09.000000000 +0200 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgalloc.h 2009-12-04 12:11:43.000000000 +0100 +@@ -118,15 +118,13 @@ static inline void pud_populate(struct m + #endif /* CONFIG_X86_PAE */ + + #if PAGETABLE_LEVELS > 3 +-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) +- + static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) + { + pgd_t ent = __pgd(_PAGE_TABLE | __pa(pud)); + + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + if (unlikely(PagePinned(virt_to_page(pgd)))) +- xen_l4_entry_update(pgd, 1, ent); ++ xen_l4_entry_update(pgd, ent); + else + *__user_pgd(pgd) = *pgd = ent; + } +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 11:12:01.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-12-04 12:11:43.000000000 +0100 +@@ -100,18 +100,25 @@ static inline void xen_set_pud(pud_t *pu + : (void)(*__pudp = xen_make_pud(0)); \ + }) + +-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) ++static inline pgd_t *__user_pgd(pgd_t *pgd) ++{ ++ if (unlikely(((unsigned long)pgd & PAGE_MASK) ++ == (unsigned long)init_level4_pgt)) ++ return NULL; ++ return (pgd_t *)(virt_to_page(pgd)->index ++ + ((unsigned long)pgd & ~PAGE_MASK)); ++} + + static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd) + { +- xen_l4_entry_update(pgdp, 0, pgd); ++ xen_l4_entry_update(pgdp, pgd); + } + + #define xen_pgd_clear(pgd) \ + ({ \ + pgd_t *__pgdp = (pgd); \ + PagePinned(virt_to_page(__pgdp)) \ +- ? xen_l4_entry_update(__pgdp, 1, xen_make_pgd(0)) \ ++ ? xen_l4_entry_update(__pgdp, xen_make_pgd(0)) \ + : (void)(*__user_pgd(__pgdp) = *__pgdp = xen_make_pgd(0)); \ + }) + +--- head-2010-01-18.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 11:12:01.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/common-xen.c 2010-01-18 17:14:45.000000000 +0100 +@@ -1026,8 +1026,7 @@ DEFINE_PER_CPU_FIRST(union irq_stack_uni + void xen_switch_pt(void) + { + #ifdef CONFIG_XEN +- xen_pt_switch(__pa_symbol(init_level4_pgt)); +- xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); ++ xen_pt_switch(init_level4_pgt); + #endif + } + +--- head-2010-01-18.orig/arch/x86/kernel/head_64-xen.S 2009-12-04 14:37:14.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head_64-xen.S 2009-12-04 14:37:53.000000000 +0100 +@@ -56,14 +56,6 @@ ENTRY(name) + __PAGE_ALIGNED_BSS + NEXT_PAGE(init_level4_pgt) + .fill 512,8,0 +- /* +- * We update two pgd entries to make kernel and user pgd consistent +- * at pgd_populate(). It can be used for kernel modules. So we place +- * this page here for those cases to avoid memory corruption. +- * We also use this page to establish the initial mapping for the +- * vsyscall area. +- */ +- .fill 512,8,0 + + NEXT_PAGE(level3_kernel_pgt) + .fill 512,8,0 +--- head-2010-01-18.orig/arch/x86/mm/hypervisor.c 2010-01-05 16:47:51.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/hypervisor.c 2010-01-05 16:47:55.000000000 +0100 +@@ -525,7 +525,7 @@ void xen_l3_entry_update(pud_t *ptr, pud + #endif + + #ifdef CONFIG_X86_64 +-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val) ++void xen_l4_entry_update(pgd_t *ptr, pgd_t val) + { + mmu_update_t u[2]; + struct page *page = NULL; +@@ -538,8 +538,11 @@ void xen_l4_entry_update(pgd_t *ptr, int + } + u[0].ptr = virt_to_machine(ptr); + u[0].val = __pgd_val(val); +- if (user) { +- u[1].ptr = virt_to_machine(__user_pgd(ptr)); ++ if (((unsigned long)ptr & ~PAGE_MASK) ++ <= pgd_index(TASK_SIZE_MAX) * sizeof(*ptr)) { ++ ptr = __user_pgd(ptr); ++ BUG_ON(!ptr); ++ u[1].ptr = virt_to_machine(ptr); + u[1].val = __pgd_val(val); + do_lN_entry_update(u, 2, page); + } else +@@ -547,21 +550,25 @@ void xen_l4_entry_update(pgd_t *ptr, int + } + #endif /* CONFIG_X86_64 */ + +-void xen_pt_switch(unsigned long ptr) ++#ifdef CONFIG_X86_64 ++void xen_pt_switch(pgd_t *pgd) + { + struct mmuext_op op; + op.cmd = MMUEXT_NEW_BASEPTR; +- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); ++ op.arg1.mfn = virt_to_mfn(pgd); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } + +-void xen_new_user_pt(unsigned long ptr) ++void xen_new_user_pt(pgd_t *pgd) + { + struct mmuext_op op; ++ ++ pgd = __user_pgd(pgd); + op.cmd = MMUEXT_NEW_USER_BASEPTR; +- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); ++ op.arg1.mfn = pgd ? virt_to_mfn(pgd) : 0; + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } ++#endif + + void xen_tlb_flush(void) + { +@@ -638,7 +645,14 @@ void xen_pgd_pin(pgd_t *pgd) + op[0].arg1.mfn = virt_to_mfn(pgd); + #ifdef CONFIG_X86_64 + op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE; +- op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd)); ++ pgd = __user_pgd(pgd); ++ if (pgd) ++ op[1].arg1.mfn = virt_to_mfn(pgd); ++ else { ++ op[1].cmd = MMUEXT_PIN_L3_TABLE; ++ op[1].arg1.mfn = pfn_to_mfn(__pa_symbol(level3_user_pgt) ++ >> PAGE_SHIFT); ++ } + #endif + if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) + BUG(); +@@ -651,8 +665,10 @@ void xen_pgd_unpin(pgd_t *pgd) + op[0].cmd = MMUEXT_UNPIN_TABLE; + op[0].arg1.mfn = virt_to_mfn(pgd); + #ifdef CONFIG_X86_64 ++ pgd = __user_pgd(pgd); ++ BUG_ON(!pgd); + op[1].cmd = MMUEXT_UNPIN_TABLE; +- op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd)); ++ op[1].arg1.mfn = virt_to_mfn(pgd); + #endif + if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) + BUG(); +--- head-2010-01-18.orig/arch/x86/mm/init_64-xen.c 2009-10-13 17:25:37.000000000 +0200 ++++ head-2010-01-18/arch/x86/mm/init_64-xen.c 2009-12-04 12:11:43.000000000 +0100 +@@ -718,9 +718,6 @@ void __init xen_init_pt(void) + (PTRS_PER_PUD - pud_index(__START_KERNEL_map)) + * sizeof(*level3_kernel_pgt)); + +- __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] = +- __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE); +- + /* Do an early initialization of the fixmap area. */ + addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); + if (pud_present(level3_kernel_pgt[pud_index(addr)])) { +@@ -736,8 +733,6 @@ void __init xen_init_pt(void) + + early_make_page_readonly(init_level4_pgt, + XENFEAT_writable_page_tables); +- early_make_page_readonly(__user_pgd(init_level4_pgt), +- XENFEAT_writable_page_tables); + early_make_page_readonly(level3_kernel_pgt, + XENFEAT_writable_page_tables); + early_make_page_readonly(level3_user_pgt, +--- head-2010-01-18.orig/arch/x86/mm/pgtable-xen.c 2009-10-14 14:20:47.000000000 +0200 ++++ head-2010-01-18/arch/x86/mm/pgtable-xen.c 2009-12-04 12:11:43.000000000 +0100 +@@ -269,9 +269,11 @@ static void pgd_walk(pgd_t *pgd_base, pg + BUG(); + seq = 0; + } ++ pgd = __user_pgd(pgd_base); ++ BUG_ON(!pgd); + MULTI_update_va_mapping(mcl + seq, +- (unsigned long)__user_pgd(pgd_base), +- pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), ++ (unsigned long)pgd, ++ pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, flags), + 0); + MULTI_update_va_mapping(mcl + seq + 1, + (unsigned long)pgd_base, +@@ -659,12 +661,29 @@ static void pgd_prepopulate_pmd(struct m + } + } + ++static inline pgd_t *user_pgd_alloc(pgd_t *pgd) ++{ + #ifdef CONFIG_X86_64 +-/* We allocate two contiguous pages for kernel and user. */ +-#define PGD_ORDER 1 +-#else +-#define PGD_ORDER 0 ++ if (pgd) { ++ pgd_t *upgd = (void *)__get_free_page(PGALLOC_GFP); ++ ++ if (upgd) ++ virt_to_page(pgd)->index = (long)upgd; ++ else { ++ free_page((unsigned long)pgd); ++ pgd = NULL; ++ } ++ } ++#endif ++ return pgd; ++} ++ ++static inline void user_pgd_free(pgd_t *pgd) ++{ ++#ifdef CONFIG_X86_64 ++ free_page(virt_to_page(pgd)->index); + #endif ++} + + pgd_t *pgd_alloc(struct mm_struct *mm) + { +@@ -672,7 +691,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + pmd_t *pmds[PREALLOCATED_PMDS]; + unsigned long flags; + +- pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER); ++ pgd = user_pgd_alloc((void *)__get_free_page(PGALLOC_GFP)); + + if (pgd == NULL) + goto out; +@@ -711,7 +730,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + out_free_pmds: + free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb)); + out_free_pgd: +- free_pages((unsigned long)pgd, PGD_ORDER); ++ user_pgd_free(pgd); ++ free_page((unsigned long)pgd); + out: + return NULL; + } +@@ -730,7 +750,8 @@ void pgd_free(struct mm_struct *mm, pgd_ + + pgd_mop_up_pmds(mm, pgd); + paravirt_pgd_free(mm, pgd); +- free_pages((unsigned long)pgd, PGD_ORDER); ++ user_pgd_free(pgd); ++ free_page((unsigned long)pgd); + } + + /* blktap and gntdev need this, as otherwise they would implicitly (and +--- head-2010-01-18.orig/drivers/xen/core/machine_reboot.c 2009-12-18 14:15:17.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/machine_reboot.c 2009-12-18 14:15:58.000000000 +0100 +@@ -188,8 +188,7 @@ static int take_machine_down(void *_susp + * in fast-suspend mode as that implies a new enough Xen. + */ + if (!suspend->fast_suspend) +- xen_new_user_pt(__pa(__user_pgd( +- current->active_mm->pgd))); ++ xen_new_user_pt(current->active_mm->pgd); + #endif + } + --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32.1-2 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32.1-2 @@ -0,0 +1,93 @@ +From: Greg Kroah-Hartman +Subject: Linux 2.6.32.2 + +Signed-off-by: Greg Kroah-Hartman + +Automatically created from "patches.kernel.org/patch-2.6.32.1-2" by xen-port-patches.py + +--- sle11sp1-2010-09-22.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-18 14:54:16.000000000 +0100 ++++ sle11sp1-2010-09-22/arch/x86/kernel/pci-dma-xen.c 2010-01-04 12:50:03.000000000 +0100 +@@ -268,7 +268,7 @@ static __init int iommu_setup(char *p) + if (!strncmp(p, "allowdac", 8)) + forbid_dac = 0; + if (!strncmp(p, "nodac", 5)) +- forbid_dac = -1; ++ forbid_dac = 1; + if (!strncmp(p, "usedac", 6)) { + forbid_dac = -1; + return 1; +--- sle11sp1-2010-09-22.orig/arch/x86/kernel/setup-xen.c 2009-11-18 14:54:16.000000000 +0100 ++++ sle11sp1-2010-09-22/arch/x86/kernel/setup-xen.c 2010-01-04 12:50:03.000000000 +0100 +@@ -109,6 +109,7 @@ + #ifdef CONFIG_X86_64 + #include + #endif ++#include + + #ifdef CONFIG_XEN + #include +@@ -1260,6 +1261,8 @@ void __init setup_arch(char **cmdline_p) + #endif + #endif /* CONFIG_XEN */ + x86_init.oem.banner(); ++ ++ mcheck_intel_therm_init(); + } + + #ifdef CONFIG_X86_32 +--- sle11sp1-2010-09-22.orig/drivers/xen/blktap2/sysfs.c 2010-09-23 11:01:08.000000000 +0200 ++++ sle11sp1-2010-09-22/drivers/xen/blktap2/sysfs.c 2010-09-23 11:01:20.000000000 +0200 +@@ -39,11 +39,11 @@ blktap_sysfs_exit(struct blktap *tap) + static ssize_t blktap_sysfs_pause_device(struct device *, + struct device_attribute *, + const char *, size_t); +-DEVICE_ATTR(pause, S_IWUSR, NULL, blktap_sysfs_pause_device); ++static DEVICE_ATTR(pause, S_IWUSR, NULL, blktap_sysfs_pause_device); + static ssize_t blktap_sysfs_resume_device(struct device *, + struct device_attribute *, + const char *, size_t); +-DEVICE_ATTR(resume, S_IWUSR, NULL, blktap_sysfs_resume_device); ++static DEVICE_ATTR(resume, S_IWUSR, NULL, blktap_sysfs_resume_device); + + static ssize_t + blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, +@@ -103,8 +103,8 @@ blktap_sysfs_get_name(struct device *dev + + return size; + } +-DEVICE_ATTR(name, S_IRUSR | S_IWUSR, +- blktap_sysfs_get_name, blktap_sysfs_set_name); ++static DEVICE_ATTR(name, S_IRUSR | S_IWUSR, ++ blktap_sysfs_get_name, blktap_sysfs_set_name); + + static ssize_t + blktap_sysfs_remove_device(struct device *dev, struct device_attribute *attr, +@@ -123,7 +123,7 @@ blktap_sysfs_remove_device(struct device + + return (err ? : size); + } +-DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); ++static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); + + static ssize_t + blktap_sysfs_pause_device(struct device *dev, struct device_attribute *attr, +@@ -293,7 +293,7 @@ out: + + return ret; + } +-DEVICE_ATTR(debug, S_IRUSR, blktap_sysfs_debug_device, NULL); ++static DEVICE_ATTR(debug, S_IRUSR, blktap_sysfs_debug_device, NULL); + + int + blktap_sysfs_create(struct blktap *tap) +--- sle11sp1-2010-09-22.orig/drivers/xen/xenbus/xenbus_probe.c 2010-08-17 13:30:17.000000000 +0200 ++++ sle11sp1-2010-09-22/drivers/xen/xenbus/xenbus_probe.c 2010-08-17 13:32:20.000000000 +0200 +@@ -562,7 +562,7 @@ static ssize_t xendev_show_modalias(stru + { + return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); + } +-DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); ++static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); + + int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.20 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.20 @@ -0,0 +1,7156 @@ +From: www.kernel.org +Subject: Linux 2.6.20 +Patch-mainline: 2.6.20 + +Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py + +Acked-by: jbeulich@novell.com + +--- head-2010-01-18.orig/arch/x86/Kconfig 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig 2009-11-20 11:00:05.000000000 +0100 +@@ -1657,6 +1657,7 @@ config PHYSICAL_START + + config RELOCATABLE + bool "Build a relocatable kernel" ++ depends on !X86_XEN + default y + ---help--- + This builds a kernel image that retains relocation information +@@ -1679,7 +1680,8 @@ config X86_NEED_RELOCS + + config PHYSICAL_ALIGN + hex +- prompt "Alignment value to which kernel should be aligned" if X86_32 ++ prompt "Alignment value to which kernel should be aligned" if X86_32 && !XEN ++ default 0x2000 if XEN + default "0x1000000" + range 0x2000 0x1000000 + ---help--- +--- head-2010-01-18.orig/arch/x86/kernel/asm-offsets_32.c 2009-12-04 10:44:46.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/asm-offsets_32.c 2009-11-06 10:46:27.000000000 +0100 +@@ -55,6 +55,7 @@ void foo(void) + OFFSET(TI_exec_domain, thread_info, exec_domain); + OFFSET(TI_flags, thread_info, flags); + OFFSET(TI_status, thread_info, status); ++ OFFSET(TI_cpu, thread_info, cpu); + OFFSET(TI_preempt_count, thread_info, preempt_count); + OFFSET(TI_addr_limit, thread_info, addr_limit); + OFFSET(TI_restart_block, thread_info, restart_block); +@@ -110,6 +111,11 @@ void foo(void) + + OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); + ++#ifdef CONFIG_XEN ++ BLANK(); ++ OFFSET(XEN_START_mfn_list, start_info, mfn_list); ++#endif ++ + #ifdef CONFIG_PARAVIRT + BLANK(); + OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); +--- head-2010-01-18.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -22,6 +22,7 @@ + #define phys_pkg_id(a,b) a + #endif + #endif ++#include + #include + + #include "cpu.h" +@@ -29,10 +30,8 @@ + DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); + EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); + +-#ifndef CONFIG_XEN +-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); +-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); +-#endif ++struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; ++EXPORT_SYMBOL(_cpu_pda); + + static int cachesize_override __cpuinitdata = -1; + static int disable_x86_fxsr __cpuinitdata; +@@ -60,7 +59,7 @@ static struct cpu_dev __cpuinitdata defa + .c_init = default_init, + .c_vendor = "Unknown", + }; +-static struct cpu_dev * this_cpu = &default_cpu; ++static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; + + static int __init cachesize_setup(char *str) + { +@@ -242,29 +241,14 @@ static int __cpuinit have_cpuid_p(void) + return flag_is_changeable_p(X86_EFLAGS_ID); + } + +-/* Do minimum CPU detection early. +- Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. +- The others are not touched to avoid unwanted side effects. +- +- WARNING: this function is only called on the BP. Don't add code here +- that is supposed to run on all CPUs. */ +-static void __init early_cpu_detect(void) ++void __init cpu_detect(struct cpuinfo_x86 *c) + { +- struct cpuinfo_x86 *c = &boot_cpu_data; +- +- c->x86_cache_alignment = 32; +- +- if (!have_cpuid_p()) +- return; +- + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + +- get_cpu_vendor(c, 1); +- + c->x86 = 4; + if (c->cpuid_level >= 0x00000001) { + u32 junk, tfms, cap0, misc; +@@ -281,6 +265,26 @@ static void __init early_cpu_detect(void + } + } + ++/* Do minimum CPU detection early. ++ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. ++ The others are not touched to avoid unwanted side effects. ++ ++ WARNING: this function is only called on the BP. Don't add code here ++ that is supposed to run on all CPUs. */ ++static void __init early_cpu_detect(void) ++{ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ c->x86_cache_alignment = 32; ++ ++ if (!have_cpuid_p()) ++ return; ++ ++ cpu_detect(c); ++ ++ get_cpu_vendor(c, 1); ++} ++ + static void __cpuinit generic_identify(struct cpuinfo_x86 * c) + { + u32 tfms, xlvl; +@@ -317,6 +321,8 @@ static void __cpuinit generic_identify(s + c->apicid = (ebx >> 24) & 0xFF; + #endif + #endif ++ if (c->x86_capability[0] & (1<<19)) ++ c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; +@@ -381,6 +387,7 @@ void __cpuinit identify_cpu(struct cpuin + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_max_cores = 1; ++ c->x86_clflush_size = 32; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + if (!have_cpuid_p()) { +@@ -601,61 +608,23 @@ void __init early_cpu_init(void) + #endif + } + +-static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr) ++/* Make sure %gs is initialized properly in idle threads */ ++struct pt_regs * __devinit idle_regs(struct pt_regs *regs) + { +- unsigned long frames[16]; +- unsigned long va; +- int f; +- +- for (va = gdt_descr->address, f = 0; +- va < gdt_descr->address + gdt_descr->size; +- va += PAGE_SIZE, f++) { +- frames[f] = virt_to_mfn(va); +- make_lowmem_page_readonly( +- (void *)va, XENFEAT_writable_descriptor_tables); +- } +- if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8)) +- BUG(); ++ memset(regs, 0, sizeof(struct pt_regs)); ++ regs->xgs = __KERNEL_PDA; ++ return regs; + } + +-/* +- * cpu_init() initializes state that is per-CPU. Some data is already +- * initialized (naturally) in the bootstrap process, such as the GDT +- * and IDT. We reload them nevertheless, this function acts as a +- * 'CPU state barrier', nothing should get across. +- */ +-void __cpuinit cpu_init(void) ++static __cpuinit int alloc_gdt(int cpu) + { +- int cpu = smp_processor_id(); +-#ifndef CONFIG_X86_NO_TSS +- struct tss_struct * t = &per_cpu(init_tss, cpu); +-#endif +- struct thread_struct *thread = ¤t->thread; +- struct desc_struct *gdt; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ struct desc_struct *gdt; ++ struct i386_pda *pda; + +- if (cpu_test_and_set(cpu, cpu_initialized)) { +- printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); +- for (;;) local_irq_enable(); +- } +- printk(KERN_INFO "Initializing CPU#%d\n", cpu); +- +- if (cpu_has_vme || cpu_has_de) +- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); +- if (tsc_disable && cpu_has_tsc) { +- printk(KERN_NOTICE "Disabling TSC...\n"); +- /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ +- clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); +- set_in_cr4(X86_CR4_TSD); +- } ++ gdt = (struct desc_struct *)cpu_gdt_descr->address; ++ pda = cpu_pda(cpu); + +-#ifndef CONFIG_XEN +- /* The CPU hotplug case */ +- if (cpu_gdt_descr->address) { +- gdt = (struct desc_struct *)cpu_gdt_descr->address; +- memset(gdt, 0, PAGE_SIZE); +- goto old_gdt; +- } + /* + * This is a horrible hack to allocate the GDT. The problem + * is that cpu_init() is called really early for the boot CPU +@@ -663,54 +632,141 @@ void __cpuinit cpu_init(void) + * CPUs, when bootmem will have gone away + */ + if (NODE_DATA(0)->bdata->node_bootmem_map) { +- gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); +- /* alloc_bootmem_pages panics on failure, so no check */ ++ BUG_ON(gdt != NULL || pda != NULL); ++ ++ gdt = alloc_bootmem_pages(PAGE_SIZE); ++ pda = alloc_bootmem(sizeof(*pda)); ++ /* alloc_bootmem(_pages) panics on failure, so no check */ ++ + memset(gdt, 0, PAGE_SIZE); ++ memset(pda, 0, sizeof(*pda)); + } else { +- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); +- if (unlikely(!gdt)) { +- printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); +- for (;;) +- local_irq_enable(); ++ /* GDT and PDA might already have been allocated if ++ this is a CPU hotplug re-insertion. */ ++ if (gdt == NULL) ++ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); ++ ++ if (pda == NULL) ++ pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); ++ ++ if (unlikely(!gdt || !pda)) { ++ free_pages((unsigned long)gdt, 0); ++ kfree(pda); ++ return 0; + } + } +-old_gdt: ++ ++ cpu_gdt_descr->address = (unsigned long)gdt; ++ cpu_pda(cpu) = pda; ++ ++ return 1; ++} ++ ++/* Initial PDA used by boot CPU */ ++struct i386_pda boot_pda = { ++ ._pda = &boot_pda, ++ .cpu_number = 0, ++ .pcurrent = &init_task, ++}; ++ ++static inline void set_kernel_gs(void) ++{ ++ /* Set %gs for this CPU's PDA. Memory clobber is to create a ++ barrier with respect to any PDA operations, so the compiler ++ doesn't move any before here. */ ++ asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); ++} ++ ++/* Initialize the CPU's GDT and PDA. The boot CPU does this for ++ itself, but secondaries find this done for them. */ ++__cpuinit int init_gdt(int cpu, struct task_struct *idle) ++{ ++ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ struct desc_struct *gdt; ++ struct i386_pda *pda; ++ ++ /* For non-boot CPUs, the GDT and PDA should already have been ++ allocated. */ ++ if (!alloc_gdt(cpu)) { ++ printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); ++ return 0; ++ } ++ ++ gdt = (struct desc_struct *)cpu_gdt_descr->address; ++ pda = cpu_pda(cpu); ++ ++ BUG_ON(gdt == NULL || pda == NULL); ++ + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + memcpy(gdt, cpu_gdt_table, GDT_SIZE); ++ cpu_gdt_descr->size = GDT_SIZE - 1; + +- /* Set up GDT entry for 16bit stack */ +- *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= +- ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | +- ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | +- (CPU_16BIT_STACK_SIZE - 1); ++ pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, ++ (u32 *)&gdt[GDT_ENTRY_PDA].b, ++ (unsigned long)pda, sizeof(*pda) - 1, ++ 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ ++ ++ memset(pda, 0, sizeof(*pda)); ++ pda->_pda = pda; ++ pda->cpu_number = cpu; ++ pda->pcurrent = idle; + +- cpu_gdt_descr->size = GDT_SIZE - 1; +- cpu_gdt_descr->address = (unsigned long)gdt; +-#else +- if (cpu == 0 && cpu_gdt_descr->address == 0) { +- gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); +- /* alloc_bootmem_pages panics on failure, so no check */ +- memset(gdt, 0, PAGE_SIZE); ++ return 1; ++} + +- memcpy(gdt, cpu_gdt_table, GDT_SIZE); +- +- cpu_gdt_descr->size = GDT_SIZE; +- cpu_gdt_descr->address = (unsigned long)gdt; ++void __cpuinit cpu_set_gdt(int cpu) ++{ ++ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ unsigned long va, frames[16]; ++ int f; ++ ++ for (va = cpu_gdt_descr->address, f = 0; ++ va < cpu_gdt_descr->address + cpu_gdt_descr->size; ++ va += PAGE_SIZE, f++) { ++ frames[f] = virt_to_mfn(va); ++ make_lowmem_page_readonly( ++ (void *)va, XENFEAT_writable_descriptor_tables); + } ++ BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); ++ ++ set_kernel_gs(); ++} ++ ++/* Common CPU init for both boot and secondary CPUs */ ++static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) ++{ ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct * t = &per_cpu(init_tss, cpu); + #endif ++ struct thread_struct *thread = &curr->thread; ++ ++ if (cpu_test_and_set(cpu, cpu_initialized)) { ++ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); ++ for (;;) local_irq_enable(); ++ } + +- cpu_gdt_init(cpu_gdt_descr); ++ printk(KERN_INFO "Initializing CPU#%d\n", cpu); ++ ++ if (cpu_has_vme || cpu_has_de) ++ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); ++ if (tsc_disable && cpu_has_tsc) { ++ printk(KERN_NOTICE "Disabling TSC...\n"); ++ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ ++ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); ++ set_in_cr4(X86_CR4_TSD); ++ } + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); +- current->active_mm = &init_mm; +- BUG_ON(current->mm); +- enter_lazy_tlb(&init_mm, current); ++ curr->active_mm = &init_mm; ++ if (curr->mm) ++ BUG(); ++ enter_lazy_tlb(&init_mm, curr); + + load_esp0(t, thread); + +@@ -721,8 +777,8 @@ old_gdt: + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + #endif + +- /* Clear %fs and %gs. */ +- asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); ++ /* Clear %fs. */ ++ asm volatile ("mov %0, %%fs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); +@@ -740,6 +796,38 @@ old_gdt: + mxcsr_feature_mask_init(); + } + ++/* Entrypoint to initialize secondary CPU */ ++void __cpuinit secondary_cpu_init(void) ++{ ++ int cpu = smp_processor_id(); ++ struct task_struct *curr = current; ++ ++ _cpu_init(cpu, curr); ++} ++ ++/* ++ * cpu_init() initializes state that is per-CPU. Some data is already ++ * initialized (naturally) in the bootstrap process, such as the GDT ++ * and IDT. We reload them nevertheless, this function acts as a ++ * 'CPU state barrier', nothing should get across. ++ */ ++void __cpuinit cpu_init(void) ++{ ++ int cpu = smp_processor_id(); ++ struct task_struct *curr = current; ++ ++ /* Set up the real GDT and PDA, so we can transition from the ++ boot versions. */ ++ if (!init_gdt(cpu, curr)) { ++ /* failed to allocate something; not much we can do... */ ++ for (;;) ++ local_irq_enable(); ++ } ++ ++ cpu_set_gdt(cpu); ++ _cpu_init(cpu, curr); ++} ++ + #ifdef CONFIG_HOTPLUG_CPU + void __cpuinit cpu_uninit(void) + { +--- head-2010-01-18.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-01-28 12:24:18.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -12,7 +12,7 @@ + static DEFINE_MUTEX(mtrr_mutex); + + void generic_get_mtrr(unsigned int reg, unsigned long *base, +- unsigned int *size, mtrr_type * type) ++ unsigned long *size, mtrr_type * type) + { + struct xen_platform_op op; + +@@ -115,8 +115,7 @@ int mtrr_del_page(int reg, unsigned long + { + unsigned i; + mtrr_type ltype; +- unsigned long lbase; +- unsigned int lsize; ++ unsigned long lbase, lsize; + int error = -EINVAL; + struct xen_platform_op op; + +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/kernel/e820_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -0,0 +1,1002 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_EFI ++int efi_enabled = 0; ++EXPORT_SYMBOL(efi_enabled); ++#endif ++ ++struct e820map e820; ++struct change_member { ++ struct e820entry *pbios; /* pointer to original bios entry */ ++ unsigned long long addr; /* address for this change point */ ++}; ++static struct change_member change_point_list[2*E820MAX] __initdata; ++static struct change_member *change_point[2*E820MAX] __initdata; ++static struct e820entry *overlap_list[E820MAX] __initdata; ++static struct e820entry new_bios[E820MAX] __initdata; ++/* For PCI or other memory-mapped resources */ ++unsigned long pci_mem_start = 0x10000000; ++#ifdef CONFIG_PCI ++EXPORT_SYMBOL(pci_mem_start); ++#endif ++extern int user_defined_memmap; ++struct resource data_resource = { ++ .name = "Kernel data", ++ .start = 0, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_MEM ++}; ++ ++struct resource code_resource = { ++ .name = "Kernel code", ++ .start = 0, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_MEM ++}; ++ ++static struct resource system_rom_resource = { ++ .name = "System ROM", ++ .start = 0xf0000, ++ .end = 0xfffff, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}; ++ ++static struct resource extension_rom_resource = { ++ .name = "Extension ROM", ++ .start = 0xe0000, ++ .end = 0xeffff, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}; ++ ++static struct resource adapter_rom_resources[] = { { ++ .name = "Adapter ROM", ++ .start = 0xc8000, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}, { ++ .name = "Adapter ROM", ++ .start = 0, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}, { ++ .name = "Adapter ROM", ++ .start = 0, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}, { ++ .name = "Adapter ROM", ++ .start = 0, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}, { ++ .name = "Adapter ROM", ++ .start = 0, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}, { ++ .name = "Adapter ROM", ++ .start = 0, ++ .end = 0, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++} }; ++ ++static struct resource video_rom_resource = { ++ .name = "Video ROM", ++ .start = 0xc0000, ++ .end = 0xc7fff, ++ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM ++}; ++ ++static struct resource video_ram_resource = { ++ .name = "Video RAM area", ++ .start = 0xa0000, ++ .end = 0xbffff, ++ .flags = IORESOURCE_BUSY | IORESOURCE_MEM ++}; ++ ++static struct resource standard_io_resources[] = { { ++ .name = "dma1", ++ .start = 0x0000, ++ .end = 0x001f, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "pic1", ++ .start = 0x0020, ++ .end = 0x0021, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "timer0", ++ .start = 0x0040, ++ .end = 0x0043, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "timer1", ++ .start = 0x0050, ++ .end = 0x0053, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "keyboard", ++ .start = 0x0060, ++ .end = 0x006f, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "dma page reg", ++ .start = 0x0080, ++ .end = 0x008f, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "pic2", ++ .start = 0x00a0, ++ .end = 0x00a1, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "dma2", ++ .start = 0x00c0, ++ .end = 0x00df, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++}, { ++ .name = "fpu", ++ .start = 0x00f0, ++ .end = 0x00ff, ++ .flags = IORESOURCE_BUSY | IORESOURCE_IO ++} }; ++ ++static int romsignature(const unsigned char *x) ++{ ++ unsigned short sig; ++ int ret = 0; ++ if (probe_kernel_address((const unsigned short *)x, sig) == 0) ++ ret = (sig == 0xaa55); ++ return ret; ++} ++ ++static int __init romchecksum(unsigned char *rom, unsigned long length) ++{ ++ unsigned char *p, sum = 0; ++ ++ for (p = rom; p < rom + length; p++) ++ sum += *p; ++ return sum == 0; ++} ++ ++static void __init probe_roms(void) ++{ ++ unsigned long start, length, upper; ++ unsigned char *rom; ++ int i; ++ ++#ifdef CONFIG_XEN ++ /* Nothing to do if not running in dom0. */ ++ if (!is_initial_xendomain()) ++ return; ++#endif ++ ++ /* video rom */ ++ upper = adapter_rom_resources[0].start; ++ for (start = video_rom_resource.start; start < upper; start += 2048) { ++ rom = isa_bus_to_virt(start); ++ if (!romsignature(rom)) ++ continue; ++ ++ video_rom_resource.start = start; ++ ++ /* 0 < length <= 0x7f * 512, historically */ ++ length = rom[2] * 512; ++ ++ /* if checksum okay, trust length byte */ ++ if (length && romchecksum(rom, length)) ++ video_rom_resource.end = start + length - 1; ++ ++ request_resource(&iomem_resource, &video_rom_resource); ++ break; ++ } ++ ++ start = (video_rom_resource.end + 1 + 2047) & ~2047UL; ++ if (start < upper) ++ start = upper; ++ ++ /* system rom */ ++ request_resource(&iomem_resource, &system_rom_resource); ++ upper = system_rom_resource.start; ++ ++ /* check for extension rom (ignore length byte!) */ ++ rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start); ++ if (romsignature(rom)) { ++ length = extension_rom_resource.end - extension_rom_resource.start + 1; ++ if (romchecksum(rom, length)) { ++ request_resource(&iomem_resource, &extension_rom_resource); ++ upper = extension_rom_resource.start; ++ } ++ } ++ ++ /* check for adapter roms on 2k boundaries */ ++ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { ++ rom = isa_bus_to_virt(start); ++ if (!romsignature(rom)) ++ continue; ++ ++ /* 0 < length <= 0x7f * 512, historically */ ++ length = rom[2] * 512; ++ ++ /* but accept any length that fits if checksum okay */ ++ if (!length || start + length > upper || !romchecksum(rom, length)) ++ continue; ++ ++ adapter_rom_resources[i].start = start; ++ adapter_rom_resources[i].end = start + length - 1; ++ request_resource(&iomem_resource, &adapter_rom_resources[i]); ++ ++ start = adapter_rom_resources[i++].end & ~2047UL; ++ } ++} ++ ++#ifdef CONFIG_XEN ++static struct e820map machine_e820 __initdata; ++#define e820 machine_e820 ++#endif ++ ++/* ++ * Request address space for all standard RAM and ROM resources ++ * and also for regions reported as reserved by the e820. ++ */ ++static void __init ++legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) ++{ ++ int i; ++ ++ probe_roms(); ++ for (i = 0; i < e820.nr_map; i++) { ++ struct resource *res; ++#ifndef CONFIG_RESOURCES_64BIT ++ if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) ++ continue; ++#endif ++ res = kzalloc(sizeof(struct resource), GFP_ATOMIC); ++ switch (e820.map[i].type) { ++ case E820_RAM: res->name = "System RAM"; break; ++ case E820_ACPI: res->name = "ACPI Tables"; break; ++ case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; ++ default: res->name = "reserved"; ++ } ++ res->start = e820.map[i].addr; ++ res->end = res->start + e820.map[i].size - 1; ++ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; ++ if (request_resource(&iomem_resource, res)) { ++ kfree(res); ++ continue; ++ } ++ if (e820.map[i].type == E820_RAM) { ++ /* ++ * We don't know which RAM region contains kernel data, ++ * so we try it repeatedly and let the resource manager ++ * test it. ++ */ ++#ifndef CONFIG_XEN ++ request_resource(res, code_resource); ++ request_resource(res, data_resource); ++#endif ++#ifdef CONFIG_KEXEC ++ request_resource(res, &crashk_res); ++#ifdef CONFIG_XEN ++ xen_machine_kexec_register_resources(res); ++#endif ++#endif ++ } ++ } ++} ++ ++#undef e820 ++ ++/* ++ * Request address space for all standard resources ++ * ++ * This is called just before pcibios_init(), which is also a ++ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). ++ */ ++static int __init request_standard_resources(void) ++{ ++ int i; ++ ++ /* Nothing to do if not running in dom0. */ ++ if (!is_initial_xendomain()) ++ return 0; ++ ++ printk("Setting up standard PCI resources\n"); ++ if (efi_enabled) ++ efi_initialize_iomem_resources(&code_resource, &data_resource); ++ else ++ legacy_init_iomem_resources(&code_resource, &data_resource); ++ ++ /* EFI systems may still have VGA */ ++ request_resource(&iomem_resource, &video_ram_resource); ++ ++ /* request I/O space for devices used on all i[345]86 PCs */ ++ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) ++ request_resource(&ioport_resource, &standard_io_resources[i]); ++ return 0; ++} ++ ++subsys_initcall(request_standard_resources); ++ ++void __init add_memory_region(unsigned long long start, ++ unsigned long long size, int type) ++{ ++ int x; ++ ++ if (!efi_enabled) { ++ x = e820.nr_map; ++ ++ if (x == E820MAX) { ++ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); ++ return; ++ } ++ ++ e820.map[x].addr = start; ++ e820.map[x].size = size; ++ e820.map[x].type = type; ++ e820.nr_map++; ++ } ++} /* add_memory_region */ ++ ++/* ++ * Sanitize the BIOS e820 map. ++ * ++ * Some e820 responses include overlapping entries. The following ++ * replaces the original e820 map with a new one, removing overlaps. ++ * ++ */ ++int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) ++{ ++ struct change_member *change_tmp; ++ unsigned long current_type, last_type; ++ unsigned long long last_addr; ++ int chgidx, still_changing; ++ int overlap_entries; ++ int new_bios_entry; ++ int old_nr, new_nr, chg_nr; ++ int i; ++ ++ /* ++ Visually we're performing the following (1,2,3,4 = memory types)... ++ ++ Sample memory map (w/overlaps): ++ ____22__________________ ++ ______________________4_ ++ ____1111________________ ++ _44_____________________ ++ 11111111________________ ++ ____________________33__ ++ ___________44___________ ++ __________33333_________ ++ ______________22________ ++ ___________________2222_ ++ _________111111111______ ++ _____________________11_ ++ _________________4______ ++ ++ Sanitized equivalent (no overlap): ++ 1_______________________ ++ _44_____________________ ++ ___1____________________ ++ ____22__________________ ++ ______11________________ ++ _________1______________ ++ __________3_____________ ++ ___________44___________ ++ _____________33_________ ++ _______________2________ ++ ________________1_______ ++ _________________4______ ++ ___________________2____ ++ ____________________33__ ++ ______________________4_ ++ */ ++ printk("sanitize start\n"); ++ /* if there's only one memory region, don't bother */ ++ if (*pnr_map < 2) { ++ printk("sanitize bail 0\n"); ++ return -1; ++ } ++ ++ old_nr = *pnr_map; ++ ++ /* bail out if we find any unreasonable addresses in bios map */ ++ for (i=0; iaddr = biosmap[i].addr; ++ change_point[chgidx++]->pbios = &biosmap[i]; ++ change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; ++ change_point[chgidx++]->pbios = &biosmap[i]; ++ } ++ } ++ chg_nr = chgidx; /* true number of change-points */ ++ ++ /* sort change-point list by memory addresses (low -> high) */ ++ still_changing = 1; ++ while (still_changing) { ++ still_changing = 0; ++ for (i=1; i < chg_nr; i++) { ++ /* if > , swap */ ++ /* or, if current= & last=, swap */ ++ if ((change_point[i]->addr < change_point[i-1]->addr) || ++ ((change_point[i]->addr == change_point[i-1]->addr) && ++ (change_point[i]->addr == change_point[i]->pbios->addr) && ++ (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) ++ ) ++ { ++ change_tmp = change_point[i]; ++ change_point[i] = change_point[i-1]; ++ change_point[i-1] = change_tmp; ++ still_changing=1; ++ } ++ } ++ } ++ ++ /* create a new bios memory map, removing overlaps */ ++ overlap_entries=0; /* number of entries in the overlap table */ ++ new_bios_entry=0; /* index for creating new bios map entries */ ++ last_type = 0; /* start with undefined memory type */ ++ last_addr = 0; /* start with 0 as last starting address */ ++ /* loop through change-points, determining affect on the new bios map */ ++ for (chgidx=0; chgidx < chg_nr; chgidx++) ++ { ++ /* keep track of all overlapping bios entries */ ++ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) ++ { ++ /* add map entry to overlap list (> 1 entry implies an overlap) */ ++ overlap_list[overlap_entries++]=change_point[chgidx]->pbios; ++ } ++ else ++ { ++ /* remove entry from list (order independent, so swap with last) */ ++ for (i=0; ipbios) ++ overlap_list[i] = overlap_list[overlap_entries-1]; ++ } ++ overlap_entries--; ++ } ++ /* if there are overlapping entries, decide which "type" to use */ ++ /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ ++ current_type = 0; ++ for (i=0; itype > current_type) ++ current_type = overlap_list[i]->type; ++ /* continue building up new bios map based on this information */ ++ if (current_type != last_type) { ++ if (last_type != 0) { ++ new_bios[new_bios_entry].size = ++ change_point[chgidx]->addr - last_addr; ++ /* move forward only if the new size was non-zero */ ++ if (new_bios[new_bios_entry].size != 0) ++ if (++new_bios_entry >= E820MAX) ++ break; /* no more space left for new bios entries */ ++ } ++ if (current_type != 0) { ++ new_bios[new_bios_entry].addr = change_point[chgidx]->addr; ++ new_bios[new_bios_entry].type = current_type; ++ last_addr=change_point[chgidx]->addr; ++ } ++ last_type = current_type; ++ } ++ } ++ new_nr = new_bios_entry; /* retain count for new bios entries */ ++ ++ /* copy new bios mapping into original location */ ++ memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); ++ *pnr_map = new_nr; ++ ++ printk("sanitize end\n"); ++ return 0; ++} ++ ++/* ++ * Copy the BIOS e820 map into a safe place. ++ * ++ * Sanity-check it while we're at it.. ++ * ++ * If we're lucky and live on a modern system, the setup code ++ * will have given us a memory map that we can use to properly ++ * set up memory. If we aren't, we'll fake a memory map. ++ * ++ * We check to see that the memory map contains at least 2 elements ++ * before we'll use it, because the detection code in setup.S may ++ * not be perfect and most every PC known to man has two memory ++ * regions: one from 0 to 640k, and one from 1mb up. (The IBM ++ * thinkpad 560x, for example, does not cooperate with the memory ++ * detection code.) ++ */ ++int __init copy_e820_map(struct e820entry * biosmap, int nr_map) ++{ ++#ifndef CONFIG_XEN ++ /* Only one memory region (or negative)? Ignore it */ ++ if (nr_map < 2) ++ return -1; ++#else ++ BUG_ON(nr_map < 1); ++#endif ++ ++ do { ++ unsigned long long start = biosmap->addr; ++ unsigned long long size = biosmap->size; ++ unsigned long long end = start + size; ++ unsigned long type = biosmap->type; ++ printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); ++ ++ /* Overflow in 64 bits? Ignore the memory map. */ ++ if (start > end) ++ return -1; ++ ++#ifndef CONFIG_XEN ++ /* ++ * Some BIOSes claim RAM in the 640k - 1M region. ++ * Not right. Fix it up. ++ */ ++ if (type == E820_RAM) { ++ printk("copy_e820_map() type is E820_RAM\n"); ++ if (start < 0x100000ULL && end > 0xA0000ULL) { ++ printk("copy_e820_map() lies in range...\n"); ++ if (start < 0xA0000ULL) { ++ printk("copy_e820_map() start < 0xA0000ULL\n"); ++ add_memory_region(start, 0xA0000ULL-start, type); ++ } ++ if (end <= 0x100000ULL) { ++ printk("copy_e820_map() end <= 0x100000ULL\n"); ++ continue; ++ } ++ start = 0x100000ULL; ++ size = end - start; ++ } ++ } ++#endif ++ add_memory_region(start, size, type); ++ } while (biosmap++,--nr_map); ++ ++#ifdef CONFIG_XEN ++ if (is_initial_xendomain()) { ++ struct xen_memory_map memmap; ++ ++ memmap.nr_entries = E820MAX; ++ set_xen_guest_handle(memmap.buffer, machine_e820.map); ++ ++ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) ++ BUG(); ++ machine_e820.nr_map = memmap.nr_entries; ++ } else ++ machine_e820 = e820; ++#endif ++ ++ return 0; ++} ++ ++/* ++ * Callback for efi_memory_walk. ++ */ ++static int __init ++efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) ++{ ++ unsigned long *max_pfn = arg, pfn; ++ ++ if (start < end) { ++ pfn = PFN_UP(end -1); ++ if (pfn > *max_pfn) ++ *max_pfn = pfn; ++ } ++ return 0; ++} ++ ++static int __init ++efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) ++{ ++ memory_present(0, PFN_UP(start), PFN_DOWN(end)); ++ return 0; ++} ++ ++/* ++ * Find the highest page frame number we have available ++ */ ++void __init find_max_pfn(void) ++{ ++ int i; ++ ++ max_pfn = 0; ++ if (efi_enabled) { ++ efi_memmap_walk(efi_find_max_pfn, &max_pfn); ++ efi_memmap_walk(efi_memory_present_wrapper, NULL); ++ return; ++ } ++ ++ for (i = 0; i < e820.nr_map; i++) { ++ unsigned long start, end; ++ /* RAM? */ ++ if (e820.map[i].type != E820_RAM) ++ continue; ++ start = PFN_UP(e820.map[i].addr); ++ end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); ++ if (start >= end) ++ continue; ++ if (end > max_pfn) ++ max_pfn = end; ++ memory_present(0, start, end); ++ } ++} ++ ++/* ++ * Free all available memory for boot time allocation. Used ++ * as a callback function by efi_memory_walk() ++ */ ++ ++static int __init ++free_available_memory(unsigned long start, unsigned long end, void *arg) ++{ ++ /* check max_low_pfn */ ++ if (start >= (max_low_pfn << PAGE_SHIFT)) ++ return 0; ++ if (end >= (max_low_pfn << PAGE_SHIFT)) ++ end = max_low_pfn << PAGE_SHIFT; ++ if (start < end) ++ free_bootmem(start, end - start); ++ ++ return 0; ++} ++/* ++ * Register fully available low RAM pages with the bootmem allocator. ++ */ ++void __init register_bootmem_low_pages(unsigned long max_low_pfn) ++{ ++ int i; ++ ++ if (efi_enabled) { ++ efi_memmap_walk(free_available_memory, NULL); ++ return; ++ } ++ for (i = 0; i < e820.nr_map; i++) { ++ unsigned long curr_pfn, last_pfn, size; ++ /* ++ * Reserve usable low memory ++ */ ++ if (e820.map[i].type != E820_RAM) ++ continue; ++ /* ++ * We are rounding up the start address of usable memory: ++ */ ++ curr_pfn = PFN_UP(e820.map[i].addr); ++ if (curr_pfn >= max_low_pfn) ++ continue; ++ /* ++ * ... and at the end of the usable range downwards: ++ */ ++ last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); ++ ++#ifdef CONFIG_XEN ++ /* ++ * Truncate to the number of actual pages currently ++ * present. ++ */ ++ if (last_pfn > xen_start_info->nr_pages) ++ last_pfn = xen_start_info->nr_pages; ++#endif ++ ++ if (last_pfn > max_low_pfn) ++ last_pfn = max_low_pfn; ++ ++ /* ++ * .. finally, did all the rounding and playing ++ * around just make the area go away? ++ */ ++ if (last_pfn <= curr_pfn) ++ continue; ++ ++ size = last_pfn - curr_pfn; ++ free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); ++ } ++} ++ ++void __init e820_register_memory(void) ++{ ++ unsigned long gapstart, gapsize, round; ++ unsigned long long last; ++ int i; ++ ++#ifdef CONFIG_XEN ++#define e820 machine_e820 ++#endif ++ /* ++ * Search for the bigest gap in the low 32 bits of the e820 ++ * memory space. ++ */ ++ last = 0x100000000ull; ++ gapstart = 0x10000000; ++ gapsize = 0x400000; ++ i = e820.nr_map; ++ while (--i >= 0) { ++ unsigned long long start = e820.map[i].addr; ++ unsigned long long end = start + e820.map[i].size; ++ ++ /* ++ * Since "last" is at most 4GB, we know we'll ++ * fit in 32 bits if this condition is true ++ */ ++ if (last > end) { ++ unsigned long gap = last - end; ++ ++ if (gap > gapsize) { ++ gapsize = gap; ++ gapstart = end; ++ } ++ } ++ if (start < last) ++ last = start; ++ } ++#undef e820 ++ ++ /* ++ * See how much we want to round up: start off with ++ * rounding to the next 1MB area. ++ */ ++ round = 0x100000; ++ while ((gapsize >> 4) > round) ++ round += round; ++ /* Fun with two's complement */ ++ pci_mem_start = (gapstart + round) & -round; ++ ++ printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", ++ pci_mem_start, gapstart, gapsize); ++} ++ ++void __init print_memory_map(char *who) ++{ ++ int i; ++ ++ for (i = 0; i < e820.nr_map; i++) { ++ printk(" %s: %016Lx - %016Lx ", who, ++ e820.map[i].addr, ++ e820.map[i].addr + e820.map[i].size); ++ switch (e820.map[i].type) { ++ case E820_RAM: printk("(usable)\n"); ++ break; ++ case E820_RESERVED: ++ printk("(reserved)\n"); ++ break; ++ case E820_ACPI: ++ printk("(ACPI data)\n"); ++ break; ++ case E820_NVS: ++ printk("(ACPI NVS)\n"); ++ break; ++ default: printk("type %lu\n", e820.map[i].type); ++ break; ++ } ++ } ++} ++ ++static __init __always_inline void efi_limit_regions(unsigned long long size) ++{ ++ unsigned long long current_addr = 0; ++ efi_memory_desc_t *md, *next_md; ++ void *p, *p1; ++ int i, j; ++ ++ j = 0; ++ p1 = memmap.map; ++ for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { ++ md = p; ++ next_md = p1; ++ current_addr = md->phys_addr + ++ PFN_PHYS(md->num_pages); ++ if (is_available_memory(md)) { ++ if (md->phys_addr >= size) continue; ++ memcpy(next_md, md, memmap.desc_size); ++ if (current_addr >= size) { ++ next_md->num_pages -= ++ PFN_UP(current_addr-size); ++ } ++ p1 += memmap.desc_size; ++ next_md = p1; ++ j++; ++ } else if ((md->attribute & EFI_MEMORY_RUNTIME) == ++ EFI_MEMORY_RUNTIME) { ++ /* In order to make runtime services ++ * available we have to include runtime ++ * memory regions in memory map */ ++ memcpy(next_md, md, memmap.desc_size); ++ p1 += memmap.desc_size; ++ next_md = p1; ++ j++; ++ } ++ } ++ memmap.nr_map = j; ++ memmap.map_end = memmap.map + ++ (memmap.nr_map * memmap.desc_size); ++} ++ ++void __init limit_regions(unsigned long long size) ++{ ++ unsigned long long current_addr = 0; ++ int i; ++ ++ print_memory_map("limit_regions start"); ++ if (efi_enabled) { ++ efi_limit_regions(size); ++ return; ++ } ++ for (i = 0; i < e820.nr_map; i++) { ++ current_addr = e820.map[i].addr + e820.map[i].size; ++ if (current_addr < size) ++ continue; ++ ++ if (e820.map[i].type != E820_RAM) ++ continue; ++ ++ if (e820.map[i].addr >= size) { ++ /* ++ * This region starts past the end of the ++ * requested size, skip it completely. ++ */ ++ e820.nr_map = i; ++ } else { ++ e820.nr_map = i + 1; ++ e820.map[i].size -= current_addr - size; ++ } ++ print_memory_map("limit_regions endfor"); ++ return; ++ } ++#ifdef CONFIG_XEN ++ if (current_addr < size) { ++ /* ++ * The e820 map finished before our requested size so ++ * extend the final entry to the requested address. ++ */ ++ --i; ++ if (e820.map[i].type == E820_RAM) ++ e820.map[i].size -= current_addr - size; ++ else ++ add_memory_region(current_addr, size - current_addr, E820_RAM); ++ } ++#endif ++ print_memory_map("limit_regions endfunc"); ++} ++ ++/* ++ * This function checks if any part of the range is mapped ++ * with type. ++ */ ++int ++e820_any_mapped(u64 start, u64 end, unsigned type) ++{ ++ int i; ++ ++#ifndef CONFIG_XEN ++ for (i = 0; i < e820.nr_map; i++) { ++ const struct e820entry *ei = &e820.map[i]; ++#else ++ if (!is_initial_xendomain()) ++ return 0; ++ for (i = 0; i < machine_e820.nr_map; ++i) { ++ const struct e820entry *ei = &machine_e820.map[i]; ++#endif ++ ++ if (type && ei->type != type) ++ continue; ++ if (ei->addr >= end || ei->addr + ei->size <= start) ++ continue; ++ return 1; ++ } ++ return 0; ++} ++EXPORT_SYMBOL_GPL(e820_any_mapped); ++ ++ /* ++ * This function checks if the entire range is mapped with type. ++ * ++ * Note: this function only works correct if the e820 table is sorted and ++ * not-overlapping, which is the case ++ */ ++int __init ++e820_all_mapped(unsigned long s, unsigned long e, unsigned type) ++{ ++ u64 start = s; ++ u64 end = e; ++ int i; ++ ++#ifndef CONFIG_XEN ++ for (i = 0; i < e820.nr_map; i++) { ++ struct e820entry *ei = &e820.map[i]; ++#else ++ if (!is_initial_xendomain()) ++ return 0; ++ for (i = 0; i < machine_e820.nr_map; ++i) { ++ const struct e820entry *ei = &machine_e820.map[i]; ++#endif ++ ++ if (type && ei->type != type) ++ continue; ++ /* is the region (part) in overlap with the current region ?*/ ++ if (ei->addr >= end || ei->addr + ei->size <= start) ++ continue; ++ /* if the region is at the beginning of we move ++ * start to the end of the region since it's ok until there ++ */ ++ if (ei->addr <= start) ++ start = ei->addr + ei->size; ++ /* if start is now at or beyond end, we're done, full ++ * coverage */ ++ if (start >= end) ++ return 1; /* we're done */ ++ } ++ return 0; ++} ++ ++static int __init parse_memmap(char *arg) ++{ ++ if (!arg) ++ return -EINVAL; ++ ++ if (strcmp(arg, "exactmap") == 0) { ++#ifdef CONFIG_CRASH_DUMP ++ /* If we are doing a crash dump, we ++ * still need to know the real mem ++ * size before original memory map is ++ * reset. ++ */ ++ find_max_pfn(); ++ saved_max_pfn = max_pfn; ++#endif ++ e820.nr_map = 0; ++ user_defined_memmap = 1; ++ } else { ++ /* If the user specifies memory size, we ++ * limit the BIOS-provided memory map to ++ * that size. exactmap can be used to specify ++ * the exact map. mem=number can be used to ++ * trim the existing memory map. ++ */ ++ unsigned long long start_at, mem_size; ++ ++ mem_size = memparse(arg, &arg); ++ if (*arg == '@') { ++ start_at = memparse(arg+1, &arg); ++ add_memory_region(start_at, mem_size, E820_RAM); ++ } else if (*arg == '#') { ++ start_at = memparse(arg+1, &arg); ++ add_memory_region(start_at, mem_size, E820_ACPI); ++ } else if (*arg == '$') { ++ start_at = memparse(arg+1, &arg); ++ add_memory_region(start_at, mem_size, E820_RESERVED); ++ } else { ++ limit_regions(mem_size); ++ user_defined_memmap = 1; ++ } ++ } ++ return 0; ++} ++early_param("memmap", parse_memmap); +--- head-2010-01-18.orig/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:46:27.000000000 +0100 +@@ -30,12 +30,13 @@ + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es +- * 24(%esp) - orig_eax +- * 28(%esp) - %eip +- * 2C(%esp) - %cs +- * 30(%esp) - %eflags +- * 34(%esp) - %oldesp +- * 38(%esp) - %oldss ++ * 24(%esp) - %gs ++ * 28(%esp) - orig_eax ++ * 2C(%esp) - %eip ++ * 30(%esp) - %cs ++ * 34(%esp) - %eflags ++ * 38(%esp) - %oldesp ++ * 3C(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ +@@ -48,27 +49,25 @@ + #include + #include + #include ++#include + #include + #include "irq_vectors.h" + #include + +-#define nr_syscalls ((syscall_table_size)/4) ++/* ++ * We use macros for low-level operations which need to be overridden ++ * for paravirtualization. The following will never clobber any registers: ++ * INTERRUPT_RETURN (aka. "iret") ++ * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") ++ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). ++ * ++ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must ++ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). ++ * Allowing a register to be clobbered can shrink the paravirt replacement ++ * enough to patch inline, increasing performance. ++ */ + +-EBX = 0x00 +-ECX = 0x04 +-EDX = 0x08 +-ESI = 0x0C +-EDI = 0x10 +-EBP = 0x14 +-EAX = 0x18 +-DS = 0x1C +-ES = 0x20 +-ORIG_EAX = 0x24 +-EIP = 0x28 +-CS = 0x2C +-EFLAGS = 0x30 +-OLDESP = 0x34 +-OLDSS = 0x38 ++#define nr_syscalls ((syscall_table_size)/4) + + CF_MASK = 0x00000001 + TF_MASK = 0x00000100 +@@ -79,61 +78,16 @@ VM_MASK = 0x00020000 + /* Pseudo-eflags. */ + NMI_MASK = 0x80000000 + +-#ifndef CONFIG_XEN +-/* These are replaces for paravirtualization */ +-#define DISABLE_INTERRUPTS cli +-#define ENABLE_INTERRUPTS sti +-#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +-#define INTERRUPT_RETURN iret +-#define GET_CR0_INTO_EAX movl %cr0, %eax +-#else +-/* Offsets into shared_info_t. */ +-#define evtchn_upcall_pending /* 0 */ +-#define evtchn_upcall_mask 1 +- +-#define sizeof_vcpu_shift 6 +- +-#ifdef CONFIG_SMP +-#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ +- shl $sizeof_vcpu_shift,%esi ; \ +- addl HYPERVISOR_shared_info,%esi +-#else +-#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi +-#endif +- +-#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) +-#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) +-#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) +-#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ +- __DISABLE_INTERRUPTS +-#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ +- __ENABLE_INTERRUPTS +-#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ +-sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ +- __TEST_PENDING ; \ +- jnz 14f # process more events if necessary... ; \ +- movl ESI(%esp), %esi ; \ +- sysexit ; \ +-14: __DISABLE_INTERRUPTS ; \ +- TRACE_IRQS_OFF ; \ +-sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ +- push %esp ; \ +- call evtchn_do_upcall ; \ +- add $4,%esp ; \ +- jmp ret_from_intr +-#define INTERRUPT_RETURN iret +-#endif +- + #ifdef CONFIG_PREEMPT +-#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF ++#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF + #else +-#define preempt_stop ++#define preempt_stop(clobbers) + #define resume_kernel restore_nocheck + #endif + + .macro TRACE_IRQS_IRET + #ifdef CONFIG_TRACE_IRQFLAGS +- testl $IF_MASK,EFLAGS(%esp) # interrupts off? ++ testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? + jz 1f + TRACE_IRQS_ON + 1: +@@ -148,6 +102,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT + + #define SAVE_ALL \ + cld; \ ++ pushl %gs; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ /*CFI_REL_OFFSET gs, 0;*/\ + pushl %es; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET es, 0;*/\ +@@ -177,7 +134,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT + CFI_REL_OFFSET ebx, 0;\ + movl $(__USER_DS), %edx; \ + movl %edx, %ds; \ +- movl %edx, %es; ++ movl %edx, %es; \ ++ movl $(__KERNEL_PDA), %edx; \ ++ movl %edx, %gs + + #define RESTORE_INT_REGS \ + popl %ebx; \ +@@ -210,17 +169,22 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT + 2: popl %es; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE es;*/\ +-.section .fixup,"ax"; \ +-3: movl $0,(%esp); \ +- jmp 1b; \ ++3: popl %gs; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ /*CFI_RESTORE gs;*/\ ++.pushsection .fixup,"ax"; \ + 4: movl $0,(%esp); \ ++ jmp 1b; \ ++5: movl $0,(%esp); \ + jmp 2b; \ +-.previous; \ ++6: movl $0,(%esp); \ ++ jmp 3b; \ + .section __ex_table,"a";\ + .align 4; \ +- .long 1b,3b; \ +- .long 2b,4b; \ +-.previous ++ .long 1b,4b; \ ++ .long 2b,5b; \ ++ .long 3b,6b; \ ++.popsection + + #define RING0_INT_FRAME \ + CFI_STARTPROC simple;\ +@@ -239,18 +203,18 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT + #define RING0_PTREGS_FRAME \ + CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ +- CFI_DEF_CFA esp, OLDESP-EBX;\ +- /*CFI_OFFSET cs, CS-OLDESP;*/\ +- CFI_OFFSET eip, EIP-OLDESP;\ +- /*CFI_OFFSET es, ES-OLDESP;*/\ +- /*CFI_OFFSET ds, DS-OLDESP;*/\ +- CFI_OFFSET eax, EAX-OLDESP;\ +- CFI_OFFSET ebp, EBP-OLDESP;\ +- CFI_OFFSET edi, EDI-OLDESP;\ +- CFI_OFFSET esi, ESI-OLDESP;\ +- CFI_OFFSET edx, EDX-OLDESP;\ +- CFI_OFFSET ecx, ECX-OLDESP;\ +- CFI_OFFSET ebx, EBX-OLDESP ++ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ ++ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ ++ CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ ++ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ ++ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ ++ CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ ++ CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ ++ CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ ++ CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ ++ CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ ++ CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ ++ CFI_OFFSET ebx, PT_EBX-PT_OLDESP + + ENTRY(ret_from_fork) + CFI_STARTPROC +@@ -278,17 +242,18 @@ ENTRY(ret_from_fork) + ALIGN + RING0_PTREGS_FRAME + ret_from_exception: +- preempt_stop ++ preempt_stop(CLBR_ANY) + ret_from_intr: + GET_THREAD_INFO(%ebp) + check_userspace: +- movl EFLAGS(%esp), %eax # mix EFLAGS and CS +- movb CS(%esp), %al ++ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb PT_CS(%esp), %al + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jb resume_kernel # not returning to v8086 or userspace ++ + ENTRY(resume_userspace) +- DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret + movl TI_flags(%ebp), %ecx +@@ -299,14 +264,14 @@ ENTRY(resume_userspace) + + #ifdef CONFIG_PREEMPT + ENTRY(resume_kernel) +- DISABLE_INTERRUPTS ++ DISABLE_INTERRUPTS(CLBR_ANY) + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jnz restore_nocheck + need_resched: + movl TI_flags(%ebp), %ecx # need_resched set ? + testb $_TIF_NEED_RESCHED, %cl + jz restore_all +- testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? ++ testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? + jz restore_all + call preempt_schedule_irq + jmp need_resched +@@ -328,7 +293,7 @@ sysenter_past_esp: + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ +- ENABLE_INTERRUPTS ++ ENABLE_INTERRUPTS(CLBR_NONE) + pushl $(__USER_DS) + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ss, 0*/ +@@ -340,12 +305,16 @@ sysenter_past_esp: + pushl $(__USER_CS) + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET cs, 0*/ ++#ifndef CONFIG_COMPAT_VDSO + /* + * Push current_thread_info()->sysenter_return to the stack. + * A tiny bit of offset fixup is necessary - 4*4 means the 4 words + * pushed above; +8 corresponds to copy_thread's esp0 setting. + */ + pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++#else ++ pushl $SYSENTER_RETURN ++#endif + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eip, 0 + +@@ -372,19 +341,27 @@ sysenter_past_esp: + cmpl $(nr_syscalls), %eax + jae syscall_badsys + call *sys_call_table(,%eax,4) +- movl %eax,EAX(%esp) +- DISABLE_INTERRUPTS ++ movl %eax,PT_EAX(%esp) ++ DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) + TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx + testw $_TIF_ALLWORK_MASK, %cx + jne syscall_exit_work + /* if something modifies registers it must also disable sysexit */ +- movl EIP(%esp), %edx +- movl OLDESP(%esp), %ecx ++ movl PT_EIP(%esp), %edx ++ movl PT_OLDESP(%esp), %ecx + xorl %ebp,%ebp + TRACE_IRQS_ON ++1: mov PT_GS(%esp), %gs + ENABLE_INTERRUPTS_SYSEXIT + CFI_ENDPROC ++.pushsection .fixup,"ax" ++2: movl $0,PT_GS(%esp) ++ jmp 1b ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,2b ++.popsection + + # pv sysenter call handler stub + ENTRY(sysenter_entry_pv) +@@ -419,7 +396,7 @@ ENTRY(system_call) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + GET_THREAD_INFO(%ebp) +- testl $TF_MASK,EFLAGS(%esp) ++ testl $TF_MASK,PT_EFLAGS(%esp) + jz no_singlestep + orl $_TIF_SINGLESTEP,TI_flags(%ebp) + no_singlestep: +@@ -431,9 +408,9 @@ no_singlestep: + jae syscall_badsys + syscall_call: + call *sys_call_table(,%eax,4) +- movl %eax,EAX(%esp) # store the return value ++ movl %eax,PT_EAX(%esp) # store the return value + syscall_exit: +- DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret + TRACE_IRQS_OFF +@@ -443,12 +420,12 @@ syscall_exit: + + restore_all: + #ifndef CONFIG_XEN +- movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS +- # Warning: OLDSS(%esp) contains the wrong/random values if we ++ movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS ++ # Warning: PT_OLDSS(%esp) contains the wrong/random values if we + # are returning to the kernel. + # See comments in process.c:copy_thread() for details. +- movb OLDSS(%esp), %ah +- movb CS(%esp), %al ++ movb PT_OLDSS(%esp), %ah ++ movb PT_CS(%esp), %al + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax + CFI_REMEMBER_STATE +@@ -456,7 +433,7 @@ restore_all: + restore_nocheck: + #else + restore_nocheck: +- movl EFLAGS(%esp), %eax ++ movl PT_EFLAGS(%esp), %eax + testl $(VM_MASK|NMI_MASK), %eax + CFI_REMEMBER_STATE + jnz hypervisor_iret +@@ -470,13 +447,13 @@ restore_nocheck: + TRACE_IRQS_IRET + restore_nocheck_notrace: + RESTORE_REGS +- addl $4, %esp ++ addl $4, %esp # skip orig_eax/error_code + CFI_ADJUST_CFA_OFFSET -4 + 1: INTERRUPT_RETURN + .section .fixup,"ax" + iret_exc: + #ifndef CONFIG_XEN +- ENABLE_INTERRUPTS ++ ENABLE_INTERRUPTS(CLBR_NONE) + #endif + pushl $0 # no error code + pushl $do_iret_error +@@ -490,33 +467,42 @@ iret_exc: + CFI_RESTORE_STATE + #ifndef CONFIG_XEN + ldt_ss: +- larl OLDSS(%esp), %eax ++ larl PT_OLDSS(%esp), %eax + jnz restore_nocheck + testl $0x00400000, %eax # returning to 32bit stack? + jnz restore_nocheck # allright, normal return ++ ++#ifdef CONFIG_PARAVIRT ++ /* ++ * The kernel can't run on a non-flat stack if paravirt mode ++ * is active. Rather than try to fixup the high bits of ++ * ESP, bypass this code entirely. This may break DOSemu ++ * and/or Wine support in a paravirt VM, although the option ++ * is still available to implement the setting of the high ++ * 16-bits in the INTERRUPT_RETURN paravirt-op. ++ */ ++ cmpl $0, paravirt_ops+PARAVIRT_enabled ++ jne restore_nocheck ++#endif ++ + /* If returning to userspace with 16bit stack, + * try to fix the higher word of ESP, as the CPU + * won't restore it. + * This is an "official" bug of all the x86-compatible + * CPUs, which we can try to work around to make + * dosemu and wine happy. */ +- subl $8, %esp # reserve space for switch16 pointer +- CFI_ADJUST_CFA_OFFSET 8 +- DISABLE_INTERRUPTS ++ movl PT_OLDESP(%esp), %eax ++ movl %esp, %edx ++ call patch_espfix_desc ++ pushl $__ESPFIX_SS ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ DISABLE_INTERRUPTS(CLBR_EAX) + TRACE_IRQS_OFF +- movl %esp, %eax +- /* Set up the 16bit stack frame with switch32 pointer on top, +- * and a switch16 pointer on top of the current frame. */ +- call setup_x86_bogus_stack +- CFI_ADJUST_CFA_OFFSET -8 # frame has moved +- TRACE_IRQS_IRET +- RESTORE_REGS +- lss 20+4(%esp), %esp # switch to 16bit stack +-1: INTERRUPT_RETURN +-.section __ex_table,"a" +- .align 4 +- .long 1b,iret_exc +-.previous ++ lss (%esp), %esp ++ CFI_ADJUST_CFA_OFFSET -8 ++ jmp restore_nocheck + #else + ALIGN + restore_all_enable_events: +@@ -540,7 +526,7 @@ ecrit: /**** END OF CRITICAL REGION *** + + CFI_RESTORE_STATE + hypervisor_iret: +- andl $~NMI_MASK, EFLAGS(%esp) ++ andl $~NMI_MASK, PT_EFLAGS(%esp) + RESTORE_REGS + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 +@@ -556,7 +542,7 @@ work_pending: + jz work_notifysig + work_resched: + call schedule +- DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret + TRACE_IRQS_OFF +@@ -569,7 +555,8 @@ work_resched: + + work_notifysig: # deal with pending signals and + # notify-resume requests +- testl $VM_MASK, EFLAGS(%esp) ++#ifdef CONFIG_VM86 ++ testl $VM_MASK, PT_EFLAGS(%esp) + movl %esp, %eax + jne work_notifysig_v86 # returning to kernel-space or + # vm86-space +@@ -579,29 +566,30 @@ work_notifysig: # deal with pending s + + ALIGN + work_notifysig_v86: +-#ifdef CONFIG_VM86 + pushl %ecx # save ti_flags for do_notify_resume + CFI_ADJUST_CFA_OFFSET 4 + call save_v86_state # %eax contains pt_regs pointer + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + movl %eax, %esp ++#else ++ movl %esp, %eax ++#endif + xorl %edx, %edx + call do_notify_resume + jmp resume_userspace_sig +-#endif + + # perform syscall exit tracing + ALIGN + syscall_trace_entry: +- movl $-ENOSYS,EAX(%esp) ++ movl $-ENOSYS,PT_EAX(%esp) + movl %esp, %eax + xorl %edx,%edx + call do_syscall_trace + cmpl $0, %eax + jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, + # so must skip actual syscall +- movl ORIG_EAX(%esp), %eax ++ movl PT_ORIG_EAX(%esp), %eax + cmpl $(nr_syscalls), %eax + jnae syscall_call + jmp syscall_exit +@@ -612,7 +600,7 @@ syscall_exit_work: + testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl + jz work_pending + TRACE_IRQS_ON +- ENABLE_INTERRUPTS # could let do_syscall_trace() call ++ ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call + # schedule() instead + movl %esp, %eax + movl $1, %edx +@@ -626,40 +614,39 @@ syscall_fault: + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + GET_THREAD_INFO(%ebp) +- movl $-EFAULT,EAX(%esp) ++ movl $-EFAULT,PT_EAX(%esp) + jmp resume_userspace + + syscall_badsys: +- movl $-ENOSYS,EAX(%esp) ++ movl $-ENOSYS,PT_EAX(%esp) + jmp resume_userspace + CFI_ENDPROC + + #ifndef CONFIG_XEN + #define FIXUP_ESPFIX_STACK \ +- movl %esp, %eax; \ +- /* switch to 32bit stack using the pointer on top of 16bit stack */ \ +- lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ +- /* copy data from 16bit stack to 32bit stack */ \ +- call fixup_x86_bogus_stack; \ +- /* put ESP to the proper location */ \ +- movl %eax, %esp; +-#define UNWIND_ESPFIX_STACK \ ++ /* since we are on a wrong stack, we cant make it a C code :( */ \ ++ movl %gs:PDA_cpu, %ebx; \ ++ PER_CPU(cpu_gdt_descr, %ebx); \ ++ movl GDS_address(%ebx), %ebx; \ ++ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ ++ addl %esp, %eax; \ ++ pushl $__KERNEL_DS; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ + pushl %eax; \ + CFI_ADJUST_CFA_OFFSET 4; \ ++ lss (%esp), %esp; \ ++ CFI_ADJUST_CFA_OFFSET -8; ++#define UNWIND_ESPFIX_STACK \ + movl %ss, %eax; \ +- /* see if on 16bit stack */ \ ++ /* see if on espfix stack */ \ + cmpw $__ESPFIX_SS, %ax; \ +- je 28f; \ +-27: popl %eax; \ +- CFI_ADJUST_CFA_OFFSET -4; \ +-.section .fixup,"ax"; \ +-28: movl $__KERNEL_DS, %eax; \ ++ jne 27f; \ ++ movl $__KERNEL_DS, %eax; \ + movl %eax, %ds; \ + movl %eax, %es; \ +- /* switch to 32bit stack */ \ ++ /* switch to normal stack */ \ + FIXUP_ESPFIX_STACK; \ +- jmp 27b; \ +-.previous ++27:; + + /* + * Build the entry stubs and pointer table with +@@ -723,13 +710,16 @@ KPROBE_ENTRY(page_fault) + CFI_ADJUST_CFA_OFFSET 4 + ALIGN + error_code: ++ /* the function address is in %gs's slot on the stack */ ++ pushl %es ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET es, 0*/ + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 +- xorl %eax, %eax + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 +@@ -742,7 +732,6 @@ error_code: + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 +- decl %eax # eax = -1 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 +@@ -750,18 +739,20 @@ error_code: + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld +- pushl %es ++ pushl %gs + CFI_ADJUST_CFA_OFFSET 4 +- /*CFI_REL_OFFSET es, 0*/ ++ /*CFI_REL_OFFSET gs, 0*/ ++ movl $(__KERNEL_PDA), %ecx ++ movl %ecx, %gs + UNWIND_ESPFIX_STACK + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ +- movl ES(%esp), %edi # get the function address +- movl ORIG_EAX(%esp), %edx # get the error code +- movl %eax, ORIG_EAX(%esp) +- movl %ecx, ES(%esp) +- /*CFI_REL_OFFSET es, ES*/ ++ movl PT_GS(%esp), %edi # get the function address ++ movl PT_ORIG_EAX(%esp), %edx # get the error code ++ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart ++ mov %ecx, PT_GS(%esp) ++ /*CFI_REL_OFFSET gs, ES*/ + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es +@@ -793,8 +784,8 @@ ENTRY(hypervisor_callback) + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL +- movl CS(%esp),%ecx +- movl EIP(%esp),%eax ++ movl PT_CS(%esp),%ecx ++ movl PT_EIP(%esp),%eax + andl $SEGMENT_RPL_MASK,%ecx + cmpl $USER_RPL,%ecx + jae .Ldo_upcall +@@ -808,7 +799,7 @@ ENTRY(hypervisor_callback) + jb .Ldo_upcall + cmpl $sysexit_ecrit,%eax + ja .Ldo_upcall +- addl $OLDESP,%esp # Remove eflags...ebx from stack frame. ++ addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame. + #endif + .Ldo_upcall: + push %esp +@@ -830,7 +821,7 @@ critical_region_fixup: + movsbl critical_fixup_table-scrit(%eax),%ecx # %ecx contains num slots popped + testl %ecx,%ecx + leal (%esp,%ecx,4),%esi # %esi points at end of src region +- leal OLDESP(%esp),%edi # %edi points at end of dst region ++ leal PT_OLDESP(%esp),%edi # %edi points at end of dst region + jle 17f # skip loop if nothing to copy + 16: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi +@@ -853,8 +844,9 @@ critical_fixup_table: + .byte 6 # pop %eax + .byte 7 # pop %ds + .byte 8 # pop %es +- .byte 9,9,9 # add $4,%esp +- .byte 10 # iret ++ .byte 9,9 # pop %gs ++ .byte 10,10,10 # add $4,%esp ++ .byte 11 # iret + .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS + .previous + +@@ -944,7 +936,7 @@ ENTRY(device_not_available) + jmp ret_from_exception + device_available_emulate: + #endif +- preempt_stop ++ preempt_stop(CLBR_ANY) + call math_state_restore + jmp ret_from_exception + CFI_ENDPROC +@@ -1014,7 +1006,7 @@ KPROBE_ENTRY(nmi) + cmpw $__ESPFIX_SS, %ax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 +- je nmi_16bit_stack ++ je nmi_espfix_stack + cmpl $sysenter_entry,(%esp) + je nmi_stack_fixup + pushl %eax +@@ -1057,7 +1049,7 @@ nmi_debug_stack_check: + FIX_STACK(24,nmi_stack_correct, 1) + jmp nmi_stack_correct + +-nmi_16bit_stack: ++nmi_espfix_stack: + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back +@@ -1066,7 +1058,6 @@ nmi_16bit_stack: + CFI_ADJUST_CFA_OFFSET 4 + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 +- movzwl %sp, %esp + addw $4, (%esp) + /* copy the iret frame of 12 bytes */ + .rept 3 +@@ -1077,11 +1068,11 @@ nmi_16bit_stack: + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + FIXUP_ESPFIX_STACK # %eax == %esp +- CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved + xorl %edx,%edx # zero error code + call do_nmi + RESTORE_REGS +- lss 12+4(%esp), %esp # back to 16bit stack ++ lss 12+4(%esp), %esp # back to espfix stack ++ CFI_ADJUST_CFA_OFFSET -24 + 1: INTERRUPT_RETURN + CFI_ENDPROC + .section __ex_table,"a" +@@ -1097,12 +1088,25 @@ KPROBE_ENTRY(nmi) + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi +- orl $NMI_MASK, EFLAGS(%esp) ++ orl $NMI_MASK, PT_EFLAGS(%esp) + jmp restore_all + CFI_ENDPROC + #endif + KPROBE_END(nmi) + ++#ifdef CONFIG_PARAVIRT ++ENTRY(native_iret) ++1: iret ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++ ++ENTRY(native_irq_enable_sysexit) ++ sti ++ sysexit ++#endif ++ + KPROBE_ENTRY(int3) + RING0_INT_FRAME + pushl $-1 # mark this as an int +@@ -1218,37 +1222,6 @@ ENTRY(spurious_interrupt_bug) + CFI_ENDPROC + #endif /* !CONFIG_XEN */ + +-#ifdef CONFIG_STACK_UNWIND +-ENTRY(arch_unwind_init_running) +- CFI_STARTPROC +- movl 4(%esp), %edx +- movl (%esp), %ecx +- leal 4(%esp), %eax +- movl %ebx, EBX(%edx) +- xorl %ebx, %ebx +- movl %ebx, ECX(%edx) +- movl %ebx, EDX(%edx) +- movl %esi, ESI(%edx) +- movl %edi, EDI(%edx) +- movl %ebp, EBP(%edx) +- movl %ebx, EAX(%edx) +- movl $__USER_DS, DS(%edx) +- movl $__USER_DS, ES(%edx) +- movl %ebx, ORIG_EAX(%edx) +- movl %ecx, EIP(%edx) +- movl 12(%esp), %ecx +- movl $__KERNEL_CS, CS(%edx) +- movl %ebx, EFLAGS(%edx) +- movl %eax, OLDESP(%edx) +- movl 8(%esp), %eax +- movl %ecx, 8(%esp) +- movl EBX(%edx), %ebx +- movl $__KERNEL_DS, OLDSS(%edx) +- jmpl *%eax +- CFI_ENDPROC +-ENDPROC(arch_unwind_init_running) +-#endif +- + ENTRY(fixup_4gb_segment) + RING0_EC_FRAME + pushl $do_fixup_4gb_segment +--- head-2010-01-18.orig/arch/x86/kernel/head_32-xen.S 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head_32-xen.S 2009-11-06 10:46:27.000000000 +0100 +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -35,6 +36,8 @@ ENTRY(startup_32) + /* Set up the stack pointer */ + movl $(init_thread_union+THREAD_SIZE),%esp + ++ call setup_pda ++ + /* get vendor info */ + xorl %eax,%eax # call CPUID with 0 -> return vendor ID + XEN_CPUID +@@ -57,14 +60,58 @@ ENTRY(startup_32) + + movb $1,X86_HARD_MATH + +- xorl %eax,%eax # Clear FS/GS and LDT ++ xorl %eax,%eax # Clear FS + movl %eax,%fs +- movl %eax,%gs ++ ++ movl $(__KERNEL_PDA),%eax ++ mov %eax,%gs ++ + cld # gcc2 wants the direction flag cleared at all times + + pushl $0 # fake return address for unwinder + jmp start_kernel + ++/* ++ * Point the GDT at this CPU's PDA. This will be ++ * cpu_gdt_table and boot_pda. ++ */ ++setup_pda: ++ /* get the PDA pointer */ ++ movl $boot_pda, %eax ++ ++ /* slot the PDA address into the GDT */ ++ mov $cpu_gdt_table, %ecx ++ mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ ++ shr $16, %eax ++ mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ ++ mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ ++ ++ # %esi still points to start_info, and no registers ++ # need to be preserved. ++ ++ movl XEN_START_mfn_list(%esi), %ebx ++ movl $(cpu_gdt_table - __PAGE_OFFSET), %eax ++ shrl $PAGE_SHIFT, %eax ++ movl (%ebx,%eax,4), %ecx ++ pushl %ecx # frame number for set_gdt below ++ ++ xorl %esi, %esi ++ xorl %edx, %edx ++ shldl $PAGE_SHIFT, %ecx, %edx ++ shll $PAGE_SHIFT, %ecx ++ orl $0x61, %ecx ++ movl $cpu_gdt_table, %ebx ++ movl $__HYPERVISOR_update_va_mapping, %eax ++ int $0x82 ++ ++ movl $(PAGE_SIZE_asm / 8), %ecx ++ movl %esp, %ebx ++ movl $__HYPERVISOR_set_gdt, %eax ++ int $0x82 ++ ++ popl %ecx ++ ret ++ + #define HYPERCALL_PAGE_OFFSET 0x1000 + .org HYPERCALL_PAGE_OFFSET + ENTRY(hypercall_page) +@@ -93,7 +140,8 @@ ENTRY(empty_zero_page) + /* + * The Global Descriptor Table contains 28 quadwords, per-CPU. + */ +- .align L1_CACHE_BYTES ++ .section .data.page_aligned, "aw" ++ .align PAGE_SIZE_asm + ENTRY(cpu_gdt_table) + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ +@@ -135,12 +183,13 @@ ENTRY(cpu_gdt_table) + .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x0000000000000000 /* 0xc8 APM DS data */ + +- .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ +- .quad 0x0000000000000000 /* 0xd8 - unused */ ++ .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */ ++ .quad 0x00cf92000000ffff /* 0xd8 - PDA */ + .quad 0x0000000000000000 /* 0xe0 - unused */ + .quad 0x0000000000000000 /* 0xe8 - unused */ + .quad 0x0000000000000000 /* 0xf0 - unused */ + .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ ++ .align PAGE_SIZE_asm + + #if CONFIG_XEN_COMPAT <= 0x030002 + /* +@@ -165,9 +214,9 @@ ENTRY(cpu_gdt_table) + .ascii ",ELF_PADDR_OFFSET=0x" + utoa __PAGE_OFFSET + .ascii ",VIRT_ENTRY=0x" +- utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET) ++ utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET) + .ascii ",HYPERCALL_PAGE=0x" +- utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) ++ utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) + .ascii ",FEATURES=writable_page_tables" + .ascii "|writable_descriptor_tables" + .ascii "|auto_translated_physmap" +--- head-2010-01-18.orig/arch/x86/kernel/io_apic_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/io_apic_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -199,14 +200,20 @@ static struct IO_APIC_route_entry ioapic + * the interrupt, and we need to make sure the entry is fully populated + * before that happens. + */ +-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++static void ++__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) + { +- unsigned long flags; + union entry_union eu; + eu.entry = e; +- spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++} ++ ++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ unsigned long flags; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __ioapic_write_entry(apic, pin, e); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -889,8 +896,7 @@ static int __init find_isa_irq_pin(int i + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || +- mp_bus_id_to_type[lbus] == MP_BUS_MCA || +- mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA + ) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) +@@ -909,8 +915,7 @@ static int __init find_isa_irq_apic(int + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || +- mp_bus_id_to_type[lbus] == MP_BUS_MCA || +- mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA + ) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) +@@ -1043,12 +1048,6 @@ static int EISA_ELCR(unsigned int irq) + #define default_MCA_trigger(idx) (1) + #define default_MCA_polarity(idx) (0) + +-/* NEC98 interrupts are always polarity zero edge triggered, +- * when listed as conforming in the MP table. */ +- +-#define default_NEC98_trigger(idx) (0) +-#define default_NEC98_polarity(idx) (0) +- + static int __init MPBIOS_polarity(int idx) + { + int bus = mp_irqs[idx].mpc_srcbus; +@@ -1083,11 +1082,6 @@ static int __init MPBIOS_polarity(int id + polarity = default_MCA_polarity(idx); + break; + } +- case MP_BUS_NEC98: /* NEC 98 pin */ +- { +- polarity = default_NEC98_polarity(idx); +- break; +- } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); +@@ -1157,11 +1151,6 @@ static int MPBIOS_trigger(int idx) + trigger = default_MCA_trigger(idx); + break; + } +- case MP_BUS_NEC98: /* NEC 98 pin */ +- { +- trigger = default_NEC98_trigger(idx); +- break; +- } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); +@@ -1223,7 +1212,6 @@ static int pin_2_irq(int idx, int apic, + case MP_BUS_ISA: /* ISA pin */ + case MP_BUS_EISA: + case MP_BUS_MCA: +- case MP_BUS_NEC98: + { + irq = mp_irqs[idx].mpc_srcbusirq; + break; +@@ -1291,7 +1279,7 @@ static inline int IO_APIC_irq_trigger(in + } + + /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ +-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ ++static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ + + static int __assign_irq_vector(int irq) + { +@@ -1417,8 +1405,8 @@ static void __init setup_IO_APIC_irqs(vo + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } +- ioapic_write_entry(apic, pin, entry); + spin_lock_irqsave(&ioapic_lock, flags); ++ __ioapic_write_entry(apic, pin, entry); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + } +@@ -1988,6 +1976,15 @@ static void __init setup_ioapic_ids_from + #endif + + #ifndef CONFIG_XEN ++static int no_timer_check __initdata; ++ ++static int __init notimercheck(char *s) ++{ ++ no_timer_check = 1; ++ return 1; ++} ++__setup("no_timer_check", notimercheck); ++ + /* + * There is a nasty bug in some older SMP boards, their mptable lies + * about the timer IRQ. We do the following to work around the situation: +@@ -1996,10 +1993,13 @@ static void __init setup_ioapic_ids_from + * - if this function detects that timer IRQs are defunct, then we fall + * back to ISA timer IRQs + */ +-static int __init timer_irq_works(void) ++int __init timer_irq_works(void) + { + unsigned long t1 = jiffies; + ++ if (no_timer_check) ++ return 1; ++ + local_irq_enable(); + /* Let ten ticks pass... */ + mdelay((10 * 1000) / HZ); +@@ -2226,9 +2226,15 @@ static inline void unlock_ExtINT_logic(v + unsigned char save_control, save_freq_select; + + pin = find_isa_irq_pin(8, mp_INT); ++ if (pin == -1) { ++ WARN_ON_ONCE(1); ++ return; ++ } + apic = find_isa_irq_apic(8, mp_INT); +- if (pin == -1) ++ if (apic == -1) { ++ WARN_ON_ONCE(1); + return; ++ } + + entry0 = ioapic_read_entry(apic, pin); + clear_IO_APIC_pin(apic, pin); +@@ -2273,7 +2279,7 @@ int timer_uses_ioapic_pin_0; + * is so screwy. Thanks to Brian Perkins for testing/hacking this beast + * fanatically on his truly buggy board. + */ +-static inline void check_timer(void) ++static inline void __init check_timer(void) + { + int apic1, pin1, apic2, pin2; + int vector; +@@ -2558,7 +2564,7 @@ device_initcall(ioapic_init_sysfs); + int create_irq(void) + { + /* Allocate an unused irq */ +- int irq, new, vector; ++ int irq, new, vector = 0; + unsigned long flags; + + irq = -ENOSPC; +@@ -2939,8 +2945,8 @@ int io_apic_set_pci_routing (int ioapic, + if (!ioapic && (irq < 16)) + disable_8259A_irq(irq); + +- ioapic_write_entry(ioapic, pin, entry); + spin_lock_irqsave(&ioapic_lock, flags); ++ __ioapic_write_entry(ioapic, pin, entry); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + +--- head-2010-01-18.orig/arch/x86/kernel/ldt_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/ldt_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -177,16 +177,14 @@ static int read_default_ldt(void __user + { + int err; + unsigned long size; +- void *address; + + err = 0; +- address = &default_ldt[0]; + size = 5*sizeof(struct desc_struct); + if (size > bytecount) + size = bytecount; + + err = size; +- if (copy_to_user(ptr, address, size)) ++ if (clear_user(ptr, size)) + err = -EFAULT; + + return err; +--- head-2010-01-18.orig/arch/x86/kernel/microcode-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/microcode-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -1,7 +1,7 @@ + /* + * Intel CPU Microcode Update Driver for Linux + * +- * Copyright (C) 2000-2004 Tigran Aivazian ++ * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors +@@ -43,7 +43,7 @@ + #include + + MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +-MODULE_AUTHOR("Tigran Aivazian "); ++MODULE_AUTHOR("Tigran Aivazian "); + MODULE_LICENSE("GPL"); + + static int verbose; +@@ -195,7 +195,7 @@ static int __init microcode_init (void) + request_microcode(); + + printk(KERN_INFO +- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); ++ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + return 0; + } + +--- head-2010-01-18.orig/arch/x86/kernel/mpparse_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/mpparse_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -36,7 +36,7 @@ + + /* Have we found an MP table */ + int smp_found_config; +-unsigned int __initdata maxcpus = NR_CPUS; ++unsigned int __cpuinitdata maxcpus = NR_CPUS; + + /* + * Various Linux-internal data structures created from the +@@ -102,10 +102,10 @@ static int __init mpf_checksum(unsigned + */ + + static int mpc_record; +-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; ++static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; + + #ifndef CONFIG_XEN +-static void __devinit MP_processor_info (struct mpc_config_processor *m) ++static void __cpuinit MP_processor_info (struct mpc_config_processor *m) + { + int ver, apicid; + physid_mask_t phys_cpu; +@@ -221,7 +221,7 @@ static void __devinit MP_processor_info + bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; + } + #else +-void __init MP_processor_info (struct mpc_config_processor *m) ++static void __cpuinit MP_processor_info (struct mpc_config_processor *m) + { + num_processors++; + } +@@ -256,8 +256,6 @@ static void __init MP_bus_info (struct m + mp_current_pci_id++; + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; +- } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; + } else { + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); + } +@@ -842,7 +840,7 @@ void __init mp_register_lapic_address(u6 + #endif + } + +-void __devinit mp_register_lapic (u8 id, u8 enabled) ++void __cpuinit mp_register_lapic (u8 id, u8 enabled) + { + struct mpc_config_processor processor; + int boot_cpu = 0; +--- head-2010-01-18.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -273,7 +273,7 @@ EXPORT_SYMBOL(dma_free_coherent); + int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) + { +- void __iomem *mem_base; ++ void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = (pages + 31)/32; + +@@ -290,14 +290,12 @@ int dma_declare_coherent_memory(struct d + if (!mem_base) + goto out; + +- dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); ++ dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dev->dma_mem) + goto out; +- memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); +- dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); ++ dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dev->dma_mem->bitmap) + goto free1_out; +- memset(dev->dma_mem->bitmap, 0, bitmap_size); + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; +@@ -312,6 +310,8 @@ int dma_declare_coherent_memory(struct d + free1_out: + kfree(dev->dma_mem->bitmap); + out: ++ if (mem_base) ++ iounmap(mem_base); + return 0; + } + EXPORT_SYMBOL(dma_declare_coherent_memory); +--- head-2010-01-18.orig/arch/x86/kernel/process_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -60,6 +60,7 @@ + + #include + #include ++#include + + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + +@@ -104,28 +105,24 @@ EXPORT_SYMBOL(enable_hlt); + */ + static void poll_idle (void) + { +- local_irq_enable(); +- +- asm volatile( +- "2:" +- "testl %0, %1;" +- "rep; nop;" +- "je 2b;" +- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); ++ cpu_relax(); + } + + static void xen_idle(void) + { +- local_irq_disable(); ++ current_thread_info()->status &= ~TS_POLLING; ++ /* ++ * TS_POLLING-cleared state must be visible before we ++ * test NEED_RESCHED: ++ */ ++ smp_mb(); + +- if (need_resched()) ++ local_irq_disable(); ++ if (!need_resched()) ++ safe_halt(); /* enables interrupts racelessly */ ++ else + local_irq_enable(); +- else { +- current_thread_info()->status &= ~TS_POLLING; +- smp_mb__after_clear_bit(); +- safe_halt(); +- current_thread_info()->status |= TS_POLLING; +- } ++ current_thread_info()->status |= TS_POLLING; + } + #ifdef CONFIG_APM_MODULE + EXPORT_SYMBOL(default_idle); +@@ -250,8 +247,8 @@ void show_regs(struct pt_regs * regs) + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); +- printk(" DS: %04x ES: %04x\n", +- 0xffff & regs->xds,0xffff & regs->xes); ++ printk(" DS: %04x ES: %04x GS: %04x\n", ++ 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); + + cr0 = read_cr0(); + cr2 = read_cr2(); +@@ -282,6 +279,7 @@ int kernel_thread(int (*fn)(void *), voi + + regs.xds = __USER_DS; + regs.xes = __USER_DS; ++ regs.xgs = __KERNEL_PDA; + regs.orig_eax = -1; + regs.eip = (unsigned long) kernel_thread_helper; + regs.xcs = __KERNEL_CS | get_kernel_rpl(); +@@ -359,7 +357,6 @@ int copy_thread(int nr, unsigned long cl + p->thread.eip = (unsigned long) ret_from_fork; + + savesegment(fs,p->thread.fs); +- savesegment(gs,p->thread.gs); + + tsk = current; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { +@@ -438,7 +435,7 @@ void dump_thread(struct pt_regs * regs, + dump->regs.ds = regs->xds; + dump->regs.es = regs->xes; + savesegment(fs,dump->regs.fs); +- savesegment(gs,dump->regs.gs); ++ dump->regs.gs = regs->xgs; + dump->regs.orig_eax = regs->orig_eax; + dump->regs.eip = regs->eip; + dump->regs.cs = regs->xcs; +@@ -635,17 +632,19 @@ struct task_struct fastcall * __switch_t + if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) + BUG(); + ++ /* we're going to use this soon, after a few expensive things */ ++ if (next_p->fpu_counter > 5) ++ prefetch(&next->i387.fxsave); ++ + /* +- * Restore %fs and %gs if needed. ++ * Restore %fs if needed. + * +- * Glibc normally makes %fs be zero, and %gs is one of +- * the TLS segments. ++ * Glibc normally makes %fs be zero. + */ + if (unlikely(next->fs)) + loadsegment(fs, next->fs); + +- if (next->gs) +- loadsegment(gs, next->gs); ++ write_pda(pcurrent, next_p); + + /* + * Now maybe handle debug registers +@@ -655,6 +654,13 @@ struct task_struct fastcall * __switch_t + + disable_tsc(prev_p, next_p); + ++ /* If the task has used fpu the last 5 timeslices, just do a full ++ * restore of the math state immediately to avoid the trap; the ++ * chances of needing FPU soon are obviously high now ++ */ ++ if (next_p->fpu_counter > 5) ++ math_state_restore(); ++ + return prev_p; + } + +--- head-2010-01-18.orig/arch/x86/kernel/quirks-xen.c 2008-01-28 12:24:19.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/quirks-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -3,10 +3,12 @@ + */ + #include + #include ++#include ++#include ++#include + + #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) +- +-static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) ++static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) + { + u8 config, rev; + u32 word; +@@ -14,14 +16,12 @@ static void __devinit quirk_intel_irqbal + /* BIOS may enable hardware IRQ balancing for + * E7520/E7320/E7525(revision ID 0x9 and below) + * based platforms. +- * Disable SW irqbalance/affinity on those platforms. ++ * For those platforms, make sure that the genapic is set to 'flat' + */ + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev > 0x9) + return; + +- printk(KERN_INFO "Intel E7520/7320/7525 detected."); +- + /* enable access to config space*/ + pci_read_config_byte(dev, 0xf4, &config); + pci_write_config_byte(dev, 0xf4, config|0x2); +@@ -30,6 +30,46 @@ static void __devinit quirk_intel_irqbal + raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); + + if (!(word & (1 << 13))) { ++#ifndef CONFIG_XEN ++#ifdef CONFIG_X86_64 ++ if (genapic != &apic_flat) ++ panic("APIC mode must be flat on this system\n"); ++#elif defined(CONFIG_X86_GENERICARCH) ++ if (genapic != &apic_default) ++ panic("APIC mode must be default(flat) on this system. Use apic=default\n"); ++#endif ++#endif ++ } ++ ++ /* put back the original value for config space*/ ++ if (!(config & 0x2)) ++ pci_write_config_byte(dev, 0xf4, config); ++} ++ ++void __init quirk_intel_irqbalance(void) ++{ ++ u8 config, rev; ++ u32 word; ++ ++ /* BIOS may enable hardware IRQ balancing for ++ * E7520/E7320/E7525(revision ID 0x9 and below) ++ * based platforms. ++ * Disable SW irqbalance/affinity on those platforms. ++ */ ++ rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); ++ if (rev > 0x9) ++ return; ++ ++ printk(KERN_INFO "Intel E7520/7320/7525 detected."); ++ ++ /* enable access to config space */ ++ config = read_pci_config_byte(0, 0, 0, 0xf4); ++ write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); ++ ++ /* read xTPR register */ ++ word = read_pci_config_16(0, 0, 0x40, 0x4c); ++ ++ if (!(word & (1 << 13))) { + struct xen_platform_op op; + printk(KERN_INFO "Disabling irq balancing and affinity\n"); + op.cmd = XENPF_platform_quirk; +@@ -37,11 +77,12 @@ static void __devinit quirk_intel_irqbal + WARN_ON(HYPERVISOR_platform_op(&op)); + } + +- /* put back the original value for config space*/ ++ /* put back the original value for config space */ + if (!(config & 0x2)) +- pci_write_config_byte(dev, 0xf4, config); ++ write_pci_config_byte(0, 0, 0, 0xf4, config); + } +-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); +-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); +-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); ++ + #endif +--- head-2010-01-18.orig/arch/x86/kernel/setup_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -76,9 +76,6 @@ + #include + #endif + +-/* Forward Declaration. */ +-void __init find_max_pfn(void); +- + static int xen_panic_event(struct notifier_block *, unsigned long, void *); + static struct notifier_block xen_panic_block = { + xen_panic_event, NULL, 0 /* try to go last */ +@@ -89,14 +86,11 @@ int disable_pse __devinitdata = 0; + /* + * Machine setup.. + */ +- +-#ifdef CONFIG_EFI +-int efi_enabled = 0; +-EXPORT_SYMBOL(efi_enabled); +-#endif ++extern struct resource code_resource; ++extern struct resource data_resource; + + /* cpu data as detected by the assembly code in head.S */ +-struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; ++struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; + /* common cpu data for all cpus */ + struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; + EXPORT_SYMBOL(boot_cpu_data); +@@ -112,12 +106,6 @@ unsigned int machine_submodel_id; + unsigned int BIOS_revision; + unsigned int mca_pentium_flag; + +-/* For PCI or other memory-mapped resources */ +-unsigned long pci_mem_start = 0x10000000; +-#ifdef CONFIG_PCI +-EXPORT_SYMBOL(pci_mem_start); +-#endif +- + /* Boot loader ID as an integer, for the benefit of proc_dointvec */ + int bootloader_type; + +@@ -150,10 +138,6 @@ struct ist_info ist_info; + defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) + EXPORT_SYMBOL(ist_info); + #endif +-struct e820map e820; +-#ifdef CONFIG_XEN +-struct e820map machine_e820; +-#endif + + extern void early_cpu_init(void); + extern int root_mountflags; +@@ -168,209 +152,6 @@ static char command_line[COMMAND_LINE_SI + + unsigned char __initdata boot_params[PARAM_SIZE]; + +-static struct resource data_resource = { +- .name = "Kernel data", +- .start = 0, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_MEM +-}; +- +-static struct resource code_resource = { +- .name = "Kernel code", +- .start = 0, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_MEM +-}; +- +-static struct resource system_rom_resource = { +- .name = "System ROM", +- .start = 0xf0000, +- .end = 0xfffff, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}; +- +-static struct resource extension_rom_resource = { +- .name = "Extension ROM", +- .start = 0xe0000, +- .end = 0xeffff, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}; +- +-static struct resource adapter_rom_resources[] = { { +- .name = "Adapter ROM", +- .start = 0xc8000, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}, { +- .name = "Adapter ROM", +- .start = 0, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}, { +- .name = "Adapter ROM", +- .start = 0, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}, { +- .name = "Adapter ROM", +- .start = 0, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}, { +- .name = "Adapter ROM", +- .start = 0, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}, { +- .name = "Adapter ROM", +- .start = 0, +- .end = 0, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-} }; +- +-static struct resource video_rom_resource = { +- .name = "Video ROM", +- .start = 0xc0000, +- .end = 0xc7fff, +- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +-}; +- +-static struct resource video_ram_resource = { +- .name = "Video RAM area", +- .start = 0xa0000, +- .end = 0xbffff, +- .flags = IORESOURCE_BUSY | IORESOURCE_MEM +-}; +- +-static struct resource standard_io_resources[] = { { +- .name = "dma1", +- .start = 0x0000, +- .end = 0x001f, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "pic1", +- .start = 0x0020, +- .end = 0x0021, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "timer0", +- .start = 0x0040, +- .end = 0x0043, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "timer1", +- .start = 0x0050, +- .end = 0x0053, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "keyboard", +- .start = 0x0060, +- .end = 0x006f, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "dma page reg", +- .start = 0x0080, +- .end = 0x008f, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "pic2", +- .start = 0x00a0, +- .end = 0x00a1, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "dma2", +- .start = 0x00c0, +- .end = 0x00df, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-}, { +- .name = "fpu", +- .start = 0x00f0, +- .end = 0x00ff, +- .flags = IORESOURCE_BUSY | IORESOURCE_IO +-} }; +- +-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) +- +-static int __init romchecksum(unsigned char *rom, unsigned long length) +-{ +- unsigned char *p, sum = 0; +- +- for (p = rom; p < rom + length; p++) +- sum += *p; +- return sum == 0; +-} +- +-static void __init probe_roms(void) +-{ +- unsigned long start, length, upper; +- unsigned char *rom; +- int i; +- +-#ifdef CONFIG_XEN +- /* Nothing to do if not running in dom0. */ +- if (!is_initial_xendomain()) +- return; +-#endif +- +- /* video rom */ +- upper = adapter_rom_resources[0].start; +- for (start = video_rom_resource.start; start < upper; start += 2048) { +- rom = isa_bus_to_virt(start); +- if (!romsignature(rom)) +- continue; +- +- video_rom_resource.start = start; +- +- /* 0 < length <= 0x7f * 512, historically */ +- length = rom[2] * 512; +- +- /* if checksum okay, trust length byte */ +- if (length && romchecksum(rom, length)) +- video_rom_resource.end = start + length - 1; +- +- request_resource(&iomem_resource, &video_rom_resource); +- break; +- } +- +- start = (video_rom_resource.end + 1 + 2047) & ~2047UL; +- if (start < upper) +- start = upper; +- +- /* system rom */ +- request_resource(&iomem_resource, &system_rom_resource); +- upper = system_rom_resource.start; +- +- /* check for extension rom (ignore length byte!) */ +- rom = isa_bus_to_virt(extension_rom_resource.start); +- if (romsignature(rom)) { +- length = extension_rom_resource.end - extension_rom_resource.start + 1; +- if (romchecksum(rom, length)) { +- request_resource(&iomem_resource, &extension_rom_resource); +- upper = extension_rom_resource.start; +- } +- } +- +- /* check for adapter roms on 2k boundaries */ +- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { +- rom = isa_bus_to_virt(start); +- if (!romsignature(rom)) +- continue; +- +- /* 0 < length <= 0x7f * 512, historically */ +- length = rom[2] * 512; +- +- /* but accept any length that fits if checksum okay */ +- if (!length || start + length > upper || !romchecksum(rom, length)) +- continue; +- +- adapter_rom_resources[i].start = start; +- adapter_rom_resources[i].end = start + length - 1; +- request_resource(&iomem_resource, &adapter_rom_resources[i]); +- +- start = adapter_rom_resources[i++].end & ~2047UL; +- } +-} +- + /* + * Point at the empty zero page to start with. We map the real shared_info + * page as soon as fixmap is up and running. +@@ -386,353 +167,6 @@ EXPORT_SYMBOL(phys_to_machine_mapping); + start_info_t *xen_start_info; + EXPORT_SYMBOL(xen_start_info); + +-void __init add_memory_region(unsigned long long start, +- unsigned long long size, int type) +-{ +- int x; +- +- if (!efi_enabled) { +- x = e820.nr_map; +- +- if (x == E820MAX) { +- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); +- return; +- } +- +- e820.map[x].addr = start; +- e820.map[x].size = size; +- e820.map[x].type = type; +- e820.nr_map++; +- } +-} /* add_memory_region */ +- +-static void __init limit_regions(unsigned long long size) +-{ +- unsigned long long current_addr = 0; +- int i; +- +- if (efi_enabled) { +- efi_memory_desc_t *md; +- void *p; +- +- for (p = memmap.map, i = 0; p < memmap.map_end; +- p += memmap.desc_size, i++) { +- md = p; +- current_addr = md->phys_addr + (md->num_pages << 12); +- if (md->type == EFI_CONVENTIONAL_MEMORY) { +- if (current_addr >= size) { +- md->num_pages -= +- (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); +- memmap.nr_map = i + 1; +- return; +- } +- } +- } +- } +- for (i = 0; i < e820.nr_map; i++) { +- current_addr = e820.map[i].addr + e820.map[i].size; +- if (current_addr < size) +- continue; +- +- if (e820.map[i].type != E820_RAM) +- continue; +- +- if (e820.map[i].addr >= size) { +- /* +- * This region starts past the end of the +- * requested size, skip it completely. +- */ +- e820.nr_map = i; +- } else { +- e820.nr_map = i + 1; +- e820.map[i].size -= current_addr - size; +- } +- return; +- } +-#ifdef CONFIG_XEN +- if (i==e820.nr_map && current_addr < size) { +- /* +- * The e820 map finished before our requested size so +- * extend the final entry to the requested address. +- */ +- --i; +- if (e820.map[i].type == E820_RAM) +- e820.map[i].size -= current_addr - size; +- else +- add_memory_region(current_addr, size - current_addr, E820_RAM); +- } +-#endif +-} +- +-#define E820_DEBUG 1 +- +-static void __init print_memory_map(char *who) +-{ +- int i; +- +- for (i = 0; i < e820.nr_map; i++) { +- printk(" %s: %016Lx - %016Lx ", who, +- e820.map[i].addr, +- e820.map[i].addr + e820.map[i].size); +- switch (e820.map[i].type) { +- case E820_RAM: printk("(usable)\n"); +- break; +- case E820_RESERVED: +- printk("(reserved)\n"); +- break; +- case E820_ACPI: +- printk("(ACPI data)\n"); +- break; +- case E820_NVS: +- printk("(ACPI NVS)\n"); +- break; +- default: printk("type %lu\n", e820.map[i].type); +- break; +- } +- } +-} +- +-/* +- * Sanitize the BIOS e820 map. +- * +- * Some e820 responses include overlapping entries. The following +- * replaces the original e820 map with a new one, removing overlaps. +- * +- */ +-struct change_member { +- struct e820entry *pbios; /* pointer to original bios entry */ +- unsigned long long addr; /* address for this change point */ +-}; +-static struct change_member change_point_list[2*E820MAX] __initdata; +-static struct change_member *change_point[2*E820MAX] __initdata; +-static struct e820entry *overlap_list[E820MAX] __initdata; +-static struct e820entry new_bios[E820MAX] __initdata; +- +-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) +-{ +- struct change_member *change_tmp; +- unsigned long current_type, last_type; +- unsigned long long last_addr; +- int chgidx, still_changing; +- int overlap_entries; +- int new_bios_entry; +- int old_nr, new_nr, chg_nr; +- int i; +- +- /* +- Visually we're performing the following (1,2,3,4 = memory types)... +- +- Sample memory map (w/overlaps): +- ____22__________________ +- ______________________4_ +- ____1111________________ +- _44_____________________ +- 11111111________________ +- ____________________33__ +- ___________44___________ +- __________33333_________ +- ______________22________ +- ___________________2222_ +- _________111111111______ +- _____________________11_ +- _________________4______ +- +- Sanitized equivalent (no overlap): +- 1_______________________ +- _44_____________________ +- ___1____________________ +- ____22__________________ +- ______11________________ +- _________1______________ +- __________3_____________ +- ___________44___________ +- _____________33_________ +- _______________2________ +- ________________1_______ +- _________________4______ +- ___________________2____ +- ____________________33__ +- ______________________4_ +- */ +- +- /* if there's only one memory region, don't bother */ +- if (*pnr_map < 2) +- return -1; +- +- old_nr = *pnr_map; +- +- /* bail out if we find any unreasonable addresses in bios map */ +- for (i=0; iaddr = biosmap[i].addr; +- change_point[chgidx++]->pbios = &biosmap[i]; +- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; +- change_point[chgidx++]->pbios = &biosmap[i]; +- } +- } +- chg_nr = chgidx; /* true number of change-points */ +- +- /* sort change-point list by memory addresses (low -> high) */ +- still_changing = 1; +- while (still_changing) { +- still_changing = 0; +- for (i=1; i < chg_nr; i++) { +- /* if > , swap */ +- /* or, if current= & last=, swap */ +- if ((change_point[i]->addr < change_point[i-1]->addr) || +- ((change_point[i]->addr == change_point[i-1]->addr) && +- (change_point[i]->addr == change_point[i]->pbios->addr) && +- (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) +- ) +- { +- change_tmp = change_point[i]; +- change_point[i] = change_point[i-1]; +- change_point[i-1] = change_tmp; +- still_changing=1; +- } +- } +- } +- +- /* create a new bios memory map, removing overlaps */ +- overlap_entries=0; /* number of entries in the overlap table */ +- new_bios_entry=0; /* index for creating new bios map entries */ +- last_type = 0; /* start with undefined memory type */ +- last_addr = 0; /* start with 0 as last starting address */ +- /* loop through change-points, determining affect on the new bios map */ +- for (chgidx=0; chgidx < chg_nr; chgidx++) +- { +- /* keep track of all overlapping bios entries */ +- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) +- { +- /* add map entry to overlap list (> 1 entry implies an overlap) */ +- overlap_list[overlap_entries++]=change_point[chgidx]->pbios; +- } +- else +- { +- /* remove entry from list (order independent, so swap with last) */ +- for (i=0; ipbios) +- overlap_list[i] = overlap_list[overlap_entries-1]; +- } +- overlap_entries--; +- } +- /* if there are overlapping entries, decide which "type" to use */ +- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ +- current_type = 0; +- for (i=0; itype > current_type) +- current_type = overlap_list[i]->type; +- /* continue building up new bios map based on this information */ +- if (current_type != last_type) { +- if (last_type != 0) { +- new_bios[new_bios_entry].size = +- change_point[chgidx]->addr - last_addr; +- /* move forward only if the new size was non-zero */ +- if (new_bios[new_bios_entry].size != 0) +- if (++new_bios_entry >= E820MAX) +- break; /* no more space left for new bios entries */ +- } +- if (current_type != 0) { +- new_bios[new_bios_entry].addr = change_point[chgidx]->addr; +- new_bios[new_bios_entry].type = current_type; +- last_addr=change_point[chgidx]->addr; +- } +- last_type = current_type; +- } +- } +- new_nr = new_bios_entry; /* retain count for new bios entries */ +- +- /* copy new bios mapping into original location */ +- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); +- *pnr_map = new_nr; +- +- return 0; +-} +- +-/* +- * Copy the BIOS e820 map into a safe place. +- * +- * Sanity-check it while we're at it.. +- * +- * If we're lucky and live on a modern system, the setup code +- * will have given us a memory map that we can use to properly +- * set up memory. If we aren't, we'll fake a memory map. +- * +- * We check to see that the memory map contains at least 2 elements +- * before we'll use it, because the detection code in setup.S may +- * not be perfect and most every PC known to man has two memory +- * regions: one from 0 to 640k, and one from 1mb up. (The IBM +- * thinkpad 560x, for example, does not cooperate with the memory +- * detection code.) +- */ +-int __init copy_e820_map(struct e820entry * biosmap, int nr_map) +-{ +-#ifndef CONFIG_XEN +- /* Only one memory region (or negative)? Ignore it */ +- if (nr_map < 2) +- return -1; +-#else +- BUG_ON(nr_map < 1); +-#endif +- +- do { +- unsigned long long start = biosmap->addr; +- unsigned long long size = biosmap->size; +- unsigned long long end = start + size; +- unsigned long type = biosmap->type; +- +- /* Overflow in 64 bits? Ignore the memory map. */ +- if (start > end) +- return -1; +- +-#ifndef CONFIG_XEN +- /* +- * Some BIOSes claim RAM in the 640k - 1M region. +- * Not right. Fix it up. +- */ +- if (type == E820_RAM) { +- if (start < 0x100000ULL && end > 0xA0000ULL) { +- if (start < 0xA0000ULL) +- add_memory_region(start, 0xA0000ULL-start, type); +- if (end <= 0x100000ULL) +- continue; +- start = 0x100000ULL; +- size = end - start; +- } +- } +-#endif +- add_memory_region(start, size, type); +- } while (biosmap++,--nr_map); +- +-#ifdef CONFIG_XEN +- if (is_initial_xendomain()) { +- struct xen_memory_map memmap; +- +- memmap.nr_entries = E820MAX; +- set_xen_guest_handle(memmap.buffer, machine_e820.map); +- +- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) +- BUG(); +- machine_e820.nr_map = memmap.nr_entries; +- } else +- machine_e820 = e820; +-#endif +- +- return 0; +-} +- + #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + struct edd edd; + #ifdef CONFIG_EDD_MODULE +@@ -758,7 +192,7 @@ static inline void copy_edd(void) + } + #endif + +-static int __initdata user_defined_memmap = 0; ++int __initdata user_defined_memmap = 0; + + /* + * "mem=nopentium" disables the 4MB page tables. +@@ -795,51 +229,6 @@ static int __init parse_mem(char *arg) + } + early_param("mem", parse_mem); + +-static int __init parse_memmap(char *arg) +-{ +- if (!arg) +- return -EINVAL; +- +- if (strcmp(arg, "exactmap") == 0) { +-#ifdef CONFIG_CRASH_DUMP +- /* If we are doing a crash dump, we +- * still need to know the real mem +- * size before original memory map is +- * reset. +- */ +- find_max_pfn(); +- saved_max_pfn = max_pfn; +-#endif +- e820.nr_map = 0; +- user_defined_memmap = 1; +- } else { +- /* If the user specifies memory size, we +- * limit the BIOS-provided memory map to +- * that size. exactmap can be used to specify +- * the exact map. mem=number can be used to +- * trim the existing memory map. +- */ +- unsigned long long start_at, mem_size; +- +- mem_size = memparse(arg, &arg); +- if (*arg == '@') { +- start_at = memparse(arg+1, &arg); +- add_memory_region(start_at, mem_size, E820_RAM); +- } else if (*arg == '#') { +- start_at = memparse(arg+1, &arg); +- add_memory_region(start_at, mem_size, E820_ACPI); +- } else if (*arg == '$') { +- start_at = memparse(arg+1, &arg); +- add_memory_region(start_at, mem_size, E820_RESERVED); +- } else { +- limit_regions(mem_size); +- user_defined_memmap = 1; +- } +- } +- return 0; +-} +-early_param("memmap", parse_memmap); +- + #ifdef CONFIG_PROC_VMCORE + /* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. +@@ -906,127 +295,6 @@ early_param("reservetop", parse_reservet + #endif + + /* +- * Callback for efi_memory_walk. +- */ +-static int __init +-efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) +-{ +- unsigned long *max_pfn = arg, pfn; +- +- if (start < end) { +- pfn = PFN_UP(end -1); +- if (pfn > *max_pfn) +- *max_pfn = pfn; +- } +- return 0; +-} +- +-static int __init +-efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) +-{ +- memory_present(0, PFN_UP(start), PFN_DOWN(end)); +- return 0; +-} +- +-/* +- * This function checks if any part of the range is mapped +- * with type. +- */ +-int +-e820_any_mapped(u64 start, u64 end, unsigned type) +-{ +- int i; +- +-#ifndef CONFIG_XEN +- for (i = 0; i < e820.nr_map; i++) { +- const struct e820entry *ei = &e820.map[i]; +-#else +- if (!is_initial_xendomain()) +- return 0; +- for (i = 0; i < machine_e820.nr_map; ++i) { +- const struct e820entry *ei = &machine_e820.map[i]; +-#endif +- +- if (type && ei->type != type) +- continue; +- if (ei->addr >= end || ei->addr + ei->size <= start) +- continue; +- return 1; +- } +- return 0; +-} +-EXPORT_SYMBOL_GPL(e820_any_mapped); +- +- /* +- * This function checks if the entire range is mapped with type. +- * +- * Note: this function only works correct if the e820 table is sorted and +- * not-overlapping, which is the case +- */ +-int __init +-e820_all_mapped(unsigned long s, unsigned long e, unsigned type) +-{ +- u64 start = s; +- u64 end = e; +- int i; +- +-#ifndef CONFIG_XEN +- for (i = 0; i < e820.nr_map; i++) { +- struct e820entry *ei = &e820.map[i]; +-#else +- if (!is_initial_xendomain()) +- return 0; +- for (i = 0; i < machine_e820.nr_map; ++i) { +- const struct e820entry *ei = &machine_e820.map[i]; +-#endif +- if (type && ei->type != type) +- continue; +- /* is the region (part) in overlap with the current region ?*/ +- if (ei->addr >= end || ei->addr + ei->size <= start) +- continue; +- /* if the region is at the beginning of we move +- * start to the end of the region since it's ok until there +- */ +- if (ei->addr <= start) +- start = ei->addr + ei->size; +- /* if start is now at or beyond end, we're done, full +- * coverage */ +- if (start >= end) +- return 1; /* we're done */ +- } +- return 0; +-} +- +-/* +- * Find the highest page frame number we have available +- */ +-void __init find_max_pfn(void) +-{ +- int i; +- +- max_pfn = 0; +- if (efi_enabled) { +- efi_memmap_walk(efi_find_max_pfn, &max_pfn); +- efi_memmap_walk(efi_memory_present_wrapper, NULL); +- return; +- } +- +- for (i = 0; i < e820.nr_map; i++) { +- unsigned long start, end; +- /* RAM? */ +- if (e820.map[i].type != E820_RAM) +- continue; +- start = PFN_UP(e820.map[i].addr); +- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); +- if (start >= end) +- continue; +- if (end > max_pfn) +- max_pfn = end; +- memory_present(0, start, end); +- } +-} +- +-/* + * Determine low and high memory ranges: + */ + unsigned long __init find_max_low_pfn(void) +@@ -1085,77 +353,6 @@ unsigned long __init find_max_low_pfn(vo + return max_low_pfn; + } + +-/* +- * Free all available memory for boot time allocation. Used +- * as a callback function by efi_memory_walk() +- */ +- +-static int __init +-free_available_memory(unsigned long start, unsigned long end, void *arg) +-{ +- /* check max_low_pfn */ +- if (start >= (max_low_pfn << PAGE_SHIFT)) +- return 0; +- if (end >= (max_low_pfn << PAGE_SHIFT)) +- end = max_low_pfn << PAGE_SHIFT; +- if (start < end) +- free_bootmem(start, end - start); +- +- return 0; +-} +-/* +- * Register fully available low RAM pages with the bootmem allocator. +- */ +-static void __init register_bootmem_low_pages(unsigned long max_low_pfn) +-{ +- int i; +- +- if (efi_enabled) { +- efi_memmap_walk(free_available_memory, NULL); +- return; +- } +- for (i = 0; i < e820.nr_map; i++) { +- unsigned long curr_pfn, last_pfn, size; +- /* +- * Reserve usable low memory +- */ +- if (e820.map[i].type != E820_RAM) +- continue; +- /* +- * We are rounding up the start address of usable memory: +- */ +- curr_pfn = PFN_UP(e820.map[i].addr); +- if (curr_pfn >= max_low_pfn) +- continue; +- /* +- * ... and at the end of the usable range downwards: +- */ +- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); +- +-#ifdef CONFIG_XEN +- /* +- * Truncate to the number of actual pages currently +- * present. +- */ +- if (last_pfn > xen_start_info->nr_pages) +- last_pfn = xen_start_info->nr_pages; +-#endif +- +- if (last_pfn > max_low_pfn) +- last_pfn = max_low_pfn; +- +- /* +- * .. finally, did all the rounding and playing +- * around just make the area go away? +- */ +- if (last_pfn <= curr_pfn) +- continue; +- +- size = last_pfn - curr_pfn; +- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); +- } +-} +- + #ifndef CONFIG_XEN + /* + * workaround for Dell systems that neglect to reserve EBDA +@@ -1247,8 +444,8 @@ void __init setup_bootmem_allocator(void + * the (very unlikely) case of us accidentally initializing the + * bootmem allocator with an invalid RAM area. + */ +- reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + +- bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); ++ reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + ++ bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); + + #ifndef CONFIG_XEN + /* +@@ -1330,160 +527,6 @@ void __init remapped_pgdat_init(void) + } + } + +-/* +- * Request address space for all standard RAM and ROM resources +- * and also for regions reported as reserved by the e820. +- */ +-static void __init +-legacy_init_iomem_resources(struct e820entry *e820, int nr_map, +- struct resource *code_resource, +- struct resource *data_resource) +-{ +- int i; +- +- probe_roms(); +- +- for (i = 0; i < nr_map; i++) { +- struct resource *res; +-#ifndef CONFIG_RESOURCES_64BIT +- if (e820[i].addr + e820[i].size > 0x100000000ULL) +- continue; +-#endif +- res = kzalloc(sizeof(struct resource), GFP_ATOMIC); +- switch (e820[i].type) { +- case E820_RAM: res->name = "System RAM"; break; +- case E820_ACPI: res->name = "ACPI Tables"; break; +- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; +- default: res->name = "reserved"; +- } +- res->start = e820[i].addr; +- res->end = res->start + e820[i].size - 1; +- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; +- if (request_resource(&iomem_resource, res)) { +- kfree(res); +- continue; +- } +- if (e820[i].type == E820_RAM) { +- /* +- * We don't know which RAM region contains kernel data, +- * so we try it repeatedly and let the resource manager +- * test it. +- */ +-#ifndef CONFIG_XEN +- request_resource(res, code_resource); +- request_resource(res, data_resource); +-#endif +-#ifdef CONFIG_KEXEC +- if (crashk_res.start != crashk_res.end) +- request_resource(res, &crashk_res); +-#ifdef CONFIG_XEN +- xen_machine_kexec_register_resources(res); +-#endif +-#endif +- } +- } +-} +- +-/* +- * Locate a unused range of the physical address space below 4G which +- * can be used for PCI mappings. +- */ +-static void __init +-e820_setup_gap(struct e820entry *e820, int nr_map) +-{ +- unsigned long gapstart, gapsize, round; +- unsigned long long last; +- int i; +- +- /* +- * Search for the bigest gap in the low 32 bits of the e820 +- * memory space. +- */ +- last = 0x100000000ull; +- gapstart = 0x10000000; +- gapsize = 0x400000; +- i = nr_map; +- while (--i >= 0) { +- unsigned long long start = e820[i].addr; +- unsigned long long end = start + e820[i].size; +- +- /* +- * Since "last" is at most 4GB, we know we'll +- * fit in 32 bits if this condition is true +- */ +- if (last > end) { +- unsigned long gap = last - end; +- +- if (gap > gapsize) { +- gapsize = gap; +- gapstart = end; +- } +- } +- if (start < last) +- last = start; +- } +- +- /* +- * See how much we want to round up: start off with +- * rounding to the next 1MB area. +- */ +- round = 0x100000; +- while ((gapsize >> 4) > round) +- round += round; +- /* Fun with two's complement */ +- pci_mem_start = (gapstart + round) & -round; +- +- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", +- pci_mem_start, gapstart, gapsize); +-} +- +-/* +- * Request address space for all standard resources +- * +- * This is called just before pcibios_init(), which is also a +- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). +- */ +-static int __init request_standard_resources(void) +-{ +- int i; +- +- /* Nothing to do if not running in dom0. */ +- if (!is_initial_xendomain()) +- return 0; +- +- printk("Setting up standard PCI resources\n"); +-#ifdef CONFIG_XEN +- legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map, +- &code_resource, &data_resource); +-#else +- if (efi_enabled) +- efi_initialize_iomem_resources(&code_resource, &data_resource); +- else +- legacy_init_iomem_resources(e820.map, e820.nr_map, +- &code_resource, &data_resource); +-#endif +- +- /* EFI systems may still have VGA */ +- request_resource(&iomem_resource, &video_ram_resource); +- +- /* request I/O space for devices used on all i[345]86 PCs */ +- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) +- request_resource(&ioport_resource, &standard_io_resources[i]); +- return 0; +-} +- +-subsys_initcall(request_standard_resources); +- +-static void __init register_memory(void) +-{ +-#ifdef CONFIG_XEN +- if (is_initial_xendomain()) +- e820_setup_gap(machine_e820.map, machine_e820.nr_map); +- else +-#endif +- e820_setup_gap(e820.map, e820.nr_map); +-} +- + #ifdef CONFIG_MCA + static void set_mca_bus(int x) + { +@@ -1493,6 +536,12 @@ static void set_mca_bus(int x) + static void set_mca_bus(int x) { } + #endif + ++/* Overridden in paravirt.c if CONFIG_PARAVIRT */ ++char * __init __attribute__((weak)) memory_setup(void) ++{ ++ return machine_specific_memory_setup(); ++} ++ + /* + * Determine if we were loaded by an EFI loader. If so, then we have also been + * passed the efi memmap, systab, etc., so we should use these data structures +@@ -1580,7 +629,7 @@ void __init setup_arch(char **cmdline_p) + efi_init(); + else { + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); +- print_memory_map(machine_specific_memory_setup()); ++ print_memory_map(memory_setup()); + } + + copy_edd(); +@@ -1759,7 +808,7 @@ void __init setup_arch(char **cmdline_p) + get_smp_config(); + #endif + +- register_memory(); ++ e820_register_memory(); + + if (is_initial_xendomain()) { + #ifdef CONFIG_VT +--- head-2010-01-18.orig/arch/x86/kernel/smp_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/smp_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo + put_cpu(); + return -EBUSY; + } ++ ++ /* Can deadlock when called with interrupts disabled */ ++ WARN_ON(irqs_disabled()); ++ + spin_lock_bh(&call_lock); + __smp_call_function_single(cpu, func, info, nonatomic, wait); + spin_unlock_bh(&call_lock); +--- head-2010-01-18.orig/arch/x86/kernel/time-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/time-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -61,6 +61,7 @@ + #include + #include + #include ++#include + #include + + #include "mach_time.h" +@@ -129,11 +130,11 @@ static DEFINE_PER_CPU(struct vcpu_runsta + /* Must be signed, as it's compared with s64 quantities which can be -ve. */ + #define NS_PER_TICK (1000000000LL/HZ) + +-static void __clock_was_set(void *unused) ++static void __clock_was_set(struct work_struct *unused) + { + clock_was_set(); + } +-static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL); ++static DECLARE_WORK(clock_was_set_work, __clock_was_set); + + /* + * GCC 4.3 can turn loops over an induction variable into division. We do +@@ -528,10 +529,7 @@ static int set_rtc_mmss(unsigned long no + /* gets recalled with irq locally disabled */ + /* XXX - does irqsave resolve this? -johnstul */ + spin_lock_irqsave(&rtc_lock, flags); +- if (efi_enabled) +- retval = efi_set_rtc_mmss(nowtime); +- else +- retval = mach_set_rtc_mmss(nowtime); ++ retval = set_wallclock(nowtime); + spin_unlock_irqrestore(&rtc_lock, flags); + + return retval; +@@ -859,10 +857,7 @@ unsigned long get_cmos_time(void) + + spin_lock_irqsave(&rtc_lock, flags); + +- if (efi_enabled) +- retval = efi_get_time(); +- else +- retval = mach_get_cmos_time(); ++ retval = get_wallclock(); + + spin_unlock_irqrestore(&rtc_lock, flags); + +@@ -964,7 +959,7 @@ static void __init hpet_time_init(void) + printk("Using HPET for base-timer\n"); + } + +- time_init_hook(); ++ do_time_init(); + } + #endif + +--- head-2010-01-18.orig/arch/x86/kernel/traps_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/traps_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -29,6 +29,8 @@ + #include + #include + #include ++#include ++#include + + #ifdef CONFIG_EISA + #include +@@ -61,9 +63,6 @@ int panic_on_unrecovered_nmi; + + asmlinkage int system_call(void); + +-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, +- { 0, 0 }, { 0, 0 } }; +- + /* Do we ignore FPU interrupts ? */ + char ignore_fpu_irq = 0; + +@@ -100,12 +99,7 @@ asmlinkage void fixup_4gb_segment(void); + #endif + asmlinkage void machine_check(void); + +-static int kstack_depth_to_print = 24; +-#ifdef CONFIG_STACK_UNWIND +-static int call_trace = 1; +-#else +-#define call_trace (-1) +-#endif ++int kstack_depth_to_print = 24; + ATOMIC_NOTIFIER_HEAD(i386die_chain); + + int register_die_notifier(struct notifier_block *nb) +@@ -159,25 +153,7 @@ static inline unsigned long print_contex + return ebp; + } + +-struct ops_and_data { +- struct stacktrace_ops *ops; +- void *data; +-}; +- +-static asmlinkage int +-dump_trace_unwind(struct unwind_frame_info *info, void *data) +-{ +- struct ops_and_data *oad = (struct ops_and_data *)data; +- int n = 0; +- +- while (unwind(info) == 0 && UNW_PC(info)) { +- n++; +- oad->ops->address(oad->data, UNW_PC(info)); +- if (arch_unw_user_mode(info)) +- break; +- } +- return n; +-} ++#define MSG(msg) ops->warning(data, msg) + + void dump_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, +@@ -188,39 +164,6 @@ void dump_trace(struct task_struct *task + if (!task) + task = current; + +- if (call_trace >= 0) { +- int unw_ret = 0; +- struct unwind_frame_info info; +- struct ops_and_data oad = { .ops = ops, .data = data }; +- +- if (regs) { +- if (unwind_init_frame_info(&info, task, regs) == 0) +- unw_ret = dump_trace_unwind(&info, &oad); +- } else if (task == current) +- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); +- else { +- if (unwind_init_blocked(&info, task) == 0) +- unw_ret = dump_trace_unwind(&info, &oad); +- } +- if (unw_ret > 0) { +- if (call_trace == 1 && !arch_unw_user_mode(&info)) { +- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", +- UNW_PC(&info)); +- if (UNW_SP(&info) >= PAGE_OFFSET) { +- ops->warning(data, "Leftover inexact backtrace:\n"); +- stack = (void *)UNW_SP(&info); +- if (!stack) +- return; +- ebp = UNW_FP(&info); +- } else +- ops->warning(data, "Full inexact backtrace again:\n"); +- } else if (call_trace >= 1) +- return; +- else +- ops->warning(data, "Full inexact backtrace again:\n"); +- } else +- ops->warning(data, "Inexact backtrace:\n"); +- } + if (!stack) { + unsigned long dummy; + stack = &dummy; +@@ -253,6 +196,7 @@ void dump_trace(struct task_struct *task + stack = (unsigned long*)context->previous_esp; + if (!stack) + break; ++ touch_nmi_watchdog(); + } + } + EXPORT_SYMBOL(dump_trace); +@@ -385,7 +329,7 @@ void show_registers(struct pt_regs *regs + * time of the fault.. + */ + if (in_kernel) { +- u8 __user *eip; ++ u8 *eip; + int code_bytes = 64; + unsigned char c; + +@@ -394,18 +338,20 @@ void show_registers(struct pt_regs *regs + + printk(KERN_EMERG "Code: "); + +- eip = (u8 __user *)regs->eip - 43; +- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { ++ eip = (u8 *)regs->eip - 43; ++ if (eip < (u8 *)PAGE_OFFSET || ++ probe_kernel_address(eip, c)) { + /* try starting at EIP */ +- eip = (u8 __user *)regs->eip; ++ eip = (u8 *)regs->eip; + code_bytes = 32; + } + for (i = 0; i < code_bytes; i++, eip++) { +- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { ++ if (eip < (u8 *)PAGE_OFFSET || ++ probe_kernel_address(eip, c)) { + printk(" Bad EIP value."); + break; + } +- if (eip == (u8 __user *)regs->eip) ++ if (eip == (u8 *)regs->eip) + printk("<%02x> ", c); + else + printk("%02x ", c); +@@ -414,43 +360,22 @@ void show_registers(struct pt_regs *regs + printk("\n"); + } + +-static void handle_BUG(struct pt_regs *regs) ++int is_valid_bugaddr(unsigned long eip) + { +- unsigned long eip = regs->eip; + unsigned short ud2; + + if (eip < PAGE_OFFSET) +- return; +- if (probe_kernel_address((unsigned short __user *)eip, ud2)) +- return; +- if (ud2 != 0x0b0f) +- return; ++ return 0; ++ if (probe_kernel_address((unsigned short *)eip, ud2)) ++ return 0; + +- printk(KERN_EMERG "------------[ cut here ]------------\n"); +- +-#ifdef CONFIG_DEBUG_BUGVERBOSE +- do { +- unsigned short line; +- char *file; +- char c; +- +- if (probe_kernel_address((unsigned short __user *)(eip + 2), +- line)) +- break; +- if (__get_user(file, (char * __user *)(eip + 4)) || +- (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) +- file = ""; +- +- printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); +- return; +- } while (0); +-#endif +- printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); ++ return ud2 == 0x0b0f; + } + +-/* This is gone through when something in the kernel +- * has done something bad and is about to be terminated. +-*/ ++/* ++ * This is gone through when something in the kernel has done something bad and ++ * is about to be terminated. ++ */ + void die(const char * str, struct pt_regs * regs, long err) + { + static struct { +@@ -458,7 +383,7 @@ void die(const char * str, struct pt_reg + u32 lock_owner; + int lock_owner_depth; + } die = { +- .lock = SPIN_LOCK_UNLOCKED, ++ .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock_owner = -1, + .lock_owner_depth = 0 + }; +@@ -482,7 +407,8 @@ void die(const char * str, struct pt_reg + unsigned long esp; + unsigned short ss; + +- handle_BUG(regs); ++ report_bug(regs->eip); ++ + printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + #ifdef CONFIG_PREEMPT + printk(KERN_EMERG "PREEMPT "); +@@ -682,8 +608,7 @@ mem_parity_error(unsigned char reason, s + { + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " + "CPU %d.\n", reason, smp_processor_id()); +- printk(KERN_EMERG "You probably have a hardware problem with your RAM " +- "chips\n"); ++ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + +@@ -741,7 +666,6 @@ void __kprobes die_nmi(struct pt_regs *r + printk(" on CPU%d, eip %08lx, registers:\n", + smp_processor_id(), regs->eip); + show_registers(regs); +- printk(KERN_EMERG "console shuts up ...\n"); + console_silent(); + spin_unlock(&nmi_print_lock); + bust_spinlocks(0); +@@ -1057,49 +981,24 @@ fastcall void do_spurious_interrupt_bug( + #endif + } + +-fastcall void setup_x86_bogus_stack(unsigned char * stk) ++fastcall unsigned long patch_espfix_desc(unsigned long uesp, ++ unsigned long kesp) + { +- unsigned long *switch16_ptr, *switch32_ptr; +- struct pt_regs *regs; +- unsigned long stack_top, stack_bot; +- unsigned short iret_frame16_off; +- int cpu = smp_processor_id(); +- /* reserve the space on 32bit stack for the magic switch16 pointer */ +- memmove(stk, stk + 8, sizeof(struct pt_regs)); +- switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); +- regs = (struct pt_regs *)stk; +- /* now the switch32 on 16bit stack */ +- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); +- stack_top = stack_bot + CPU_16BIT_STACK_SIZE; +- switch32_ptr = (unsigned long *)(stack_top - 8); +- iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; +- /* copy iret frame on 16bit stack */ +- memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); +- /* fill in the switch pointers */ +- switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; +- switch16_ptr[1] = __ESPFIX_SS; +- switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + +- 8 - CPU_16BIT_STACK_SIZE; +- switch32_ptr[1] = __KERNEL_DS; +-} +- +-fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) +-{ +- unsigned long *switch32_ptr; +- unsigned char *stack16, *stack32; +- unsigned long stack_top, stack_bot; +- int len; + int cpu = smp_processor_id(); +- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); +- stack_top = stack_bot + CPU_16BIT_STACK_SIZE; +- switch32_ptr = (unsigned long *)(stack_top - 8); +- /* copy the data from 16bit stack to 32bit stack */ +- len = CPU_16BIT_STACK_SIZE - 8 - sp; +- stack16 = (unsigned char *)(stack_bot + sp); +- stack32 = (unsigned char *) +- (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); +- memcpy(stack32, stack16, len); +- return stack32; ++ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; ++ unsigned long base = (kesp - uesp) & -THREAD_SIZE; ++ unsigned long new_kesp = kesp - base; ++ unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; ++ __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; ++ /* Set up base for espfix segment */ ++ desc &= 0x00f0ff0000000000ULL; ++ desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | ++ ((((__u64)base) << 32) & 0xff00000000000000ULL) | ++ ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | ++ (lim_pages & 0xffff); ++ *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; ++ return new_kesp; + } + #endif + +@@ -1113,7 +1012,7 @@ fastcall unsigned char * fixup_x86_bogus + * Must be called with kernel preemption disabled (in this case, + * local interrupts are disabled at the call-site in entry.S). + */ +-asmlinkage void math_state_restore(struct pt_regs regs) ++asmlinkage void math_state_restore(void) + { + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = thread->task; +@@ -1123,6 +1022,7 @@ asmlinkage void math_state_restore(struc + init_fpu(tsk); + restore_fpu(tsk); + thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ ++ tsk->fpu_counter++; + } + + #ifndef CONFIG_MATH_EMULATION +@@ -1234,19 +1134,3 @@ static int __init kstack_setup(char *s) + return 1; + } + __setup("kstack=", kstack_setup); +- +-#ifdef CONFIG_STACK_UNWIND +-static int __init call_trace_setup(char *s) +-{ +- if (strcmp(s, "old") == 0) +- call_trace = -1; +- else if (strcmp(s, "both") == 0) +- call_trace = 0; +- else if (strcmp(s, "newfallback") == 0) +- call_trace = 1; +- else if (strcmp(s, "new") == 2) +- call_trace = 2; +- return 1; +-} +-__setup("call_trace=", call_trace_setup); +-#endif +--- head-2010-01-18.orig/arch/x86/kernel/vmlinux.lds.S 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/vmlinux.lds.S 2009-12-04 10:51:06.000000000 +0100 +@@ -84,6 +84,10 @@ SECTIONS + { + #ifdef CONFIG_X86_32 + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; ++#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002 ++#undef LOAD_OFFSET ++#define LOAD_OFFSET 0 ++#endif + phys_startup_32 = startup_32 - LOAD_OFFSET; + #else + . = __START_KERNEL; +--- head-2010-01-18.orig/arch/x86/kvm/Kconfig 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/kvm/Kconfig 2009-11-06 10:46:27.000000000 +0100 +@@ -7,6 +7,7 @@ source "virt/kvm/Kconfig" + menuconfig VIRTUALIZATION + bool "Virtualization" + depends on HAVE_KVM || X86 ++ depends on !XEN + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other +--- head-2010-01-18.orig/arch/x86/mm/fault_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/fault_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -22,9 +22,9 @@ + #include + #include + #include ++#include + + #include +-#include + #include + #include + #include +@@ -167,7 +167,7 @@ static inline unsigned long get_segment_ + static int __is_prefetch(struct pt_regs *regs, unsigned long addr) + { + unsigned long limit; +- unsigned long instr = get_segment_eip (regs, &limit); ++ unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); + int scan_more = 1; + int prefetch = 0; + int i; +@@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs + unsigned char instr_hi; + unsigned char instr_lo; + +- if (instr > limit) ++ if (instr > (unsigned char *)limit) + break; +- if (__get_user(opcode, (unsigned char __user *) instr)) ++ if (probe_kernel_address(instr, opcode)) + break; + + instr_hi = opcode & 0xf0; +@@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs + case 0x00: + /* Prefetch instruction is 0x0F0D or 0x0F18 */ + scan_more = 0; +- if (instr > limit) ++ if (instr > (unsigned char *)limit) + break; +- if (__get_user(opcode, (unsigned char __user *) instr)) ++ if (probe_kernel_address(instr, opcode)) + break; + prefetch = (instr_lo == 0xF) && + (opcode == 0x0D || opcode == 0x18); +--- head-2010-01-18.orig/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page * + unsigned long vaddr; + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ +- inc_preempt_count(); ++ pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + +@@ -63,26 +63,22 @@ void kunmap_atomic(void *kvaddr, enum km + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + +-#ifdef CONFIG_DEBUG_HIGHMEM +- if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { +- dec_preempt_count(); +- preempt_check_resched(); +- return; +- } +- +- if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) +- BUG(); +-#endif + /* + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. + */ +- kpte_clear_flush(kmap_pte-idx, vaddr); ++ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) ++ kpte_clear_flush(kmap_pte-idx, vaddr); ++ else { ++#ifdef CONFIG_DEBUG_HIGHMEM ++ BUG_ON(vaddr < PAGE_OFFSET); ++ BUG_ON(vaddr >= (unsigned long)high_memory); ++#endif ++ } + +- dec_preempt_count(); +- preempt_check_resched(); ++ pagefault_enable(); + } + + /* This is the same as kmap_atomic() but can map memory that doesn't +@@ -93,7 +89,7 @@ void *kmap_atomic_pfn(unsigned long pfn, + enum fixed_addresses idx; + unsigned long vaddr; + +- inc_preempt_count(); ++ pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +--- head-2010-01-18.orig/arch/x86/mm/init_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -233,8 +233,6 @@ static inline int page_kills_ppro(unsign + + #endif + +-extern int is_available_memory(efi_memory_desc_t *); +- + int page_is_ram(unsigned long pagenr) + { + int i; +@@ -326,7 +324,7 @@ void __init add_one_highpage_init(struct + SetPageReserved(page); + } + +-static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) ++static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) + { + free_new_highpage(page, pfn); + totalram_pages++; +@@ -343,7 +341,7 @@ static int add_one_highpage_hotplug(stru + * has been added dynamically that would be + * onlined here is in HIGHMEM + */ +-void online_page(struct page *page) ++void __meminit online_page(struct page *page) + { + ClearPageReserved(page); + add_one_highpage_hotplug(page, page_to_pfn(page)); +@@ -738,16 +736,10 @@ void __init mem_init(void) + set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags); + } + +-/* +- * this is for the non-NUMA, single node SMP system case. +- * Specifically, in the case of x86, we will always add +- * memory to the highmem for now. +- */ + #ifdef CONFIG_MEMORY_HOTPLUG +-#ifndef CONFIG_NEED_MULTIPLE_NODES + int arch_add_memory(int nid, u64 start, u64 size) + { +- struct pglist_data *pgdata = &contig_page_data; ++ struct pglist_data *pgdata = NODE_DATA(nid); + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +@@ -759,11 +751,11 @@ int remove_memory(u64 start, u64 size) + { + return -EINVAL; + } +-#endif ++EXPORT_SYMBOL_GPL(remove_memory); + #endif + +-kmem_cache_t *pgd_cache; +-kmem_cache_t *pmd_cache; ++struct kmem_cache *pgd_cache; ++struct kmem_cache *pmd_cache; + + void __init pgtable_cache_init(void) + { +--- head-2010-01-18.orig/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -203,7 +203,7 @@ void pte_free(struct page *pte) + __free_page(pte); + } + +-void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) ++void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) + { + memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); + } +@@ -243,7 +243,7 @@ static inline void pgd_list_del(pgd_t *p + set_page_private(next, (unsigned long)pprev); + } + +-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) ++void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) + { + unsigned long flags; + +@@ -264,7 +264,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c + } + + /* never called when PTRS_PER_PMD > 1 */ +-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) ++void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) + { + unsigned long flags; /* can be called from interrupt context */ + +--- head-2010-01-18.orig/arch/x86/pci/irq-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/pci/irq-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -768,7 +768,7 @@ static void __init pirq_find_router(stru + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", + rt->rtr_vendor, rt->rtr_device); + +- pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); ++ pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); + if (!pirq_router_dev) { + DBG(KERN_DEBUG "PCI: Interrupt router not found at " + "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); +@@ -788,6 +788,8 @@ static void __init pirq_find_router(stru + pirq_router_dev->vendor, + pirq_router_dev->device, + pci_name(pirq_router_dev)); ++ ++ /* The device remains referenced for the kernel lifetime */ + } + + static struct irq_info *pirq_get_info(struct pci_dev *dev) +--- head-2010-01-18.orig/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:46:27.000000000 +0100 +@@ -261,7 +261,6 @@ ENTRY(system_call) + movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + GET_THREAD_INFO(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) +- CFI_REMEMBER_STATE + jnz tracesys + cmpq $__NR_syscall_max,%rax + ja badsys +@@ -272,7 +271,6 @@ ENTRY(system_call) + * Syscall return path ending with SYSRET (fast path) + * Has incomplete stack frame and undefined top of stack. + */ +- .globl ret_from_sys_call + ret_from_sys_call: + movl $_TIF_ALLWORK_MASK,%edi + /* edi: flagmask */ +@@ -282,8 +280,8 @@ sysret_check: + TRACE_IRQS_OFF + movl threadinfo_flags(%rcx),%edx + andl %edi,%edx +- CFI_REMEMBER_STATE + jnz sysret_careful ++ CFI_REMEMBER_STATE + /* + * sysretq will re-enable interrupts: + */ +@@ -292,10 +290,10 @@ sysret_check: + RESTORE_ARGS 0,8,0 + HYPERVISOR_IRET VGCF_IN_SYSCALL + ++ CFI_RESTORE_STATE + /* Handle reschedules */ + /* edx: work, edi: workmask */ + sysret_careful: +- CFI_RESTORE_STATE + bt $TIF_NEED_RESCHED,%edx + jnc sysret_signal + TRACE_IRQS_ON +@@ -334,7 +332,6 @@ badsys: + + /* Do syscall tracing */ + tracesys: +- CFI_RESTORE_STATE + SAVE_REST + movq $-ENOSYS,RAX(%rsp) + FIXUP_TOP_OF_STACK %rdi +@@ -350,32 +347,13 @@ tracesys: + call *sys_call_table(,%rax,8) + 1: movq %rax,RAX-ARGOFFSET(%rsp) + /* Use IRET because user could have changed frame */ +- jmp int_ret_from_sys_call +- CFI_ENDPROC +-END(system_call) + + /* + * Syscall return path ending with IRET. + * Has correct top of stack, but partial stack frame. +- */ +-ENTRY(int_ret_from_sys_call) +- CFI_STARTPROC simple +- CFI_SIGNAL_FRAME +- CFI_DEF_CFA rsp,SS+8-ARGOFFSET +- /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ +- CFI_REL_OFFSET rsp,RSP-ARGOFFSET +- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ +- /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ +- CFI_REL_OFFSET rip,RIP-ARGOFFSET +- CFI_REL_OFFSET rdx,RDX-ARGOFFSET +- CFI_REL_OFFSET rcx,RCX-ARGOFFSET +- CFI_REL_OFFSET rax,RAX-ARGOFFSET +- CFI_REL_OFFSET rdi,RDI-ARGOFFSET +- CFI_REL_OFFSET rsi,RSI-ARGOFFSET +- CFI_REL_OFFSET r8,R8-ARGOFFSET +- CFI_REL_OFFSET r9,R9-ARGOFFSET +- CFI_REL_OFFSET r10,R10-ARGOFFSET +- CFI_REL_OFFSET r11,R11-ARGOFFSET ++ */ ++ .globl int_ret_from_sys_call ++int_ret_from_sys_call: + XEN_BLOCK_EVENTS(%rsi) + TRACE_IRQS_OFF + testb $3,CS-ARGOFFSET(%rsp) +@@ -428,8 +406,6 @@ int_very_careful: + popq %rdi + CFI_ADJUST_CFA_OFFSET -8 + andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi +- XEN_BLOCK_EVENTS(%rsi) +- TRACE_IRQS_OFF + jmp int_restore_rest + + int_signal: +@@ -445,7 +421,7 @@ int_restore_rest: + TRACE_IRQS_OFF + jmp int_with_check + CFI_ENDPROC +-END(int_ret_from_sys_call) ++END(system_call) + + /* + * Certain special system calls that need to save a complete full stack frame. +@@ -1270,36 +1246,3 @@ ENTRY(call_softirq) + ret + CFI_ENDPROC + ENDPROC(call_softirq) +- +-#ifdef CONFIG_STACK_UNWIND +-ENTRY(arch_unwind_init_running) +- CFI_STARTPROC +- movq %r15, R15(%rdi) +- movq %r14, R14(%rdi) +- xchgq %rsi, %rdx +- movq %r13, R13(%rdi) +- movq %r12, R12(%rdi) +- xorl %eax, %eax +- movq %rbp, RBP(%rdi) +- movq %rbx, RBX(%rdi) +- movq (%rsp), %rcx +- movq %rax, R11(%rdi) +- movq %rax, R10(%rdi) +- movq %rax, R9(%rdi) +- movq %rax, R8(%rdi) +- movq %rax, RAX(%rdi) +- movq %rax, RCX(%rdi) +- movq %rax, RDX(%rdi) +- movq %rax, RSI(%rdi) +- movq %rax, RDI(%rdi) +- movq %rax, ORIG_RAX(%rdi) +- movq %rcx, RIP(%rdi) +- leaq 8(%rsp), %rcx +- movq $__KERNEL_CS, CS(%rdi) +- movq %rax, EFLAGS(%rdi) +- movq %rcx, RSP(%rdi) +- movq $__KERNEL_DS, SS(%rdi) +- jmpq *%rdx +- CFI_ENDPROC +-ENDPROC(arch_unwind_init_running) +-#endif +--- head-2010-01-18.orig/arch/x86/kernel/head64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -104,7 +104,10 @@ void __init x86_64_start_kernel(char * r + machine_to_phys_order++; + + #if 0 +- for (i = 0; i < 256; i++) ++ /* clear bss before set_intr_gate with early_idt_handler */ ++ clear_bss(); ++ ++ for (i = 0; i < IDT_ENTRIES; i++) + set_intr_gate(i, early_idt_handler); + asm volatile("lidt %0" :: "m" (idt_descr)); + #endif +--- head-2010-01-18.orig/arch/x86/kernel/io_apic_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/io_apic_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -202,14 +202,20 @@ static struct IO_APIC_route_entry ioapic + * the interrupt, and we need to make sure the entry is fully populated + * before that happens. + */ +-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++static void ++__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) + { +- unsigned long flags; + union entry_union eu; + eu.entry = e; +- spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++} ++ ++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ unsigned long flags; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __ioapic_write_entry(apic, pin, e); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -720,6 +726,22 @@ static int assign_irq_vector(int irq, cp + } + + #ifndef CONFIG_XEN ++static void __clear_irq_vector(int irq) ++{ ++ cpumask_t mask; ++ int cpu, vector; ++ ++ BUG_ON(!irq_vector[irq]); ++ ++ vector = irq_vector[irq]; ++ cpus_and(mask, irq_domain[irq], cpu_online_map); ++ for_each_cpu_mask(cpu, mask) ++ per_cpu(vector_irq, cpu)[vector] = -1; ++ ++ irq_vector[irq] = 0; ++ irq_domain[irq] = CPU_MASK_NONE; ++} ++ + void __setup_vector_irq(int cpu) + { + /* Initialize vector_irq on a new cpu */ +@@ -767,26 +789,65 @@ static void ioapic_register_intr(int irq + #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq) + #endif /* !CONFIG_XEN */ + +-static void __init setup_IO_APIC_irqs(void) ++static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) + { + struct IO_APIC_route_entry entry; +- int apic, pin, idx, irq, first_notcon = 1, vector; ++ int vector; + unsigned long flags; + +- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + +- for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ /* ++ * add it to the IO-APIC irq-routing table: ++ */ ++ memset(&entry,0,sizeof(entry)); + +- /* +- * add it to the IO-APIC irq-routing table: +- */ +- memset(&entry,0,sizeof(entry)); ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 0; /* enable IRQ */ ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.dest_mode = INT_DEST_MODE; +- entry.mask = 0; /* enable IRQ */ ++ entry.trigger = irq_trigger(idx); ++ entry.polarity = irq_polarity(idx); ++ ++ if (irq_trigger(idx)) { ++ entry.trigger = 1; ++ entry.mask = 1; + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ } ++ ++ if (/* !apic && */ !IO_APIC_IRQ(irq)) ++ return; ++ ++ if (IO_APIC_IRQ(irq)) { ++ cpumask_t mask; ++ vector = assign_irq_vector(irq, TARGET_CPUS, &mask); ++ if (vector < 0) ++ return; ++ ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); ++ entry.vector = vector; ++ ++ ioapic_register_intr(irq, vector, IOAPIC_AUTO); ++ if (!apic && (irq < 16)) ++ disable_8259A_irq(irq); ++ } ++ ++ ioapic_write_entry(apic, pin, entry); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ set_native_irq_info(irq, TARGET_CPUS); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++} ++ ++static void __init setup_IO_APIC_irqs(void) ++{ ++ int apic, pin, idx, irq, first_notcon = 1; ++ ++ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + idx = find_irq_entry(apic,pin,mp_INT); + if (idx == -1) { +@@ -798,39 +859,11 @@ static void __init setup_IO_APIC_irqs(vo + continue; + } + +- entry.trigger = irq_trigger(idx); +- entry.polarity = irq_polarity(idx); +- +- if (irq_trigger(idx)) { +- entry.trigger = 1; +- entry.mask = 1; +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); +- } +- + irq = pin_2_irq(idx, apic, pin); + add_pin_to_irq(irq, apic, pin); + +- if (/* !apic && */ !IO_APIC_IRQ(irq)) +- continue; +- +- if (IO_APIC_IRQ(irq)) { +- cpumask_t mask; +- vector = assign_irq_vector(irq, TARGET_CPUS, &mask); +- if (vector < 0) +- continue; +- +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); +- entry.vector = vector; ++ setup_IO_APIC_irq(apic, pin, idx, irq); + +- ioapic_register_intr(irq, vector, IOAPIC_AUTO); +- if (!apic && (irq < 16)) +- disable_8259A_irq(irq); +- } +- ioapic_write_entry(apic, pin, entry); +- +- spin_lock_irqsave(&ioapic_lock, flags); +- set_native_irq_info(irq, TARGET_CPUS); +- spin_unlock_irqrestore(&ioapic_lock, flags); + } + } + +@@ -1826,7 +1859,7 @@ void destroy_irq(unsigned int irq) + dynamic_irq_cleanup(irq); + + spin_lock_irqsave(&vector_lock, flags); +- irq_vector[irq] = 0; ++ __clear_irq_vector(irq); + spin_unlock_irqrestore(&vector_lock, flags); + } + +@@ -2131,7 +2164,15 @@ void __init setup_ioapic_dest(void) + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); +- set_ioapic_affinity_irq(irq, TARGET_CPUS); ++ ++ /* setup_IO_APIC_irqs could fail to get vector for some device ++ * when you have too many devices, because at that time only boot ++ * cpu is online. ++ */ ++ if(!irq_vector[irq]) ++ setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); ++ else ++ set_ioapic_affinity_irq(irq, TARGET_CPUS); + } + + } +--- head-2010-01-18.orig/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt + + if (likely(irq < NR_IRQS)) + generic_handle_irq(irq); +- else ++ else if (printk_ratelimit()) + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", + __func__, smp_processor_id(), irq); + +--- head-2010-01-18.orig/arch/x86/kernel/mpparse_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/mpparse_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -35,8 +35,6 @@ + int smp_found_config; + unsigned int __initdata maxcpus = NR_CPUS; + +-int acpi_found_madt; +- + /* + * Various Linux-internal data structures created from the + * MP-table. +--- head-2010-01-18.orig/arch/x86/kernel/process_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -119,29 +119,23 @@ void exit_idle(void) + static void poll_idle (void) + { + local_irq_enable(); +- +- asm volatile( +- "2:" +- "testl %0,%1;" +- "rep; nop;" +- "je 2b;" +- : : +- "i" (_TIF_NEED_RESCHED), +- "m" (current_thread_info()->flags)); ++ cpu_relax(); + } + + static void xen_idle(void) + { ++ current_thread_info()->status &= ~TS_POLLING; ++ /* ++ * TS_POLLING-cleared state must be visible before we ++ * test NEED_RESCHED: ++ */ ++ smp_mb(); + local_irq_disable(); +- +- if (need_resched()) +- local_irq_enable(); +- else { +- current_thread_info()->status &= ~TS_POLLING; +- smp_mb__after_clear_bit(); ++ if (!need_resched()) + safe_halt(); +- current_thread_info()->status |= TS_POLLING; +- } ++ else ++ local_irq_enable(); ++ current_thread_info()->status |= TS_POLLING; + } + + #ifdef CONFIG_HOTPLUG_CPU +@@ -181,6 +175,12 @@ void cpu_idle (void) + idle = xen_idle; /* no alternatives */ + if (cpu_is_offline(smp_processor_id())) + play_dead(); ++ /* ++ * Idle routines should keep interrupts disabled ++ * from here on, until they go to idle. ++ * Otherwise, idle callbacks can misfire. ++ */ ++ local_irq_disable(); + enter_idle(); + idle(); + /* In many cases the interrupt that ended idle +--- head-2010-01-18.orig/arch/x86/kernel/setup_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -574,8 +574,7 @@ void __init setup_arch(char **cmdline_p) + if (LOADER_TYPE && INITRD_START) { + if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { + reserve_bootmem_generic(INITRD_START, INITRD_SIZE); +- initrd_start = +- INITRD_START ? INITRD_START + PAGE_OFFSET : 0; ++ initrd_start = INITRD_START + PAGE_OFFSET; + initrd_end = initrd_start+INITRD_SIZE; + } + else { +@@ -991,11 +990,8 @@ static void __cpuinit init_amd(struct cp + /* Fix cpuid4 emulation for more */ + num_cache_leaves = 3; + +- /* When there is only one core no need to synchronize RDTSC */ +- if (num_possible_cpus() == 1) +- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); +- else +- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); ++ /* RDTSC can be speculated around */ ++ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + } + + static void __cpuinit detect_ht(struct cpuinfo_x86 *c) +@@ -1094,6 +1090,15 @@ static void __cpuinit init_intel(struct + set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); + } + ++ if (cpu_has_ds) { ++ unsigned int l1, l2; ++ rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); ++ if (!(l1 & (1<<11))) ++ set_bit(X86_FEATURE_BTS, c->x86_capability); ++ if (!(l1 & (1<<12))) ++ set_bit(X86_FEATURE_PEBS, c->x86_capability); ++ } ++ + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); +@@ -1113,7 +1118,10 @@ static void __cpuinit init_intel(struct + set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + if (c->x86 == 6) + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); +- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); ++ if (c->x86 == 15) ++ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); ++ else ++ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + c->x86_max_cores = intel_num_cpu_cores(c); + + srat_detect_node(); +--- head-2010-01-18.orig/arch/x86/kernel/smp_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/smp_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v + put_cpu(); + return 0; + } ++ ++ /* Can deadlock when called with interrupts disabled */ ++ WARN_ON(irqs_disabled()); ++ + spin_lock_bh(&call_lock); + __smp_call_function_single(cpu, func, info, nonatomic, wait); + spin_unlock_bh(&call_lock); + put_cpu(); + return 0; + } ++EXPORT_SYMBOL(smp_call_function_single); + + /* + * this function sends a 'generic call function' IPI to all other CPUs +--- head-2010-01-18.orig/arch/x86/kernel/traps_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/traps_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -30,9 +30,10 @@ + #include + #include + #include ++#include ++#include + + #include +-#include + #include + #include + #include +@@ -108,12 +109,7 @@ static inline void preempt_conditional_c + preempt_enable_no_resched(); + } + +-static int kstack_depth_to_print = 12; +-#ifdef CONFIG_STACK_UNWIND +-static int call_trace = 1; +-#else +-#define call_trace (-1) +-#endif ++int kstack_depth_to_print = 12; + + #ifdef CONFIG_KALLSYMS + void printk_address(unsigned long address) +@@ -218,24 +214,7 @@ static unsigned long *in_exception_stack + return NULL; + } + +-struct ops_and_data { +- struct stacktrace_ops *ops; +- void *data; +-}; +- +-static int dump_trace_unwind(struct unwind_frame_info *info, void *context) +-{ +- struct ops_and_data *oad = (struct ops_and_data *)context; +- int n = 0; +- +- while (unwind(info) == 0 && UNW_PC(info)) { +- n++; +- oad->ops->address(oad->data, UNW_PC(info)); +- if (arch_unw_user_mode(info)) +- break; +- } +- return n; +-} ++#define MSG(txt) ops->warning(data, txt) + + /* + * x86-64 can have upto three kernel stacks: +@@ -250,61 +229,24 @@ static inline int valid_stack_ptr(struct + return p > t && p < t + THREAD_SIZE - 3; + } + +-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, ++void dump_trace(struct task_struct *tsk, struct pt_regs *regs, ++ unsigned long *stack, + struct stacktrace_ops *ops, void *data) + { +- const unsigned cpu = smp_processor_id(); +- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; ++ const unsigned cpu = get_cpu(); ++ unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; + unsigned used = 0; + struct thread_info *tinfo; + + if (!tsk) + tsk = current; + +- if (call_trace >= 0) { +- int unw_ret = 0; +- struct unwind_frame_info info; +- struct ops_and_data oad = { .ops = ops, .data = data }; +- +- if (regs) { +- if (unwind_init_frame_info(&info, tsk, regs) == 0) +- unw_ret = dump_trace_unwind(&info, &oad); +- } else if (tsk == current) +- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); +- else { +- if (unwind_init_blocked(&info, tsk) == 0) +- unw_ret = dump_trace_unwind(&info, &oad); +- } +- if (unw_ret > 0) { +- if (call_trace == 1 && !arch_unw_user_mode(&info)) { +- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", +- UNW_PC(&info)); +- if ((long)UNW_SP(&info) < 0) { +- ops->warning(data, "Leftover inexact backtrace:\n"); +- stack = (unsigned long *)UNW_SP(&info); +- if (!stack) +- return; +- } else +- ops->warning(data, "Full inexact backtrace again:\n"); +- } else if (call_trace >= 1) +- return; +- else +- ops->warning(data, "Full inexact backtrace again:\n"); +- } else +- ops->warning(data, "Inexact backtrace:\n"); +- } + if (!stack) { + unsigned long dummy; + stack = &dummy; + if (tsk && tsk != current) + stack = (unsigned long *)tsk->thread.rsp; + } +- /* +- * Align the stack pointer on word boundary, later loops +- * rely on that (and corruption / debug info bugs can cause +- * unaligned values here): +- */ +- stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); + + /* + * Print function call entries within a stack. 'cond' is the +@@ -314,9 +256,9 @@ void dump_trace(struct task_struct *tsk, + #define HANDLE_STACK(cond) \ + do while (cond) { \ + unsigned long addr = *stack++; \ +- if (oops_in_progress ? \ +- __kernel_text_address(addr) : \ +- kernel_text_address(addr)) { \ ++ /* Use unlocked access here because except for NMIs \ ++ we should be already protected against module unloads */ \ ++ if (__kernel_text_address(addr)) { \ + /* \ + * If the address is either in the text segment of the \ + * kernel, or in the region which contains vmalloc'ed \ +@@ -379,9 +321,10 @@ void dump_trace(struct task_struct *tsk, + /* + * This handles the process stack: + */ +- tinfo = current_thread_info(); ++ tinfo = task_thread_info(tsk); + HANDLE_STACK (valid_stack_ptr(tinfo, stack)); + #undef HANDLE_STACK ++ put_cpu(); + } + EXPORT_SYMBOL(dump_trace); + +@@ -518,30 +461,15 @@ bad: + printk("\n"); + } + +-void handle_BUG(struct pt_regs *regs) +-{ +- struct bug_frame f; +- long len; +- const char *prefix = ""; ++int is_valid_bugaddr(unsigned long rip) ++{ ++ unsigned short ud2; + +- if (user_mode(regs)) +- return; +- if (__copy_from_user(&f, (const void __user *) regs->rip, +- sizeof(struct bug_frame))) +- return; +- if (f.filename >= 0 || +- f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) +- return; +- len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1; +- if (len < 0 || len >= PATH_MAX) +- f.filename = (int)(long)"unmapped filename"; +- else if (len > 50) { +- f.filename += len - 50; +- prefix = "..."; +- } +- printk("----------- [cut here ] --------- [please bite here ] ---------\n"); +- printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line); +-} ++ if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) ++ return 0; ++ ++ return ud2 == 0x0b0f; ++} + + #ifdef CONFIG_BUG + void out_of_line_bug(void) +@@ -621,7 +549,9 @@ void die(const char * str, struct pt_reg + { + unsigned long flags = oops_begin(); + +- handle_BUG(regs); ++ if (!user_mode(regs)) ++ report_bug(regs->rip); ++ + __die(str, regs, err); + oops_end(flags); + do_exit(SIGSEGV); +@@ -790,8 +720,7 @@ mem_parity_error(unsigned char reason, s + { + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", + reason); +- printk(KERN_EMERG "You probably have a hardware problem with your " +- "RAM chips\n"); ++ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); +@@ -1227,21 +1156,3 @@ static int __init kstack_setup(char *s) + return 0; + } + early_param("kstack", kstack_setup); +- +-#ifdef CONFIG_STACK_UNWIND +-static int __init call_trace_setup(char *s) +-{ +- if (!s) +- return -EINVAL; +- if (strcmp(s, "old") == 0) +- call_trace = -1; +- else if (strcmp(s, "both") == 0) +- call_trace = 0; +- else if (strcmp(s, "newfallback") == 0) +- call_trace = 1; +- else if (strcmp(s, "new") == 0) +- call_trace = 2; +- return 0; +-} +-early_param("call_trace", call_trace_setup); +-#endif +--- head-2010-01-18.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/vsyscall_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -42,6 +42,7 @@ + #include + + #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) ++#define __syscall_clobber "r11","rcx","memory" + + int __sysctl_vsyscall __section_sysctl_vsyscall = 1; + seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +@@ -224,8 +225,7 @@ out: + + static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, +- void __user *newval, size_t newlen, +- void **context) ++ void __user *newval, size_t newlen) + { + return -ENOSYS; + } +@@ -277,7 +277,6 @@ static void __cpuinit cpu_vsyscall_init( + vsyscall_set_cpu(raw_smp_processor_id()); + } + +-#ifdef CONFIG_HOTPLUG_CPU + static int __cpuinit + cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) + { +@@ -286,13 +285,13 @@ cpu_vsyscall_notifier(struct notifier_bl + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); + return NOTIFY_DONE; + } +-#endif + + static void __init map_vsyscall(void) + { + extern char __vsyscall_0; + unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); + ++ /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ + __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); + } + +--- head-2010-01-18.orig/arch/x86/mm/fault_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/fault_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -23,9 +23,9 @@ + #include + #include + #include ++#include + + #include +-#include + #include + #include + #include +@@ -96,7 +96,7 @@ void bust_spinlocks(int yes) + static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, + unsigned long error_code) + { +- unsigned char __user *instr; ++ unsigned char *instr; + int scan_more = 1; + int prefetch = 0; + unsigned char *max_instr; +@@ -116,7 +116,7 @@ static noinline int is_prefetch(struct p + unsigned char instr_hi; + unsigned char instr_lo; + +- if (__get_user(opcode, (char __user *)instr)) ++ if (probe_kernel_address(instr, opcode)) + break; + + instr_hi = opcode & 0xf0; +@@ -154,7 +154,7 @@ static noinline int is_prefetch(struct p + case 0x00: + /* Prefetch instruction is 0x0F0D or 0x0F18 */ + scan_more = 0; +- if (__get_user(opcode, (char __user *)instr)) ++ if (probe_kernel_address(instr, opcode)) + break; + prefetch = (instr_lo == 0xF) && + (opcode == 0x0D || opcode == 0x18); +@@ -170,7 +170,7 @@ static noinline int is_prefetch(struct p + static int bad_address(void *p) + { + unsigned long dummy; +- return __get_user(dummy, (unsigned long __user *)p); ++ return probe_kernel_address((unsigned long *)p, dummy); + } + + void dump_pagetable(unsigned long address) +--- head-2010-01-18.orig/arch/x86/mm/init_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -1151,14 +1151,15 @@ static __init int x8664_sysctl_init(void + __initcall(x8664_sysctl_init); + #endif + +-/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only ++/* A pseudo VMA to allow ptrace access for the vsyscall page. This only + covers the 64bit vsyscall page now. 32bit has a real VMA now and does + not need special handling anymore. */ + + static struct vm_area_struct gate_vma = { + .vm_start = VSYSCALL_START, +- .vm_end = VSYSCALL_END, +- .vm_page_prot = PAGE_READONLY ++ .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), ++ .vm_page_prot = PAGE_READONLY_EXEC, ++ .vm_flags = VM_READ | VM_EXEC + }; + + struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +--- head-2010-01-18.orig/arch/x86/mm/pageattr_64-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pageattr_64-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -330,34 +330,40 @@ static struct page *split_large_page(uns + return base; + } + +- +-static void flush_kernel_map(void *address) ++static void cache_flush_page(void *adr) + { +- if (0 && address && cpu_has_clflush) { +- /* is this worth it? */ +- int i; +- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) +- asm volatile("clflush (%0)" :: "r" (address + i)); +- } else +- asm volatile("wbinvd":::"memory"); +- if (address) +- __flush_tlb_one(address); +- else +- __flush_tlb_all(); ++ int i; ++ for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) ++ asm volatile("clflush (%0)" :: "r" (adr + i)); + } + ++static void flush_kernel_map(void *arg) ++{ ++ struct list_head *l = (struct list_head *)arg; ++ struct page *pg; + +-static inline void flush_map(unsigned long address) ++ /* When clflush is available always use it because it is ++ much cheaper than WBINVD */ ++ if (!cpu_has_clflush) ++ asm volatile("wbinvd" ::: "memory"); ++ list_for_each_entry(pg, l, lru) { ++ void *adr = page_address(pg); ++ if (cpu_has_clflush) ++ cache_flush_page(adr); ++ __flush_tlb_one(adr); ++ } ++} ++ ++static inline void flush_map(struct list_head *l) + { +- on_each_cpu(flush_kernel_map, (void *)address, 1, 1); ++ on_each_cpu(flush_kernel_map, l, 1, 1); + } + +-static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ ++static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */ + + static inline void save_page(struct page *fpage) + { +- fpage->lru.next = (struct list_head *)deferred_pages; +- deferred_pages = fpage; ++ list_add(&fpage->lru, &deferred_pages); + } + + /* +@@ -487,18 +493,18 @@ int change_page_attr(struct page *page, + + void global_flush_tlb(void) + { +- struct page *dpage; ++ struct page *pg, *next; ++ struct list_head l; + + down_read(&init_mm.mmap_sem); +- dpage = xchg(&deferred_pages, NULL); ++ list_replace_init(&deferred_pages, &l); + up_read(&init_mm.mmap_sem); + +- flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); +- while (dpage) { +- struct page *tmp = dpage; +- dpage = (struct page *)dpage->lru.next; +- ClearPagePrivate(tmp); +- __free_page(tmp); ++ flush_map(&l); ++ ++ list_for_each_entry_safe(pg, next, &l, lru) { ++ ClearPagePrivate(pg); ++ __free_page(pg); + } + } + +--- head-2010-01-18.orig/drivers/pci/msi-xen.c 2009-12-04 10:49:14.000000000 +0100 ++++ head-2010-01-18/drivers/pci/msi-xen.c 2009-11-06 10:46:27.000000000 +0100 +@@ -273,10 +273,8 @@ void disable_msi_mode(struct pci_dev *de + pci_write_config_word(dev, msi_control_reg(pos), control); + dev->msix_enabled = 0; + } +- if (pci_find_capability(dev, PCI_CAP_ID_EXP)) { +- /* PCI Express Endpoint device detected */ +- pci_intx(dev, 1); /* enable intx */ +- } ++ ++ pci_intx(dev, 1); /* enable intx */ + } + + static void enable_msi_mode(struct pci_dev *dev, int pos, int type) +@@ -294,10 +292,8 @@ static void enable_msi_mode(struct pci_d + pci_write_config_word(dev, msi_control_reg(pos), control); + dev->msix_enabled = 1; + } +- if (pci_find_capability(dev, PCI_CAP_ID_EXP)) { +- /* PCI Express Endpoint device detected */ +- pci_intx(dev, 0); /* disable intx */ +- } ++ ++ pci_intx(dev, 0); /* disable intx */ + } + + #ifdef CONFIG_PM +--- head-2010-01-18.orig/drivers/xen/balloon/balloon.c 2010-01-04 12:19:20.000000000 +0100 ++++ head-2010-01-18/drivers/xen/balloon/balloon.c 2009-11-06 10:46:27.000000000 +0100 +@@ -94,8 +94,8 @@ static unsigned long frame_list[PAGE_SIZ + static LIST_HEAD(ballooned_pages); + + /* Main work function, always executed in process context. */ +-static void balloon_process(void *unused); +-static DECLARE_WORK(balloon_worker, balloon_process, NULL); ++static void balloon_process(struct work_struct *unused); ++static DECLARE_WORK(balloon_worker, balloon_process); + static struct timer_list balloon_timer; + + /* When ballooning out (allocating memory to return to Xen) we don't really +@@ -402,7 +402,7 @@ static int decrease_reservation(unsigned + * by the balloon lock), or with changes to the Xen hard limit, but we will + * recover from these in time. + */ +-static void balloon_process(void *unused) ++static void balloon_process(struct work_struct *unused) + { + int need_sleep = 0; + long credit; +--- head-2010-01-18.orig/drivers/xen/blkback/blkback.c 2010-01-04 12:22:06.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blkback/blkback.c 2009-11-06 10:46:27.000000000 +0100 +@@ -37,6 +37,7 @@ + + #include + #include ++#include + #include + #include + #include +--- head-2010-01-18.orig/drivers/xen/blkback/interface.c 2010-01-04 11:56:34.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blkback/interface.c 2010-01-04 12:24:22.000000000 +0100 +@@ -35,7 +35,7 @@ + #include + #include + +-static kmem_cache_t *blkif_cachep; ++static struct kmem_cache *blkif_cachep; + + blkif_t *blkif_alloc(domid_t domid) + { +--- head-2010-01-18.orig/drivers/xen/blkfront/blkfront.c 2010-01-18 16:17:32.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blkfront/blkfront.c 2010-01-18 16:17:45.000000000 +0100 +@@ -71,7 +71,7 @@ static int setup_blkring(struct xenbus_d + static void kick_pending_request_queues(struct blkfront_info *); + + static irqreturn_t blkif_int(int irq, void *dev_id); +-static void blkif_restart_queue(void *arg); ++static void blkif_restart_queue(struct work_struct *arg); + static void blkif_recover(struct blkfront_info *); + static void blkif_completion(struct blk_shadow *); + static void blkif_free(struct blkfront_info *, int); +@@ -111,7 +111,7 @@ static int blkfront_probe(struct xenbus_ + info->xbdev = dev; + info->vdevice = vdevice; + info->connected = BLKIF_STATE_DISCONNECTED; +- INIT_WORK(&info->work, blkif_restart_queue, (void *)info); ++ INIT_WORK(&info->work, blkif_restart_queue); + + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; +@@ -465,9 +465,9 @@ static void kick_pending_request_queues( + } + } + +-static void blkif_restart_queue(void *arg) ++static void blkif_restart_queue(struct work_struct *arg) + { +- struct blkfront_info *info = (struct blkfront_info *)arg; ++ struct blkfront_info *info = container_of(arg, struct blkfront_info, work); + spin_lock_irq(&blkif_io_lock); + if (info->connected == BLKIF_STATE_CONNECTED) + kick_pending_request_queues(info); +--- head-2010-01-18.orig/drivers/xen/blktap/blktap.c 2010-01-04 12:22:21.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blktap/blktap.c 2009-11-06 10:46:27.000000000 +0100 +@@ -40,6 +40,7 @@ + + #include + #include ++#include + #include + #include + #include "common.h" +--- head-2010-01-18.orig/drivers/xen/blktap/interface.c 2010-01-04 11:56:34.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blktap/interface.c 2010-01-04 12:24:32.000000000 +0100 +@@ -35,7 +35,7 @@ + #include + #include + +-static kmem_cache_t *blkif_cachep; ++static struct kmem_cache *blkif_cachep; + + blkif_t *tap_alloc_blkif(domid_t domid) + { +--- head-2010-01-18.orig/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200 ++++ head-2010-01-18/drivers/xen/char/mem.c 2009-11-06 10:46:27.000000000 +0100 +@@ -157,7 +157,7 @@ static loff_t memory_lseek(struct file * + { + loff_t ret; + +- mutex_lock(&file->f_dentry->d_inode->i_mutex); ++ mutex_lock(&file->f_path.dentry->d_inode->i_mutex); + switch (orig) { + case 0: + file->f_pos = offset; +@@ -172,7 +172,7 @@ static loff_t memory_lseek(struct file * + default: + ret = -EINVAL; + } +- mutex_unlock(&file->f_dentry->d_inode->i_mutex); ++ mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + return ret; + } + +--- head-2010-01-18.orig/drivers/xen/console/console.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/console/console.c 2009-11-06 10:46:27.000000000 +0100 +@@ -85,11 +85,6 @@ static int xc_num = -1; + #define XEN_HVC_MAJOR 229 + #define XEN_HVC_MINOR 0 + +-#ifdef CONFIG_MAGIC_SYSRQ +-static unsigned long sysrq_requested; +-extern int sysrq_enabled; +-#endif +- + static int __init xencons_setup(char *str) + { + char *q; +@@ -354,8 +349,8 @@ void __init dom0_init_screen_info(const + #define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \ + ((_tty)->index != (xc_num - 1))) + +-static struct termios *xencons_termios[MAX_NR_CONSOLES]; +-static struct termios *xencons_termios_locked[MAX_NR_CONSOLES]; ++static struct ktermios *xencons_termios[MAX_NR_CONSOLES]; ++static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES]; + static struct tty_struct *xencons_tty; + static int xencons_priv_irq; + static char x_char; +@@ -371,7 +366,9 @@ void xencons_rx(char *buf, unsigned len) + + for (i = 0; i < len; i++) { + #ifdef CONFIG_MAGIC_SYSRQ +- if (sysrq_enabled) { ++ if (sysrq_on()) { ++ static unsigned long sysrq_requested; ++ + if (buf[i] == '\x0f') { /* ^O */ + if (!sysrq_requested) { + sysrq_requested = jiffies; +--- head-2010-01-18.orig/drivers/xen/core/reboot.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/reboot.c 2009-11-06 10:46:27.000000000 +0100 +@@ -34,8 +34,8 @@ static int suspend_cancelled; + /* Can we leave APs online when we suspend? */ + static int fast_suspend; + +-static void __shutdown_handler(void *unused); +-static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); ++static void __shutdown_handler(struct work_struct *unused); ++static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler); + + static int setup_suspend_evtchn(void); + +@@ -105,7 +105,7 @@ static int xen_suspend(void *__unused) + case SHUTDOWN_RESUMING: + break; + default: +- schedule_work(&shutdown_work); ++ schedule_delayed_work(&shutdown_work, 0); + break; + } + +@@ -137,12 +137,12 @@ static void switch_shutdown_state(int ne + + /* Either we kick off the work, or we leave it to xen_suspend(). */ + if (old_state == SHUTDOWN_INVALID) +- schedule_work(&shutdown_work); ++ schedule_delayed_work(&shutdown_work, 0); + else + BUG_ON(old_state != SHUTDOWN_RESUMING); + } + +-static void __shutdown_handler(void *unused) ++static void __shutdown_handler(struct work_struct *unused) + { + int err; + +--- head-2010-01-18.orig/drivers/xen/core/smpboot.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/smpboot.c 2009-11-06 10:46:27.000000000 +0100 +@@ -160,7 +160,12 @@ static void xen_smp_intr_exit(unsigned i + + void __cpuinit cpu_bringup(void) + { ++#ifdef __i386__ ++ cpu_set_gdt(current_thread_info()->cpu); ++ secondary_cpu_init(); ++#else + cpu_init(); ++#endif + identify_cpu(cpu_data + smp_processor_id()); + touch_softlockup_watchdog(); + preempt_disable(); +@@ -299,11 +304,12 @@ void __init smp_prepare_cpus(unsigned in + if (cpu == 0) + continue; + ++ idle = fork_idle(cpu); ++ if (IS_ERR(idle)) ++ panic("failed fork for CPU %d", cpu); ++ + #ifdef __x86_64__ + gdt_descr = &cpu_gdt_descr[cpu]; +-#else +- gdt_descr = &per_cpu(cpu_gdt_descr, cpu); +-#endif + gdt_descr->address = get_zeroed_page(GFP_KERNEL); + if (unlikely(!gdt_descr->address)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", +@@ -312,6 +318,11 @@ void __init smp_prepare_cpus(unsigned in + } + gdt_descr->size = GDT_SIZE; + memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); ++#else ++ if (unlikely(!init_gdt(cpu, idle))) ++ continue; ++ gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++#endif + make_page_readonly( + (void *)gdt_descr->address, + XENFEAT_writable_descriptor_tables); +@@ -331,10 +342,6 @@ void __init smp_prepare_cpus(unsigned in + cpu_2_logical_apicid[cpu] = apicid; + x86_cpu_to_apicid[cpu] = apicid; + +- idle = fork_idle(cpu); +- if (IS_ERR(idle)) +- panic("failed fork for CPU %d", cpu); +- + #ifdef __x86_64__ + cpu_pda(cpu)->pcurrent = idle; + cpu_pda(cpu)->cpunumber = cpu; +--- head-2010-01-18.orig/drivers/xen/fbfront/xenfb.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/fbfront/xenfb.c 2009-11-06 10:46:27.000000000 +0100 +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + #include + #include + #include +--- head-2010-01-18.orig/drivers/xen/netback/loopback.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netback/loopback.c 2009-11-06 10:46:27.000000000 +0100 +@@ -54,6 +54,7 @@ + #include + #include /* secpath_reset() */ + #include /* is_initial_xendomain() */ ++#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */ + + static int nloopbacks = -1; + module_param(nloopbacks, int, 0); +--- head-2010-01-18.orig/drivers/xen/pciback/conf_space_header.c 2008-10-29 09:55:56.000000000 +0100 ++++ head-2010-01-18/drivers/xen/pciback/conf_space_header.c 2009-11-06 10:46:27.000000000 +0100 +@@ -22,14 +22,14 @@ static int command_write(struct pci_dev + { + int err; + +- if (!dev->is_enabled && is_enable_cmd(value)) { ++ if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable\n", + pci_name(dev)); + err = pci_enable_device(dev); + if (err) + return err; +- } else if (dev->is_enabled && !is_enable_cmd(value)) { ++ } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: disable\n", + pci_name(dev)); +--- head-2010-01-18.orig/drivers/xen/pciback/pciback.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/pciback/pciback.h 2009-11-06 10:46:27.000000000 +0100 +@@ -100,7 +100,7 @@ void pciback_release_devices(struct pcib + + /* Handles events from front-end */ + irqreturn_t pciback_handle_event(int irq, void *dev_id); +-void pciback_do_op(void *data); ++void pciback_do_op(struct work_struct *work); + + int pciback_xenbus_register(void); + void pciback_xenbus_unregister(void); +--- head-2010-01-18.orig/drivers/xen/pciback/pciback_ops.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/pciback/pciback_ops.c 2009-11-06 10:46:27.000000000 +0100 +@@ -26,7 +26,7 @@ void pciback_reset_device(struct pci_dev + + pci_write_config_word(dev, PCI_COMMAND, 0); + +- dev->is_enabled = 0; ++ atomic_set(&dev->enable_cnt, 0); + dev->is_busmaster = 0; + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); +@@ -67,9 +67,9 @@ void test_and_schedule_op(struct pciback + * context because some of the pci_* functions can sleep (mostly due to ACPI + * use of semaphores). This function is intended to be called from a work + * queue in process context taking a struct pciback_device as a parameter */ +-void pciback_do_op(void *data) ++void pciback_do_op(struct work_struct *work) + { +- struct pciback_device *pdev = data; ++ struct pciback_device *pdev = container_of(work, struct pciback_device, op_work); + struct pci_dev *dev; + struct xen_pci_op *op = &pdev->sh_info->op; + +--- head-2010-01-18.orig/drivers/xen/pciback/xenbus.c 2009-04-07 13:58:48.000000000 +0200 ++++ head-2010-01-18/drivers/xen/pciback/xenbus.c 2009-11-06 10:46:27.000000000 +0100 +@@ -33,7 +33,7 @@ static struct pciback_device *alloc_pdev + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + pdev->be_watching = 0; + +- INIT_WORK(&pdev->op_work, pciback_do_op, pdev); ++ INIT_WORK(&pdev->op_work, pciback_do_op); + + if (pciback_init_devices(pdev)) { + kfree(pdev); +--- head-2010-01-18.orig/drivers/xen/pcifront/pci_op.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/pcifront/pci_op.c 2009-11-06 10:46:27.000000000 +0100 +@@ -636,9 +636,9 @@ static pci_ers_result_t pcifront_common_ + } + + +-void pcifront_do_aer(void *data) ++void pcifront_do_aer(struct work_struct *data) + { +- struct pcifront_device *pdev = data; ++ struct pcifront_device *pdev = container_of(data, struct pcifront_device, op_work); + int cmd = pdev->sh_info->aer_op.cmd; + pci_channel_state_t state = + (pci_channel_state_t)pdev->sh_info->aer_op.err; +--- head-2010-01-18.orig/drivers/xen/pcifront/pcifront.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/pcifront/pcifront.h 2009-11-06 10:46:27.000000000 +0100 +@@ -48,7 +48,7 @@ int pcifront_rescan_root(struct pcifront + unsigned int domain, unsigned int bus); + void pcifront_free_roots(struct pcifront_device *pdev); + +-void pcifront_do_aer( void *data); ++void pcifront_do_aer(struct work_struct *data); + + irqreturn_t pcifront_handler_aer(int irq, void *dev); + +--- head-2010-01-18.orig/drivers/xen/pcifront/xenbus.c 2009-04-07 13:58:48.000000000 +0200 ++++ head-2010-01-18/drivers/xen/pcifront/xenbus.c 2009-11-06 10:46:27.000000000 +0100 +@@ -49,7 +49,7 @@ static struct pcifront_device *alloc_pde + pdev->evtchn = INVALID_EVTCHN; + pdev->gnt_ref = INVALID_GRANT_REF; + +- INIT_WORK(&pdev->op_work, pcifront_do_aer, pdev); ++ INIT_WORK(&pdev->op_work, pcifront_do_aer); + + dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n", + pdev, pdev->sh_info); +--- head-2010-01-18.orig/drivers/xen/scsiback/interface.c 2010-01-04 11:56:34.000000000 +0100 ++++ head-2010-01-18/drivers/xen/scsiback/interface.c 2010-01-04 12:24:50.000000000 +0100 +@@ -40,7 +40,7 @@ + #include + + +-static kmem_cache_t *scsiback_cachep; ++static struct kmem_cache *scsiback_cachep; + + struct vscsibk_info *vscsibk_info_alloc(domid_t domid) + { +--- head-2010-01-18.orig/drivers/xen/scsiback/scsiback.c 2010-01-04 12:23:07.000000000 +0100 ++++ head-2010-01-18/drivers/xen/scsiback/scsiback.c 2009-11-06 10:46:27.000000000 +0100 +@@ -349,13 +349,11 @@ static int scsiback_merge_bio(struct req + + if (!rq->bio) + blk_rq_bio_prep(q, rq, bio); +- else if (!q->back_merge_fn(q, rq, bio)) ++ else if (!ll_back_merge_fn(q, rq, bio)) + return -EINVAL; + else { + rq->biotail->bi_next = bio; + rq->biotail = bio; +- rq->hard_nr_sectors += bio_sectors(bio); +- rq->nr_sectors = rq->hard_nr_sectors; + } + + return 0; +--- head-2010-01-18.orig/drivers/xen/sfc_netfront/accel_vi.c 2010-01-18 16:17:16.000000000 +0100 ++++ head-2010-01-18/drivers/xen/sfc_netfront/accel_vi.c 2010-01-18 16:17:54.000000000 +0100 +@@ -465,7 +465,7 @@ netfront_accel_enqueue_skb_multi(netfron + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* Set to zero to encourage falcon to work it out for us */ +- *(u16*)(skb->h.raw + skb->csum) = 0; ++ *(u16*)(skb->h.raw + skb->csum_offset) = 0; + } + + if (multi_post_start_new_buffer(vnic, &state)) { +@@ -584,7 +584,7 @@ netfront_accel_enqueue_skb_single(netfro + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* Set to zero to encourage falcon to work it out for us */ +- *(u16*)(skb->h.raw + skb->csum) = 0; ++ *(u16*)(skb->h.raw + skb->csum_offset) = 0; + } + NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT + (skb, idx, frag_data, frag_len, { +--- head-2010-01-18.orig/drivers/xen/tpmback/interface.c 2010-01-04 11:56:34.000000000 +0100 ++++ head-2010-01-18/drivers/xen/tpmback/interface.c 2010-01-04 12:25:38.000000000 +0100 +@@ -16,7 +16,7 @@ + #include + #include + +-static kmem_cache_t *tpmif_cachep; ++static struct kmem_cache *tpmif_cachep; + int num_frontends = 0; + + LIST_HEAD(tpmif_list); +--- head-2010-01-18.orig/drivers/xen/usbback/usbback.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/usbback/usbback.c 2010-01-04 12:25:52.000000000 +0100 +@@ -540,9 +540,10 @@ struct set_interface_request { + struct work_struct work; + }; + +-static void usbbk_set_interface_work(void *data) ++static void usbbk_set_interface_work(struct work_struct *arg) + { +- struct set_interface_request *req = (struct set_interface_request *) data; ++ struct set_interface_request *req ++ = container_of(arg, struct set_interface_request, work); + pending_req_t *pending_req = req->pending_req; + struct usb_device *udev = req->pending_req->stub->udev; + +@@ -570,7 +571,7 @@ static int usbbk_set_interface(pending_r + req->pending_req = pending_req; + req->interface = interface; + req->alternate = alternate; +- INIT_WORK(&req->work, usbbk_set_interface_work, req); ++ INIT_WORK(&req->work, usbbk_set_interface_work); + usb_get_dev(udev); + schedule_work(&req->work); + return 0; +@@ -582,9 +583,10 @@ struct clear_halt_request { + struct work_struct work; + }; + +-static void usbbk_clear_halt_work(void *data) ++static void usbbk_clear_halt_work(struct work_struct *arg) + { +- struct clear_halt_request *req = (struct clear_halt_request *) data; ++ struct clear_halt_request *req ++ = container_of(arg, struct clear_halt_request, work); + pending_req_t *pending_req = req->pending_req; + struct usb_device *udev = req->pending_req->stub->udev; + int ret; +@@ -610,7 +612,7 @@ static int usbbk_clear_halt(pending_req_ + return -ENOMEM; + req->pending_req = pending_req; + req->pipe = pipe; +- INIT_WORK(&req->work, usbbk_clear_halt_work, req); ++ INIT_WORK(&req->work, usbbk_clear_halt_work); + + usb_get_dev(udev); + schedule_work(&req->work); +@@ -623,9 +625,10 @@ struct port_reset_request { + struct work_struct work; + }; + +-static void usbbk_port_reset_work(void *data) ++static void usbbk_port_reset_work(struct work_struct *arg) + { +- struct port_reset_request *req = (struct port_reset_request *) data; ++ struct port_reset_request *req ++ = container_of(arg, struct port_reset_request, work); + pending_req_t *pending_req = req->pending_req; + struct usb_device *udev = pending_req->stub->udev; + int ret, ret_lock; +@@ -654,7 +657,7 @@ static int usbbk_port_reset(pending_req_ + return -ENOMEM; + + req->pending_req = pending_req; +- INIT_WORK(&req->work, usbbk_port_reset_work, req); ++ INIT_WORK(&req->work, usbbk_port_reset_work); + + usb_get_dev(udev); + schedule_work(&req->work); +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_comms.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_comms.c 2009-11-06 10:46:27.000000000 +0100 +@@ -49,8 +49,8 @@ + + static int xenbus_irq; + +-extern void xenbus_probe(void *); +-static DECLARE_WORK(probe_work, xenbus_probe, NULL); ++extern void xenbus_probe(struct work_struct *); ++static DECLARE_WORK(probe_work, xenbus_probe); + + static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); + +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_probe.c 2010-01-07 09:53:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_probe.c 2009-12-04 10:51:20.000000000 +0100 +@@ -856,7 +856,7 @@ void unregister_xenstore_notifier(struct + EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); + + +-void xenbus_probe(void *unused) ++void xenbus_probe(struct work_struct *unused) + { + BUG_ON(!is_xenstored_ready()); + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/desc_32.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/desc_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -4,8 +4,6 @@ + #include + #include + +-#define CPU_16BIT_STACK_SIZE 1024 +- + #ifndef __ASSEMBLY__ + + #include +@@ -15,8 +13,6 @@ + + extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; + +-DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); +- + struct Xgt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); +@@ -32,11 +28,6 @@ static inline struct desc_struct *get_cp + return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; + } + +-/* +- * This is the ldt that every process will get unless we need +- * something other than this. +- */ +-extern struct desc_struct default_ldt[]; + extern struct desc_struct idt_table[]; + extern void set_intr_gate(unsigned int irq, void * addr); + +@@ -63,8 +54,8 @@ static inline void pack_gate(__u32 *a, _ + #define DESCTYPE_DPL3 0x60 /* DPL-3 */ + #define DESCTYPE_S 0x10 /* !system */ + ++#ifndef CONFIG_XEN + #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) +-#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) + + #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) + #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) +@@ -75,6 +66,7 @@ static inline void pack_gate(__u32 *a, _ + #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) + #define store_tr(tr) __asm__ ("str %0":"=m" (tr)) + #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) ++#endif + + #if TLS_SIZE != 24 + # error update this code. +@@ -90,22 +82,43 @@ static inline void load_TLS(struct threa + } + + #ifndef CONFIG_XEN ++#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) ++#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) ++#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) ++ + static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) + { + __u32 *lp = (__u32 *)((char *)dt + entry*8); + *lp = entry_a; + *(lp+1) = entry_b; + } +- +-#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) +-#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) ++#define set_ldt native_set_ldt + #else + extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); + extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); ++#define set_ldt xen_set_ldt ++#endif ++ ++#ifndef CONFIG_XEN ++static inline fastcall void native_set_ldt(const void *addr, ++ unsigned int entries) ++{ ++ if (likely(entries == 0)) ++ __asm__ __volatile__("lldt %w0"::"q" (0)); ++ else { ++ unsigned cpu = smp_processor_id(); ++ __u32 a, b; ++ ++ pack_descriptor(&a, &b, (unsigned long)addr, ++ entries * sizeof(struct desc_struct) - 1, ++ DESCTYPE_LDT, 0); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); ++ __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); ++ } ++} + #endif +-#ifndef CONFIG_X86_NO_IDT +-#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) + ++#ifndef CONFIG_X86_NO_IDT + static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) + { + __u32 a, b; +@@ -125,14 +138,6 @@ static inline void __set_tss_desc(unsign + } + #endif + +-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) +-{ +- __u32 a, b; +- pack_descriptor(&a, &b, (unsigned long)addr, +- entries * sizeof(struct desc_struct) - 1, +- DESCTYPE_LDT, 0); +- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); +-} + + #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) + +@@ -163,36 +168,22 @@ static inline void set_ldt_desc(unsigned + + static inline void clear_LDT(void) + { +- int cpu = get_cpu(); +- +- /* +- * NB. We load the default_ldt for lcall7/27 handling on demand, as +- * it slows down context switching. Noone uses it anyway. +- */ +- cpu = cpu; /* XXX avoid compiler warning */ +- xen_set_ldt(NULL, 0); +- put_cpu(); ++ set_ldt(NULL, 0); + } + + /* + * load one particular LDT into the current CPU + */ +-static inline void load_LDT_nolock(mm_context_t *pc, int cpu) ++static inline void load_LDT_nolock(mm_context_t *pc) + { +- void *segments = pc->ldt; +- int count = pc->size; +- +- if (likely(!count)) +- segments = NULL; +- +- xen_set_ldt(segments, count); ++ set_ldt(pc->ldt, pc->size); + } + + static inline void load_LDT(mm_context_t *pc) + { +- int cpu = get_cpu(); +- load_LDT_nolock(pc, cpu); +- put_cpu(); ++ preempt_disable(); ++ load_LDT_nolock(pc); ++ preempt_enable(); + } + + static inline unsigned long get_desc_base(unsigned long *desc) +@@ -204,6 +195,29 @@ static inline unsigned long get_desc_bas + return base; + } + ++#else /* __ASSEMBLY__ */ ++ ++/* ++ * GET_DESC_BASE reads the descriptor base of the specified segment. ++ * ++ * Args: ++ * idx - descriptor index ++ * gdt - GDT pointer ++ * base - 32bit register to which the base will be written ++ * lo_w - lo word of the "base" register ++ * lo_b - lo byte of the "base" register ++ * hi_b - hi byte of the low word of the "base" register ++ * ++ * Example: ++ * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) ++ * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. ++ */ ++#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ ++ movb idx*8+4(gdt), lo_b; \ ++ movb idx*8+7(gdt), hi_b; \ ++ shll $16, base; \ ++ movw idx*8+2(gdt), lo_w; ++ + #endif /* !__ASSEMBLY__ */ + + #endif +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/fixmap_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -13,13 +13,16 @@ + #ifndef _ASM_FIXMAP_H + #define _ASM_FIXMAP_H + +- + /* used by vmalloc.c, vsyscall.lds.S. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ + extern unsigned long __FIXADDR_TOP; ++#ifdef CONFIG_COMPAT_VDSO ++#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) ++#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) ++#endif + + #ifndef __ASSEMBLY__ + #include +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/highmem.h 2008-10-29 09:55:56.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/highmem.h 2009-11-06 10:46:27.000000000 +0100 +@@ -85,7 +85,7 @@ static inline void clear_user_highpage(s + + void copy_highpage(struct page *to, struct page *from); + static inline void copy_user_highpage(struct page *to, struct page *from, +- unsigned long vaddr) ++ unsigned long vaddr, struct vm_area_struct *vma) + { + copy_highpage(to, from); + } +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-06 10:46:27.000000000 +0100 +@@ -47,15 +47,6 @@ + #include + #include + #include +-#if defined(__i386__) +-# ifdef CONFIG_X86_PAE +-# include +-# else +-# include +-# endif +-#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) +-# include +-#endif + + extern shared_info_t *HYPERVISOR_shared_info; + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/irqflags_32.h 2007-06-12 13:14:02.000000000 +0200 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/irqflags_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -22,9 +22,6 @@ + + #define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) + +-#define raw_local_save_flags(flags) \ +- do { (flags) = __raw_local_save_flags(); } while (0) +- + #define raw_local_irq_restore(x) \ + do { \ + vcpu_info_t *_vcpu; \ +@@ -66,18 +63,6 @@ void raw_safe_halt(void); + */ + void halt(void); + +-static inline int raw_irqs_disabled_flags(unsigned long flags) +-{ +- return (flags != 0); +-} +- +-#define raw_irqs_disabled() \ +-({ \ +- unsigned long flags = __raw_local_save_flags(); \ +- \ +- raw_irqs_disabled_flags(flags); \ +-}) +- + /* + * For spinlocks, etc: + */ +@@ -90,9 +75,64 @@ static inline int raw_irqs_disabled_flag + flags; \ + }) + ++#else ++/* Offsets into shared_info_t. */ ++#define evtchn_upcall_pending /* 0 */ ++#define evtchn_upcall_mask 1 ++ ++#define sizeof_vcpu_shift 6 ++ ++#ifdef CONFIG_SMP ++#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ ++ shl $sizeof_vcpu_shift,%esi ; \ ++ addl HYPERVISOR_shared_info,%esi ++#else ++#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi ++#endif ++ ++#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) ++#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) ++#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) ++#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ ++ __DISABLE_INTERRUPTS ++#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ ++ __ENABLE_INTERRUPTS ++#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ ++sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ ++ __TEST_PENDING ; \ ++ jnz 14f /* process more events if necessary... */ ; \ ++ movl PT_ESI(%esp), %esi ; \ ++ sysexit ; \ ++14: __DISABLE_INTERRUPTS ; \ ++ TRACE_IRQS_OFF ; \ ++sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ ++ mov $__KERNEL_PDA, %ecx ; \ ++ push %esp ; \ ++ mov %ecx, %gs ; \ ++ call evtchn_do_upcall ; \ ++ add $4,%esp ; \ ++ jmp ret_from_intr ++#define INTERRUPT_RETURN iret ++#endif /* __ASSEMBLY__ */ ++ ++#ifndef __ASSEMBLY__ ++#define raw_local_save_flags(flags) \ ++ do { (flags) = __raw_local_save_flags(); } while (0) ++ + #define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + ++static inline int raw_irqs_disabled_flags(unsigned long flags) ++{ ++ return (flags != 0); ++} ++ ++#define raw_irqs_disabled() \ ++({ \ ++ unsigned long flags = __raw_local_save_flags(); \ ++ \ ++ raw_irqs_disabled_flags(flags); \ ++}) + #endif /* __ASSEMBLY__ */ + + /* +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2007-06-12 13:14:02.000000000 +0200 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -27,14 +27,13 @@ static inline void enter_lazy_tlb(struct + static inline void __prepare_arch_switch(void) + { + /* +- * Save away %fs and %gs. No need to save %es and %ds, as those +- * are always kernel segments while inside the kernel. Must +- * happen before reload of cr3/ldt (i.e., not in __switch_to). ++ * Save away %fs. No need to save %gs, as it was saved on the ++ * stack on entry. No need to save %es and %ds, as those are ++ * always kernel segments while inside the kernel. + */ +- asm volatile ( "mov %%fs,%0 ; mov %%gs,%1" +- : "=m" (current->thread.fs), +- "=m" (current->thread.gs)); +- asm volatile ( "movl %0,%%fs ; movl %0,%%gs" ++ asm volatile ( "mov %%fs,%0" ++ : "=m" (current->thread.fs)); ++ asm volatile ( "movl %0,%%fs" + : : "r" (0) ); + } + +@@ -89,14 +88,14 @@ static inline void switch_mm(struct mm_s + * tlb flush IPI delivery. We must reload %cr3. + */ + load_cr3(next->pgd); +- load_LDT_nolock(&next->context, cpu); ++ load_LDT_nolock(&next->context); + } + } + #endif + } + +-#define deactivate_mm(tsk, mm) \ +- asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) ++#define deactivate_mm(tsk, mm) \ ++ asm("movl %0,%%fs": :"r" (0)); + + static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) + { +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:46:27.000000000 +0100 +@@ -1,8 +1,6 @@ + #ifndef _I386_PGTABLE_3LEVEL_H + #define _I386_PGTABLE_3LEVEL_H + +-#include +- + /* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. +@@ -75,6 +73,23 @@ static inline void set_pte(pte_t *ptep, + xen_l3_entry_update((pudptr), (pudval)) + + /* ++ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table ++ * entry, so clear the bottom half first and enforce ordering with a compiler ++ * barrier. ++ */ ++static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) ++{ ++ if ((mm != current->mm && mm != &init_mm) ++ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { ++ ptep->pte_low = 0; ++ smp_wmb(); ++ ptep->pte_high = 0; ++ } ++} ++ ++#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) ++ ++/* + * Pentium-II erratum A13: in PAE mode we explicitly have to flush + * the TLB via cr3 if the top-level pgd is changed... + * We do not let the generic code free and clear pgd entries due to +@@ -93,45 +108,16 @@ static inline void pud_clear (pud_t * pu + #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ + pmd_index(address)) + +-static inline int pte_none(pte_t pte) +-{ +- return !(pte.pte_low | pte.pte_high); +-} +- +-/* +- * For PTEs and PDEs, we must clear the P-bit first when clearing a page table +- * entry, so clear the bottom half first and enforce ordering with a compiler +- * barrier. +- */ +-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) ++static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) + { +- if ((mm != current->mm && mm != &init_mm) +- || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { +- ptep->pte_low = 0; +- smp_wmb(); ++ uint64_t val = __pte_val(res); ++ if (__cmpxchg64(ptep, val, 0) != val) { ++ /* xchg acts as a barrier before the setting of the high bits */ ++ res.pte_low = xchg(&ptep->pte_low, 0); ++ res.pte_high = ptep->pte_high; + ptep->pte_high = 0; + } +-} +- +-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +- +-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +-{ +- pte_t pte = *ptep; +- if (!pte_none(pte)) { +- if ((mm != &init_mm) || +- HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { +- uint64_t val = __pte_val(pte); +- if (__cmpxchg64(ptep, val, 0) != val) { +- /* xchg acts as a barrier before the setting of the high bits */ +- pte.pte_low = xchg(&ptep->pte_low, 0); +- pte.pte_high = ptep->pte_high; +- ptep->pte_high = 0; +- } +- } +- } +- return pte; ++ return res; + } + + #define __HAVE_ARCH_PTEP_CLEAR_FLUSH +@@ -160,6 +146,11 @@ static inline int pte_same(pte_t a, pte_ + + #define pte_page(x) pfn_to_page(pte_pfn(x)) + ++static inline int pte_none(pte_t pte) ++{ ++ return !(pte.pte_low | pte.pte_high); ++} ++ + #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ + ((_pte).pte_high << (32-PAGE_SHIFT))) + #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -38,14 +38,14 @@ struct vm_area_struct; + #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + extern unsigned long empty_zero_page[1024]; + extern pgd_t *swapper_pg_dir; +-extern kmem_cache_t *pgd_cache; +-extern kmem_cache_t *pmd_cache; ++extern struct kmem_cache *pgd_cache; ++extern struct kmem_cache *pmd_cache; + extern spinlock_t pgd_lock; + extern struct page *pgd_list; + +-void pmd_ctor(void *, kmem_cache_t *, unsigned long); +-void pgd_ctor(void *, kmem_cache_t *, unsigned long); +-void pgd_dtor(void *, kmem_cache_t *, unsigned long); ++void pmd_ctor(void *, struct kmem_cache *, unsigned long); ++void pgd_ctor(void *, struct kmem_cache *, unsigned long); ++void pgd_dtor(void *, struct kmem_cache *, unsigned long); + void pgtable_cache_init(void); + void paging_init(void); + +@@ -276,7 +276,6 @@ static inline pte_t pte_mkhuge(pte_t pte + #define pte_update(mm, addr, ptep) do { } while (0) + #define pte_update_defer(mm, addr, ptep) do { } while (0) + +- + /* + * We only update the dirty/accessed state if we set + * the dirty bit by hand in the kernel, since the hardware +@@ -342,6 +341,19 @@ do { \ + __young; \ + }) + ++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR ++static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) ++{ ++ pte_t pte = *ptep; ++ if (!pte_none(pte) ++ && (mm != &init_mm ++ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { ++ pte = raw_ptep_get_and_clear(ptep, pte); ++ pte_update(mm, addr, ptep); ++ } ++ return pte; ++} ++ + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL + #define ptep_get_and_clear_full(mm, addr, ptep, full) \ + ((full) ? ({ \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/processor_32.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/processor_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + + /* flag for disabling the tsc */ +@@ -73,6 +74,7 @@ struct cpuinfo_x86 { + #endif + unsigned char x86_max_cores; /* cpuid returned max cores value */ + unsigned char apicid; ++ unsigned short x86_clflush_size; + #ifdef CONFIG_SMP + unsigned char booted_cores; /* number of cores as seen by OS */ + __u8 phys_proc_id; /* Physical processor id. */ +@@ -114,6 +116,8 @@ extern struct cpuinfo_x86 cpu_data[]; + extern int cpu_llc_id[NR_CPUS]; + extern char ignore_fpu_irq; + ++void __init cpu_detect(struct cpuinfo_x86 *c); ++ + extern void identify_cpu(struct cpuinfo_x86 *); + extern void print_cpu_info(struct cpuinfo_x86 *); + extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); +@@ -146,8 +150,8 @@ static inline void detect_ht(struct cpui + #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ + #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +-static inline void __cpuid(unsigned int *eax, unsigned int *ebx, +- unsigned int *ecx, unsigned int *edx) ++static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, ++ unsigned int *ecx, unsigned int *edx) + { + /* ecx is often an input as well as an output. */ + __asm__(XEN_CPUID +@@ -158,59 +162,6 @@ static inline void __cpuid(unsigned int + : "0" (*eax), "2" (*ecx)); + } + +-/* +- * Generic CPUID function +- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx +- * resulting in stale register contents being returned. +- */ +-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) +-{ +- *eax = op; +- *ecx = 0; +- __cpuid(eax, ebx, ecx, edx); +-} +- +-/* Some CPUID calls want 'count' to be placed in ecx */ +-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, +- int *edx) +-{ +- *eax = op; +- *ecx = count; +- __cpuid(eax, ebx, ecx, edx); +-} +- +-/* +- * CPUID functions returning a single datum +- */ +-static inline unsigned int cpuid_eax(unsigned int op) +-{ +- unsigned int eax, ebx, ecx, edx; +- +- cpuid(op, &eax, &ebx, &ecx, &edx); +- return eax; +-} +-static inline unsigned int cpuid_ebx(unsigned int op) +-{ +- unsigned int eax, ebx, ecx, edx; +- +- cpuid(op, &eax, &ebx, &ecx, &edx); +- return ebx; +-} +-static inline unsigned int cpuid_ecx(unsigned int op) +-{ +- unsigned int eax, ebx, ecx, edx; +- +- cpuid(op, &eax, &ebx, &ecx, &edx); +- return ecx; +-} +-static inline unsigned int cpuid_edx(unsigned int op) +-{ +- unsigned int eax, ebx, ecx, edx; +- +- cpuid(op, &eax, &ebx, &ecx, &edx); +- return edx; +-} +- + #define load_cr3(pgdir) write_cr3(__pa(pgdir)) + + /* +@@ -480,9 +431,9 @@ struct thread_struct { + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ ++ .gs = __KERNEL_PDA, \ + } + +-#ifndef CONFIG_X86_NO_TSS + /* + * Note that the .io_bitmap member must be extra-big. This is because + * the CPU will access an additional byte beyond the end of the IO +@@ -497,26 +448,9 @@ struct thread_struct { + .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ + } + +-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) +-{ +- tss->esp0 = thread->esp0; +- /* This can only happen when SEP is enabled, no need to test "SEP"arately */ +- if (unlikely(tss->ss1 != thread->sysenter_cs)) { +- tss->ss1 = thread->sysenter_cs; +- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); +- } +-} +-#define load_esp0(tss, thread) \ +- __load_esp0(tss, thread) +-#else +-#define load_esp0(tss, thread) do { \ +- if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ +- BUG(); \ +-} while (0) +-#endif +- + #define start_thread(regs, new_eip, new_esp) do { \ +- __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ ++ __asm__("movl %0,%%fs": :"r" (0)); \ ++ regs->xgs = 0; \ + set_fs(USER_DS); \ + regs->xds = __USER_DS; \ + regs->xes = __USER_DS; \ +@@ -526,26 +460,6 @@ static inline void __load_esp0(struct ts + regs->esp = new_esp; \ + } while (0) + +-/* +- * These special macros can be used to get or set a debugging register +- */ +-#define get_debugreg(var, register) \ +- (var) = HYPERVISOR_get_debugreg((register)) +-#define set_debugreg(value, register) \ +- WARN_ON(HYPERVISOR_set_debugreg((register), (value))) +- +-/* +- * Set IOPL bits in EFLAGS from given mask +- */ +-static inline void set_iopl_mask(unsigned mask) +-{ +- struct physdev_set_iopl set_iopl; +- +- /* Force the change at ring 0. */ +- set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; +- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); +-} +- + /* Forward declaration, a strange C thing */ + struct task_struct; + struct mm_struct; +@@ -637,6 +551,105 @@ static inline void rep_nop(void) + + #define cpu_relax() rep_nop() + ++#define paravirt_enabled() 0 ++#define __cpuid xen_cpuid ++ ++#ifndef CONFIG_X86_NO_TSS ++static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) ++{ ++ tss->esp0 = thread->esp0; ++ /* This can only happen when SEP is enabled, no need to test "SEP"arately */ ++ if (unlikely(tss->ss1 != thread->sysenter_cs)) { ++ tss->ss1 = thread->sysenter_cs; ++ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); ++ } ++} ++#define load_esp0(tss, thread) \ ++ __load_esp0(tss, thread) ++#else ++#define load_esp0(tss, thread) do { \ ++ if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ ++ BUG(); \ ++} while (0) ++#endif ++ ++ ++/* ++ * These special macros can be used to get or set a debugging register ++ */ ++#define get_debugreg(var, register) \ ++ (var) = HYPERVISOR_get_debugreg(register) ++#define set_debugreg(value, register) \ ++ WARN_ON(HYPERVISOR_set_debugreg(register, value)) ++ ++#define set_iopl_mask xen_set_iopl_mask ++ ++/* ++ * Set IOPL bits in EFLAGS from given mask ++ */ ++static inline void xen_set_iopl_mask(unsigned mask) ++{ ++ struct physdev_set_iopl set_iopl; ++ ++ /* Force the change at ring 0. */ ++ set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); ++} ++ ++ ++/* ++ * Generic CPUID function ++ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx ++ * resulting in stale register contents being returned. ++ */ ++static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) ++{ ++ *eax = op; ++ *ecx = 0; ++ __cpuid(eax, ebx, ecx, edx); ++} ++ ++/* Some CPUID calls want 'count' to be placed in ecx */ ++static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, ++ int *edx) ++{ ++ *eax = op; ++ *ecx = count; ++ __cpuid(eax, ebx, ecx, edx); ++} ++ ++/* ++ * CPUID functions returning a single datum ++ */ ++static inline unsigned int cpuid_eax(unsigned int op) ++{ ++ unsigned int eax, ebx, ecx, edx; ++ ++ cpuid(op, &eax, &ebx, &ecx, &edx); ++ return eax; ++} ++static inline unsigned int cpuid_ebx(unsigned int op) ++{ ++ unsigned int eax, ebx, ecx, edx; ++ ++ cpuid(op, &eax, &ebx, &ecx, &edx); ++ return ebx; ++} ++static inline unsigned int cpuid_ecx(unsigned int op) ++{ ++ unsigned int eax, ebx, ecx, edx; ++ ++ cpuid(op, &eax, &ebx, &ecx, &edx); ++ return ecx; ++} ++static inline unsigned int cpuid_edx(unsigned int op) ++{ ++ unsigned int eax, ebx, ecx, edx; ++ ++ cpuid(op, &eax, &ebx, &ecx, &edx); ++ return edx; ++} ++ + /* generic versions from gas */ + #define GENERIC_NOP1 ".byte 0x90\n" + #define GENERIC_NOP2 ".byte 0x89,0xf6\n" +@@ -736,4 +749,8 @@ extern unsigned long boot_option_idle_ov + extern void enable_sep_cpu(void); + extern int sysenter_setup(void); + ++extern int init_gdt(int cpu, struct task_struct *idle); ++extern void cpu_set_gdt(int); ++extern void secondary_cpu_init(void); ++ + #endif /* __ASM_I386_PROCESSOR_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/smp_32.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #endif + + #ifdef CONFIG_X86_LOCAL_APIC +@@ -56,7 +57,7 @@ extern void cpu_uninit(void); + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ +-#define raw_smp_processor_id() (current_thread_info()->cpu) ++#define raw_smp_processor_id() (read_pda(cpu_number)) + + extern cpumask_t cpu_possible_map; + #define cpu_callin_map cpu_possible_map +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/system_32.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/system_32.h 2009-11-06 10:46:27.000000000 +0100 +@@ -139,17 +139,17 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" + #define write_cr4(x) \ + __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) + +-/* +- * Clear and set 'TS' bit respectively +- */ ++#define wbinvd() \ ++ __asm__ __volatile__ ("wbinvd": : :"memory") ++ ++/* Clear the 'TS' bit */ + #define clts() (HYPERVISOR_fpu_taskswitch(0)) ++ ++/* Set the 'TS' bit */ + #define stts() (HYPERVISOR_fpu_taskswitch(1)) + + #endif /* __KERNEL__ */ + +-#define wbinvd() \ +- __asm__ __volatile__ ("wbinvd": : :"memory") +- + static inline unsigned long get_limit(unsigned long segment) + { + unsigned long __limit; +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/desc_64.h 2008-01-28 12:24:19.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/desc_64.h 2009-11-06 10:46:27.000000000 +0100 +@@ -9,62 +9,11 @@ + + #include + #include ++#include + + #include + #include + +-// 8 byte segment descriptor +-struct desc_struct { +- u16 limit0; +- u16 base0; +- unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; +- unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; +-} __attribute__((packed)); +- +-struct n_desc_struct { +- unsigned int a,b; +-}; +- +-enum { +- GATE_INTERRUPT = 0xE, +- GATE_TRAP = 0xF, +- GATE_CALL = 0xC, +-}; +- +-// 16byte gate +-struct gate_struct { +- u16 offset_low; +- u16 segment; +- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; +- u16 offset_middle; +- u32 offset_high; +- u32 zero1; +-} __attribute__((packed)); +- +-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) +-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) +-#define PTR_HIGH(x) ((unsigned long)(x) >> 32) +- +-enum { +- DESC_TSS = 0x9, +- DESC_LDT = 0x2, +-}; +- +-// LDT or TSS descriptor in the GDT. 16 bytes. +-struct ldttss_desc { +- u16 limit0; +- u16 base0; +- unsigned base1 : 8, type : 5, dpl : 2, p : 1; +- unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; +- u32 base3; +- u32 zero1; +-} __attribute__((packed)); +- +-struct desc_ptr { +- unsigned short size; +- unsigned long address; +-} __attribute__((packed)) ; +- + extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS]; + + extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:46:27.000000000 +0100 +@@ -237,19 +237,18 @@ extern unsigned int __kernel_page_user; + + static inline unsigned long pgd_bad(pgd_t pgd) + { +- unsigned long val = __pgd_val(pgd); +- val &= ~PTE_MASK; +- val &= ~(_PAGE_USER | _PAGE_DIRTY); +- return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); ++ return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); + } + +-static inline unsigned long pud_bad(pud_t pud) +-{ +- unsigned long val = __pud_val(pud); +- val &= ~PTE_MASK; +- val &= ~(_PAGE_USER | _PAGE_DIRTY); +- return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); +-} ++static inline unsigned long pud_bad(pud_t pud) ++{ ++ return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); ++} ++ ++static inline unsigned long pmd_bad(pmd_t pmd) ++{ ++ return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); ++} + + #define set_pte_at(_mm,addr,ptep,pteval) do { \ + if (((_mm) != current->mm && (_mm) != &init_mm) || \ +@@ -404,8 +403,6 @@ static inline int pmd_large(pmd_t pte) { + #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) + #endif + #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +-#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \ +- != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT))) + #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) + #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/processor_64.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/processor_64.h 2009-11-06 10:46:27.000000000 +0100 +@@ -484,6 +484,14 @@ static inline void __mwait(unsigned long + : :"a" (eax), "c" (ecx)); + } + ++static inline void __sti_mwait(unsigned long eax, unsigned long ecx) ++{ ++ /* "mwait %eax,%ecx;" */ ++ asm volatile( ++ "sti; .byte 0x0f,0x01,0xc9;" ++ : :"a" (eax), "c" (ecx)); ++} ++ + extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); + + #define stack_current() \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/smp_64.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp_64.h 2009-11-06 10:46:27.000000000 +0100 +@@ -88,11 +88,6 @@ extern u8 x86_cpu_to_log_apicid[NR_CPUS] + extern u8 bios_cpu_apicid[]; + + #ifdef CONFIG_X86_LOCAL_APIC +-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +-{ +- return cpus_addr(cpumask)[0]; +-} +- + static inline int cpu_present_to_apicid(int mps_cpu) + { + if (mps_cpu < NR_CPUS) +@@ -127,13 +122,6 @@ static __inline int logical_smp_processo + #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] + #else + #define cpu_physical_id(cpu) boot_cpu_id +-static inline int smp_call_function_single(int cpuid, void (*func) (void *info), +- void *info, int retry, int wait) +-{ +- /* Disable interrupts here? */ +- func(info); +- return 0; +-} + #endif /* !CONFIG_SMP */ + #endif + +--- head-2010-01-18.orig/kernel/kexec.c 2009-11-06 10:45:37.000000000 +0100 ++++ head-2010-01-18/kernel/kexec.c 2009-11-06 10:46:27.000000000 +0100 +@@ -375,7 +375,7 @@ static struct page *kimage_alloc_pages(g + if (limit == ~0UL) + address_bits = BITS_PER_LONG; + else +- address_bits = long_log2(limit); ++ address_bits = ilog2(limit); + + if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) { + __free_pages(pages, order); +--- head-2010-01-18.orig/net/core/dev.c 2009-12-04 10:49:48.000000000 +0100 ++++ head-2010-01-18/net/core/dev.c 2009-12-04 10:51:26.000000000 +0100 +@@ -1834,10 +1834,10 @@ inline int skb_checksum_setup(struct sk_ + goto out; + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: +- skb->csum = offsetof(struct tcphdr, check); ++ skb->csum_offset = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: +- skb->csum = offsetof(struct udphdr, check); ++ skb->csum_offset = offsetof(struct udphdr, check); + break; + default: + if (net_ratelimit()) +@@ -1846,7 +1846,7 @@ inline int skb_checksum_setup(struct sk_ + " %d packet", skb->nh.iph->protocol); + goto out; + } +- if ((skb->h.raw + skb->csum + 2) > skb->tail) ++ if ((skb->h.raw + skb->csum_offset + 2) > skb->tail) + goto out; + skb->ip_summed = CHECKSUM_PARTIAL; + skb->proto_csum_blank = 0; --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-auto-xen-kconfig.diff +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-auto-xen-kconfig.diff @@ -0,0 +1,854 @@ +Subject: xen3 xen-kconfig +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 983:3358caa6b3a3) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +--- head-2010-01-18.orig/arch/x86/Kconfig 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -63,6 +63,7 @@ config ARCH_DEFCONFIG + + config GENERIC_TIME + def_bool y ++ depends on !X86_XEN + + config GENERIC_CMOS_UPDATE + def_bool y +@@ -213,12 +214,23 @@ config X86_64_SMP + + config X86_HT + bool +- depends on SMP ++ depends on SMP && !XEN + default y + + config X86_TRAMPOLINE + bool + depends on SMP || (64BIT && ACPI_SLEEP) ++ depends on !XEN ++ default y ++ ++config X86_NO_TSS ++ bool ++ depends on X86_XEN || X86_64_XEN ++ default y ++ ++config X86_NO_IDT ++ bool ++ depends on X86_XEN || X86_64_XEN + default y + + config X86_32_LAZY_GS +@@ -298,6 +310,17 @@ config X86_MPPARSE + For old smp systems that do not have proper acpi support. Newer systems + (esp with 64bit cpus) with acpi support, MADT and DSDT will override it + ++config X86_XEN ++ bool "Xen-compatible" ++ select XEN ++ select X86_PAE ++ select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST ++ select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST ++ select SWIOTLB ++ help ++ Choose this option if you plan to run this kernel on top of the ++ Xen Hypervisor. ++ + config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32 && SMP +@@ -327,6 +350,13 @@ config X86_EXTENDED_PLATFORM + generic distribution kernel, say Y here - otherwise say N. + endif + ++config X86_64_XEN ++ bool "Enable Xen compatible kernel" ++ select XEN ++ select SWIOTLB ++ help ++ This option will compile a kernel compatible with Xen hypervisor ++ + if X86_64 + config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" +@@ -639,6 +669,7 @@ source "arch/x86/Kconfig.cpu" + config HPET_TIMER + def_bool X86_64 + prompt "HPET Timer Support" if X86_32 ++ depends on !X86_XEN && !X86_64_XEN + ---help--- + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is +@@ -674,7 +705,7 @@ config GART_IOMMU + bool "GART IOMMU support" if EMBEDDED + default y + select SWIOTLB +- depends on X86_64 && PCI ++ depends on X86_64 && PCI && !X86_64_XEN + ---help--- + Support for full DMA access of devices with 32bit memory access only + on systems with more than 3GB. This is usually needed for USB, +@@ -689,7 +720,7 @@ config GART_IOMMU + config CALGARY_IOMMU + bool "IBM Calgary IOMMU support" + select SWIOTLB +- depends on X86_64 && PCI && EXPERIMENTAL ++ depends on X86_64 && PCI && !X86_64_XEN && EXPERIMENTAL + ---help--- + Support for hardware IOMMUs in IBM's xSeries x366 and x460 + systems. Needed to run systems with more than 3GB of memory +@@ -773,6 +804,7 @@ config NR_CPUS + default "1" if !SMP + default "4096" if MAXSMP + default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) ++ default "16" if X86_64_XEN + default "8" if SMP + ---help--- + This allows you to specify the maximum number of CPUs which this +@@ -804,7 +836,7 @@ source "kernel/Kconfig.preempt" + + config X86_UP_APIC + bool "Local APIC support on uniprocessors" +- depends on X86_32 && !SMP && !X86_32_NON_STANDARD ++ depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !XEN_UNPRIVILEGED_GUEST + ---help--- + A local APIC (Advanced Programmable Interrupt Controller) is an + integrated interrupt controller in the CPU. If you have a single-CPU +@@ -830,15 +862,22 @@ config X86_UP_IOAPIC + config X86_LOCAL_APIC + def_bool y + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC ++ depends on !XEN_UNPRIVILEGED_GUEST + + config X86_IO_APIC + def_bool y + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC ++ depends on !XEN_UNPRIVILEGED_GUEST + + config X86_VISWS_APIC + def_bool y + depends on X86_32 && X86_VISWS + ++config X86_XEN_GENAPIC ++ bool ++ depends on X86_64_XEN ++ default y ++ + config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + default n +@@ -865,6 +904,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + + config X86_MCE + bool "Machine Check / overheating reporting" ++ depends on !X86_XEN && !XEN_UNPRIVILEGED_GUEST + ---help--- + Machine Check support allows the processor to notify the + kernel if it detects a problem (e.g. overheating, data corruption). +@@ -874,7 +914,7 @@ config X86_MCE + config X86_MCE_INTEL + def_bool y + prompt "Intel MCE features" +- depends on X86_MCE && X86_LOCAL_APIC ++ depends on X86_MCE && X86_LOCAL_APIC && !XEN + ---help--- + Additional support for intel specific MCE features such as + the thermal monitor. +@@ -882,7 +922,7 @@ config X86_MCE_INTEL + config X86_MCE_AMD + def_bool y + prompt "AMD MCE features" +- depends on X86_MCE && X86_LOCAL_APIC ++ depends on X86_MCE && X86_LOCAL_APIC && !XEN + ---help--- + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. +@@ -909,6 +949,10 @@ config X86_MCE_INJECT + If you don't know what a machine check is and you don't do kernel + QA it is safe to say n. + ++config X86_XEN_MCE ++ def_bool y ++ depends on XEN && X86_MCE ++ + config X86_THERMAL_VECTOR + def_bool y + depends on X86_MCE_INTEL +@@ -961,7 +1005,7 @@ config I8K + + config X86_REBOOTFIXUPS + bool "Enable X86 board specific fixups for reboot" +- depends on X86_32 ++ depends on X86_32 && !X86_XEN + ---help--- + This enables chipset and/or board specific fixups to be done + in order to get reboot to work correctly. This is only needed on +@@ -978,6 +1022,7 @@ config X86_REBOOTFIXUPS + + config MICROCODE + tristate "/dev/cpu/microcode - microcode support" ++ depends on !XEN_UNPRIVILEGED_GUEST + select FW_LOADER + ---help--- + If you say Y here, you will be able to update the microcode on +@@ -1174,7 +1219,7 @@ config DIRECT_GBPAGES + # Common NUMA Features + config NUMA + bool "Numa Memory Allocation and Scheduler Support" +- depends on SMP ++ depends on SMP && !XEN + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) + default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) + ---help--- +@@ -1283,6 +1328,7 @@ config ARCH_SPARSEMEM_DEFAULT + config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD ++ depends on !XEN + select SPARSEMEM_STATIC if X86_32 + select SPARSEMEM_VMEMMAP_ENABLE if X86_64 + +@@ -1358,6 +1404,7 @@ config X86_RESERVE_LOW_64K + config MATH_EMULATION + bool + prompt "Math emulation" if X86_32 ++ depends on !X86_XEN + ---help--- + Linux can emulate a math coprocessor (used for floating point + operations) if you don't have one. 486DX and Pentium processors have +@@ -1383,6 +1430,7 @@ config MATH_EMULATION + + config MTRR + bool "MTRR (Memory Type Range Register) support" ++ depends on !XEN_UNPRIVILEGED_GUEST + ---help--- + On Intel P6 family processors (Pentium Pro, Pentium II and later) + the Memory Type Range Registers (MTRRs) may be used to control +@@ -1467,7 +1515,7 @@ config ARCH_USES_PG_UNCACHED + + config EFI + bool "EFI runtime service support" +- depends on ACPI ++ depends on ACPI && !XEN + ---help--- + This enables the kernel to use EFI runtime services that are + available (such as the EFI variable services). +@@ -1527,6 +1575,7 @@ source kernel/Kconfig.hz + + config KEXEC + bool "kexec system call" ++ depends on !XEN_UNPRIVILEGED_GUEST + ---help--- + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot +@@ -1544,6 +1593,7 @@ config KEXEC + config CRASH_DUMP + bool "kernel crash dumps" + depends on X86_64 || (X86_32 && HIGHMEM) ++ depends on !XEN + ---help--- + Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels +@@ -1664,6 +1714,7 @@ config COMPAT_VDSO + def_bool y + prompt "Compat VDSO support" + depends on X86_32 || IA32_EMULATION ++ depends on !X86_XEN + ---help--- + Map the 32-bit VDSO to the predictable old-style address too. + ---help--- +@@ -1733,6 +1784,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID + depends on NUMA + + menu "Power management and ACPI options" ++ depends on !XEN_UNPRIVILEGED_GUEST + + config ARCH_HIBERNATION_HEADER + def_bool y +@@ -1751,7 +1803,7 @@ config X86_APM_BOOT + + menuconfig APM + tristate "APM (Advanced Power Management) BIOS support" +- depends on X86_32 && PM_SLEEP ++ depends on X86_32 && PM_SLEEP && !XEN + ---help--- + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with +@@ -1912,6 +1964,7 @@ choice + + config PCI_GOBIOS + bool "BIOS" ++ depends on !X86_XEN + + config PCI_GOMMCONFIG + bool "MMConfig" +@@ -1923,6 +1976,13 @@ config PCI_GOOLPC + bool "OLPC" + depends on OLPC + ++config PCI_GOXEN_FE ++ bool "Xen PCI Frontend" ++ depends on X86_XEN ++ help ++ The PCI device frontend driver allows the kernel to import arbitrary ++ PCI devices from a PCI backend to support PCI driver domains. ++ + config PCI_GOANY + bool "Any" + +@@ -1930,7 +1990,7 @@ endchoice + + config PCI_BIOS + def_bool y +- depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY) ++ depends on X86_32 && PCI && !XEN && (PCI_GOBIOS || PCI_GOANY) + + # x86-64 doesn't support PCI BIOS access from long mode so always go direct. + config PCI_DIRECT +@@ -1953,6 +2013,22 @@ config PCI_MMCONFIG + bool "Support mmconfig PCI config space access" + depends on X86_64 && PCI && ACPI + ++config XEN_PCIDEV_FRONTEND ++ bool "Xen PCI Frontend" if X86_64 ++ depends on PCI && ((X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)) || X86_64_XEN) ++ select HOTPLUG ++ default y ++ help ++ The PCI device frontend driver allows the kernel to import arbitrary ++ PCI devices from a PCI backend to support PCI driver domains. ++ ++config XEN_PCIDEV_FE_DEBUG ++ bool "Xen PCI Frontend Debugging" ++ depends on XEN_PCIDEV_FRONTEND ++ default n ++ help ++ Enables some debug statements within the PCI Frontend. ++ + config DMAR + bool "Support for DMA Remapping Devices (EXPERIMENTAL)" + depends on PCI_MSI && ACPI && EXPERIMENTAL +@@ -2015,6 +2091,7 @@ if X86_32 + + config ISA + bool "ISA support" ++ depends on !XEN + ---help--- + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff +@@ -2042,6 +2119,7 @@ source "drivers/eisa/Kconfig" + + config MCA + bool "MCA support" ++ depends on !XEN + ---help--- + MicroChannel Architecture is found in some IBM PS/2 machines and + laptops. It is a bus system similar to PCI or ISA. See +@@ -2155,4 +2233,6 @@ source "crypto/Kconfig" + + source "arch/x86/kvm/Kconfig" + ++source "drivers/xen/Kconfig" ++ + source "lib/Kconfig" +--- head-2010-01-18.orig/arch/x86/Kconfig.cpu 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig.cpu 2009-12-04 10:44:40.000000000 +0100 +@@ -340,7 +340,7 @@ config X86_PPRO_FENCE + + config X86_F00F_BUG + def_bool y +- depends on M586MMX || M586TSC || M586 || M486 || M386 ++ depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT + + config X86_WP_WORKS_OK + def_bool y +@@ -397,6 +397,7 @@ config X86_P6_NOP + config X86_TSC + def_bool y + depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 ++ depends on !XEN + + config X86_CMPXCHG64 + def_bool y +--- head-2010-01-18.orig/arch/x86/Kconfig.debug 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig.debug 2009-12-04 10:44:40.000000000 +0100 +@@ -136,7 +136,7 @@ config 4KSTACKS + config DOUBLEFAULT + default y + bool "Enable doublefault exception handler" if EMBEDDED +- depends on X86_32 ++ depends on X86_32 && !X86_NO_TSS + ---help--- + This option allows trapping of rare doublefault exceptions that + would otherwise cause a system to silently reboot. Disabling this +--- head-2010-01-18.orig/drivers/acpi/Kconfig 2010-01-19 10:53:23.000000000 +0100 ++++ head-2010-01-18/drivers/acpi/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -307,6 +307,7 @@ config ACPI_PCI_SLOT + config X86_PM_TIMER + bool "Power Management Timer Support" if EMBEDDED + depends on X86 ++ depends on !XEN + default y + help + The Power Management Timer is available on all ACPI-capable, +@@ -360,4 +361,13 @@ config ACPI_SBS + To compile this driver as a module, choose M here: + the modules will be called sbs and sbshc. + ++config ACPI_PV_SLEEP ++ bool ++ depends on X86 && XEN && ACPI_SLEEP ++ default y ++ ++config PROCESSOR_EXTERNAL_CONTROL ++ bool ++ depends on (X86 || IA64) && XEN ++ default y + endif # ACPI +--- head-2010-01-18.orig/drivers/char/Kconfig 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/drivers/char/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -1052,7 +1052,7 @@ config MAX_RAW_DEVS + config HPET + bool "HPET - High Precision Event Timer" if (X86 || IA64) + default n +- depends on ACPI ++ depends on ACPI && !XEN + help + If you say Y here, you will have a miscdevice named "/dev/hpet/". Each + open selects one of the timers supported by the HPET. The timers are +--- head-2010-01-18.orig/drivers/char/tpm/Kconfig 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/drivers/char/tpm/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -58,4 +58,13 @@ config TCG_INFINEON + Further information on this driver and the supported hardware + can be found at http://www.prosec.rub.de/tpm + ++config TCG_XEN ++ tristate "XEN TPM Interface" ++ depends on XEN ++ ---help--- ++ If you want to make TPM support available to a Xen user domain, ++ say Yes and it will be accessible from within Linux. ++ To compile this driver as a module, choose M here; the module ++ will be called tpm_xenu. ++ + endif # TCG_TPM +--- head-2010-01-18.orig/drivers/cpufreq/Kconfig 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/drivers/cpufreq/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -1,5 +1,6 @@ + config CPU_FREQ + bool "CPU Frequency scaling" ++ depends on !PROCESSOR_EXTERNAL_CONTROL + help + CPU Frequency scaling allows you to change the clock speed of + CPUs on the fly. This is a nice method to save power, because +--- head-2010-01-18.orig/drivers/serial/Kconfig 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/drivers/serial/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -9,6 +9,7 @@ menu "Serial drivers" + # The new 8250/16550 serial drivers + config SERIAL_8250 + tristate "8250/16550 and compatible serial support" ++ depends on !XEN_DISABLE_SERIAL + select SERIAL_CORE + ---help--- + This selects whether you want to include the driver for the standard +--- head-2010-01-18.orig/drivers/xen/Kconfig 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/drivers/xen/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -1,6 +1,354 @@ ++# ++# This Kconfig describe xen options ++# ++ ++mainmenu "Xen Configuration" ++ ++config XEN ++ bool ++ ++if XEN ++config XEN_INTERFACE_VERSION ++ hex ++ default 0x00030207 ++ ++menu "XEN" ++ ++config XEN_PRIVILEGED_GUEST ++ bool "Privileged Guest (domain 0)" ++ select PCI_REASSIGN if PCI ++ help ++ Support for privileged operation (domain 0) ++ ++config XEN_UNPRIVILEGED_GUEST ++ def_bool !XEN_PRIVILEGED_GUEST ++ ++config XEN_PRIVCMD ++ def_bool y ++ depends on PROC_FS ++ ++config XEN_XENBUS_DEV ++ def_bool y ++ depends on PROC_FS ++ ++config XEN_NETDEV_ACCEL_SFC_UTIL ++ depends on X86 ++ tristate ++ ++config XEN_BACKEND ++ tristate "Backend driver support" ++ default XEN_PRIVILEGED_GUEST ++ help ++ Support for backend device drivers that provide I/O services ++ to other virtual machines. ++ ++config XEN_BLKDEV_BACKEND ++ tristate "Block-device backend driver" ++ depends on XEN_BACKEND ++ default XEN_BACKEND ++ help ++ The block-device backend driver allows the kernel to export its ++ block devices to other guests via a high-performance shared-memory ++ interface. ++ ++config XEN_BLKDEV_TAP ++ tristate "Block-device tap backend driver" ++ depends on XEN_BACKEND ++ default XEN_BACKEND ++ help ++ The block tap driver is an alternative to the block back driver ++ and allows VM block requests to be redirected to userspace through ++ a device interface. The tap allows user-space development of ++ high-performance block backends, where disk images may be implemented ++ as files, in memory, or on other hosts across the network. This ++ driver can safely coexist with the existing blockback driver. ++ ++config XEN_BLKDEV_TAP2 ++ tristate "Block-device tap backend driver 2" ++ depends on XEN_BACKEND ++ default XEN_BACKEND ++ help ++ The block tap driver is an alternative to the block back driver ++ and allows VM block requests to be redirected to userspace through ++ a device interface. The tap allows user-space development of ++ high-performance block backends, where disk images may be implemented ++ as files, in memory, or on other hosts across the network. This ++ driver can safely coexist with the existing blockback driver. ++ ++config XEN_BLKBACK_PAGEMAP ++ tristate ++ depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP2 != n ++ default XEN_BLKDEV_BACKEND || XEN_BLKDEV_TAP2 ++ ++config XEN_NETDEV_BACKEND ++ tristate "Network-device backend driver" ++ depends on XEN_BACKEND && NET ++ default XEN_BACKEND ++ help ++ The network-device backend driver allows the kernel to export its ++ network devices to other guests via a high-performance shared-memory ++ interface. ++ ++config XEN_NETDEV_PIPELINED_TRANSMITTER ++ bool "Pipelined transmitter (DANGEROUS)" ++ depends on XEN_NETDEV_BACKEND ++ help ++ If the net backend is a dumb domain, such as a transparent Ethernet ++ bridge with no local IP interface, it is safe to say Y here to get ++ slightly lower network overhead. ++ If the backend has a local IP interface; or may be doing smart things ++ like reassembling packets to perform firewall filtering; or if you ++ are unsure; or if you experience network hangs when this option is ++ enabled; then you must say N here. ++ ++config XEN_NETDEV_ACCEL_SFC_BACKEND ++ tristate "Network-device backend driver acceleration for Solarflare NICs" ++ depends on XEN_NETDEV_BACKEND && SFC && SFC_RESOURCE && X86 ++ select XEN_NETDEV_ACCEL_SFC_UTIL ++ default m ++ ++config XEN_NETDEV_LOOPBACK ++ tristate "Network-device loopback driver" ++ depends on XEN_NETDEV_BACKEND ++ help ++ A two-interface loopback device to emulate a local netfront-netback ++ connection. If unsure, it is probably safe to say N here. ++ ++config XEN_PCIDEV_BACKEND ++ tristate "PCI-device backend driver" ++ depends on PCI && XEN_BACKEND ++ default XEN_BACKEND ++ help ++ The PCI device backend driver allows the kernel to export arbitrary ++ PCI devices to other guests. If you select this to be a module, you ++ will need to make sure no other driver has bound to the device(s) ++ you want to make visible to other guests. ++ ++choice ++ prompt "PCI Backend Mode" ++ depends on XEN_PCIDEV_BACKEND ++ default XEN_PCIDEV_BACKEND_VPCI if !IA64 ++ default XEN_PCIDEV_BACKEND_CONTROLLER if IA64 ++ ++config XEN_PCIDEV_BACKEND_VPCI ++ bool "Virtual PCI" ++ ---help--- ++ This PCI Backend hides the true PCI topology and makes the frontend ++ think there is a single PCI bus with only the exported devices on it. ++ For example, a device at 03:05.0 will be re-assigned to 00:00.0. A ++ second device at 02:1a.1 will be re-assigned to 00:01.1. ++ ++config XEN_PCIDEV_BACKEND_PASS ++ bool "Passthrough" ++ ---help--- ++ This PCI Backend provides a real view of the PCI topology to the ++ frontend (for example, a device at 06:01.b will still appear at ++ 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed ++ PCI devices to its driver domains. This may be required for drivers ++ which depend on finding their hardward in certain bus/slot ++ locations. ++ ++config XEN_PCIDEV_BACKEND_SLOT ++ bool "Slot" ++ ---help--- ++ This PCI Backend hides the true PCI topology and makes the frontend ++ think there is a single PCI bus with only the exported devices on it. ++ Contrary to the virtual PCI backend, a function becomes a new slot. ++ For example, a device at 03:05.2 will be re-assigned to 00:00.0. A ++ second device at 02:1a.1 will be re-assigned to 00:01.0. ++ ++config XEN_PCIDEV_BACKEND_CONTROLLER ++ bool "Controller" ++ depends on IA64 ++ ---help--- ++ This PCI backend virtualizes the PCI bus topology by providing a ++ virtual bus per PCI root device. Devices which are physically under ++ the same root bus will appear on the same virtual bus. For systems ++ with complex I/O addressing, this is the only backend which supports ++ extended I/O port spaces and MMIO translation offsets. This backend ++ also supports slot virtualization. For example, a device at ++ 0000:01:02.1 will be re-assigned to 0000:00:00.0. A second device ++ at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be ++ re-assigned to 0000:00:01.0. A third device at 0000:16:05.0 (under ++ a different PCI root bus) will be re-assigned to 0000:01:00.0. ++ ++endchoice ++ ++config XEN_PCIDEV_BE_DEBUG ++ bool "PCI Backend Debugging" ++ depends on XEN_PCIDEV_BACKEND ++ ++config XEN_TPMDEV_BACKEND ++ tristate "TPM-device backend driver" ++ depends on XEN_BACKEND ++ help ++ The TPM-device backend driver ++ ++config XEN_SCSI_BACKEND ++ tristate "SCSI backend driver" ++ depends on SCSI && XEN_BACKEND ++ default m ++ help ++ The SCSI backend driver allows the kernel to export its SCSI Devices ++ to other guests via a high-performance shared-memory interface. ++ ++config XEN_USB_BACKEND ++ tristate "USB backend driver" ++ depends on USB && XEN_BACKEND ++ default m ++ help ++ The USB backend driver allows the kernel to export its USB Devices ++ to other guests. ++ ++config XEN_BLKDEV_FRONTEND ++ tristate "Block-device frontend driver" ++ default y ++ help ++ The block-device frontend driver allows the kernel to access block ++ devices mounted within another guest OS. Unless you are building a ++ dedicated device-driver domain, or your master control domain ++ (domain 0), then you almost certainly want to say Y here. ++ ++config XEN_NETDEV_FRONTEND ++ tristate "Network-device frontend driver" ++ depends on NET ++ default y ++ help ++ The network-device frontend driver allows the kernel to access ++ network interfaces within another guest OS. Unless you are building a ++ dedicated device-driver domain, or your master control domain ++ (domain 0), then you almost certainly want to say Y here. ++ ++config XEN_NETDEV_ACCEL_SFC_FRONTEND ++ tristate "Network-device frontend driver acceleration for Solarflare NICs" ++ depends on XEN_NETDEV_FRONTEND && X86 ++ select XEN_NETDEV_ACCEL_SFC_UTIL ++ default m ++ ++config XEN_SCSI_FRONTEND ++ tristate "SCSI frontend driver" ++ depends on SCSI ++ default m ++ help ++ The SCSI frontend driver allows the kernel to access SCSI Devices ++ within another guest OS. ++ ++config XEN_USB_FRONTEND ++ tristate "USB frontend driver" ++ depends on USB ++ default m ++ help ++ The USB frontend driver allows the kernel to access USB Devices ++ within another guest OS. ++ ++config XEN_USB_FRONTEND_HCD_STATS ++ bool "Taking the HCD statistics (for debug)" ++ depends on XEN_USB_FRONTEND ++ default y ++ help ++ Count the transferred urb status and the RING_FULL occurrence. ++ ++config XEN_USB_FRONTEND_HCD_PM ++ bool "HCD suspend/resume support (DO NOT USE)" ++ depends on XEN_USB_FRONTEND ++ default n ++ help ++ Experimental bus suspend/resume feature support. ++ ++config XEN_GRANT_DEV ++ tristate "User-space granted page access driver" ++ default XEN_PRIVILEGED_GUEST ++ help ++ Device for accessing (in user-space) pages that have been granted ++ by other domains. ++ ++config XEN_FRAMEBUFFER ++ tristate "Framebuffer-device frontend driver" ++ depends on FB ++ select FB_CFB_FILLRECT ++ select FB_CFB_COPYAREA ++ select FB_CFB_IMAGEBLIT ++ default y ++ help ++ The framebuffer-device frontend drivers allows the kernel to create a ++ virtual framebuffer. This framebuffer can be viewed in another ++ domain. Unless this domain has access to a real video card, you ++ probably want to say Y here. ++ ++config XEN_KEYBOARD ++ tristate "Keyboard-device frontend driver" ++ depends on XEN_FRAMEBUFFER && INPUT ++ default y ++ help ++ The keyboard-device frontend driver allows the kernel to create a ++ virtual keyboard. This keyboard can then be driven by another ++ domain. If you've said Y to CONFIG_XEN_FRAMEBUFFER, you probably ++ want to say Y here. ++ ++config XEN_DISABLE_SERIAL ++ bool "Disable serial port drivers" ++ default y ++ help ++ Disable serial port drivers, allowing the Xen console driver ++ to provide a serial console at ttyS0. ++ ++config XEN_SYSFS ++ tristate "Export Xen attributes in sysfs" ++ depends on SYSFS ++ select SYS_HYPERVISOR ++ default y ++ help ++ Xen hypervisor attributes will show up under /sys/hypervisor/. ++ ++choice ++ prompt "Xen version compatibility" ++ default XEN_COMPAT_030002_AND_LATER ++ ++ config XEN_COMPAT_030002_AND_LATER ++ bool "3.0.2 and later" ++ ++ config XEN_COMPAT_030004_AND_LATER ++ bool "3.0.4 and later" ++ ++ config XEN_COMPAT_030100_AND_LATER ++ bool "3.1.0 and later" ++ ++ config XEN_COMPAT_LATEST_ONLY ++ bool "no compatibility code" ++ ++endchoice ++ ++config XEN_COMPAT ++ hex ++ default 0xffffff if XEN_COMPAT_LATEST_ONLY ++ default 0x030100 if XEN_COMPAT_030100_AND_LATER ++ default 0x030004 if XEN_COMPAT_030004_AND_LATER ++ default 0x030002 if XEN_COMPAT_030002_AND_LATER ++ default 0 ++ ++endmenu ++ ++config HAVE_IRQ_IGNORE_UNHANDLED ++ def_bool y ++ ++config NO_IDLE_HZ ++ def_bool y ++ ++config XEN_SMPBOOT ++ def_bool y ++ depends on SMP && !PPC_XEN ++ ++config XEN_XENCOMM ++ bool ++ ++config XEN_DEVMEM ++ def_bool y ++ ++endif ++ + config XEN_BALLOON +- bool "Xen memory balloon driver" +- depends on XEN ++ bool "Xen memory balloon driver" if PARAVIRT_XEN ++ depends on (XEN && !PPC_XEN) || PARAVIRT_XEN + default y + help + The balloon driver allows the Xen domain to request more memory from +@@ -8,14 +356,16 @@ config XEN_BALLOON + return unneeded memory to the system. + + config XEN_SCRUB_PAGES +- bool "Scrub pages before returning them to system" +- depends on XEN_BALLOON ++ bool "Scrub memory before freeing it to Xen" ++ depends on XEN || XEN_BALLOON + default y + help +- Scrub pages before returning them to the system for reuse by +- other domains. This makes sure that any confidential data +- is not accidentally visible to other domains. Is it more +- secure, but slightly less efficient. ++ Erase memory contents before freeing it back to Xen's global ++ pool. This ensures that any secrets contained within that ++ memory (e.g., private keys) cannot be found by other guests that ++ may be running on the machine. Most people will want to say Y here. ++ If security is not a concern then you may increase performance by ++ saying N. + If in doubt, say yes. + + config XEN_DEV_EVTCHN +--- head-2010-01-18.orig/fs/Kconfig 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/fs/Kconfig 2009-12-04 10:44:40.000000000 +0100 +@@ -160,6 +160,7 @@ config HUGETLBFS + bool "HugeTLB file system support" + depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \ + SYS_SUPPORTS_HUGETLBFS || BROKEN ++ depends on !XEN + help + hugetlbfs is a filesystem backing for HugeTLB pages, based on + ramfs. For architectures that support it, say Y here and read +--- head-2010-01-18.orig/kernel/Kconfig.preempt 2010-01-19 10:52:41.000000000 +0100 ++++ head-2010-01-18/kernel/Kconfig.preempt 2009-12-04 10:44:40.000000000 +0100 +@@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY + + config PREEMPT + bool "Preemptible Kernel (Low-Latency Desktop)" ++ depends on !XEN + help + This option reduces the latency of the kernel by making + all kernel code (that is not executing in a critical section) --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-netback-notify-multi +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-netback-notify-multi @@ -0,0 +1,85 @@ +From: jbeulich@novell.com +Subject: netback: use multicall for send multiple notifications +Patch-mainline: obsolete + +This also does a small fairness improvement since now notifications +get sent in the order requests came in rather than in the inverse one. + +--- head-2010-01-04.orig/drivers/xen/core/evtchn.c 2009-11-06 11:10:22.000000000 +0100 ++++ head-2010-01-04/drivers/xen/core/evtchn.c 2009-11-06 11:10:30.000000000 +0100 +@@ -1329,6 +1329,21 @@ void notify_remote_via_irq(int irq) + } + EXPORT_SYMBOL_GPL(notify_remote_via_irq); + ++int multi_notify_remote_via_irq(multicall_entry_t *mcl, int irq) ++{ ++ int evtchn = evtchn_from_irq(irq); ++ ++ BUG_ON(type_from_irq(irq) == IRQT_VIRQ); ++ BUG_IF_IPI(irq); ++ ++ if (!VALID_EVTCHN(evtchn)) ++ return -EINVAL; ++ ++ multi_notify_remote_via_evtchn(mcl, evtchn); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(multi_notify_remote_via_irq); ++ + int irq_to_evtchn_port(int irq) + { + BUG_IF_VIRQ_PER_CPU(irq); +--- head-2010-01-04.orig/drivers/xen/netback/netback.c 2010-01-04 13:31:44.000000000 +0100 ++++ head-2010-01-04/drivers/xen/netback/netback.c 2010-01-04 13:31:57.000000000 +0100 +@@ -778,10 +778,20 @@ static void net_rx_action(unsigned long + npo.meta_cons += nr_frags + 1; + } + +- while (notify_nr != 0) { +- irq = notify_list[--notify_nr]; ++ if (notify_nr == 1) { ++ irq = *notify_list; + __clear_bit(irq, rx_notify); + notify_remote_via_irq(irq + DYNIRQ_BASE); ++ } else { ++ for (count = ret = 0; ret < notify_nr; ++ret) { ++ irq = notify_list[ret]; ++ __clear_bit(irq, rx_notify); ++ if (!multi_notify_remote_via_irq(rx_mcl + count, ++ irq + DYNIRQ_BASE)) ++ ++count; ++ } ++ if (HYPERVISOR_multicall(rx_mcl, count)) ++ BUG(); + } + + /* More work to do? */ +--- head-2010-01-04.orig/include/xen/evtchn.h 2009-12-18 10:13:32.000000000 +0100 ++++ head-2010-01-04/include/xen/evtchn.h 2009-12-18 10:13:40.000000000 +0100 +@@ -193,6 +193,18 @@ static inline void notify_remote_via_evt + VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send)); + } + ++static inline void ++multi_notify_remote_via_evtchn(multicall_entry_t *mcl, int port) ++{ ++ struct evtchn_send *send = (void *)(mcl->args + 2); ++ ++ BUILD_BUG_ON(sizeof(*send) > sizeof(mcl->args) - 2 * sizeof(*mcl->args)); ++ send->port = port; ++ mcl->op = __HYPERVISOR_event_channel_op; ++ mcl->args[0] = EVTCHNOP_send; ++ mcl->args[1] = (unsigned long)send; ++} ++ + /* Clear an irq's pending state, in preparation for polling on it. */ + void xen_clear_irq_pending(int irq); + +@@ -211,6 +223,7 @@ void xen_poll_irq(int irq); + * by bind_*_to_irqhandler(). + */ + void notify_remote_via_irq(int irq); ++int multi_notify_remote_via_irq(multicall_entry_t *, int irq); + int irq_to_evtchn_port(int irq); + + #if defined(CONFIG_SMP) && !defined(MODULE) && defined(CONFIG_X86) --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-seccomp-disable-tsc-option +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-seccomp-disable-tsc-option @@ -0,0 +1,33 @@ +From: Andrea Arcangeli +Subject: [PATCH seccomp: make tsc disabling optional +Patch-mainline: unknown +References: 191123 + +Make the TSC disable purely paranoid feature optional, so by default seccomp +returns absolutely zerocost. + +Ported from 2.6.19 to 2.6.24-rc7 by Jeff Mahoney. +Addition of x86-64 by Jan Beulich. + +Signed-off-by: Andrea Arcangeli +Acked-by: Jeff Mahoney +Automatically created from "patches.fixes/seccomp-disable-tsc-option" by xen-port-patches.py + +--- head-2009-10-12.orig/arch/x86/kernel/process-xen.c 2009-10-13 11:49:03.000000000 +0200 ++++ head-2009-10-12/arch/x86/kernel/process-xen.c 2009-10-13 17:01:41.000000000 +0200 +@@ -129,6 +129,7 @@ static void hard_disable_TSC(void) + + void disable_TSC(void) + { ++#ifdef CONFIG_SECCOMP_DISABLE_TSC + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* +@@ -137,6 +138,7 @@ void disable_TSC(void) + */ + hard_disable_TSC(); + preempt_enable(); ++#endif + } + + static void hard_enable_TSC(void) --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32.15-16 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32.15-16 @@ -0,0 +1,28 @@ +From: Greg Kroah-Hartman +Subject: Linux 2.6.32.16 +Patch-mainline: 2.6.32.16 + +Signed-off-by: Greg Kroah-Hartman + +Automatically created from "patches.kernel.org/patch-2.6.32.15-16" by xen-port-patches.py + +--- sle11sp1-2010-08-06.orig/arch/x86/kernel/setup-xen.c 2010-02-09 17:12:56.000000000 +0100 ++++ sle11sp1-2010-08-06/arch/x86/kernel/setup-xen.c 2010-08-06 17:06:51.000000000 +0200 +@@ -757,6 +757,17 @@ static struct dmi_system_id __initdata b + DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), + }, + }, ++ /* ++ * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so ++ * match on the product name. ++ */ ++ { ++ .callback = dmi_low_memory_corruption, ++ .ident = "Phoenix BIOS", ++ .matches = { ++ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), ++ }, ++ }, + #endif + {} + }; --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-configurable-guest-devices +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-configurable-guest-devices @@ -0,0 +1,74 @@ +From: jbeulich@novell.com +Subject: allow number of guest devices to be configurable +Patch-mainline: obsolete + +... and derive NR_DYNIRQS from this (rather than having a hard-coded +value). +Similarly, allow the number of simultaneous transmits in netback to be +configurable. + +--- head-2010-01-04.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-04/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-12-22 13:21:47.000000000 +0100 +@@ -89,7 +89,7 @@ extern int nr_pirqs; + #endif + + #define DYNIRQ_BASE (PIRQ_BASE + nr_pirqs) +-#define NR_DYNIRQS 256 ++#define NR_DYNIRQS (64 + CONFIG_XEN_NR_GUEST_DEVICES) + + #define NR_IRQS (NR_PIRQS + NR_DYNIRQS) + +--- head-2010-01-04.orig/drivers/xen/Kconfig 2009-12-18 12:30:54.000000000 +0100 ++++ head-2010-01-04/drivers/xen/Kconfig 2009-12-18 12:31:00.000000000 +0100 +@@ -96,6 +96,15 @@ config XEN_NETDEV_BACKEND + network devices to other guests via a high-performance shared-memory + interface. + ++config XEN_NETDEV_TX_SHIFT ++ int "Maximum simultaneous transmit requests (as a power of 2)" ++ depends on XEN_NETDEV_BACKEND ++ range 5 16 ++ default 8 ++ help ++ The maximum number transmits the driver can hold pending, expressed ++ as the exponent of a power of 2. ++ + config XEN_NETDEV_PIPELINED_TRANSMITTER + bool "Pipelined transmitter (DANGEROUS)" + depends on XEN_NETDEV_BACKEND +@@ -307,6 +316,16 @@ config XEN_SYSFS + help + Xen hypervisor attributes will show up under /sys/hypervisor/. + ++config XEN_NR_GUEST_DEVICES ++ int "Number of guest devices" ++ range 0 4032 if 64BIT ++ range 0 960 ++ default 256 if XEN_BACKEND ++ default 16 ++ help ++ Specify the total number of virtual devices (i.e. both frontend ++ and backend) that you want the kernel to be able to service. ++ + choice + prompt "Xen version compatibility" + default XEN_COMPAT_030002_AND_LATER +--- head-2010-01-04.orig/drivers/xen/netback/netback.c 2010-01-04 13:31:26.000000000 +0100 ++++ head-2010-01-04/drivers/xen/netback/netback.c 2010-01-04 13:31:38.000000000 +0100 +@@ -71,7 +71,7 @@ static DECLARE_TASKLET(net_rx_tasklet, n + static struct timer_list net_timer; + static struct timer_list netbk_tx_pending_timer; + +-#define MAX_PENDING_REQS 256 ++#define MAX_PENDING_REQS (1U << CONFIG_XEN_NETDEV_TX_SHIFT) + + static struct sk_buff_head rx_queue; + +@@ -1265,6 +1265,7 @@ static void net_tx_action(unsigned long + net_tx_action_dealloc(); + + mop = tx_map_ops; ++ BUILD_BUG_ON(MAX_SKB_FRAGS >= MAX_PENDING_REQS); + while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&net_schedule_list)) { + /* Get a netif from the list with work to do. */ --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-auto-arch-x86.diff +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-auto-arch-x86.diff @@ -0,0 +1,436 @@ +Subject: xen3 arch-x86 +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 983:3358caa6b3a3) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +List of files that don't require modification anymore (and hence +removed from this patch), for reference and in case upstream wants to +take the forward porting patches: +2.6.26/arch/x86/kernel/crash.c +2.6.30/arch/x86/kernel/acpi/boot.c + +--- + arch/x86/Makefile | 24 +++++++++++++++++++++++- + arch/x86/boot/Makefile | 9 +++++++++ + arch/x86/include/asm/acpi.h | 27 +++++++++++++++++++++++++++ + arch/x86/include/asm/apic.h | 2 ++ + arch/x86/include/asm/kexec.h | 13 +++++++++++++ + arch/x86/kernel/Makefile | 9 +++++++++ + arch/x86/kernel/acpi/Makefile | 4 ++++ + arch/x86/kernel/acpi/processor.c | 11 +++++++++++ + arch/x86/kernel/cpu/mcheck/Makefile | 1 + + arch/x86/kernel/cpu/mcheck/mce.c | 21 +++++++++++++++++++++ + arch/x86/kernel/cpu/mtrr/Makefile | 1 + + arch/x86/lib/Makefile | 2 ++ + arch/x86/mm/Makefile | 2 ++ + arch/x86/oprofile/Makefile | 7 +++++++ + arch/x86/pci/Makefile | 3 +++ + arch/x86/power/cpu.c | 4 ++++ + 16 files changed, 139 insertions(+), 1 deletion(-) + +Index: linux-2.6.32-master/arch/x86/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/Makefile ++++ linux-2.6.32-master/arch/x86/Makefile +@@ -111,6 +111,10 @@ endif + # prevent gcc from generating any FP code by mistake + KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) + ++# Xen subarch support ++mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-x86/mach-xen ++mcore-$(CONFIG_X86_XEN) := arch/x86/mach-xen/ ++ + KBUILD_CFLAGS += $(mflags-y) + KBUILD_AFLAGS += $(mflags-y) + +@@ -151,9 +155,26 @@ boot := arch/x86/boot + + BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage + +-PHONY += bzImage $(BOOT_TARGETS) ++PHONY += bzImage vmlinuz $(BOOT_TARGETS) ++ ++ifdef CONFIG_XEN ++CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ ++ -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) ++ ++ifdef CONFIG_X86_64 ++LDFLAGS_vmlinux := -e startup_64 ++endif + + # Default kernel to build ++all: vmlinuz ++ ++# KBUILD_IMAGE specifies the target image being built ++KBUILD_IMAGE := $(boot)/vmlinuz ++ ++vmlinuz: vmlinux ++ $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) ++else ++# Default kernel to build + all: bzImage + + # KBUILD_IMAGE specify target image being built +@@ -166,6 +187,7 @@ bzImage: vmlinux + + $(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ ++endif + + PHONY += install + install: +Index: linux-2.6.32-master/arch/x86/boot/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/boot/Makefile ++++ linux-2.6.32-master/arch/x86/boot/Makefile +@@ -23,6 +23,7 @@ ROOT_DEV := CURRENT + SVGA_MODE := -DSVGA_MODE=NORMAL_VGA + + targets := vmlinux.bin setup.bin setup.elf bzImage ++targets += vmlinuz vmlinux-stripped + targets += fdimage fdimage144 fdimage288 image.iso mtools.conf + subdir- := compressed + +@@ -195,6 +196,14 @@ bzlilo: $(obj)/bzImage + cp System.map $(INSTALL_PATH)/ + if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi + ++$(obj)/vmlinuz: $(obj)/vmlinux-stripped FORCE ++ $(call if_changed,gzip) ++ @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' ++ ++$(obj)/vmlinux-stripped: OBJCOPYFLAGS := -g --strip-unneeded ++$(obj)/vmlinux-stripped: vmlinux FORCE ++ $(call if_changed,objcopy) ++ + install: + sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ + System.map "$(INSTALL_PATH)" +Index: linux-2.6.32-master/arch/x86/kernel/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/kernel/Makefile ++++ linux-2.6.32-master/arch/x86/kernel/Makefile +@@ -117,9 +117,12 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) + + obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o + ++obj-$(CONFIG_X86_XEN) += fixup.o ++ + ### + # 64 bit specific files + ifeq ($(CONFIG_X86_64),y) ++ obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o + obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o + obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o + obj-$(CONFIG_AUDIT) += audit_64.o +@@ -130,4 +133,10 @@ ifeq ($(CONFIG_X86_64),y) + + obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o + obj-y += vsmp_64.o ++ ++ time_64-$(CONFIG_XEN) += time_32.o ++ pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o + endif ++ ++disabled-obj-$(CONFIG_XEN) := i8259_$(BITS).o reboot.o smpboot_$(BITS).o ++%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := +Index: linux-2.6.32-master/arch/x86/kernel/acpi/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/kernel/acpi/Makefile ++++ linux-2.6.32-master/arch/x86/kernel/acpi/Makefile +@@ -5,6 +5,9 @@ obj-$(CONFIG_ACPI_SLEEP) += sleep.o wake + + ifneq ($(CONFIG_ACPI_PROCESSOR),) + obj-y += cstate.o processor.o ++ifneq ($(CONFIG_PROCESSOR_EXTERNAL_CONTROL),) ++obj-$(CONFIG_XEN) += processor_extcntl_xen.o ++endif + endif + + $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin +@@ -12,3 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w + $(obj)/realmode/wakeup.bin: FORCE + $(Q)$(MAKE) $(build)=$(obj)/realmode + ++disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_$(BITS).o +Index: linux-2.6.32-master/arch/x86/kernel/acpi/processor.c +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/kernel/acpi/processor.c ++++ linux-2.6.32-master/arch/x86/kernel/acpi/processor.c +@@ -76,7 +76,18 @@ static void init_intel_pdc(struct acpi_p + /* Initialize _PDC data based on the CPU vendor */ + void arch_acpi_processor_init_pdc(struct acpi_processor *pr) + { ++#ifdef CONFIG_XEN ++ /* ++ * As a work-around, just use cpu0's cpuinfo for all processors. ++ * Further work is required to expose xen hypervisor interface of ++ * getting physical cpuinfo to dom0 kernel and then ++ * arch_acpi_processor_init_pdc can set _PDC parameters according ++ * to Xen's phys information. ++ */ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++#else + struct cpuinfo_x86 *c = &cpu_data(pr->id); ++#endif + + pr->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL || +Index: linux-2.6.32-master/arch/x86/kernel/cpu/mcheck/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/kernel/cpu/mcheck/Makefile ++++ linux-2.6.32-master/arch/x86/kernel/cpu/mcheck/Makefile +@@ -4,6 +4,7 @@ obj-$(CONFIG_X86_ANCIENT_MCE) += winchip + obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o + obj-$(CONFIG_X86_MCE_XEON75XX) += mce-xeon75xx.o + obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o ++obj-$(CONFIG_X86_XEN_MCE) += mce_dom0.o + obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o + obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o + +Index: linux-2.6.32-master/arch/x86/kernel/cpu/mcheck/mce.c +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/kernel/cpu/mcheck/mce.c ++++ linux-2.6.32-master/arch/x86/kernel/cpu/mcheck/mce.c +@@ -1127,8 +1127,15 @@ void mce_log_therm_throt_event(__u64 sta + * Periodic polling timer for "silent" machine check errors. If the + * poller finds an MCE, poll 2x faster. When the poller finds no more + * errors, poll 2x slower (up to check_interval seconds). ++ * ++ * We will disable polling in DOM0 since all CMCI/Polling ++ * mechanism will be done in XEN for Intel CPUs + */ ++#if defined (CONFIG_X86_XEN_MCE) ++static int check_interval = 0; /* disable polling */ ++#else + static int check_interval = 5 * 60; /* 5 minutes */ ++#endif + + static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ + static DEFINE_PER_CPU(struct timer_list, mce_timer); +@@ -1293,6 +1300,7 @@ static int __cpuinit mce_cpu_quirks(stru + + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86_vendor == X86_VENDOR_AMD) { ++#ifndef CONFIG_XEN + if (c->x86 == 15 && banks > 4) { + /* + * disable GART TBL walk error reporting, which +@@ -1301,6 +1309,7 @@ static int __cpuinit mce_cpu_quirks(stru + */ + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); + } ++#endif + if (c->x86 <= 17 && mce_bootlog < 0) { + /* + * Lots of broken BIOS around that don't clear them +@@ -1368,6 +1377,7 @@ static void __cpuinit mce_ancient_init(s + + static void mce_cpu_features(struct cpuinfo_x86 *c) + { ++#ifndef CONFIG_X86_64_XEN + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + mce_intel_feature_init(c); +@@ -1378,6 +1388,7 @@ static void mce_cpu_features(struct cpui + default: + break; + } ++#endif + } + + static void mce_init_timer(void) +@@ -2064,6 +2075,16 @@ static __init int mce_init_device(void) + register_hotcpu_notifier(&mce_cpu_notifier); + misc_register(&mce_log_device); + ++#ifdef CONFIG_X86_XEN_MCE ++ if (is_initial_xendomain()) { ++ /* Register vIRQ handler for MCE LOG processing */ ++ extern void bind_virq_for_mce(void); ++ ++ printk(KERN_DEBUG "MCE: bind virq for DOM0 logging\n"); ++ bind_virq_for_mce(); ++ } ++#endif ++ + return err; + } + +Index: linux-2.6.32-master/arch/x86/kernel/cpu/mtrr/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/kernel/cpu/mtrr/Makefile ++++ linux-2.6.32-master/arch/x86/kernel/cpu/mtrr/Makefile +@@ -1,3 +1,4 @@ + obj-y := main.o if.o generic.o state.o cleanup.o + obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o + ++obj-$(CONFIG_XEN) := main.o if.o +Index: linux-2.6.32-master/arch/x86/lib/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/lib/Makefile ++++ linux-2.6.32-master/arch/x86/lib/Makefile +@@ -27,3 +27,5 @@ else + lib-y += memmove_64.o memset_64.o + lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o + endif ++ ++lib-$(CONFIG_XEN_SCRUB_PAGES) += scrub.o +Index: linux-2.6.32-master/arch/x86/mm/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/mm/Makefile ++++ linux-2.6.32-master/arch/x86/mm/Makefile +@@ -25,4 +25,6 @@ obj-$(CONFIG_NUMA) += numa.o numa_$(BIT + obj-$(CONFIG_K8_NUMA) += k8topology_64.o + obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o + ++obj-$(CONFIG_XEN) += hypervisor.o ++ + obj-$(CONFIG_MEMTEST) += memtest.o +Index: linux-2.6.32-master/arch/x86/oprofile/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/oprofile/Makefile ++++ linux-2.6.32-master/arch/x86/oprofile/Makefile +@@ -6,7 +6,14 @@ DRIVER_OBJS = $(addprefix ../../../drive + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + ++ifdef CONFIG_XEN ++XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \ ++ xenoprofile.o) ++oprofile-y := $(DRIVER_OBJS) \ ++ $(XENOPROF_COMMON_OBJS) xenoprof.o ++else + oprofile-y := $(DRIVER_OBJS) init.o backtrace.o + oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ + op_model_ppro.o op_model_p4.o + oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o ++endif +Index: linux-2.6.32-master/arch/x86/pci/Makefile +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/pci/Makefile ++++ linux-2.6.32-master/arch/x86/pci/Makefile +@@ -4,6 +4,9 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o + obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o + obj-$(CONFIG_PCI_DIRECT) += direct.o + obj-$(CONFIG_PCI_OLPC) += olpc.o ++# pcifront should be after mmconfig.o and direct.o as it should only ++# take over if direct access to the PCI bus is unavailable ++obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront.o + + obj-y += fixup.o + obj-$(CONFIG_ACPI) += acpi.o +Index: linux-2.6.32-master/arch/x86/power/cpu.c +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/power/cpu.c ++++ linux-2.6.32-master/arch/x86/power/cpu.c +@@ -125,6 +125,7 @@ static void do_fpu_end(void) + + static void fix_processor_context(void) + { ++#ifndef CONFIG_X86_NO_TSS + int cpu = smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + +@@ -137,7 +138,10 @@ static void fix_processor_context(void) + + #ifdef CONFIG_X86_64 + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; ++#endif ++#endif + ++#ifdef CONFIG_X86_64 + syscall_init(); /* This sets MSR_*STAR and related */ + #endif + load_TR_desc(); /* This does ltr */ +Index: linux-2.6.32-master/arch/x86/include/asm/acpi.h +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/include/asm/acpi.h ++++ linux-2.6.32-master/arch/x86/include/asm/acpi.h +@@ -30,6 +30,10 @@ + #include + #include + ++#ifdef CONFIG_XEN ++#include ++#endif ++ + #define COMPILER_DEPENDENT_INT64 long long + #define COMPILER_DEPENDENT_UINT64 unsigned long long + +@@ -120,6 +124,27 @@ extern unsigned long acpi_wakeup_address + /* early initialization routine */ + extern void acpi_reserve_bootmem(void); + ++#ifdef CONFIG_XEN ++static inline int acpi_notify_hypervisor_state(u8 sleep_state, ++ u32 pm1a_cnt_val, ++ u32 pm1b_cnt_val) ++{ ++ struct xen_platform_op op = { ++ .cmd = XENPF_enter_acpi_sleep, ++ .interface_version = XENPF_INTERFACE_VERSION, ++ .u = { ++ .enter_acpi_sleep = { ++ .pm1a_cnt_val = pm1a_cnt_val, ++ .pm1b_cnt_val = pm1b_cnt_val, ++ .sleep_state = sleep_state, ++ }, ++ }, ++ }; ++ ++ return HYPERVISOR_platform_op(&op); ++} ++#endif /* CONFIG_XEN */ ++ + /* + * Check if the CPU can handle C2 and deeper + */ +@@ -152,7 +177,9 @@ static inline void disable_acpi(void) { + + #endif /* !CONFIG_ACPI */ + ++#ifndef CONFIG_XEN + #define ARCH_HAS_POWER_INIT 1 ++#endif + + struct bootnode; + +Index: linux-2.6.32-master/arch/x86/include/asm/apic.h +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/include/asm/apic.h ++++ linux-2.6.32-master/arch/x86/include/asm/apic.h +@@ -15,7 +15,9 @@ + #include + #include + ++#ifndef CONFIG_XEN + #define ARCH_APICTIMER_STOPS_ON_C3 1 ++#endif + + /* + * Debugging macros +Index: linux-2.6.32-master/arch/x86/include/asm/kexec.h +=================================================================== +--- linux-2.6.32-master.orig/arch/x86/include/asm/kexec.h ++++ linux-2.6.32-master/arch/x86/include/asm/kexec.h +@@ -163,6 +163,19 @@ struct kimage_arch { + }; + #endif + ++/* Under Xen we need to work with machine addresses. These macros give the ++ * machine address of a certain page to the generic kexec code instead of ++ * the pseudo physical address which would be given by the default macros. ++ */ ++ ++#ifdef CONFIG_XEN ++#define KEXEC_ARCH_HAS_PAGE_MACROS ++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) ++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) ++#define kexec_virt_to_phys(addr) virt_to_machine(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) ++#endif ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _ASM_X86_KEXEC_H */ --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-blkif-protocol-fallback-hack +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-blkif-protocol-fallback-hack @@ -0,0 +1,219 @@ +Subject: 32-on-64 blkif protocol negotiation fallback for old guests. +From: kraxel@suse.de +References: 244055 +Patch-mainline: never. + +See the comment below. Oh well. + +--- head-2010-01-04.orig/drivers/xen/Kconfig 2009-12-18 12:27:52.000000000 +0100 ++++ head-2010-01-04/drivers/xen/Kconfig 2009-12-18 12:30:34.000000000 +0100 +@@ -28,6 +28,9 @@ config XEN_PRIVCMD + def_bool y + depends on PROC_FS + ++config XEN_DOMCTL ++ tristate ++ + config XEN_XENBUS_DEV + def_bool y + depends on PROC_FS +@@ -47,6 +50,7 @@ config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND + default XEN_BACKEND ++ select XEN_DOMCTL + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory +@@ -56,6 +60,7 @@ config XEN_BLKDEV_TAP + tristate "Block-device tap backend driver" + depends on XEN_BACKEND + default XEN_BACKEND ++ select XEN_DOMCTL + help + The block tap driver is an alternative to the block back driver + and allows VM block requests to be redirected to userspace through +--- head-2010-01-04.orig/drivers/xen/blkback/xenbus.c 2008-09-15 15:10:36.000000000 +0200 ++++ head-2010-01-04/drivers/xen/blkback/xenbus.c 2008-09-15 15:10:39.000000000 +0200 +@@ -21,6 +21,7 @@ + #include + #include + #include "common.h" ++#include "../core/domctl.h" + + #undef DPRINTK + #define DPRINTK(fmt, args...) \ +@@ -488,8 +489,10 @@ static int connect_ring(struct backend_i + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); +- if (err) +- strcpy(protocol, "unspecified, assuming native"); ++ if (err) { ++ strcpy(protocol, "unspecified"); ++ be->blkif->blk_protocol = xen_guest_blkif_protocol(be->blkif->domid); ++ } + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) +--- head-2010-01-04.orig/drivers/xen/blktap/xenbus.c 2008-09-15 15:10:36.000000000 +0200 ++++ head-2010-01-04/drivers/xen/blktap/xenbus.c 2008-09-15 15:10:39.000000000 +0200 +@@ -39,6 +39,7 @@ + #include + #include + #include "common.h" ++#include "../core/domctl.h" + + + struct backend_info +@@ -426,8 +427,10 @@ static int connect_ring(struct backend_i + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); +- if (err) +- strcpy(protocol, "unspecified, assuming native"); ++ if (err) { ++ strcpy(protocol, "unspecified"); ++ be->blkif->blk_protocol = xen_guest_blkif_protocol(be->blkif->domid); ++ } + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) +--- head-2010-01-04.orig/drivers/xen/core/Makefile 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-04/drivers/xen/core/Makefile 2010-01-04 16:17:00.000000000 +0100 +@@ -12,4 +12,7 @@ obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o + obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o + obj-$(CONFIG_SMP) += spinlock.o + obj-$(CONFIG_KEXEC) += machine_kexec.o ++obj-$(CONFIG_XEN_DOMCTL) += domctl.o ++CFLAGS_domctl.o := -D__XEN_PUBLIC_XEN_H__ -D__XEN_PUBLIC_GRANT_TABLE_H__ ++CFLAGS_domctl.o += -D__XEN_TOOLS__ -imacros xen/interface/domctl.h + obj-$(CONFIG_XEN_XENCOMM) += xencomm.o +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-04/drivers/xen/core/domctl.c 2010-01-04 16:15:58.000000000 +0100 +@@ -0,0 +1,120 @@ ++/* ++ * !!! dirty hack alert !!! ++ * ++ * Problem: old guests kernels don't have a "protocol" node ++ * in the frontend xenstore directory, so mixing ++ * 32 and 64bit domains doesn't work. ++ * ++ * Upstream plans to solve this in the tools, by letting them ++ * create a protocol node. Which certainly makes sense. ++ * But it isn't trivial and isn't done yet. Too bad. ++ * ++ * So for the time being we use the get_address_size domctl ++ * hypercall for a pretty good guess. Not nice as the domctl ++ * hypercall isn't supposed to be used by the kernel. Because ++ * we don't want to have dependencies between dom0 kernel and ++ * xen kernel versions. Now we have one. Ouch. ++ */ ++#undef __XEN_PUBLIC_XEN_H__ ++#undef __XEN_PUBLIC_GRANT_TABLE_H__ ++#undef __XEN_TOOLS__ ++#include ++#include ++#include ++#include ++ ++#include "domctl.h" ++ ++/* stuff copied from xen/interface/domctl.h, which we can't ++ * include directly for the reasons outlined above .... */ ++ ++typedef struct xen_domctl_address_size { ++ uint32_t size; ++} xen_domctl_address_size_t; ++ ++typedef __attribute__((aligned(8))) uint64_t uint64_aligned_t; ++ ++union xen_domctl { ++ /* v4: sles10 sp1: xen 3.0.4 + 32-on-64 patches */ ++ struct { ++ uint32_t cmd; ++ uint32_t interface_version; ++ domid_t domain; ++ union { ++ /* left out lots of other struct xen_domctl_foobar */ ++ struct xen_domctl_address_size address_size; ++ uint64_t dummy_align; ++ uint8_t dummy_pad[128]; ++ }; ++ } v4; ++ ++ /* v5: upstream: xen 3.1, v6: upstream: xen 4.0 */ ++ struct { ++ uint32_t cmd; ++ uint32_t interface_version; ++ domid_t domain; ++ union { ++ struct xen_domctl_address_size address_size; ++ uint64_aligned_t dummy_align; ++ uint8_t dummy_pad[128]; ++ }; ++ } v5, v6; ++}; ++ ++/* The actual code comes here */ ++ ++static inline int hypervisor_domctl(void *domctl) ++{ ++ return _hypercall1(int, domctl, domctl); ++} ++ ++int xen_guest_address_size(int domid) ++{ ++ union xen_domctl domctl; ++ int low, ret; ++ ++#define guest_address_size(ver) do { \ ++ memset(&domctl, 0, sizeof(domctl)); \ ++ domctl.v##ver.cmd = XEN_DOMCTL_get_address_size; \ ++ domctl.v##ver.interface_version = low = ver; \ ++ domctl.v##ver.domain = domid; \ ++ ret = hypervisor_domctl(&domctl) ?: domctl.v##ver.address_size.size; \ ++ if (ret == 32 || ret == 64) { \ ++ printk("v" #ver " domctl worked ok: dom%d is %d-bit\n", \ ++ domid, ret); \ ++ return ret; \ ++ } \ ++} while (0) ++ ++ BUILD_BUG_ON(XEN_DOMCTL_INTERFACE_VERSION > 6); ++ guest_address_size(6); ++#if CONFIG_XEN_COMPAT < 0x040000 ++ guest_address_size(5); ++#endif ++#if CONFIG_XEN_COMPAT < 0x030100 ++ guest_address_size(4); ++#endif ++ ++ ret = BITS_PER_LONG; ++ printk("v%d...6 domctls failed, assuming dom%d is native: %d\n", ++ low, domid, ret); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xen_guest_address_size); ++ ++int xen_guest_blkif_protocol(int domid) ++{ ++ int address_size = xen_guest_address_size(domid); ++ ++ if (address_size == BITS_PER_LONG) ++ return BLKIF_PROTOCOL_NATIVE; ++ if (address_size == 32) ++ return BLKIF_PROTOCOL_X86_32; ++ if (address_size == 64) ++ return BLKIF_PROTOCOL_X86_64; ++ return BLKIF_PROTOCOL_NATIVE; ++} ++EXPORT_SYMBOL_GPL(xen_guest_blkif_protocol); ++ ++MODULE_LICENSE("GPL"); +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-04/drivers/xen/core/domctl.h 2008-09-15 15:10:39.000000000 +0200 +@@ -0,0 +1,2 @@ ++int xen_guest_address_size(int domid); ++int xen_guest_blkif_protocol(int domid); --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/tmem +++ linux-ec2-2.6.32/debian.ec2/patches.xen/tmem @@ -0,0 +1,1241 @@ +Subject: Transcendent memory ("tmem") for Linux +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 908:baeb818cd2dc) +Patch-mainline: obsolete + +Tmem, when called from a tmem-capable (paravirtualized) guest, makes +use of otherwise unutilized ("fallow") memory to create and manage +pools of pages that can be accessed from the guest either as +"ephemeral" pages or as "persistent" pages. In either case, the pages +are not directly addressible by the guest, only copied to and fro via +the tmem interface. Ephemeral pages are a nice place for a guest to +put recently evicted clean pages that it might need again; these pages +can be reclaimed synchronously by Xen for other guests or other uses. +Persistent pages are a nice place for a guest to put "swap" pages to +avoid sending them to disk. These pages retain data as long as the +guest lives, but count against the guest memory allocation. + +This patch contains the Linux paravirtualization changes to +complement the tmem Xen patch (xen-unstable c/s 19646). It +implements "precache" (ext3 only as of now), "preswap", +and limited "shared precache" (ocfs2 only as of now) support. +CONFIG options are required to turn on +the support (but in this patch they default to "y"). If +the underlying Xen does not have tmem support or has it +turned off, this is sensed early to avoid nearly all +hypercalls. + +Lots of useful prose about tmem can be found at +http://oss.oracle.com/projects/tmem + +Signed-off-by: Dan Magenheimer +Acked-by: jbeulich@novell.com + +--- head-2010-01-18.orig/fs/btrfs/super.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/btrfs/super.c 2009-12-17 21:50:16.000000000 +0100 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include "compat.h" + #include "ctree.h" + #include "disk-io.h" +@@ -387,6 +388,7 @@ static int btrfs_fill_super(struct super + sb->s_root = root_dentry; + + save_mount_options(sb, data); ++ precache_init(sb); + return 0; + + fail_close: +--- head-2010-01-18.orig/fs/btrfs/extent_io.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/btrfs/extent_io.c 2009-12-17 23:28:33.000000000 +0100 +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include "extent_io.h" + #include "extent_map.h" + #include "compat.h" +@@ -2015,6 +2016,13 @@ static int __extent_read_full_page(struc + + set_page_extent_mapped(page); + ++ if (!PageUptodate(page)) { ++ if (precache_get(page->mapping, page->index, page) == 1) { ++ BUG_ON(blocksize != PAGE_SIZE); ++ goto out; ++ } ++ } ++ + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + +@@ -2131,6 +2139,7 @@ static int __extent_read_full_page(struc + cur = cur + iosize; + page_offset += iosize; + } ++out: + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); +--- head-2010-01-18.orig/fs/buffer.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/buffer.c 2009-12-04 10:29:25.000000000 +0100 +@@ -42,6 +42,7 @@ + #include + #include + #include ++#include + + static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); + +@@ -282,6 +283,11 @@ void invalidate_bdev(struct block_device + + invalidate_bh_lrus(); + invalidate_mapping_pages(mapping, 0, -1); ++ ++ /* 99% of the time, we don't need to flush the precache on the bdev. ++ * But, for the strange corners, lets be cautious ++ */ ++ precache_flush_inode(mapping); + } + EXPORT_SYMBOL(invalidate_bdev); + +--- head-2010-01-18.orig/fs/ext3/super.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/ext3/super.c 2009-12-04 10:29:25.000000000 +0100 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #include + +@@ -1334,6 +1335,7 @@ static int ext3_setup_super(struct super + } else { + printk("internal journal\n"); + } ++ precache_init(sb); + return res; + } + +--- head-2010-01-18.orig/fs/ext4/super.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/ext4/super.c 2010-01-07 09:36:31.000000000 +0100 +@@ -39,6 +39,7 @@ + #include + #include + #include ++#include + #include + + #include "ext4.h" +@@ -1685,6 +1686,8 @@ static int ext4_setup_super(struct super + EXT4_INODES_PER_GROUP(sb), + sbi->s_mount_opt); + ++ precache_init(sb); ++ + return res; + } + +--- head-2010-01-18.orig/fs/mpage.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/mpage.c 2009-12-04 10:29:25.000000000 +0100 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + /* + * I/O completion handler for multipage BIOs. +@@ -285,6 +286,13 @@ do_mpage_readpage(struct bio *bio, struc + SetPageMappedToDisk(page); + } + ++ if (fully_mapped && ++ blocks_per_page == 1 && !PageUptodate(page) && ++ precache_get(page->mapping, page->index, page) == 1) { ++ SetPageUptodate(page); ++ goto confused; ++ } ++ + /* + * This page will go to BIO. Do we need to send this BIO off first? + */ +--- head-2010-01-18.orig/fs/ocfs2/super.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/ocfs2/super.c 2010-01-18 15:40:59.000000000 +0100 +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + #include + + #define MLOG_MASK_PREFIX ML_SUPER +@@ -2257,6 +2258,7 @@ static int ocfs2_initialize_super(struct + mlog_errno(status); + goto bail; + } ++ shared_precache_init(sb, &di->id2.i_super.s_uuid[0]); + + bail: + mlog_exit(status); +--- head-2010-01-18.orig/fs/reiserfs/super.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/reiserfs/super.c 2009-12-04 10:29:25.000000000 +0100 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -1854,6 +1855,7 @@ static int reiserfs_fill_super(struct su + + init_waitqueue_head(&(sbi->s_wait)); + spin_lock_init(&sbi->bitmap_lock); ++ precache_init(s); + + return (0); + +--- head-2010-01-18.orig/fs/super.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/fs/super.c 2009-12-04 10:29:25.000000000 +0100 +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include "internal.h" + +@@ -104,6 +105,9 @@ static struct super_block *alloc_super(s + s->s_qcop = sb_quotactl_ops; + s->s_op = &default_op; + s->s_time_gran = 1000000000; ++#ifdef CONFIG_PRECACHE ++ s->precache_poolid = -1; ++#endif + } + out: + return s; +@@ -194,6 +198,7 @@ void deactivate_super(struct super_block + vfs_dq_off(s, 0); + down_write(&s->s_umount); + fs->kill_sb(s); ++ precache_flush_filesystem(s); + put_filesystem(fs); + put_super(s); + } +@@ -881,6 +886,9 @@ int get_sb_nodev(struct file_system_type + return error; + } + s->s_flags |= MS_ACTIVE; ++#ifdef CONFIG_PRECACHE ++ s->precache_poolid = -2; ++#endif + simple_set_mnt(mnt, s); + return 0; + } +--- head-2010-01-18.orig/include/linux/fs.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/include/linux/fs.h 2009-12-04 10:29:25.000000000 +0100 +@@ -1397,6 +1397,9 @@ struct super_block { + /* Granularity of c/m/atime in ns. + Cannot be worse than a second */ + u32 s_time_gran; ++#ifdef CONFIG_PRECACHE ++ u32 precache_poolid; ++#endif + + /* + * Filesystem subtype. If non-empty the filesystem type field +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/linux/precache.h 2009-12-04 10:29:25.000000000 +0100 +@@ -0,0 +1,55 @@ ++#ifndef _LINUX_PRECACHE_H ++ ++#include ++#include ++ ++#ifdef CONFIG_PRECACHE ++extern void precache_init(struct super_block *sb); ++extern void shared_precache_init(struct super_block *sb, char *uuid); ++extern int precache_get(struct address_space *mapping, unsigned long index, ++ struct page *empty_page); ++extern int precache_put(struct address_space *mapping, unsigned long index, ++ struct page *page); ++extern int precache_flush(struct address_space *mapping, unsigned long index); ++extern int precache_flush_inode(struct address_space *mapping); ++extern int precache_flush_filesystem(struct super_block *s); ++#else ++static inline void precache_init(struct super_block *sb) ++{ ++} ++ ++static inline void shared_precache_init(struct super_block *sb, char *uuid) ++{ ++} ++ ++static inline int precache_get(struct address_space *mapping, ++ unsigned long index, struct page *empty_page) ++{ ++ return 0; ++} ++ ++static inline int precache_put(struct address_space *mapping, ++ unsigned long index, struct page *page) ++{ ++ return 0; ++} ++ ++static inline int precache_flush(struct address_space *mapping, ++ unsigned long index) ++{ ++ return 0; ++} ++ ++static inline int precache_flush_inode(struct address_space *mapping) ++{ ++ return 0; ++} ++ ++static inline int precache_flush_filesystem(struct super_block *s) ++{ ++ return 0; ++} ++#endif ++ ++#define _LINUX_PRECACHE_H ++#endif /* _LINUX_PRECACHE_H */ +--- head-2010-01-18.orig/include/linux/swap.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/include/linux/swap.h 2009-12-04 10:29:25.000000000 +0100 +@@ -178,8 +178,59 @@ struct swap_info_struct { + unsigned int max; + unsigned int inuse_pages; + unsigned int old_block_size; ++#ifdef CONFIG_PRESWAP ++ unsigned long *preswap_map; ++ unsigned int preswap_pages; ++#endif + }; + ++#ifdef CONFIG_PRESWAP ++ ++#include ++extern int preswap_sysctl_handler(struct ctl_table *, int, void __user *, ++ size_t *, loff_t *); ++extern const unsigned long preswap_zero, preswap_infinity; ++ ++extern void preswap_shrink(unsigned long); ++extern int preswap_test(struct swap_info_struct *, unsigned long); ++extern void preswap_init(unsigned); ++extern int preswap_put(struct page *); ++extern int preswap_get(struct page *); ++extern void preswap_flush(unsigned, unsigned long); ++extern void preswap_flush_area(unsigned); ++#else ++static inline void preswap_shrink(unsigned long target_pages) ++{ ++} ++ ++static inline int preswap_test(struct swap_info_struct *sis, unsigned long offset) ++{ ++ return 0; ++} ++ ++static inline void preswap_init(unsigned type) ++{ ++} ++ ++static inline int preswap_put(struct page *page) ++{ ++ return 0; ++} ++ ++static inline int preswap_get(struct page *get) ++{ ++ return 0; ++} ++ ++static inline void preswap_flush(unsigned type, unsigned long offset) ++{ ++} ++ ++static inline void preswap_flush_area(unsigned type) ++{ ++} ++#endif /* CONFIG_PRESWAP */ ++ + struct swap_list_t { + int head; /* head of priority-ordered swapfile list */ + int next; /* swapfile to be used next */ +--- head-2010-01-18.orig/kernel/sysctl.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/kernel/sysctl.c 2010-01-07 09:36:42.000000000 +0100 +@@ -1444,6 +1444,18 @@ static struct ctl_table vm_table[] = { + .mode = 0644, + .proc_handler = &proc_dointvec, + }, ++#ifdef CONFIG_PRESWAP ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "preswap", ++ .data = NULL, ++ .maxlen = sizeof(unsigned long), ++ .mode = 0644, ++ .proc_handler = &preswap_sysctl_handler, ++ .extra1 = (void *)&preswap_zero, ++ .extra2 = (void *)&preswap_infinity, ++ }, ++#endif + #ifdef CONFIG_MEMORY_FAILURE + { + .ctl_name = CTL_UNNUMBERED, +--- head-2010-01-18.orig/mm/Kconfig 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/mm/Kconfig 2010-01-07 09:36:51.000000000 +0100 +@@ -288,3 +288,31 @@ config NOMMU_INITIAL_TRIM_EXCESS + of 1 says that all excess pages should be trimmed. + + See Documentation/nommu-mmap.txt for more information. ++ ++# ++# support for transcendent memory ++# ++config TMEM ++ bool ++ help ++ In a virtualized environment, allows unused and underutilized ++ system physical memory to be made accessible through a narrow ++ well-defined page-copy-based API. If unsure, say Y. ++ ++config PRECACHE ++ bool "Cache clean pages in transcendent memory" ++ depends on XEN ++ select TMEM ++ help ++ Allows the transcendent memory pool to be used to store clean ++ page-cache pages which, under some circumstances, will greatly ++ reduce paging and thus improve performance. If unsure, say Y. ++ ++config PRESWAP ++ bool "Swap pages to transcendent memory" ++ depends on XEN ++ select TMEM ++ help ++ Allows the transcendent memory pool to be used as a pseudo-swap ++ device which, under some circumstances, will greatly reduce ++ swapping and thus improve performance. If unsure, say Y. +--- head-2010-01-18.orig/mm/Makefile 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/mm/Makefile 2009-12-04 10:29:25.000000000 +0100 +@@ -17,6 +17,9 @@ obj-y += init-mm.o + + obj-$(CONFIG_BOUNCE) += bounce.o + obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o ++obj-$(CONFIG_TMEM) += tmem.o ++obj-$(CONFIG_PRESWAP) += preswap.o ++obj-$(CONFIG_PRECACHE) += precache.o + obj-$(CONFIG_HAS_DMA) += dmapool.o + obj-$(CONFIG_HUGETLBFS) += hugetlb.o + obj-$(CONFIG_NUMA) += mempolicy.o +--- head-2010-01-18.orig/mm/filemap.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/mm/filemap.c 2009-12-04 10:29:25.000000000 +0100 +@@ -33,6 +33,7 @@ + #include + #include /* for BUG_ON(!in_atomic()) only */ + #include ++#include + #include /* for page_is_file_cache() */ + #include + #include "internal.h" +@@ -123,6 +124,16 @@ void __remove_from_page_cache(struct pag + { + struct address_space *mapping = page->mapping; + ++ /* ++ * if we're uptodate, flush out into the precache, otherwise ++ * invalidate any existing precache entries. We can't leave ++ * stale data around in the precache once our page is gone ++ */ ++ if (PageUptodate(page)) ++ precache_put(page->mapping, page->index, page); ++ else ++ precache_flush(page->mapping, page->index); ++ + radix_tree_delete(&mapping->page_tree, page->index); + page->mapping = NULL; + mapping->nrpages--; +--- head-2010-01-18.orig/mm/page_io.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/mm/page_io.c 2009-12-07 12:08:42.000000000 +0100 +@@ -126,10 +126,23 @@ int swap_writepage(struct page *page, st + ret = -ENOMEM; + goto out; + } ++ ++#ifdef CONFIG_PRECACHE ++ set_page_writeback(page); ++ if (preswap_put(page) == 1) { ++ unlock_page(page); ++ end_page_writeback(page); ++ bio_put(bio); ++ goto out; ++ } ++#endif ++ + if (wbc->sync_mode == WB_SYNC_ALL) + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + count_vm_event(PSWPOUT); ++#ifndef CONFIG_PRECACHE + set_page_writeback(page); ++#endif + trace_swap_out(page); + unlock_page(page); + submit_bio(rw, bio); +@@ -188,6 +201,12 @@ int swap_readpage(struct page *page) + return ret; + } + ++ if (preswap_get(page) == 1) { ++ SetPageUptodate(page); ++ unlock_page(page); ++ goto out; ++ } ++ + bio = get_swap_bio(GFP_KERNEL, page_private(page), page, + end_swap_bio_read); + if (bio == NULL) { +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/mm/precache.c 2009-12-04 10:29:25.000000000 +0100 +@@ -0,0 +1,140 @@ ++/* ++ * linux/mm/precache.c ++ * ++ * Implements "precache" for filesystems/pagecache on top of transcendent ++ * memory ("tmem") API. A filesystem creates an "ephemeral tmem pool" ++ * and retains the returned pool_id in its superblock. Clean pages evicted ++ * from pagecache may be "put" into the pool and associated with a "handle" ++ * consisting of the pool_id, an object (inode) id, and an index (page offset). ++ * Note that the page is copied to tmem; no kernel mappings are changed. ++ * If the page is later needed, the filesystem (or VFS) issues a "get", passing ++ * the same handle and an empty pageframe. If successful, the page is copied ++ * into the pageframe and a disk read is avoided. But since the tmem pool ++ * is of indeterminate size, a "put" page has indeterminate longevity ++ * ("ephemeral"), and the "get" may fail, in which case the filesystem must ++ * read the page from disk as before. Note that the filesystem/pagecache are ++ * responsible for maintaining coherency between the pagecache, precache, ++ * and the disk, for which "flush page" and "flush object" actions are ++ * provided. And when a filesystem is unmounted, it must "destroy" the pool. ++ * ++ * Two types of pools may be created for a precache: "private" or "shared". ++ * For a private pool, a successful "get" always flushes, implementing ++ * exclusive semantics; for a "shared" pool (which is intended for use by ++ * co-resident nodes of a cluster filesystem), the "flush" is not guaranteed. ++ * In either case, a failed "duplicate" put (overwrite) always guarantee ++ * the old data is flushed. ++ * ++ * Note also that multiple accesses to a tmem pool may be concurrent and any ++ * ordering must be guaranteed by the caller. ++ * ++ * Copyright (C) 2008,2009 Dan Magenheimer, Oracle Corp. ++ */ ++ ++#include ++#include ++#include "tmem.h" ++ ++static int precache_auto_allocate; /* set to 1 to auto_allocate */ ++ ++int precache_put(struct address_space *mapping, unsigned long index, ++ struct page *page) ++{ ++ u32 tmem_pool = mapping->host->i_sb->precache_poolid; ++ u64 obj = (unsigned long) mapping->host->i_ino; ++ u32 ind = (u32) index; ++ unsigned long mfn = pfn_to_mfn(page_to_pfn(page)); ++ int ret; ++ ++ if ((s32)tmem_pool < 0) { ++ if (!precache_auto_allocate) ++ return 0; ++ /* a put on a non-existent precache may auto-allocate one */ ++ ret = tmem_new_pool(0, 0, 0); ++ if (ret < 0) ++ return 0; ++ printk(KERN_INFO ++ "Mapping superblock for s_id=%s to precache_id=%d\n", ++ mapping->host->i_sb->s_id, tmem_pool); ++ mapping->host->i_sb->precache_poolid = tmem_pool; ++ } ++ if (ind != index) ++ return 0; ++ mb(); /* ensure page is quiescent; tmem may address it with an alias */ ++ return tmem_put_page(tmem_pool, obj, ind, mfn); ++} ++ ++int precache_get(struct address_space *mapping, unsigned long index, ++ struct page *empty_page) ++{ ++ u32 tmem_pool = mapping->host->i_sb->precache_poolid; ++ u64 obj = (unsigned long) mapping->host->i_ino; ++ u32 ind = (u32) index; ++ unsigned long mfn = pfn_to_mfn(page_to_pfn(empty_page)); ++ ++ if ((s32)tmem_pool < 0) ++ return 0; ++ if (ind != index) ++ return 0; ++ ++ return tmem_get_page(tmem_pool, obj, ind, mfn); ++} ++EXPORT_SYMBOL(precache_get); ++ ++int precache_flush(struct address_space *mapping, unsigned long index) ++{ ++ u32 tmem_pool = mapping->host->i_sb->precache_poolid; ++ u64 obj = (unsigned long) mapping->host->i_ino; ++ u32 ind = (u32) index; ++ ++ if ((s32)tmem_pool < 0) ++ return 0; ++ if (ind != index) ++ return 0; ++ ++ return tmem_flush_page(tmem_pool, obj, ind); ++} ++EXPORT_SYMBOL(precache_flush); ++ ++int precache_flush_inode(struct address_space *mapping) ++{ ++ u32 tmem_pool = mapping->host->i_sb->precache_poolid; ++ u64 obj = (unsigned long) mapping->host->i_ino; ++ ++ if ((s32)tmem_pool < 0) ++ return 0; ++ ++ return tmem_flush_object(tmem_pool, obj); ++} ++EXPORT_SYMBOL(precache_flush_inode); ++ ++int precache_flush_filesystem(struct super_block *sb) ++{ ++ u32 tmem_pool = sb->precache_poolid; ++ int ret; ++ ++ if ((s32)tmem_pool < 0) ++ return 0; ++ ret = tmem_destroy_pool(tmem_pool); ++ if (!ret) ++ return 0; ++ printk(KERN_INFO ++ "Unmapping superblock for s_id=%s from precache_id=%d\n", ++ sb->s_id, ret); ++ sb->precache_poolid = 0; ++ return 1; ++} ++EXPORT_SYMBOL(precache_flush_filesystem); ++ ++void precache_init(struct super_block *sb) ++{ ++ sb->precache_poolid = tmem_new_pool(0, 0, 0); ++} ++EXPORT_SYMBOL(precache_init); ++ ++void shared_precache_init(struct super_block *sb, char *uuid) ++{ ++ u64 uuid_lo = *(u64 *)uuid; ++ u64 uuid_hi = *(u64 *)(&uuid[8]); ++ sb->precache_poolid = tmem_new_pool(uuid_lo, uuid_hi, TMEM_POOL_SHARED); ++} ++EXPORT_SYMBOL(shared_precache_init); +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/mm/preswap.c 2009-12-04 10:29:25.000000000 +0100 +@@ -0,0 +1,184 @@ ++/* ++ * linux/mm/preswap.c ++ * ++ * Implements a fast "preswap" on top of the transcendent memory ("tmem") API. ++ * When a swapdisk is enabled (with swapon), a "private persistent tmem pool" ++ * is created along with a bit-per-page preswap_map. When swapping occurs ++ * and a page is about to be written to disk, a "put" into the pool may first ++ * be attempted by passing the pageframe to be swapped, along with a "handle" ++ * consisting of a pool_id, an object id, and an index. Since the pool is of ++ * indeterminate size, the "put" may be rejected, in which case the page ++ * is swapped to disk as normal. If the "put" is successful, the page is ++ * copied to tmem and the preswap_map records the success. Later, when ++ * the page needs to be swapped in, the preswap_map is checked and, if set, ++ * the page may be obtained with a "get" operation. Note that the swap ++ * subsystem is responsible for: maintaining coherency between the swapcache, ++ * preswap, and the swapdisk; for evicting stale pages from preswap; and for ++ * emptying preswap when swapoff is performed. The "flush page" and "flush ++ * object" actions are provided for this. ++ * ++ * Note that if a "duplicate put" is performed to overwrite a page and ++ * the "put" operation fails, the page (and old data) is flushed and lost. ++ * Also note that multiple accesses to a tmem pool may be concurrent and ++ * any ordering must be guaranteed by the caller. ++ * ++ * Copyright (C) 2008,2009 Dan Magenheimer, Oracle Corp. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "tmem.h" ++ ++static u32 preswap_poolid = -1; /* if negative, preswap will never call tmem */ ++ ++const unsigned long preswap_zero = 0, preswap_infinity = ~0UL; /* for sysctl */ ++ ++/* ++ * Swizzling increases objects per swaptype, increasing tmem concurrency ++ * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS ++ */ ++#define SWIZ_BITS 4 ++#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) ++#define oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) ++#define iswiz(_ind) (_ind >> SWIZ_BITS) ++ ++/* ++ * preswap_map test/set/clear operations (must be atomic) ++ */ ++ ++int preswap_test(struct swap_info_struct *sis, unsigned long offset) ++{ ++ if (!sis->preswap_map) ++ return 0; ++ return test_bit(offset % BITS_PER_LONG, ++ &sis->preswap_map[offset/BITS_PER_LONG]); ++} ++ ++static inline void preswap_set(struct swap_info_struct *sis, ++ unsigned long offset) ++{ ++ if (!sis->preswap_map) ++ return; ++ set_bit(offset % BITS_PER_LONG, ++ &sis->preswap_map[offset/BITS_PER_LONG]); ++} ++ ++static inline void preswap_clear(struct swap_info_struct *sis, ++ unsigned long offset) ++{ ++ if (!sis->preswap_map) ++ return; ++ clear_bit(offset % BITS_PER_LONG, ++ &sis->preswap_map[offset/BITS_PER_LONG]); ++} ++ ++/* ++ * preswap tmem operations ++ */ ++ ++/* returns 1 if the page was successfully put into preswap, 0 if the page ++ * was declined, and -ERRNO for a specific error */ ++int preswap_put(struct page *page) ++{ ++ swp_entry_t entry = { .val = page_private(page), }; ++ unsigned type = swp_type(entry); ++ pgoff_t offset = swp_offset(entry); ++ u64 ind64 = (u64)offset; ++ u32 ind = (u32)offset; ++ unsigned long mfn = pfn_to_mfn(page_to_pfn(page)); ++ struct swap_info_struct *sis = get_swap_info_struct(type); ++ int dup = 0, ret; ++ ++ if ((s32)preswap_poolid < 0) ++ return 0; ++ if (ind64 != ind) ++ return 0; ++ if (preswap_test(sis, offset)) ++ dup = 1; ++ mb(); /* ensure page is quiescent; tmem may address it with an alias */ ++ ret = tmem_put_page(preswap_poolid, oswiz(type, ind), iswiz(ind), mfn); ++ if (ret == 1) { ++ preswap_set(sis, offset); ++ if (!dup) ++ sis->preswap_pages++; ++ } else if (dup) { ++ /* failed dup put always results in an automatic flush of ++ * the (older) page from preswap */ ++ preswap_clear(sis, offset); ++ sis->preswap_pages--; ++ } ++ return ret; ++} ++ ++/* returns 1 if the page was successfully gotten from preswap, 0 if the page ++ * was not present (should never happen!), and -ERRNO for a specific error */ ++int preswap_get(struct page *page) ++{ ++ swp_entry_t entry = { .val = page_private(page), }; ++ unsigned type = swp_type(entry); ++ pgoff_t offset = swp_offset(entry); ++ u64 ind64 = (u64)offset; ++ u32 ind = (u32)offset; ++ unsigned long mfn = pfn_to_mfn(page_to_pfn(page)); ++ struct swap_info_struct *sis = get_swap_info_struct(type); ++ int ret; ++ ++ if ((s32)preswap_poolid < 0) ++ return 0; ++ if (ind64 != ind) ++ return 0; ++ if (!preswap_test(sis, offset)) ++ return 0; ++ ret = tmem_get_page(preswap_poolid, oswiz(type, ind), iswiz(ind), mfn); ++ return ret; ++} ++ ++/* flush a single page from preswap */ ++void preswap_flush(unsigned type, unsigned long offset) ++{ ++ u64 ind64 = (u64)offset; ++ u32 ind = (u32)offset; ++ struct swap_info_struct *sis = get_swap_info_struct(type); ++ int ret = 1; ++ ++ if ((s32)preswap_poolid < 0) ++ return; ++ if (ind64 != ind) ++ return; ++ if (preswap_test(sis, offset)) { ++ ret = tmem_flush_page(preswap_poolid, ++ oswiz(type, ind), iswiz(ind)); ++ sis->preswap_pages--; ++ preswap_clear(sis, offset); ++ } ++} ++ ++/* flush all pages from the passed swaptype */ ++void preswap_flush_area(unsigned type) ++{ ++ struct swap_info_struct *sis = get_swap_info_struct(type); ++ int ind; ++ ++ if ((s32)preswap_poolid < 0) ++ return; ++ for (ind = SWIZ_MASK; ind >= 0; ind--) ++ (void)tmem_flush_object(preswap_poolid, oswiz(type, ind)); ++ sis->preswap_pages = 0; ++} ++ ++void preswap_init(unsigned type) ++{ ++ /* only need one tmem pool for all swap types */ ++ if ((s32)preswap_poolid >= 0) ++ return; ++ preswap_poolid = tmem_new_pool(0, 0, TMEM_POOL_PERSIST); ++ if (preswap_poolid < 0) ++ return; ++} +--- head-2010-01-18.orig/mm/swapfile.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/mm/swapfile.c 2009-12-04 10:29:25.000000000 +0100 +@@ -593,6 +593,7 @@ static int swap_entry_free(struct swap_i + swap_list.next = p - swap_info; + nr_swap_pages++; + p->inuse_pages--; ++ preswap_flush(p - swap_info, offset); + } + if (!swap_count(count)) + mem_cgroup_uncharge_swap(ent); +@@ -992,7 +993,7 @@ static int unuse_mm(struct mm_struct *mm + * Recycle to start on reaching the end, returning 0 when empty. + */ + static unsigned int find_next_to_unuse(struct swap_info_struct *si, +- unsigned int prev) ++ unsigned int prev, unsigned int preswap) + { + unsigned int max = si->max; + unsigned int i = prev; +@@ -1018,6 +1019,12 @@ static unsigned int find_next_to_unuse(s + prev = 0; + i = 1; + } ++ if (preswap) { ++ if (preswap_test(si, i)) ++ break; ++ else ++ continue; ++ } + count = si->swap_map[i]; + if (count && swap_count(count) != SWAP_MAP_BAD) + break; +@@ -1029,8 +1036,12 @@ static unsigned int find_next_to_unuse(s + * We completely avoid races by reading each swap page in advance, + * and then search for the process using it. All the necessary + * page table adjustments can then be made atomically. ++ * ++ * if the boolean preswap is true, only unuse pages_to_unuse pages; ++ * pages_to_unuse==0 means all pages + */ +-static int try_to_unuse(unsigned int type) ++static int try_to_unuse(unsigned int type, unsigned int preswap, ++ unsigned long pages_to_unuse) + { + struct swap_info_struct * si = &swap_info[type]; + struct mm_struct *start_mm; +@@ -1066,7 +1077,7 @@ static int try_to_unuse(unsigned int typ + * one pass through swap_map is enough, but not necessarily: + * there are races when an instance of an entry might be missed. + */ +- while ((i = find_next_to_unuse(si, i)) != 0) { ++ while ((i = find_next_to_unuse(si, i, preswap)) != 0) { + if (signal_pending(current)) { + retval = -EINTR; + break; +@@ -1259,6 +1270,8 @@ retry: + * interactive performance. + */ + cond_resched(); ++ if (preswap && pages_to_unuse && !--pages_to_unuse) ++ break; + } + + mmput(start_mm); +@@ -1602,7 +1615,7 @@ SYSCALL_DEFINE1(swapoff, const char __us + spin_unlock(&swap_lock); + + current->flags |= PF_OOM_ORIGIN; +- err = try_to_unuse(type); ++ err = try_to_unuse(type, 0, 0); + current->flags &= ~PF_OOM_ORIGIN; + + if (err) { +@@ -1652,9 +1665,14 @@ SYSCALL_DEFINE1(swapoff, const char __us + p->swap_map = NULL; + p->flags = 0; + trace_swap_file_close(swap_file); ++ preswap_flush_area(p - swap_info); + spin_unlock(&swap_lock); + mutex_unlock(&swapon_mutex); + vfree(swap_map); ++#ifdef CONFIG_PRESWAP ++ if (p->preswap_map) ++ vfree(p->preswap_map); ++#endif + /* Destroy swap account informatin */ + swap_cgroup_swapoff(type); + +@@ -1808,6 +1826,7 @@ SYSCALL_DEFINE2(swapon, const char __use + unsigned long maxpages = 1; + unsigned long swapfilepages; + unsigned short *swap_map = NULL; ++ unsigned long *preswap_map = NULL; + struct page *page = NULL; + struct inode *inode = NULL; + int did_down = 0; +@@ -1976,6 +1995,12 @@ SYSCALL_DEFINE2(swapon, const char __use + swap_map[page_nr] = SWAP_MAP_BAD; + } + ++#ifdef CONFIG_PRESWAP ++ preswap_map = vmalloc(maxpages / sizeof(long)); ++ if (preswap_map) ++ memset(preswap_map, 0, maxpages / sizeof(long)); ++#endif ++ + error = swap_cgroup_swapon(type, maxpages); + if (error) + goto bad_swap; +@@ -2018,6 +2043,9 @@ SYSCALL_DEFINE2(swapon, const char __use + else + p->prio = --least_priority; + p->swap_map = swap_map; ++#ifdef CONFIG_PRESWAP ++ p->preswap_map = preswap_map; ++#endif + p->flags |= SWP_WRITEOK; + nr_swap_pages += nr_good_pages; + total_swap_pages += nr_good_pages; +@@ -2044,6 +2072,7 @@ SYSCALL_DEFINE2(swapon, const char __use + swap_info[prev].next = p - swap_info; + } + trace_swap_file_open(swap_file, name); ++ preswap_init(p - swap_info); + spin_unlock(&swap_lock); + mutex_unlock(&swapon_mutex); + error = 0; +@@ -2060,6 +2089,7 @@ bad_swap_2: + p->swap_file = NULL; + p->flags = 0; + spin_unlock(&swap_lock); ++ vfree(preswap_map); + vfree(swap_map); + if (swap_file) + filp_close(swap_file, NULL); +@@ -2268,6 +2298,10 @@ int valid_swaphandles(swp_entry_t entry, + base++; + + spin_lock(&swap_lock); ++ if (preswap_test(si, target)) { ++ spin_unlock(&swap_lock); ++ return 0; ++ } + if (end > si->max) /* don't go beyond end of map */ + end = si->max; + +@@ -2278,6 +2312,9 @@ int valid_swaphandles(swp_entry_t entry, + break; + if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD) + break; ++ /* Don't read in preswap pages */ ++ if (preswap_test(si, toff)) ++ break; + } + /* Count contiguous allocated slots below our target */ + for (toff = target; --toff >= base; nr_pages++) { +@@ -2286,6 +2323,9 @@ int valid_swaphandles(swp_entry_t entry, + break; + if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD) + break; ++ /* Don't read in preswap pages */ ++ if (preswap_test(si, toff)) ++ break; + } + spin_unlock(&swap_lock); + +@@ -2296,3 +2336,92 @@ int valid_swaphandles(swp_entry_t entry, + *offset = ++toff; + return nr_pages? ++nr_pages: 0; + } ++ ++#ifdef CONFIG_PRESWAP ++/* ++ * preswap infrastructure functions ++ */ ++ ++/* code structure leveraged from sys_swapoff */ ++void preswap_shrink(unsigned long target_pages) ++{ ++ struct swap_info_struct *si = NULL; ++ unsigned long total_pages = 0, total_pages_to_unuse; ++ unsigned long pages = 0, unuse_pages = 0; ++ int type; ++ int wrapped = 0; ++ ++ do { ++ /* ++ * we don't want to hold swap_lock while doing a very ++ * lengthy try_to_unuse, but swap_list may change ++ * so restart scan from swap_list.head each time ++ */ ++ spin_lock(&swap_lock); ++ total_pages = 0; ++ for (type = swap_list.head; type >= 0; type = si->next) { ++ si = get_swap_info_struct(type); ++ total_pages += si->preswap_pages; ++ } ++ if (total_pages <= target_pages) { ++ spin_unlock(&swap_lock); ++ return; ++ } ++ total_pages_to_unuse = total_pages - target_pages; ++ for (type = swap_list.head; type >= 0; type = si->next) { ++ si = get_swap_info_struct(type); ++ if (total_pages_to_unuse < si->preswap_pages) ++ pages = unuse_pages = total_pages_to_unuse; ++ else { ++ pages = si->preswap_pages; ++ unuse_pages = 0; /* unuse all */ ++ } ++ if (security_vm_enough_memory(pages)) ++ continue; ++ vm_unacct_memory(pages); ++ break; ++ } ++ spin_unlock(&swap_lock); ++ if (type < 0) ++ return; ++ current->flags |= PF_OOM_ORIGIN; ++ (void)try_to_unuse(type, 1, unuse_pages); ++ current->flags &= ~PF_OOM_ORIGIN; ++ wrapped++; ++ } while (wrapped <= 3); ++} ++ ++ ++#ifdef CONFIG_SYSCTL ++/* cat /sys/proc/vm/preswap provides total number of pages in preswap ++ * across all swaptypes. echo N > /sys/proc/vm/preswap attempts to shrink ++ * preswap page usage to N (usually 0) */ ++int preswap_sysctl_handler(ctl_table *table, int write, ++ void __user *buffer, size_t *length, loff_t *ppos) ++{ ++ unsigned long npages; ++ int type; ++ unsigned long totalpages = 0; ++ struct swap_info_struct *si = NULL; ++ ++ /* modeled after hugetlb_sysctl_handler in mm/hugetlb.c */ ++ if (!write) { ++ spin_lock(&swap_lock); ++ for (type = swap_list.head; type >= 0; type = si->next) { ++ si = get_swap_info_struct(type); ++ totalpages += si->preswap_pages; ++ } ++ spin_unlock(&swap_lock); ++ npages = totalpages; ++ } ++ table->data = &npages; ++ table->maxlen = sizeof(unsigned long); ++ proc_doulongvec_minmax(table, write, buffer, length, ppos); ++ ++ if (write) ++ preswap_shrink(npages); ++ ++ return 0; ++} ++#endif ++#endif /* CONFIG_PRESWAP */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/mm/tmem.h 2009-12-04 10:29:25.000000000 +0100 +@@ -0,0 +1,84 @@ ++/* ++ * linux/mm/tmem.h ++ * ++ * Interface to transcendent memory, used by mm/precache.c and mm/preswap.c ++ * Currently implemented on XEN, but may be implemented elsewhere in future. ++ * ++ * Copyright (C) 2008,2009 Dan Magenheimer, Oracle Corp. ++ */ ++ ++#ifdef CONFIG_XEN ++#include ++ ++/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ ++#define TMEM_POOL_MIN_PAGESHIFT 12 ++#define TMEM_POOL_PAGEORDER (PAGE_SHIFT - TMEM_POOL_MIN_PAGESHIFT) ++ ++extern int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, u64 object, u32 index, ++ unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len); ++extern int xen_tmem_new_pool(u32 tmem_cmd, u64 uuid_lo, u64 uuid_hi, u32 flags); ++ ++static inline int tmem_put_page(u32 pool_id, u64 object, u32 index, ++ unsigned long gmfn) ++{ ++ return xen_tmem_op(TMEM_PUT_PAGE, pool_id, object, index, ++ gmfn, 0, 0, 0); ++} ++ ++static inline int tmem_get_page(u32 pool_id, u64 object, u32 index, ++ unsigned long gmfn) ++{ ++ return xen_tmem_op(TMEM_GET_PAGE, pool_id, object, index, ++ gmfn, 0, 0, 0); ++} ++ ++static inline int tmem_flush_page(u32 pool_id, u64 object, u32 index) ++{ ++ return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, object, index, ++ 0, 0, 0, 0); ++} ++ ++static inline int tmem_flush_object(u32 pool_id, u64 object) ++{ ++ return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, object, 0, 0, 0, 0, 0); ++} ++ ++static inline int tmem_new_pool(u64 uuid_lo, u64 uuid_hi, u32 flags) ++{ ++ BUILD_BUG_ON((TMEM_POOL_PAGEORDER < 0) || ++ (TMEM_POOL_PAGEORDER >= TMEM_POOL_PAGESIZE_MASK)); ++ flags |= TMEM_POOL_PAGEORDER << TMEM_POOL_PAGESIZE_SHIFT; ++ return xen_tmem_new_pool(TMEM_NEW_POOL, uuid_lo, uuid_hi, flags); ++} ++ ++static inline int tmem_destroy_pool(u32 pool_id) ++{ ++ return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, 0, 0, 0, 0, 0, 0); ++} ++#else ++struct tmem_op { ++ u32 cmd; ++ s32 pool_id; /* private > 0; shared < 0; 0 is invalid */ ++ union { ++ struct { /* for cmd == TMEM_NEW_POOL */ ++ u64 uuid[2]; ++ u32 flags; ++ } new; ++ struct { /* for cmd == TMEM_CONTROL */ ++ u32 subop; ++ u32 cli_id; ++ u32 arg1; ++ u32 arg2; ++ void *buf; ++ } ctrl; ++ struct { ++ u64 object; ++ u32 index; ++ u32 tmem_offset; ++ u32 pfn_offset; ++ u32 len; ++ unsigned long pfn; /* page frame */ ++ } gen; ++ } u; ++}; ++#endif +--- head-2010-01-18.orig/mm/truncate.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/mm/truncate.c 2009-12-04 10:29:25.000000000 +0100 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include /* grr. try_to_release_page, + do_invalidatepage */ +@@ -50,6 +51,7 @@ void do_invalidatepage(struct page *page + static inline void truncate_partial_page(struct page *page, unsigned partial) + { + zero_user_segment(page, partial, PAGE_CACHE_SIZE); ++ precache_flush(page->mapping, page->index); + if (page_has_private(page)) + do_invalidatepage(page, partial); + } +@@ -107,6 +109,10 @@ truncate_complete_page(struct address_sp + clear_page_mlock(page); + remove_from_page_cache(page); + ClearPageMappedToDisk(page); ++ /* this must be after the remove_from_page_cache which ++ * calls precache_put ++ */ ++ precache_flush(mapping, page->index); + page_cache_release(page); /* pagecache ref */ + return 0; + } +@@ -214,6 +220,7 @@ void truncate_inode_pages_range(struct a + pgoff_t next; + int i; + ++ precache_flush_inode(mapping); + if (mapping->nrpages == 0) + return; + +@@ -287,6 +294,7 @@ void truncate_inode_pages_range(struct a + } + pagevec_release(&pvec); + } ++ precache_flush_inode(mapping); + } + EXPORT_SYMBOL(truncate_inode_pages_range); + +@@ -423,6 +431,7 @@ int invalidate_inode_pages2_range(struct + int did_range_unmap = 0; + int wrapped = 0; + ++ precache_flush_inode(mapping); + pagevec_init(&pvec, 0); + next = start; + while (next <= end && !wrapped && +@@ -479,6 +488,7 @@ int invalidate_inode_pages2_range(struct + pagevec_release(&pvec); + cond_resched(); + } ++ precache_flush_inode(mapping); + return ret; + } + EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-fixup-common +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-fixup-common @@ -0,0 +1,424 @@ +Subject: Fix xen build. +From: jbeulich@novell.com +Patch-mainline: obsolete + +--- sle11sp1-2010-01-20.orig/drivers/acpi/acpica/hwsleep.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/acpi/acpica/hwsleep.c 2009-11-06 10:45:37.000000000 +0100 +@@ -419,6 +419,7 @@ ACPI_EXPORT_SYMBOL(acpi_enter_sleep_stat + * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED + * + ******************************************************************************/ ++#ifndef CONFIG_XEN + acpi_status asmlinkage acpi_enter_sleep_state_s4bios(void) + { + u32 in_value; +@@ -472,6 +473,7 @@ acpi_status asmlinkage acpi_enter_sleep_ + } + + ACPI_EXPORT_SYMBOL(acpi_enter_sleep_state_s4bios) ++#endif + + /******************************************************************************* + * +--- sle11sp1-2010-01-20.orig/drivers/acpi/processor_perflib.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/acpi/processor_perflib.c 2009-12-03 15:24:07.000000000 +0100 +@@ -168,7 +168,7 @@ int acpi_processor_ppc_has_changed(struc + else + #ifdef CONFIG_CPU_FREQ + return cpufreq_update_policy(pr->id); +-#elif CONFIG_PROCESSOR_EXTERNAL_CONTROL ++#elif defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) + return processor_notify_external(pr, + PROCESSOR_PM_CHANGE, PM_TYPE_PERF); + #endif +--- sle11sp1-2010-01-20.orig/drivers/base/cpu.c 2010-01-20 10:20:51.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/base/cpu.c 2009-12-04 10:48:00.000000000 +0100 +@@ -80,7 +80,7 @@ static inline void register_cpu_control( + } + #endif /* CONFIG_HOTPLUG_CPU */ + +-#ifdef CONFIG_KEXEC ++#if defined(CONFIG_KEXEC) && !defined(CONFIG_XEN) + #include + + static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute *attr, +@@ -219,7 +219,7 @@ int __cpuinit register_cpu(struct cpu *c + if (!error) + register_cpu_under_node(num, cpu_to_node(num)); + +-#ifdef CONFIG_KEXEC ++#if defined(CONFIG_KEXEC) && !defined(CONFIG_XEN) + if (!error) + error = sysdev_create_file(&cpu->sysdev, &attr_crash_notes); + #endif +--- sle11sp1-2010-01-20.orig/drivers/ide/ide-lib.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/ide/ide-lib.c 2009-11-06 10:45:37.000000000 +0100 +@@ -18,6 +18,16 @@ void ide_toggle_bounce(ide_drive_t *driv + { + u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ + ++#ifndef CONFIG_XEN ++ if (!PCI_DMA_BUS_IS_PHYS) { ++ addr = BLK_BOUNCE_ANY; ++ } else if (on && drive->media == ide_disk) { ++ struct device *dev = drive->hwif->dev; ++ ++ if (dev && dev->dma_mask) ++ addr = *dev->dma_mask; ++ } ++#else + if (on && drive->media == ide_disk) { + struct device *dev = drive->hwif->dev; + +@@ -26,6 +36,7 @@ void ide_toggle_bounce(ide_drive_t *driv + else if (dev && dev->dma_mask) + addr = *dev->dma_mask; + } ++#endif + + if (drive->queue) + blk_queue_bounce_limit(drive->queue, addr); +--- sle11sp1-2010-01-20.orig/drivers/oprofile/buffer_sync.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/oprofile/buffer_sync.c 2009-11-06 10:45:37.000000000 +0100 +@@ -46,7 +46,9 @@ static cpumask_var_t marked_cpus; + static DEFINE_SPINLOCK(task_mortuary); + static void process_task_mortuary(void); + ++#ifdef CONFIG_XEN + static int cpu_current_domain[NR_CPUS]; ++#endif + + /* Take ownership of the task struct and place it on the + * list for processing. Only after two full buffer syncs +@@ -158,11 +160,13 @@ static void end_sync(void) + int sync_start(void) + { + int err; ++#ifdef CONFIG_XEN + int i; + + for (i = 0; i < NR_CPUS; i++) { + cpu_current_domain[i] = COORDINATOR_DOMAIN; + } ++#endif + + if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) + return -ENOMEM; +@@ -312,12 +316,14 @@ static void add_cpu_mode_switch(unsigned + } + } + ++#ifdef CONFIG_XEN + static void add_domain_switch(unsigned long domain_id) + { + add_event_entry(ESCAPE_CODE); + add_event_entry(DOMAIN_SWITCH_CODE); + add_event_entry(domain_id); + } ++#endif + + static void + add_user_ctx_switch(struct task_struct const *task, unsigned long cookie) +@@ -540,10 +546,12 @@ void sync_buffer(int cpu) + + add_cpu_switch(cpu); + ++#ifdef CONFIG_XEN + /* We need to assign the first samples in this CPU buffer to the + same domain that we were processing at the last sync_buffer */ + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) + add_domain_switch(cpu_current_domain[cpu]); ++#endif + + op_cpu_buffer_reset(cpu); + available = op_cpu_buffer_entries(cpu); +@@ -553,12 +561,14 @@ void sync_buffer(int cpu) + if (!sample) + break; + ++#ifdef CONFIG_XEN + if (domain_switch) { + cpu_current_domain[cpu] = sample->eip; + add_domain_switch(sample->eip); + domain_switch = 0; + continue; + } ++#endif + + if (is_code(sample->eip)) { + flags = sample->event; +@@ -584,17 +594,21 @@ void sync_buffer(int cpu) + cookie = get_exec_dcookie(mm); + add_user_ctx_switch(new, cookie); + } ++#ifdef CONFIG_XEN + if (flags & DOMAIN_SWITCH) + domain_switch = 1; ++#endif + if (op_cpu_buffer_get_size(&entry)) + add_data(&entry, mm); + continue; + } + ++#ifdef CONFIG_XEN + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { + add_sample_entry(sample->eip, sample->event); + continue; + } ++#endif + + if (state < sb_bt_start) + /* ignore sample */ +@@ -611,9 +625,11 @@ void sync_buffer(int cpu) + } + release_mm(mm); + ++#ifdef CONFIG_XEN + /* We reset domain to COORDINATOR at each CPU switch */ + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) + add_domain_switch(COORDINATOR_DOMAIN); ++#endif + + mark_done(cpu); + +--- sle11sp1-2010-01-20.orig/drivers/oprofile/cpu_buffer.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/oprofile/cpu_buffer.c 2009-11-06 10:45:37.000000000 +0100 +@@ -58,7 +58,11 @@ static void wq_sync_buffer(struct work_s + #define DEFAULT_TIMER_EXPIRE (HZ / 10) + static int work_enabled; + ++#ifndef CONFIG_XEN ++#define current_domain COORDINATOR_DOMAIN ++#else + static int32_t current_domain = COORDINATOR_DOMAIN; ++#endif + + unsigned long oprofile_get_cpu_buffer_size(void) + { +@@ -463,6 +467,7 @@ fail: + return; + } + ++#ifdef CONFIG_XEN + int oprofile_add_domain_switch(int32_t domain_id) + { + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; +@@ -481,6 +486,7 @@ int oprofile_add_domain_switch(int32_t d + + return 1; + } ++#endif + + /* + * This serves to avoid cpu buffer overflow, and makes sure +--- sle11sp1-2010-01-20.orig/drivers/oprofile/oprof.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/oprofile/oprof.c 2009-11-06 10:45:37.000000000 +0100 +@@ -39,6 +39,7 @@ static DEFINE_MUTEX(start_mutex); + */ + static int timer = 0; + ++#ifdef CONFIG_XEN + int oprofile_set_active(int active_domains[], unsigned int adomains) + { + int err; +@@ -64,6 +65,7 @@ int oprofile_set_passive(int passive_dom + mutex_unlock(&start_mutex); + return err; + } ++#endif + + int oprofile_setup(void) + { +--- sle11sp1-2010-01-20.orig/drivers/oprofile/oprofile_files.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/oprofile/oprofile_files.c 2009-11-06 10:45:37.000000000 +0100 +@@ -171,6 +171,8 @@ static const struct file_operations dump + .write = dump_write, + }; + ++#ifdef CONFIG_XEN ++ + #define TMPBUFSIZE 512 + + static unsigned int adomains = 0; +@@ -360,6 +362,8 @@ static const struct file_operations pass + .write = pdomain_write, + }; + ++#endif /* CONFIG_XEN */ ++ + void oprofile_create_files(struct super_block *sb, struct dentry *root) + { + /* reinitialize default values */ +@@ -370,8 +374,10 @@ void oprofile_create_files(struct super_ + + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); ++#ifdef CONFIG_XEN + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); ++#endif + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed); +--- sle11sp1-2010-01-20.orig/drivers/xen/core/smpboot.c 2009-05-19 09:16:41.000000000 +0200 ++++ sle11sp1-2010-01-20/drivers/xen/core/smpboot.c 2009-11-06 10:45:37.000000000 +0100 +@@ -57,7 +57,6 @@ u8 cpu_2_logical_apicid[NR_CPUS] = { [0 + + cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; + cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +-EXPORT_SYMBOL(cpu_core_map); + + #if defined(__i386__) + u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; +--- sle11sp1-2010-01-20.orig/include/linux/mm.h 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/include/linux/mm.h 2009-11-06 10:45:37.000000000 +0100 +@@ -210,6 +210,7 @@ struct vm_operations_struct { + int (*access)(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write); + ++#ifdef CONFIG_XEN + /* Area-specific function for clearing the PTE at @ptep. Returns the + * original value of @ptep. */ + pte_t (*zap_pte)(struct vm_area_struct *vma, +@@ -217,6 +218,7 @@ struct vm_operations_struct { + + /* called before close() to indicate no more pages should be mapped */ + void (*unmap)(struct vm_area_struct *area); ++#endif + + #ifdef CONFIG_NUMA + /* +--- sle11sp1-2010-01-20.orig/include/linux/oprofile.h 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/include/linux/oprofile.h 2009-11-06 10:45:37.000000000 +0100 +@@ -16,8 +16,9 @@ + #include + #include + #include +- ++#ifdef CONFIG_XEN + #include ++#endif + + /* Each escaped entry is prefixed by ESCAPE_CODE + * then one of the following codes, then the +@@ -55,11 +56,12 @@ struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); ++#ifdef CONFIG_XEN + /* setup active domains with Xen */ + int (*set_active)(int *active_domains, unsigned int adomains); + /* setup passive domains with Xen */ + int (*set_passive)(int *passive_domains, unsigned int pdomains); +- ++#endif + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ +--- sle11sp1-2010-01-20.orig/include/linux/page-flags.h 2009-12-16 11:48:10.000000000 +0100 ++++ sle11sp1-2010-01-20/include/linux/page-flags.h 2009-12-16 11:49:19.000000000 +0100 +@@ -112,7 +112,7 @@ enum pageflags { + #endif + #ifdef CONFIG_XEN + PG_foreign, /* Page is owned by foreign allocator. */ +- PG_netback, /* Page is owned by netback */ ++ /* PG_netback, Page is owned by netback */ + PG_blkback, /* Page is owned by blkback */ + #endif + __NR_PAGEFLAGS, +@@ -357,9 +357,11 @@ CLEARPAGEFLAG(Uptodate, uptodate) + #define PageForeignDestructor(_page, order) \ + ((void (*)(struct page *, unsigned int))(_page)->index)(_page, order) + ++#if 0 + #define PageNetback(page) test_bit(PG_netback, &(page)->flags) + #define SetPageNetback(page) set_bit(PG_netback, &(page)->flags) + #define ClearPageNetback(page) clear_bit(PG_netback, &(page)->flags) ++#endif + + #define PageBlkback(page) test_bit(PG_blkback, &(page)->flags) + #define SetPageBlkback(page) set_bit(PG_blkback, &(page)->flags) +--- sle11sp1-2010-01-20.orig/kernel/kexec.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/kernel/kexec.c 2009-11-06 10:45:37.000000000 +0100 +@@ -45,8 +45,10 @@ + #include + #endif + ++#ifndef CONFIG_XEN + /* Per cpu memory for storing cpu states in case of system crash. */ + note_buf_t* crash_notes; ++#endif + int dump_after_notifier; + + /* vmcoreinfo stuff */ +@@ -1168,6 +1170,7 @@ static void final_note(u32 *buf) + memcpy(buf, ¬e, sizeof(note)); + } + ++#ifndef CONFIG_XEN + void crash_save_cpu(struct pt_regs *regs, int cpu) + { + struct elf_prstatus prstatus; +@@ -1193,6 +1196,7 @@ void crash_save_cpu(struct pt_regs *regs + &prstatus, sizeof(prstatus)); + final_note(buf); + } ++#endif + + #ifdef CONFIG_SYSCTL + static ctl_table dump_after_notifier_table[] = { +@@ -1220,6 +1224,7 @@ static ctl_table kexec_sys_table[] = { + + static int __init crash_notes_memory_init(void) + { ++#ifndef CONFIG_XEN + /* Allocate memory for saving cpu registers. */ + crash_notes = alloc_percpu(note_buf_t); + if (!crash_notes) { +@@ -1227,6 +1232,7 @@ static int __init crash_notes_memory_ini + " states failed\n"); + return -ENOMEM; + } ++#endif + #ifdef CONFIG_SYSCTL + register_sysctl_table(kexec_sys_table); + #endif +--- sle11sp1-2010-01-20.orig/mm/memory.c 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/mm/memory.c 2009-12-04 10:48:12.000000000 +0100 +@@ -851,10 +851,12 @@ static unsigned long zap_pte_range(struc + page->index > details->last_index)) + continue; + } ++#ifdef CONFIG_XEN + if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte)) + ptent = vma->vm_ops->zap_pte(vma, addr, pte, + tlb->fullmm); + else ++#endif + ptent = ptep_get_and_clear_full(mm, addr, pte, + tlb->fullmm); + tlb_remove_tlb_entry(tlb, pte, addr); +--- sle11sp1-2010-01-20.orig/mm/mmap.c 2010-01-20 10:22:09.000000000 +0100 ++++ sle11sp1-2010-01-20/mm/mmap.c 2010-01-20 10:22:30.000000000 +0100 +@@ -1845,8 +1845,10 @@ static void unmap_region(struct mm_struc + + static inline void unmap_vma(struct vm_area_struct *vma) + { ++#ifdef CONFIG_XEN + if (unlikely(vma->vm_ops && vma->vm_ops->unmap)) + vma->vm_ops->unmap(vma); ++#endif + } + + /* +@@ -2159,8 +2161,10 @@ void exit_mmap(struct mm_struct *mm) + + arch_exit_mmap(mm); + ++#ifdef CONFIG_XEN + for (vma = mm->mmap; vma; vma = vma->vm_next) + unmap_vma(vma); ++#endif + + vma = mm->mmap; + if (!vma) /* Can happen if dup_mmap() received an OOM */ --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/sfc-external-sram +++ linux-ec2-2.6.32/debian.ec2/patches.xen/sfc-external-sram @@ -0,0 +1,298 @@ +From: Kieran Mansley +Subject: enable access to Falcon's external SRAM +References: bnc#489105 + +Include ability to reference external SRAM on Solarflare Falcon NICs to +allow event queues to be accessed by virtualised guests. + +Acked-by: jbeulich@novell.com + +--- head-2009-07-28.orig/drivers/net/sfc/falcon.c 2009-07-28 10:05:40.000000000 +0200 ++++ head-2009-07-28/drivers/net/sfc/falcon.c 2009-07-28 10:06:53.000000000 +0200 +@@ -36,6 +36,9 @@ + + /** + * struct falcon_nic_data - Falcon NIC state ++ * @sram_cfg: SRAM configuration value ++ * @tx_dc_base: Base address in SRAM of TX queue descriptor caches ++ * @rx_dc_base: Base address in SRAM of RX queue descriptor caches + * @next_buffer_table: First available buffer table id + * @resources: Resource information for driverlink client + * @pci_dev2: The secondary PCI device if present +@@ -44,6 +47,9 @@ + * @int_error_expire: Time at which error count will be expired + */ + struct falcon_nic_data { ++ int sram_cfg; ++ unsigned tx_dc_base; ++ unsigned rx_dc_base; + #ifndef CONFIG_SFC_DRIVERLINK + unsigned next_buffer_table; + #else +@@ -74,11 +80,11 @@ static int disable_dma_stats; + */ + #define TX_DC_ENTRIES 16 + #define TX_DC_ENTRIES_ORDER 0 +-#define TX_DC_BASE 0x130000 ++#define TX_DC_INTERNAL_BASE 0x130000 + + #define RX_DC_ENTRIES 64 + #define RX_DC_ENTRIES_ORDER 2 +-#define RX_DC_BASE 0x100000 ++#define RX_DC_INTERNAL_BASE 0x100000 + + static const unsigned int + /* "Large" EEPROM device: Atmel AT25640 or similar +@@ -468,9 +474,17 @@ void falcon_push_buffers(struct efx_tx_q + int falcon_probe_tx(struct efx_tx_queue *tx_queue) + { + struct efx_nic *efx = tx_queue->efx; +- return falcon_alloc_special_buffer(efx, &tx_queue->txd, +- FALCON_TXD_RING_SIZE * +- sizeof(efx_qword_t)); ++ int rc = falcon_alloc_special_buffer(efx, &tx_queue->txd, ++ FALCON_TXD_RING_SIZE * ++ sizeof(efx_qword_t)); ++#ifdef CONFIG_SFC_DRIVERLINK ++ if (rc == 0) { ++ struct falcon_nic_data *nic_data = efx->nic_data; ++ nic_data->resources.txq_min = max(nic_data->resources.txq_min, ++ (unsigned)tx_queue->queue + 1); ++ } ++#endif ++ return rc; + } + + void falcon_init_tx(struct efx_tx_queue *tx_queue) +@@ -610,9 +624,17 @@ void falcon_notify_rx_desc(struct efx_rx + int falcon_probe_rx(struct efx_rx_queue *rx_queue) + { + struct efx_nic *efx = rx_queue->efx; +- return falcon_alloc_special_buffer(efx, &rx_queue->rxd, +- FALCON_RXD_RING_SIZE * +- sizeof(efx_qword_t)); ++ int rc = falcon_alloc_special_buffer(efx, &rx_queue->rxd, ++ FALCON_RXD_RING_SIZE * ++ sizeof(efx_qword_t)); ++#ifdef CONFIG_SFC_DRIVERLINK ++ if (rc == 0) { ++ struct falcon_nic_data *nic_data = efx->nic_data; ++ nic_data->resources.rxq_min = max(nic_data->resources.rxq_min, ++ (unsigned)rx_queue->queue + 1); ++ } ++#endif ++ return rc; + } + + void falcon_init_rx(struct efx_rx_queue *rx_queue) +@@ -1120,9 +1142,18 @@ int falcon_probe_eventq(struct efx_chann + { + struct efx_nic *efx = channel->efx; + unsigned int evq_size; ++ int rc; + + evq_size = FALCON_EVQ_SIZE * sizeof(efx_qword_t); +- return falcon_alloc_special_buffer(efx, &channel->eventq, evq_size); ++ rc = falcon_alloc_special_buffer(efx, &channel->eventq, evq_size); ++#ifdef CONFIG_SFC_DRIVERLINK ++ if (rc == 0) { ++ struct falcon_nic_data *nic_data = efx->nic_data; ++ nic_data->resources.evq_int_min = max(nic_data->resources.evq_int_min, ++ (unsigned)channel->channel + 1); ++ } ++#endif ++ return rc; + } + + void falcon_init_eventq(struct efx_channel *channel) +@@ -2618,19 +2649,22 @@ fail5: + */ + static int falcon_reset_sram(struct efx_nic *efx) + { ++ struct falcon_nic_data *nic_data = efx->nic_data; + efx_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker; +- int count; ++ int count, onchip, sram_cfg_val; + + /* Set the SRAM wake/sleep GPIO appropriately. */ ++ onchip = (nic_data->sram_cfg == SRM_NB_BSZ_ONCHIP_ONLY); + falcon_read(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER); + EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OEN, 1); +- EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OUT, 1); ++ EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OUT, onchip); + falcon_write(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER); + + /* Initiate SRAM reset */ ++ sram_cfg_val = onchip ? 0 : nic_data->sram_cfg; + EFX_POPULATE_OWORD_2(srm_cfg_reg_ker, + SRAM_OOB_BT_INIT_EN, 1, +- SRM_NUM_BANKS_AND_BANK_SIZE, 0); ++ SRM_NUM_BANKS_AND_BANK_SIZE, sram_cfg_val); + falcon_write(efx, &srm_cfg_reg_ker, SRM_CFG_REG_KER); + + /* Wait for SRAM reset to complete */ +@@ -2702,8 +2736,10 @@ static void falcon_remove_spi_devices(st + /* Extract non-volatile configuration */ + static int falcon_probe_nvconfig(struct efx_nic *efx) + { ++ struct falcon_nic_data *nic_data = efx->nic_data; + struct falcon_nvconfig *nvconfig; + int board_rev; ++ bool onchip_sram; + int rc; + + nvconfig = kmalloc(sizeof(*nvconfig), GFP_KERNEL); +@@ -2716,6 +2752,7 @@ static int falcon_probe_nvconfig(struct + efx->phy_type = PHY_TYPE_NONE; + efx->mdio.prtad = MDIO_PRTAD_NONE; + board_rev = 0; ++ onchip_sram = true; + rc = 0; + } else if (rc) { + goto fail1; +@@ -2726,6 +2763,13 @@ static int falcon_probe_nvconfig(struct + efx->phy_type = v2->port0_phy_type; + efx->mdio.prtad = v2->port0_phy_addr; + board_rev = le16_to_cpu(v2->board_revision); ++#ifdef CONFIG_SFC_DRIVERLINK ++ onchip_sram = EFX_OWORD_FIELD(nvconfig->nic_stat_reg, ++ ONCHIP_SRAM); ++#else ++ /* We have no use for external SRAM */ ++ onchip_sram = true; ++#endif + + if (le16_to_cpu(nvconfig->board_struct_ver) >= 3) { + __le32 fl = v3->spi_device_type[EE_SPI_FLASH]; +@@ -2750,6 +2794,21 @@ static int falcon_probe_nvconfig(struct + + efx_set_board_info(efx, board_rev); + ++ /* Read the SRAM configuration. The register is initialised ++ * automatically but might may been reset since boot. ++ */ ++ if (onchip_sram) { ++ nic_data->sram_cfg = SRM_NB_BSZ_ONCHIP_ONLY; ++ } else { ++ nic_data->sram_cfg = ++ EFX_OWORD_FIELD(nvconfig->srm_cfg_reg, ++ SRM_NUM_BANKS_AND_BANK_SIZE); ++ WARN_ON(nic_data->sram_cfg == SRM_NB_BSZ_RESERVED); ++ /* Replace invalid setting with the smallest defaults */ ++ if (nic_data->sram_cfg == SRM_NB_BSZ_DEFAULT) ++ nic_data->sram_cfg = SRM_NB_BSZ_1BANKS_2M; ++ } ++ + kfree(nvconfig); + return 0; + +@@ -2765,9 +2824,9 @@ static int falcon_probe_nvconfig(struct + * should live. */ + static int falcon_dimension_resources(struct efx_nic *efx) + { ++ struct falcon_nic_data *nic_data = efx->nic_data; + #ifdef CONFIG_SFC_DRIVERLINK + unsigned internal_dcs_entries; +- struct falcon_nic_data *nic_data = efx->nic_data; + struct efx_dl_falcon_resources *res = &nic_data->resources; + + /* Fill out the driverlink resource list */ +@@ -2800,16 +2859,64 @@ static int falcon_dimension_resources(st + break; + } + +- /* Internal SRAM only for now */ +- res->rxq_lim = internal_dcs_entries / RX_DC_ENTRIES; +- res->txq_lim = internal_dcs_entries / TX_DC_ENTRIES; +- res->buffer_table_lim = 8192; ++ if (nic_data->sram_cfg == SRM_NB_BSZ_ONCHIP_ONLY) { ++ res->rxq_lim = internal_dcs_entries / RX_DC_ENTRIES; ++ res->txq_lim = internal_dcs_entries / TX_DC_ENTRIES; ++ res->buffer_table_lim = 8192; ++ nic_data->tx_dc_base = TX_DC_INTERNAL_BASE; ++ nic_data->rx_dc_base = RX_DC_INTERNAL_BASE; ++ } else { ++ unsigned sram_bytes, vnic_bytes, max_vnics, n_vnics, dcs; ++ ++ /* Determine how much SRAM we have to play with. We have ++ * to fit buffer table and descriptor caches in. ++ */ ++ switch (nic_data->sram_cfg) { ++ case SRM_NB_BSZ_1BANKS_2M: ++ default: ++ sram_bytes = 2 * 1024 * 1024; ++ break; ++ case SRM_NB_BSZ_1BANKS_4M: ++ case SRM_NB_BSZ_2BANKS_4M: ++ sram_bytes = 4 * 1024 * 1024; ++ break; ++ case SRM_NB_BSZ_1BANKS_8M: ++ case SRM_NB_BSZ_2BANKS_8M: ++ sram_bytes = 8 * 1024 * 1024; ++ break; ++ case SRM_NB_BSZ_2BANKS_16M: ++ sram_bytes = 16 * 1024 * 1024; ++ break; ++ } ++ /* For each VNIC allow at least 512 buffer table entries ++ * and descriptor cache for an rxq and txq. Buffer table ++ * space for evqs and dmaqs is relatively trivial, so not ++ * considered in this calculation. ++ */ ++ vnic_bytes = 512 * 8 + RX_DC_ENTRIES * 8 + TX_DC_ENTRIES * 8; ++ max_vnics = sram_bytes / vnic_bytes; ++ for (n_vnics = 1; n_vnics < res->evq_timer_min + max_vnics;) ++ n_vnics *= 2; ++ res->rxq_lim = n_vnics; ++ res->txq_lim = n_vnics; ++ ++ dcs = n_vnics * TX_DC_ENTRIES * 8; ++ nic_data->tx_dc_base = sram_bytes - dcs; ++ dcs = n_vnics * RX_DC_ENTRIES * 8; ++ nic_data->rx_dc_base = nic_data->tx_dc_base - dcs; ++ res->buffer_table_lim = nic_data->rx_dc_base / 8; ++ } + + if (FALCON_IS_DUAL_FUNC(efx)) + res->flags |= EFX_DL_FALCON_DUAL_FUNC; + + if (EFX_INT_MODE_USE_MSI(efx)) + res->flags |= EFX_DL_FALCON_USE_MSI; ++#else ++ /* We ignore external SRAM */ ++ EFX_BUG_ON_PARANOID(nic_data->sram_cfg != SRM_NB_BSZ_ONCHIP_ONLY); ++ nic_data->tx_dc_base = TX_DC_INTERNAL_BASE; ++ nic_data->rx_dc_base = RX_DC_INTERNAL_BASE; + #endif + + return 0; +@@ -2998,13 +3105,15 @@ int falcon_probe_nic(struct efx_nic *efx + */ + int falcon_init_nic(struct efx_nic *efx) + { ++ struct falcon_nic_data *nic_data = efx->nic_data; + efx_oword_t temp; + unsigned thresh; + int rc; + +- /* Use on-chip SRAM */ ++ /* Use on-chip SRAM if wanted. */ + falcon_read(efx, &temp, NIC_STAT_REG); +- EFX_SET_OWORD_FIELD(temp, ONCHIP_SRAM, 1); ++ EFX_SET_OWORD_FIELD(temp, ONCHIP_SRAM, ++ nic_data->sram_cfg == SRM_NB_BSZ_ONCHIP_ONLY); + falcon_write(efx, &temp, NIC_STAT_REG); + + /* Set the source of the GMAC clock */ +@@ -3023,9 +3132,9 @@ int falcon_init_nic(struct efx_nic *efx) + return rc; + + /* Set positions of descriptor caches in SRAM. */ +- EFX_POPULATE_OWORD_1(temp, SRM_TX_DC_BASE_ADR, TX_DC_BASE / 8); ++ EFX_POPULATE_OWORD_1(temp, SRM_TX_DC_BASE_ADR, nic_data->tx_dc_base / 8); + falcon_write(efx, &temp, SRM_TX_DC_CFG_REG_KER); +- EFX_POPULATE_OWORD_1(temp, SRM_RX_DC_BASE_ADR, RX_DC_BASE / 8); ++ EFX_POPULATE_OWORD_1(temp, SRM_RX_DC_BASE_ADR, nic_data->rx_dc_base / 8); + falcon_write(efx, &temp, SRM_RX_DC_CFG_REG_KER); + + /* Set TX descriptor cache size. */ --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.21 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.21 @@ -0,0 +1,5056 @@ +From: www.kernel.org +Subject: Linux 2.6.21 +Patch-mainline: 2.6.21 + +Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py + +Acked-by: jbeulich@novell.com + +--- head-2010-01-18.orig/arch/x86/Kconfig 2009-11-20 11:00:05.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig 2009-11-06 10:46:41.000000000 +0100 +@@ -69,13 +69,15 @@ config GENERIC_CMOS_UPDATE + + config CLOCKSOURCE_WATCHDOG + def_bool y ++ depends on !X86_XEN + + config GENERIC_CLOCKEVENTS + def_bool y ++ depends on !X86_XEN + + config GENERIC_CLOCKEVENTS_BROADCAST + def_bool y +- depends on X86_64 || (X86_32 && X86_LOCAL_APIC) ++ depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN) + + config LOCKDEP_SUPPORT + def_bool y +--- head-2010-01-18.orig/arch/x86/kernel/Makefile 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/Makefile 2009-11-06 10:46:41.000000000 +0100 +@@ -138,7 +138,7 @@ ifeq ($(CONFIG_X86_64),y) + pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o + endif + +-disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \ +- smpboot_$(BITS).o tsc_$(BITS).o ++disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ ++ smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o + disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o + %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := +--- head-2010-01-18.orig/arch/x86/kernel/apic_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -25,6 +25,8 @@ + #include + #include + #include ++#include ++#include + #include + + #include +@@ -56,83 +58,26 @@ static cpumask_t timer_bcast_ipi; + */ + + /* +- * Debug level ++ * Debug level, exported for io_apic.c + */ + int apic_verbosity; + + #ifndef CONFIG_XEN + static int modern_apic(void) + { +- unsigned int lvr, version; + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && +- boot_cpu_data.x86 >= 0xf) ++ boot_cpu_data.x86 >= 0xf) + return 1; +- lvr = apic_read(APIC_LVR); +- version = GET_APIC_VERSION(lvr); +- return version >= 0x14; ++ return lapic_get_version() >= 0x14; + } + #endif /* !CONFIG_XEN */ + +-/* +- * 'what should we do if we get a hw irq event on an illegal vector'. +- * each architecture has to answer this themselves. +- */ +-void ack_bad_irq(unsigned int irq) +-{ +- printk("unexpected IRQ trap at vector %02x\n", irq); +- /* +- * Currently unexpected vectors happen only on SMP and APIC. +- * We _must_ ack these because every local APIC has only N +- * irq slots per priority level, and a 'hanging, unacked' IRQ +- * holds up an irq slot - in excessive cases (when multiple +- * unexpected vectors occur) that might lock up the APIC +- * completely. +- * But only ack when the APIC is enabled -AK +- */ +- if (cpu_has_apic) +- ack_APIC_irq(); +-} +- + int get_physical_broadcast(void) + { + return 0xff; + } + +-#ifndef CONFIG_XEN +-#ifndef CONFIG_SMP +-static void up_apic_timer_interrupt_call(void) +-{ +- int cpu = smp_processor_id(); +- +- /* +- * the NMI deadlock-detector uses this. +- */ +- per_cpu(irq_stat, cpu).apic_timer_irqs++; +- +- smp_local_timer_interrupt(); +-} +-#endif +- +-void smp_send_timer_broadcast_ipi(void) +-{ +- cpumask_t mask; +- +- cpus_and(mask, cpu_online_map, timer_bcast_ipi); +- if (!cpus_empty(mask)) { +-#ifdef CONFIG_SMP +- send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +-#else +- /* +- * We can directly call the apic timer interrupt handler +- * in UP case. Minus all irq related functions +- */ +- up_apic_timer_interrupt_call(); +-#endif +- } +-} +-#endif +- + int setup_profiling_timer(unsigned int multiplier) + { + return -EINVAL; +--- head-2010-01-18.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -612,7 +612,7 @@ void __init early_cpu_init(void) + struct pt_regs * __devinit idle_regs(struct pt_regs *regs) + { + memset(regs, 0, sizeof(struct pt_regs)); +- regs->xgs = __KERNEL_PDA; ++ regs->xfs = __KERNEL_PDA; + return regs; + } + +@@ -669,12 +669,12 @@ struct i386_pda boot_pda = { + .pcurrent = &init_task, + }; + +-static inline void set_kernel_gs(void) ++static inline void set_kernel_fs(void) + { +- /* Set %gs for this CPU's PDA. Memory clobber is to create a ++ /* Set %fs for this CPU's PDA. Memory clobber is to create a + barrier with respect to any PDA operations, so the compiler + doesn't move any before here. */ +- asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); ++ asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); + } + + /* Initialize the CPU's GDT and PDA. The boot CPU does this for +@@ -732,7 +732,7 @@ void __cpuinit cpu_set_gdt(int cpu) + } + BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); + +- set_kernel_gs(); ++ set_kernel_fs(); + } + + /* Common CPU init for both boot and secondary CPUs */ +@@ -777,8 +777,8 @@ static void __cpuinit _cpu_init(int cpu, + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + #endif + +- /* Clear %fs. */ +- asm volatile ("mov %0, %%fs" : : "r" (0)); ++ /* Clear %gs. */ ++ asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); +--- head-2010-01-18.orig/arch/x86/kernel/e820_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/e820_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + + #ifdef CONFIG_EFI +@@ -157,21 +158,22 @@ static struct resource standard_io_resou + .flags = IORESOURCE_BUSY | IORESOURCE_IO + } }; + +-static int romsignature(const unsigned char *x) ++#define ROMSIGNATURE 0xaa55 ++ ++static int __init romsignature(const unsigned char *rom) + { + unsigned short sig; +- int ret = 0; +- if (probe_kernel_address((const unsigned short *)x, sig) == 0) +- ret = (sig == 0xaa55); +- return ret; ++ ++ return probe_kernel_address((const unsigned short *)rom, sig) == 0 && ++ sig == ROMSIGNATURE; + } + + static int __init romchecksum(unsigned char *rom, unsigned long length) + { +- unsigned char *p, sum = 0; ++ unsigned char sum; + +- for (p = rom; p < rom + length; p++) +- sum += *p; ++ for (sum = 0; length; length--) ++ sum += *rom++; + return sum == 0; + } + +--- head-2010-01-18.orig/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:46:41.000000000 +0100 +@@ -30,7 +30,7 @@ + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es +- * 24(%esp) - %gs ++ * 24(%esp) - %fs + * 28(%esp) - orig_eax + * 2C(%esp) - %eip + * 30(%esp) - %cs +@@ -102,9 +102,9 @@ NMI_MASK = 0x80000000 + + #define SAVE_ALL \ + cld; \ +- pushl %gs; \ ++ pushl %fs; \ + CFI_ADJUST_CFA_OFFSET 4;\ +- /*CFI_REL_OFFSET gs, 0;*/\ ++ /*CFI_REL_OFFSET fs, 0;*/\ + pushl %es; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET es, 0;*/\ +@@ -136,7 +136,7 @@ NMI_MASK = 0x80000000 + movl %edx, %ds; \ + movl %edx, %es; \ + movl $(__KERNEL_PDA), %edx; \ +- movl %edx, %gs ++ movl %edx, %fs + + #define RESTORE_INT_REGS \ + popl %ebx; \ +@@ -169,9 +169,9 @@ NMI_MASK = 0x80000000 + 2: popl %es; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE es;*/\ +-3: popl %gs; \ ++3: popl %fs; \ + CFI_ADJUST_CFA_OFFSET -4;\ +- /*CFI_RESTORE gs;*/\ ++ /*CFI_RESTORE fs;*/\ + .pushsection .fixup,"ax"; \ + 4: movl $0,(%esp); \ + jmp 1b; \ +@@ -230,6 +230,7 @@ ENTRY(ret_from_fork) + CFI_ADJUST_CFA_OFFSET -4 + jmp syscall_exit + CFI_ENDPROC ++END(ret_from_fork) + + /* + * Return to user mode is not as complex as all this looks, +@@ -261,6 +262,7 @@ ENTRY(resume_userspace) + # int/exception return? + jne work_pending + jmp restore_all ++END(ret_from_exception) + + #ifdef CONFIG_PREEMPT + ENTRY(resume_kernel) +@@ -275,6 +277,7 @@ need_resched: + jz restore_all + call preempt_schedule_irq + jmp need_resched ++END(resume_kernel) + #endif + CFI_ENDPROC + +@@ -352,16 +355,17 @@ sysenter_past_esp: + movl PT_OLDESP(%esp), %ecx + xorl %ebp,%ebp + TRACE_IRQS_ON +-1: mov PT_GS(%esp), %gs ++1: mov PT_FS(%esp), %fs + ENABLE_INTERRUPTS_SYSEXIT + CFI_ENDPROC + .pushsection .fixup,"ax" +-2: movl $0,PT_GS(%esp) ++2: movl $0,PT_FS(%esp) + jmp 1b + .section __ex_table,"a" + .align 4 + .long 1b,2b + .popsection ++ENDPROC(sysenter_entry) + + # pv sysenter call handler stub + ENTRY(sysenter_entry_pv) +@@ -533,6 +537,7 @@ hypervisor_iret: + jmp hypercall_page + (__HYPERVISOR_iret * 32) + #endif + CFI_ENDPROC ++ENDPROC(system_call) + + # perform work that needs to be done immediately before resumption + ALIGN +@@ -578,6 +583,7 @@ work_notifysig_v86: + xorl %edx, %edx + call do_notify_resume + jmp resume_userspace_sig ++END(work_pending) + + # perform syscall exit tracing + ALIGN +@@ -593,6 +599,7 @@ syscall_trace_entry: + cmpl $(nr_syscalls), %eax + jnae syscall_call + jmp syscall_exit ++END(syscall_trace_entry) + + # perform syscall exit tracing + ALIGN +@@ -606,6 +613,7 @@ syscall_exit_work: + movl $1, %edx + call do_syscall_trace + jmp resume_userspace ++END(syscall_exit_work) + CFI_ENDPROC + + RING0_INT_FRAME # can't unwind into user space anyway +@@ -616,16 +624,18 @@ syscall_fault: + GET_THREAD_INFO(%ebp) + movl $-EFAULT,PT_EAX(%esp) + jmp resume_userspace ++END(syscall_fault) + + syscall_badsys: + movl $-ENOSYS,PT_EAX(%esp) + jmp resume_userspace ++END(syscall_badsys) + CFI_ENDPROC + + #ifndef CONFIG_XEN + #define FIXUP_ESPFIX_STACK \ + /* since we are on a wrong stack, we cant make it a C code :( */ \ +- movl %gs:PDA_cpu, %ebx; \ ++ movl %fs:PDA_cpu, %ebx; \ + PER_CPU(cpu_gdt_descr, %ebx); \ + movl GDS_address(%ebx), %ebx; \ + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ +@@ -656,9 +666,9 @@ syscall_badsys: + ENTRY(interrupt) + .text + +-vector=0 + ENTRY(irq_entries_start) + RING0_INT_FRAME ++vector=0 + .rept NR_IRQS + ALIGN + .if vector +@@ -667,11 +677,16 @@ ENTRY(irq_entries_start) + 1: pushl $~(vector) + CFI_ADJUST_CFA_OFFSET 4 + jmp common_interrupt +-.data ++ .previous + .long 1b +-.text ++ .text + vector=vector+1 + .endr ++END(irq_entries_start) ++ ++.previous ++END(interrupt) ++.previous + + /* + * the CPU automatically disables interrupts when executing an IRQ vector, +@@ -684,6 +699,7 @@ common_interrupt: + movl %esp,%eax + call do_IRQ + jmp ret_from_intr ++ENDPROC(common_interrupt) + CFI_ENDPROC + + #define BUILD_INTERRUPT(name, nr) \ +@@ -696,10 +712,16 @@ ENTRY(name) \ + movl %esp,%eax; \ + call smp_/**/name; \ + jmp ret_from_intr; \ +- CFI_ENDPROC ++ CFI_ENDPROC; \ ++ENDPROC(name) + + /* The include is where all of the SMP etc. interrupts come from */ + #include "entry_arch.h" ++ ++/* This alternate entry is needed because we hijack the apic LVTT */ ++#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) ++BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) ++#endif + #else + #define UNWIND_ESPFIX_STACK + #endif +@@ -710,7 +732,7 @@ KPROBE_ENTRY(page_fault) + CFI_ADJUST_CFA_OFFSET 4 + ALIGN + error_code: +- /* the function address is in %gs's slot on the stack */ ++ /* the function address is in %fs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ +@@ -739,20 +761,20 @@ error_code: + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld +- pushl %gs ++ pushl %fs + CFI_ADJUST_CFA_OFFSET 4 +- /*CFI_REL_OFFSET gs, 0*/ ++ /*CFI_REL_OFFSET fs, 0*/ + movl $(__KERNEL_PDA), %ecx +- movl %ecx, %gs ++ movl %ecx, %fs + UNWIND_ESPFIX_STACK + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ +- movl PT_GS(%esp), %edi # get the function address ++ movl PT_FS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart +- mov %ecx, PT_GS(%esp) +- /*CFI_REL_OFFSET gs, ES*/ ++ mov %ecx, PT_FS(%esp) ++ /*CFI_REL_OFFSET fs, ES*/ + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es +@@ -844,7 +866,7 @@ critical_fixup_table: + .byte 6 # pop %eax + .byte 7 # pop %ds + .byte 8 # pop %es +- .byte 9,9 # pop %gs ++ .byte 9,9 # pop %fs + .byte 10,10,10 # add $4,%esp + .byte 11 # iret + .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS +@@ -909,6 +931,7 @@ ENTRY(coprocessor_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(coprocessor_error) + + ENTRY(simd_coprocessor_error) + RING0_INT_FRAME +@@ -918,6 +941,7 @@ ENTRY(simd_coprocessor_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(simd_coprocessor_error) + + ENTRY(device_not_available) + RING0_INT_FRAME +@@ -940,6 +964,7 @@ device_available_emulate: + call math_state_restore + jmp ret_from_exception + CFI_ENDPROC ++END(device_not_available) + + #ifndef CONFIG_XEN + /* +@@ -1101,10 +1126,12 @@ ENTRY(native_iret) + .align 4 + .long 1b,iret_exc + .previous ++END(native_iret) + + ENTRY(native_irq_enable_sysexit) + sti + sysexit ++END(native_irq_enable_sysexit) + #endif + + KPROBE_ENTRY(int3) +@@ -1127,6 +1154,7 @@ ENTRY(overflow) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(overflow) + + ENTRY(bounds) + RING0_INT_FRAME +@@ -1136,6 +1164,7 @@ ENTRY(bounds) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(bounds) + + ENTRY(invalid_op) + RING0_INT_FRAME +@@ -1145,6 +1174,7 @@ ENTRY(invalid_op) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(invalid_op) + + ENTRY(coprocessor_segment_overrun) + RING0_INT_FRAME +@@ -1154,6 +1184,7 @@ ENTRY(coprocessor_segment_overrun) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(coprocessor_segment_overrun) + + ENTRY(invalid_TSS) + RING0_EC_FRAME +@@ -1161,6 +1192,7 @@ ENTRY(invalid_TSS) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(invalid_TSS) + + ENTRY(segment_not_present) + RING0_EC_FRAME +@@ -1168,6 +1200,7 @@ ENTRY(segment_not_present) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(segment_not_present) + + ENTRY(stack_segment) + RING0_EC_FRAME +@@ -1175,6 +1208,7 @@ ENTRY(stack_segment) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(stack_segment) + + KPROBE_ENTRY(general_protection) + RING0_EC_FRAME +@@ -1190,6 +1224,7 @@ ENTRY(alignment_check) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(alignment_check) + + ENTRY(divide_error) + RING0_INT_FRAME +@@ -1199,6 +1234,7 @@ ENTRY(divide_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(divide_error) + + #ifdef CONFIG_X86_MCE + ENTRY(machine_check) +@@ -1209,6 +1245,7 @@ ENTRY(machine_check) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(machine_check) + #endif + + #ifndef CONFIG_XEN +@@ -1228,6 +1265,7 @@ ENTRY(fixup_4gb_segment) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(spurious_interrupt_bug) + + ENTRY(kernel_thread_helper) + pushl $0 # fake return address for unwinder +--- head-2010-01-18.orig/arch/x86/kernel/head_32-xen.S 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head_32-xen.S 2009-11-06 10:46:41.000000000 +0100 +@@ -27,6 +27,7 @@ + #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability + #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id + ++.section .text.head,"ax",@progbits + #define VIRT_ENTRY_OFFSET 0x0 + .org VIRT_ENTRY_OFFSET + ENTRY(startup_32) +@@ -60,11 +61,11 @@ ENTRY(startup_32) + + movb $1,X86_HARD_MATH + +- xorl %eax,%eax # Clear FS +- movl %eax,%fs ++ xorl %eax,%eax # Clear GS ++ movl %eax,%gs + + movl $(__KERNEL_PDA),%eax +- mov %eax,%gs ++ mov %eax,%fs + + cld # gcc2 wants the direction flag cleared at all times + +@@ -75,7 +76,7 @@ ENTRY(startup_32) + * Point the GDT at this CPU's PDA. This will be + * cpu_gdt_table and boot_pda. + */ +-setup_pda: ++ENTRY(setup_pda) + /* get the PDA pointer */ + movl $boot_pda, %eax + +--- head-2010-01-18.orig/arch/x86/kernel/io_apic_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/io_apic_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -167,7 +167,7 @@ static inline void io_apic_write(unsigne + */ + static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) + { +- volatile struct io_apic *io_apic = io_apic_base(apic); ++ volatile struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); + writel(value, &io_apic->data); +@@ -392,7 +392,7 @@ static void set_ioapic_affinity_irq(unsi + break; + entry = irq_2_pin + entry->next; + } +- set_native_irq_info(irq, cpumask); ++ irq_desc[irq].affinity = cpumask; + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -531,8 +531,8 @@ static void do_irq_balance(void) + package_index = CPU_TO_PACKAGEINDEX(i); + for (j = 0; j < NR_IRQS; j++) { + unsigned long value_now, delta; +- /* Is this an active IRQ? */ +- if (!irq_desc[j].action) ++ /* Is this an active IRQ or balancing disabled ? */ ++ if (!irq_desc[j].action || irq_balancing_disabled(j)) + continue; + if ( package_index == i ) + IRQ_DELTA(package_index,j) = 0; +@@ -785,7 +785,7 @@ failed: + return 0; + } + +-int __init irqbalance_disable(char *str) ++int __devinit irqbalance_disable(char *str) + { + irqbalance_disabled = 1; + return 1; +@@ -1329,11 +1329,9 @@ static void ioapic_register_intr(int irq + trigger == IOAPIC_LEVEL) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, "fasteoi"); +- else { +- irq_desc[irq].status |= IRQ_DELAYED_DISABLE; ++ else + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_edge_irq, "edge"); +- } + set_intr_gate(vector, interrupt[irq]); + } + #else +@@ -1407,7 +1405,6 @@ static void __init setup_IO_APIC_irqs(vo + } + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, entry); +- set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + } +@@ -1638,7 +1635,7 @@ void /*__init*/ print_local_APIC(void * + v = apic_read(APIC_LVR); + printk(KERN_INFO "... APIC VERSION: %08x\n", v); + ver = GET_APIC_VERSION(v); +- maxlvt = get_maxlvt(); ++ maxlvt = lapic_get_maxlvt(); + + v = apic_read(APIC_TASKPRI); + printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); +@@ -1976,7 +1973,7 @@ static void __init setup_ioapic_ids_from + #endif + + #ifndef CONFIG_XEN +-static int no_timer_check __initdata; ++int no_timer_check __initdata; + + static int __init notimercheck(char *s) + { +@@ -2369,7 +2366,7 @@ static inline void __init check_timer(vo + + disable_8259A_irq(0); + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, +- "fasteio"); ++ "fasteoi"); + apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + enable_8259A_irq(0); + +@@ -2662,7 +2659,7 @@ static void set_msi_irq_affinity(unsigne + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + write_msi_msg(irq, &msg); +- set_native_irq_info(irq, mask); ++ irq_desc[irq].affinity = mask; + } + #endif /* CONFIG_SMP */ + +@@ -2681,25 +2678,32 @@ static struct irq_chip msi_chip = { + .retrigger = ioapic_retrigger_irq, + }; + +-int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) ++int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) + { + struct msi_msg msg; +- int ret; ++ int irq, ret; ++ irq = create_irq(); ++ if (irq < 0) ++ return irq; ++ ++ set_irq_msi(irq, desc); + ret = msi_compose_msg(dev, irq, &msg); +- if (ret < 0) ++ if (ret < 0) { ++ destroy_irq(irq); + return ret; ++ } + + write_msi_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, + "edge"); + +- return 0; ++ return irq; + } + + void arch_teardown_msi_irq(unsigned int irq) + { +- return; ++ destroy_irq(irq); + } + + #endif /* CONFIG_PCI_MSI */ +@@ -2739,7 +2743,7 @@ static void set_ht_irq_affinity(unsigned + dest = cpu_mask_to_apicid(mask); + + target_ht_irq(irq, dest); +- set_native_irq_info(irq, mask); ++ irq_desc[irq].affinity = mask; + } + #endif + +@@ -2947,7 +2951,6 @@ int io_apic_set_pci_routing (int ioapic, + + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(ioapic, pin, entry); +- set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +--- head-2010-01-18.orig/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -10,7 +10,6 @@ + * io_apic.c.) + */ + +-#include + #include + #include + #include +@@ -19,19 +18,34 @@ + #include + #include + ++#include ++#include ++ + DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; + EXPORT_PER_CPU_SYMBOL(irq_stat); + +-#ifndef CONFIG_X86_LOCAL_APIC + /* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ + void ack_bad_irq(unsigned int irq) + { +- printk("unexpected IRQ trap at vector %02x\n", irq); +-} ++ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); ++ ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) ++ /* ++ * Currently unexpected vectors happen only on SMP and APIC. ++ * We _must_ ack these because every local APIC has only N ++ * irq slots per priority level, and a 'hanging, unacked' IRQ ++ * holds up an irq slot - in excessive cases (when multiple ++ * unexpected vectors occur) that might lock up the APIC ++ * completely. ++ * But only ack when the APIC is enabled -AK ++ */ ++ if (cpu_has_apic) ++ ack_APIC_irq(); + #endif ++} + + #ifdef CONFIG_4KSTACKS + /* +--- head-2010-01-18.orig/arch/x86/kernel/microcode-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/microcode-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -108,7 +108,7 @@ static ssize_t microcode_write (struct f + return ret; + } + +-static struct file_operations microcode_fops = { ++static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +--- head-2010-01-18.orig/arch/x86/kernel/mpparse_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/mpparse_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -1079,7 +1079,7 @@ int mp_register_gsi(u32 gsi, int trigger + static int gsi_to_irq[MAX_GSI_NUM]; + + /* Don't set up the ACPI SCI because it's already set up */ +- if (acpi_fadt.sci_int == gsi) ++ if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); +@@ -1136,7 +1136,7 @@ int mp_register_gsi(u32 gsi, int trigger + /* + * Don't assign IRQ used by ACPI SCI + */ +- if (gsi == acpi_fadt.sci_int) ++ if (gsi == acpi_gbl_FADT.sci_interrupt) + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { +--- head-2010-01-18.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -308,7 +308,7 @@ int dma_declare_coherent_memory(struct d + return DMA_MEMORY_IO; + + free1_out: +- kfree(dev->dma_mem->bitmap); ++ kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); +--- head-2010-01-18.orig/arch/x86/kernel/pcspeaker.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pcspeaker.c 2009-11-06 10:46:41.000000000 +0100 +@@ -6,6 +6,11 @@ static __init int add_pcspkr(void) + { + struct platform_device *pd; + ++#ifdef CONFIG_XEN ++ if (!is_initial_xendomain()) ++ return 0; ++#endif ++ + pd = platform_device_register_simple("pcspkr", -1, NULL, 0); + + return IS_ERR(pd) ? PTR_ERR(pd) : 0; +--- head-2010-01-18.orig/arch/x86/kernel/process_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -160,6 +161,7 @@ void cpu_idle(void) + + /* endless idle loop with no priority at all */ + while (1) { ++ tick_nohz_stop_sched_tick(); + while (!need_resched()) { + void (*idle)(void); + +@@ -175,6 +177,7 @@ void cpu_idle(void) + __get_cpu_var(irq_stat).idle_timestamp = jiffies; + idle(); + } ++ tick_nohz_restart_sched_tick(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); +@@ -247,8 +250,8 @@ void show_regs(struct pt_regs * regs) + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); +- printk(" DS: %04x ES: %04x GS: %04x\n", +- 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); ++ printk(" DS: %04x ES: %04x FS: %04x\n", ++ 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + + cr0 = read_cr0(); + cr2 = read_cr2(); +@@ -279,7 +282,7 @@ int kernel_thread(int (*fn)(void *), voi + + regs.xds = __USER_DS; + regs.xes = __USER_DS; +- regs.xgs = __KERNEL_PDA; ++ regs.xfs = __KERNEL_PDA; + regs.orig_eax = -1; + regs.eip = (unsigned long) kernel_thread_helper; + regs.xcs = __KERNEL_CS | get_kernel_rpl(); +@@ -356,7 +359,7 @@ int copy_thread(int nr, unsigned long cl + + p->thread.eip = (unsigned long) ret_from_fork; + +- savesegment(fs,p->thread.fs); ++ savesegment(gs,p->thread.gs); + + tsk = current; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { +@@ -434,8 +437,8 @@ void dump_thread(struct pt_regs * regs, + dump->regs.eax = regs->eax; + dump->regs.ds = regs->xds; + dump->regs.es = regs->xes; +- savesegment(fs,dump->regs.fs); +- dump->regs.gs = regs->xgs; ++ dump->regs.fs = regs->xfs; ++ savesegment(gs,dump->regs.gs); + dump->regs.orig_eax = regs->orig_eax; + dump->regs.eip = regs->eip; + dump->regs.cs = regs->xcs; +@@ -637,16 +640,6 @@ struct task_struct fastcall * __switch_t + prefetch(&next->i387.fxsave); + + /* +- * Restore %fs if needed. +- * +- * Glibc normally makes %fs be zero. +- */ +- if (unlikely(next->fs)) +- loadsegment(fs, next->fs); +- +- write_pda(pcurrent, next_p); +- +- /* + * Now maybe handle debug registers + */ + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) +@@ -654,6 +647,15 @@ struct task_struct fastcall * __switch_t + + disable_tsc(prev_p, next_p); + ++ /* ++ * Leave lazy mode, flushing any hypercalls made here. ++ * This must be done before restoring TLS segments so ++ * the GDT and LDT are properly updated, and must be ++ * done before math_state_restore, so the TS bit is up ++ * to date. ++ */ ++ arch_leave_lazy_cpu_mode(); ++ + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now +@@ -661,6 +663,14 @@ struct task_struct fastcall * __switch_t + if (next_p->fpu_counter > 5) + math_state_restore(); + ++ /* ++ * Restore %gs if needed (which is common) ++ */ ++ if (prev->gs | next->gs) ++ loadsegment(gs, next->gs); ++ ++ write_pda(pcurrent, next_p); ++ + return prev_p; + } + +--- head-2010-01-18.orig/arch/x86/kernel/setup_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -33,7 +33,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -148,7 +147,7 @@ unsigned long saved_videomode; + #define RAMDISK_PROMPT_FLAG 0x8000 + #define RAMDISK_LOAD_FLAG 0x4000 + +-static char command_line[COMMAND_LINE_SIZE]; ++static char __initdata command_line[COMMAND_LINE_SIZE]; + + unsigned char __initdata boot_params[PARAM_SIZE]; + +@@ -649,8 +648,8 @@ void __init setup_arch(char **cmdline_p) + + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) + i = COMMAND_LINE_SIZE; +- memcpy(saved_command_line, xen_start_info->cmd_line, i); +- saved_command_line[i - 1] = '\0'; ++ memcpy(boot_command_line, xen_start_info->cmd_line, i); ++ boot_command_line[i - 1] = '\0'; + parse_early_param(); + + if (user_defined_memmap) { +@@ -658,11 +657,19 @@ void __init setup_arch(char **cmdline_p) + print_memory_map("user"); + } + +- strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); ++ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + max_low_pfn = setup_memory(); + ++#ifdef CONFIG_VMI ++ /* ++ * Must be after max_low_pfn is determined, and before kernel ++ * pagetables are setup. ++ */ ++ vmi_init(); ++#endif ++ + /* + * NOTE: before this point _nobody_ is allowed to allocate + * any memory using the bootmem allocator. Although the +@@ -825,7 +832,6 @@ void __init setup_arch(char **cmdline_p) + conswitchp = &dummy_con; + #endif + } +- tsc_init(); + } + + static int +@@ -835,31 +841,3 @@ xen_panic_event(struct notifier_block *t + /* we're never actually going to get here... */ + return NOTIFY_DONE; + } +- +-static __init int add_pcspkr(void) +-{ +- struct platform_device *pd; +- int ret; +- +- if (!is_initial_xendomain()) +- return 0; +- +- pd = platform_device_alloc("pcspkr", -1); +- if (!pd) +- return -ENOMEM; +- +- ret = platform_device_add(pd); +- if (ret) +- platform_device_put(pd); +- +- return ret; +-} +-device_initcall(add_pcspkr); +- +-/* +- * Local Variables: +- * mode:c +- * c-file-style:"k&r" +- * c-basic-offset:8 +- * End: +- */ +--- head-2010-01-18.orig/arch/x86/kernel/smp_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/smp_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -335,8 +335,7 @@ static void flush_tlb_others(cpumask_t c + /* + * i'm not happy about this global shared spinlock in the + * MM hot path, but we'll see how contended it is. +- * Temporarily this turns IRQs off, so that lockups are +- * detected by the NMI watchdog. ++ * AK: x86-64 has a faster method that could be ported. + */ + spin_lock(&tlbstate_lock); + +@@ -361,7 +360,7 @@ static void flush_tlb_others(cpumask_t c + + while (!cpus_empty(flush_cpumask)) + /* nothing. lockup detection does not belong here */ +- mb(); ++ cpu_relax(); + + flush_mm = NULL; + flush_va = 0; +--- head-2010-01-18.orig/arch/x86/kernel/time-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/time-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -75,25 +76,17 @@ + #include + #include + +-#if defined (__i386__) +-#include ++#ifdef CONFIG_X86_32 + #include + DEFINE_SPINLOCK(i8253_lock); + EXPORT_SYMBOL(i8253_lock); +-#endif +- +-#define XEN_SHIFT 22 +- + int pit_latch_buggy; /* extern */ +- +-#if defined(__x86_64__) +-unsigned long vxtime_hz = PIT_TICK_RATE; +-struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ ++#else + volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; +-struct timespec __xtime __section_xtime; +-struct timezone __sys_tz __section_sys_tz; + #endif + ++#define XEN_SHIFT 22 ++ + unsigned int cpu_khz; /* Detected as we calibrate the TSC */ + EXPORT_SYMBOL(cpu_khz); + +@@ -113,9 +106,6 @@ static DEFINE_PER_CPU(struct shadow_time + static struct timespec shadow_tv; + static u32 shadow_tv_version; + +-static struct timeval monotonic_tv; +-static spinlock_t monotonic_lock = SPIN_LOCK_UNLOCKED; +- + /* Keep track of last time we did processing/updating of jiffies and xtime. */ + static u64 processed_system_time; /* System time (ns) at last processing. */ + static DEFINE_PER_CPU(u64, processed_system_time); +@@ -210,7 +200,7 @@ static inline u64 scale_delta(u64 delta, + return product; + } + +-void init_cpu_khz(void) ++static void init_cpu_khz(void) + { + u64 __cpu_khz = 1000000ULL << 32; + struct vcpu_time_info *info = &vcpu_info(0)->time; +@@ -229,16 +219,6 @@ static u64 get_nsec_offset(struct shadow + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); + } + +-#ifdef CONFIG_X86_64 +-static unsigned long get_usec_offset(struct shadow_time_info *shadow) +-{ +- u64 now, delta; +- rdtscll(now); +- delta = now - shadow->tsc_timestamp; +- return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift); +-} +-#endif +- + static void __update_wallclock(time_t sec, long nsec) + { + long wtm_nsec, xtime_nsec; +@@ -351,142 +331,6 @@ void rtc_cmos_write(unsigned char val, u + } + EXPORT_SYMBOL(rtc_cmos_write); + +-#ifdef CONFIG_X86_64 +- +-/* +- * This version of gettimeofday has microsecond resolution +- * and better than microsecond precision on fast x86 machines with TSC. +- */ +-void do_gettimeofday(struct timeval *tv) +-{ +- unsigned long seq; +- unsigned long usec, sec; +- unsigned long flags; +- s64 nsec; +- unsigned int cpu; +- struct shadow_time_info *shadow; +- u32 local_time_version; +- +- cpu = get_cpu(); +- shadow = &per_cpu(shadow_time, cpu); +- +- do { +- local_time_version = shadow->version; +- seq = read_seqbegin(&xtime_lock); +- +- usec = get_usec_offset(shadow); +- +- sec = xtime.tv_sec; +- usec += (xtime.tv_nsec / NSEC_PER_USEC); +- +- nsec = shadow->system_timestamp - processed_system_time; +- __normalize_time(&sec, &nsec); +- usec += (long)nsec / NSEC_PER_USEC; +- +- if (unlikely(!time_values_up_to_date(cpu))) { +- /* +- * We may have blocked for a long time, +- * rendering our calculations invalid +- * (e.g. the time delta may have +- * overflowed). Detect that and recalculate +- * with fresh values. +- */ +- get_time_values_from_xen(cpu); +- continue; +- } +- } while (read_seqretry(&xtime_lock, seq) || +- (local_time_version != shadow->version)); +- +- put_cpu(); +- +- while (usec >= USEC_PER_SEC) { +- usec -= USEC_PER_SEC; +- sec++; +- } +- +- spin_lock_irqsave(&monotonic_lock, flags); +- if ((sec > monotonic_tv.tv_sec) || +- ((sec == monotonic_tv.tv_sec) && (usec > monotonic_tv.tv_usec))) +- { +- monotonic_tv.tv_sec = sec; +- monotonic_tv.tv_usec = usec; +- } else { +- sec = monotonic_tv.tv_sec; +- usec = monotonic_tv.tv_usec; +- } +- spin_unlock_irqrestore(&monotonic_lock, flags); +- +- tv->tv_sec = sec; +- tv->tv_usec = usec; +-} +- +-EXPORT_SYMBOL(do_gettimeofday); +- +-int do_settimeofday(struct timespec *tv) +-{ +- time_t sec; +- s64 nsec; +- unsigned int cpu; +- struct shadow_time_info *shadow; +- struct xen_platform_op op; +- +- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) +- return -EINVAL; +- +- if (!is_initial_xendomain() && !independent_wallclock) +- return -EPERM; +- +- cpu = get_cpu(); +- shadow = &per_cpu(shadow_time, cpu); +- +- write_seqlock_irq(&xtime_lock); +- +- /* +- * Ensure we don't get blocked for a long time so that our time delta +- * overflows. If that were to happen then our shadow time values would +- * be stale, so we can retry with fresh ones. +- */ +- for (;;) { +- nsec = tv->tv_nsec - get_nsec_offset(shadow); +- if (time_values_up_to_date(cpu)) +- break; +- get_time_values_from_xen(cpu); +- } +- sec = tv->tv_sec; +- __normalize_time(&sec, &nsec); +- +- if (is_initial_xendomain() && !independent_wallclock) { +- op.cmd = XENPF_settime; +- op.u.settime.secs = sec; +- op.u.settime.nsecs = nsec; +- op.u.settime.system_time = shadow->system_timestamp; +- WARN_ON(HYPERVISOR_platform_op(&op)); +- update_wallclock(); +- } else if (independent_wallclock) { +- nsec -= shadow->system_timestamp; +- __normalize_time(&sec, &nsec); +- __update_wallclock(sec, nsec); +- } +- ntp_clear(); +- +- /* Reset monotonic gettimeofday() timeval. */ +- spin_lock(&monotonic_lock); +- monotonic_tv.tv_sec = 0; +- monotonic_tv.tv_usec = 0; +- spin_unlock(&monotonic_lock); +- +- write_sequnlock_irq(&xtime_lock); +- +- put_cpu(); +- +- clock_was_set(); +- return 0; +-} +- +-EXPORT_SYMBOL(do_settimeofday); +- +-#endif +- + static void sync_xen_wallclock(unsigned long dummy); + static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0); + static void sync_xen_wallclock(unsigned long dummy) +@@ -535,15 +379,7 @@ static int set_rtc_mmss(unsigned long no + return retval; + } + +-#ifdef CONFIG_X86_64 +-/* monotonic_clock(): returns # of nanoseconds passed since time_init() +- * Note: This function is required to return accurate +- * time even in the absence of multiple timer ticks. +- */ +-unsigned long long monotonic_clock(void) +-#else + unsigned long long sched_clock(void) +-#endif + { + unsigned int cpu = get_cpu(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); +@@ -563,21 +399,18 @@ unsigned long long sched_clock(void) + + return time; + } +-#ifdef CONFIG_X86_64 +-EXPORT_SYMBOL(monotonic_clock); +- +-unsigned long long sched_clock(void) +-{ +- return monotonic_clock(); +-} +-#endif + + unsigned long profile_pc(struct pt_regs *regs) + { + unsigned long pc = instruction_pointer(regs); + + #if defined(CONFIG_SMP) || defined(__x86_64__) +- if (!user_mode_vm(regs) && in_lock_functions(pc)) { ++# ifdef __i386__ ++ if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) ++# else ++ if (!user_mode(regs) ++# endif ++ && in_lock_functions(pc)) { + # ifdef CONFIG_FRAME_POINTER + # ifdef __i386__ + return ((unsigned long *)regs->ebp)[1]; +@@ -586,14 +419,11 @@ unsigned long profile_pc(struct pt_regs + # endif + # else + # ifdef __i386__ +- unsigned long *sp; +- if ((regs->xcs & 2) == 0) +- sp = (unsigned long *)®s->esp; +- else +- sp = (unsigned long *)regs->esp; ++ unsigned long *sp = (unsigned long *)®s->esp; + # else + unsigned long *sp = (unsigned long *)regs->rsp; + # endif ++ + /* Return address is either directly at stack pointer + or above a saved eflags. Eflags has bits 22-31 zero, + kernel addresses don't. */ +@@ -747,19 +577,6 @@ irqreturn_t timer_interrupt(int irq, voi + return IRQ_HANDLED; + } + +-#ifndef CONFIG_X86_64 +- +-void tsc_init(void) +-{ +- init_cpu_khz(); +- printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", +- cpu_khz / 1000, cpu_khz % 1000); +- +- use_tsc_delay(); +-} +- +-#include +- + void mark_tsc_unstable(void) + { + #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */ +@@ -815,21 +632,9 @@ static struct clocksource clocksource_xe + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */ + .shift = XEN_SHIFT, +- .is_continuous = 1, ++ .flags = CLOCK_SOURCE_IS_CONTINUOUS, + }; + +-static int __init init_xen_clocksource(void) +-{ +- clocksource_xen.mult = clocksource_khz2mult(cpu_khz, +- clocksource_xen.shift); +- +- return clocksource_register(&clocksource_xen); +-} +- +-module_init(init_xen_clocksource); +- +-#endif +- + static void init_missing_ticks_accounting(unsigned int cpu) + { + struct vcpu_register_runstate_memory_area area; +@@ -850,7 +655,7 @@ static void init_missing_ticks_accountin + } + + /* not static: needed by APM */ +-unsigned long get_cmos_time(void) ++unsigned long read_persistent_clock(void) + { + unsigned long retval; + unsigned long flags; +@@ -863,11 +668,11 @@ unsigned long get_cmos_time(void) + + return retval; + } +-EXPORT_SYMBOL(get_cmos_time); + + static void sync_cmos_clock(unsigned long dummy); + + static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); ++int no_sync_cmos_clock; + + static void sync_cmos_clock(unsigned long dummy) + { +@@ -911,7 +716,8 @@ static void sync_cmos_clock(unsigned lon + + void notify_arch_cmos_timer(void) + { +- mod_timer(&sync_cmos_timer, jiffies + 1); ++ if (!no_sync_cmos_clock) ++ mod_timer(&sync_cmos_timer, jiffies + 1); + mod_timer(&sync_xen_wallclock_timer, jiffies + 1); + } + +@@ -944,29 +750,11 @@ static int time_init_device(void) + + device_initcall(time_init_device); + +-#ifdef CONFIG_HPET_TIMER + extern void (*late_time_init)(void); +-/* Duplicate of time_init() below, with hpet_enable part added */ +-static void __init hpet_time_init(void) +-{ +- struct timespec ts; +- ts.tv_sec = get_cmos_time(); +- ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); +- +- do_settimeofday(&ts); +- +- if ((hpet_enable() >= 0) && hpet_use_timer) { +- printk("Using HPET for base-timer\n"); +- } +- +- do_time_init(); +-} +-#endif + + /* Dynamically-mapped IRQ. */ + DEFINE_PER_CPU(int, timer_irq); + +-extern void (*late_time_init)(void); + static void setup_cpu0_timer_irq(void) + { + per_cpu(timer_irq, 0) = +@@ -974,7 +762,7 @@ static void setup_cpu0_timer_irq(void) + VIRQ_TIMER, + 0, + timer_interrupt, +- IRQF_DISABLED|IRQF_TIMER, ++ IRQF_DISABLED|IRQF_TIMER|IRQF_NOBALANCING, + "timer0", + NULL); + BUG_ON(per_cpu(timer_irq, 0) < 0); +@@ -986,16 +774,9 @@ static struct vcpu_set_periodic_timer xe + + void __init time_init(void) + { +-#ifdef CONFIG_HPET_TIMER +- if (is_hpet_capable()) { +- /* +- * HPET initialization needs to do memory-mapped io. So, let +- * us do a late initialization after mem_init(). +- */ +- late_time_init = hpet_time_init; +- return; +- } +-#endif ++ init_cpu_khz(); ++ printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", ++ cpu_khz / 1000, cpu_khz % 1000); + + switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0, + &xen_set_periodic_tick)) { +@@ -1014,18 +795,12 @@ void __init time_init(void) + per_cpu(processed_system_time, 0) = processed_system_time; + init_missing_ticks_accounting(0); + +- update_wallclock(); ++ clocksource_register(&clocksource_xen); + +-#ifdef CONFIG_X86_64 +- init_cpu_khz(); +- printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", +- cpu_khz / 1000, cpu_khz % 1000); ++ update_wallclock(); + +- vxtime.mode = VXTIME_TSC; +- vxtime.quot = (1000000L << 32) / vxtime_hz; +- vxtime.tsc_quot = (1000L << 32) / cpu_khz; +- sync_core(); +- rdtscll(vxtime.last_tsc); ++#ifndef CONFIG_X86_64 ++ use_tsc_delay(); + #endif + + /* Cannot request_irq() until kmem is initialised. */ +@@ -1182,7 +957,7 @@ int __cpuinit local_setup_timer(unsigned + irq = bind_virq_to_irqhandler(VIRQ_TIMER, + cpu, + timer_interrupt, +- IRQF_DISABLED|IRQF_TIMER, ++ IRQF_DISABLED|IRQF_TIMER|IRQF_NOBALANCING, + timer_name[cpu], + NULL); + if (irq < 0) +@@ -1271,7 +1046,7 @@ static ctl_table xen_table[] = { + }; + static int __init xen_sysctl_init(void) + { +- (void)register_sysctl_table(xen_table, 0); ++ (void)register_sysctl_table(xen_table); + return 0; + } + __initcall(xen_sysctl_init); +--- head-2010-01-18.orig/arch/x86/kernel/traps_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/traps_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -100,6 +100,7 @@ asmlinkage void fixup_4gb_segment(void); + asmlinkage void machine_check(void); + + int kstack_depth_to_print = 24; ++static unsigned int code_bytes = 64; + ATOMIC_NOTIFIER_HEAD(i386die_chain); + + int register_die_notifier(struct notifier_block *nb) +@@ -297,10 +298,11 @@ void show_registers(struct pt_regs *regs + int i; + int in_kernel = 1; + unsigned long esp; +- unsigned short ss; ++ unsigned short ss, gs; + + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); ++ savesegment(gs, gs); + if (user_mode_vm(regs)) { + in_kernel = 0; + esp = regs->esp; +@@ -319,8 +321,8 @@ void show_registers(struct pt_regs *regs + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); +- printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", +- regs->xds & 0xffff, regs->xes & 0xffff, ss); ++ printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", ++ regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", + TASK_COMM_LEN, current->comm, current->pid, + current_thread_info(), current, current->thread_info); +@@ -330,7 +332,8 @@ void show_registers(struct pt_regs *regs + */ + if (in_kernel) { + u8 *eip; +- int code_bytes = 64; ++ unsigned int code_prologue = code_bytes * 43 / 64; ++ unsigned int code_len = code_bytes; + unsigned char c; + + printk("\n" KERN_EMERG "Stack: "); +@@ -338,14 +341,14 @@ void show_registers(struct pt_regs *regs + + printk(KERN_EMERG "Code: "); + +- eip = (u8 *)regs->eip - 43; ++ eip = (u8 *)regs->eip - code_prologue; + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { + /* try starting at EIP */ + eip = (u8 *)regs->eip; +- code_bytes = 32; ++ code_len = code_len - code_prologue + 1; + } +- for (i = 0; i < code_bytes; i++, eip++) { ++ for (i = 0; i < code_len; i++, eip++) { + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { + printk(" Bad EIP value."); +@@ -1134,3 +1137,13 @@ static int __init kstack_setup(char *s) + return 1; + } + __setup("kstack=", kstack_setup); ++ ++static int __init code_bytes_setup(char *s) ++{ ++ code_bytes = simple_strtoul(s, NULL, 0); ++ if (code_bytes > 8192) ++ code_bytes = 8192; ++ ++ return 1; ++} ++__setup("code_bytes=", code_bytes_setup); +--- head-2010-01-18.orig/arch/x86/mm/fault_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/fault_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -46,43 +46,17 @@ int unregister_page_fault_notifier(struc + } + EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + +-static inline int notify_page_fault(enum die_val val, const char *str, +- struct pt_regs *regs, long err, int trap, int sig) ++static inline int notify_page_fault(struct pt_regs *regs, long err) + { + struct die_args args = { + .regs = regs, +- .str = str, ++ .str = "page fault", + .err = err, +- .trapnr = trap, +- .signr = sig ++ .trapnr = 14, ++ .signr = SIGSEGV + }; +- return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +-} +- +-/* +- * Unlock any spinlocks which will prevent us from getting the +- * message out +- */ +-void bust_spinlocks(int yes) +-{ +- int loglevel_save = console_loglevel; +- +- if (yes) { +- oops_in_progress = 1; +- return; +- } +-#ifdef CONFIG_VT +- unblank_screen(); +-#endif +- oops_in_progress = 0; +- /* +- * OK, the message is on the console. Now we call printk() +- * without oops_in_progress set so that printk will give klogd +- * a poke. Hold onto your hats... +- */ +- console_loglevel = 15; /* NMI oopser may have shut the console up */ +- printk(" "); +- console_loglevel = loglevel_save; ++ return atomic_notifier_call_chain(¬ify_page_fault_chain, ++ DIE_PAGE_FAULT, &args); + } + + /* +@@ -476,8 +450,7 @@ fastcall void __kprobes do_page_fault(st + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch +@@ -486,8 +459,7 @@ fastcall void __kprobes do_page_fault(st + goto bad_area_nosemaphore; + } + +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + + /* It's safe to allow irq's after cr2 has been saved and the vmalloc +--- head-2010-01-18.orig/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -33,14 +33,16 @@ static void *__kmap_atomic(struct page * + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); ++ ++ idx = type + KM_TYPE_NR*smp_processor_id(); ++ BUG_ON(!pte_none(*(kmap_pte-idx))); ++ + if (!PageHighMem(page)) + return page_address(page); + +- idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +- if (!pte_none(*(kmap_pte-idx))) +- BUG(); + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); ++ /*arch_flush_lazy_mmu_mode();*/ + + return (void*) vaddr; + } +@@ -94,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn, + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); ++ /*arch_flush_lazy_mmu_mode();*/ + + return (void*) vaddr; + } +--- head-2010-01-18.orig/arch/x86/mm/init_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -66,6 +66,7 @@ static pmd_t * __init one_md_table_init( + + #ifdef CONFIG_X86_PAE + pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); ++ paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); + make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); +@@ -87,6 +88,7 @@ static pte_t * __init one_page_table_ini + { + if (pmd_none(*pmd)) { + pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); ++ paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); + make_lowmem_page_readonly(page_table, + XENFEAT_writable_page_tables); + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); +--- head-2010-01-18.orig/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -149,6 +149,8 @@ void __set_fixmap (enum fixed_addresses + void __init reserve_top_address(unsigned long reserve) + { + BUG_ON(fixmaps > 0); ++ printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", ++ (int)-reserve); + __FIXADDR_TOP = -reserve - PAGE_SIZE; + __VMALLOC_RESERVE += reserve; + } +@@ -258,6 +260,12 @@ void pgd_ctor(void *pgd, struct kmem_cac + swapper_pg_dir + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); ++ ++ /* must happen under lock */ ++ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, ++ __pa(swapper_pg_dir) >> PAGE_SHIFT, ++ USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); ++ + pgd_list_add(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); + } +@@ -268,6 +276,7 @@ void pgd_dtor(void *pgd, struct kmem_cac + { + unsigned long flags; /* can be called from interrupt context */ + ++ paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); +@@ -292,6 +301,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + if (!pmd) + goto out_oom; ++ paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } + return pgd; +@@ -314,6 +324,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + if (!pmd[i]) + goto out_oom; ++ paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); + } + + spin_lock_irqsave(&pgd_lock, flags); +@@ -354,12 +365,17 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + + out_oom: + if (HAVE_SHARED_KERNEL_PMD) { +- for (i--; i >= 0; i--) +- kmem_cache_free(pmd_cache, +- (void *)__va(pgd_val(pgd[i])-1)); ++ for (i--; i >= 0; i--) { ++ pgd_t pgdent = pgd[i]; ++ void* pmd = (void *)__va(pgd_val(pgdent)-1); ++ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); ++ kmem_cache_free(pmd_cache, pmd); ++ } + } else { +- for (i--; i >= 0; i--) ++ for (i--; i >= 0; i--) { ++ paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); + kmem_cache_free(pmd_cache, pmd[i]); ++ } + kfree(pmd); + } + kmem_cache_free(pgd_cache, pgd); +@@ -383,7 +399,9 @@ void pgd_free(pgd_t *pgd) + /* in the PAE case user pgd entries are overwritten before usage */ + if (PTRS_PER_PMD > 1) { + for (i = 0; i < USER_PTRS_PER_PGD; ++i) { +- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ pgd_t pgdent = pgd[i]; ++ void* pmd = (void *)__va(pgd_val(pgdent)-1); ++ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); + kmem_cache_free(pmd_cache, pmd); + } + +--- head-2010-01-18.orig/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:46:41.000000000 +0100 +@@ -465,7 +465,7 @@ ia32_sys_call_table: + .quad sys32_vm86_warning /* vm86old */ + .quad compat_sys_wait4 + .quad sys_swapoff /* 115 */ +- .quad sys32_sysinfo ++ .quad compat_sys_sysinfo + .quad sys32_ipc + .quad sys_fsync + .quad stub32_sigreturn +@@ -510,7 +510,7 @@ ia32_sys_call_table: + .quad sys_sched_yield + .quad sys_sched_get_priority_max + .quad sys_sched_get_priority_min /* 160 */ +- .quad sys_sched_rr_get_interval ++ .quad sys32_sched_rr_get_interval + .quad compat_sys_nanosleep + .quad sys_mremap + .quad sys_setresuid16 +@@ -668,4 +668,5 @@ ia32_sys_call_table: + .quad compat_sys_vmsplice + .quad compat_sys_move_pages + .quad sys_getcpu ++ .quad sys_epoll_pwait + ia32_syscall_end: +--- head-2010-01-18.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2008-04-15 09:29:41.000000000 +0200 ++++ head-2010-01-18/arch/x86/kernel/acpi/sleep_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -59,7 +59,7 @@ unsigned long acpi_wakeup_address = 0; + unsigned long acpi_video_flags; + extern char wakeup_start, wakeup_end; + +-extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); ++extern unsigned long acpi_copy_wakeup_routine(unsigned long); + + static pgd_t low_ptr; + +@@ -67,8 +67,10 @@ static void init_low_mapping(void) + { + pgd_t *slot0 = pgd_offset(current->mm, 0UL); + low_ptr = *slot0; ++ /* FIXME: We're playing with the current task's page tables here, which ++ * is potentially dangerous on SMP systems. ++ */ + set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); +- WARN_ON(num_online_cpus() != 1); + local_flush_tlb(); + } + #endif +--- head-2010-01-18.orig/arch/x86/kernel/e820_64-xen.c 2009-12-04 10:49:03.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/e820_64-xen.c 2009-12-04 10:52:06.000000000 +0100 +@@ -90,6 +90,13 @@ static inline int bad_addr(unsigned long + return 1; + } + ++#ifdef CONFIG_NUMA ++ /* NUMA memory to node map */ ++ if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { ++ *addrp = nodemap_addr + nodemap_size; ++ return 1; ++ } ++#endif + /* XXX ramdisk image here? */ + #else + if (last < (table_end<type != E820_RAM || ++ ei->addr+ei->size <= start || ++ ei->addr >= end) ++ continue; ++ ++ addr = round_up(ei->addr, PAGE_SIZE); ++ if (addr < start) ++ addr = start; ++ ++ last = round_down(ei->addr + ei->size, PAGE_SIZE); ++ if (last >= end) ++ last = end; ++ ++ if (last > addr) ++ ram += last - addr; ++ } ++ return ((end - start) - ram); ++} ++ ++/* + * Mark e820 reserved areas as busy for the resource manager. + */ + void __init e820_reserve_resources(struct e820entry *e820, int nr_map) +@@ -751,7 +789,7 @@ static int __init parse_memmap_opt(char + } + early_param("memmap", parse_memmap_opt); + +-void finish_e820_parsing(void) ++void __init finish_e820_parsing(void) + { + if (userdef) { + printk(KERN_INFO "user-defined physical RAM map:\n"); +--- head-2010-01-18.orig/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:46:41.000000000 +0100 +@@ -629,6 +629,9 @@ END(invalidate_interrupt\num) + ENTRY(call_function_interrupt) + apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt + END(call_function_interrupt) ++ENTRY(irq_move_cleanup_interrupt) ++ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt ++END(irq_move_cleanup_interrupt) + #endif + + ENTRY(apic_timer_interrupt) +--- head-2010-01-18.orig/arch/x86/kernel/head64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -45,8 +45,6 @@ static void __init clear_bss(void) + #define OLD_CL_BASE_ADDR 0x90000 + #define OLD_CL_OFFSET 0x90022 + +-extern char saved_command_line[]; +- + static void __init copy_bootdata(char *real_mode_data) + { + #ifndef CONFIG_XEN +@@ -62,14 +60,14 @@ static void __init copy_bootdata(char *r + new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; + } + command_line = (char *) ((u64)(new_data)); +- memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); ++ memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); + #else + int max_cmdline; + + if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) + max_cmdline = COMMAND_LINE_SIZE; +- memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); +- saved_command_line[max_cmdline-1] = '\0'; ++ memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline); ++ boot_command_line[max_cmdline-1] = '\0'; + #endif + } + +--- head-2010-01-18.orig/arch/x86/kernel/io_apic_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/io_apic_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -36,6 +36,7 @@ + #include + #endif + ++#include + #include + #include + #include +@@ -47,7 +48,20 @@ + #include + #include + +-static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result); ++struct irq_cfg { ++#ifndef CONFIG_XEN ++ cpumask_t domain; ++ cpumask_t old_domain; ++#endif ++ unsigned move_cleanup_count; ++ u8 vector; ++ u8 move_in_progress : 1; ++}; ++ ++/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ ++struct irq_cfg irq_cfg[NR_IRQS] __read_mostly; ++ ++static int assign_irq_vector(int irq, cpumask_t mask); + + #define __apicdebuginit __init + +@@ -89,7 +103,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +-#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS ++#define MAX_PLUS_SHARED_IRQS NR_IRQS + #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + + /* +@@ -262,21 +276,19 @@ static void __target_IO_APIC_irq(unsigne + + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; +- int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(mask, tmp, CPU_MASK_ALL); ++ return; + +- vector = assign_irq_vector(irq, mask, &tmp); +- if (vector < 0) ++ if (assign_irq_vector(irq, mask)) + return; + ++ cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + /* +@@ -285,8 +297,8 @@ static void set_ioapic_affinity_irq(unsi + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); +- __target_IO_APIC_irq(irq, dest, vector); +- set_native_irq_info(irq, mask); ++ __target_IO_APIC_irq(irq, dest, cfg->vector); ++ irq_desc[irq].affinity = mask; + spin_unlock_irqrestore(&ioapic_lock, flags); + } + #endif +@@ -332,11 +344,11 @@ static void add_pin_to_irq(unsigned int + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ ++ FINAL; \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ +- FINAL; \ + } + + #define DO_ACTION(name,R,ACTION, FINAL) \ +@@ -669,77 +681,62 @@ static int pin_2_irq(int idx, int apic, + return irq; + } + +-static inline int IO_APIC_irq_trigger(int irq) +-{ +- int apic, idx, pin; +- +- for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { +- idx = find_irq_entry(apic,pin,mp_INT); +- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) +- return irq_trigger(idx); +- } +- } +- /* +- * nonexistent IRQs are edge default +- */ +- return 0; +-} +- +-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ +-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; +- +-static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) ++static int __assign_irq_vector(int irq, cpumask_t mask) + { +- int vector; + struct physdev_irq irq_op; ++ struct irq_cfg *cfg; + +- BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); ++ BUG_ON((unsigned)irq >= NR_IRQS); + + if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS) + return -EINVAL; + +- cpus_and(*result, mask, cpu_online_map); ++ cfg = &irq_cfg[irq]; ++ ++ if ((cfg->move_in_progress) || cfg->move_cleanup_count) ++ return -EBUSY; + +- if (irq_vector[irq] > 0) +- return irq_vector[irq]; ++ if (cfg->vector) ++ return 0; + + irq_op.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) + return -ENOSPC; + +- vector = irq_op.vector; +- irq_vector[irq] = vector; ++ cfg->vector = irq_op.vector; + +- return vector; ++ return 0; + } + +-static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) ++static int assign_irq_vector(int irq, cpumask_t mask) + { +- int vector; ++ int err; + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); +- vector = __assign_irq_vector(irq, mask, result); ++ err = __assign_irq_vector(irq, mask); + spin_unlock_irqrestore(&vector_lock, flags); +- return vector; ++ return err; + } + + #ifndef CONFIG_XEN + static void __clear_irq_vector(int irq) + { ++ struct irq_cfg *cfg; + cpumask_t mask; + int cpu, vector; + +- BUG_ON(!irq_vector[irq]); ++ BUG_ON((unsigned)irq >= NR_IRQS); ++ cfg = &irq_cfg[irq]; ++ BUG_ON(!cfg->vector); + +- vector = irq_vector[irq]; +- cpus_and(mask, irq_domain[irq], cpu_online_map); ++ vector = cfg->vector; ++ cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + +- irq_vector[irq] = 0; +- irq_domain[irq] = CPU_MASK_NONE; ++ cfg->vector = 0; ++ cfg->domain = CPU_MASK_NONE; + } + + void __setup_vector_irq(int cpu) +@@ -749,10 +746,10 @@ void __setup_vector_irq(int cpu) + int irq, vector; + + /* Mark the inuse vectors */ +- for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) { +- if (!cpu_isset(cpu, irq_domain[irq])) ++ for (irq = 0; irq < NR_IRQS; ++irq) { ++ if (!cpu_isset(cpu, irq_cfg[irq].domain)) + continue; +- vector = irq_vector[irq]; ++ vector = irq_cfg[irq].vector; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ +@@ -760,41 +757,49 @@ void __setup_vector_irq(int cpu) + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq < 0) + continue; +- if (!cpu_isset(cpu, irq_domain[irq])) ++ if (!cpu_isset(cpu, irq_cfg[irq].domain)) + per_cpu(vector_irq, cpu)[vector] = -1; + } + } + +-extern void (*interrupt[NR_IRQS])(void); +- + static struct irq_chip ioapic_chip; + +-#define IOAPIC_AUTO -1 +-#define IOAPIC_EDGE 0 +-#define IOAPIC_LEVEL 1 +- +-static void ioapic_register_intr(int irq, int vector, unsigned long trigger) ++static void ioapic_register_intr(int irq, unsigned long trigger) + { +- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || +- trigger == IOAPIC_LEVEL) ++ if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, "fasteoi"); +- else { +- irq_desc[irq].status |= IRQ_DELAYED_DISABLE; ++ else + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_edge_irq, "edge"); +- } + } + #else +-#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq) ++#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq) + #endif /* !CONFIG_XEN */ + +-static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) ++static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, ++ int trigger, int polarity) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + struct IO_APIC_route_entry entry; +- int vector; +- unsigned long flags; ++ cpumask_t mask; ++ ++ if (!IO_APIC_IRQ(irq)) ++ return; + ++ mask = TARGET_CPUS; ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++#ifndef CONFIG_XEN ++ cpus_and(mask, cfg->domain, mask); ++#endif ++ ++ apic_printk(APIC_VERBOSE,KERN_DEBUG ++ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " ++ "IRQ %d Mode:%i Active:%i)\n", ++ apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, ++ irq, trigger, polarity); + + /* + * add it to the IO-APIC irq-routing table: +@@ -803,41 +808,23 @@ static void __init setup_IO_APIC_irq(int + + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; ++ entry.dest = cpu_mask_to_apicid(mask); + entry.mask = 0; /* enable IRQ */ +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); +- +- entry.trigger = irq_trigger(idx); +- entry.polarity = irq_polarity(idx); ++ entry.trigger = trigger; ++ entry.polarity = polarity; ++ entry.vector = cfg->vector; + +- if (irq_trigger(idx)) { +- entry.trigger = 1; ++ /* Mask level triggered irqs. ++ * Use IRQ_DELAYED_DISABLE for edge triggered irqs. ++ */ ++ if (trigger) + entry.mask = 1; +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); +- } +- +- if (/* !apic && */ !IO_APIC_IRQ(irq)) +- return; + +- if (IO_APIC_IRQ(irq)) { +- cpumask_t mask; +- vector = assign_irq_vector(irq, TARGET_CPUS, &mask); +- if (vector < 0) +- return; +- +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); +- entry.vector = vector; +- +- ioapic_register_intr(irq, vector, IOAPIC_AUTO); +- if (!apic && (irq < 16)) +- disable_8259A_irq(irq); +- } ++ ioapic_register_intr(irq, trigger); ++ if (irq < 16) ++ disable_8259A_irq(irq); + + ioapic_write_entry(apic, pin, entry); +- +- spin_lock_irqsave(&ioapic_lock, flags); +- set_native_irq_info(irq, TARGET_CPUS); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- + } + + static void __init setup_IO_APIC_irqs(void) +@@ -862,8 +849,8 @@ static void __init setup_IO_APIC_irqs(vo + irq = pin_2_irq(idx, apic, pin); + add_pin_to_irq(irq, apic, pin); + +- setup_IO_APIC_irq(apic, pin, idx, irq); +- ++ setup_IO_APIC_irq(apic, pin, irq, ++ irq_trigger(idx), irq_polarity(idx)); + } + } + +@@ -894,7 +881,7 @@ static void __init setup_ExtINT_IRQ0_pin + */ + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* unmask IRQ now */ +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.polarity = 0; + entry.trigger = 0; +@@ -994,18 +981,17 @@ void __apicdebuginit print_IO_APIC(void) + + printk(KERN_DEBUG ".... IRQ redirection table:\n"); + +- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" +- " Stat Dest Deli Vect: \n"); ++ printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" ++ " Stat Dmod Deli Vect: \n"); + + for (i = 0; i <= reg_01.bits.entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + +- printk(KERN_DEBUG " %02x %03X %02X ", ++ printk(KERN_DEBUG " %02x %03X ", + i, +- entry.dest.logical.logical_dest, +- entry.dest.physical.physical_dest ++ entry.dest + ); + + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", +@@ -1269,8 +1255,7 @@ void disable_IO_APIC(void) + entry.dest_mode = 0; /* Physical */ + entry.delivery_mode = dest_ExtINT; /* ExtInt */ + entry.vector = 0; +- entry.dest.physical.physical_dest = +- GET_APIC_ID(apic_read(APIC_ID)); ++ entry.dest = GET_APIC_ID(apic_read(APIC_ID)); + + /* + * Add it to the IO-APIC irq-routing table: +@@ -1355,16 +1340,15 @@ static unsigned int startup_ioapic_irq(u + + static int ioapic_retrigger_irq(unsigned int irq) + { ++ struct irq_cfg *cfg = &irq_cfg[irq]; + cpumask_t mask; +- unsigned vector; + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); +- vector = irq_vector[irq]; + cpus_clear(mask); +- cpu_set(first_cpu(irq_domain[irq]), mask); ++ cpu_set(first_cpu(cfg->domain), mask); + +- send_IPI_mask(mask, vector); ++ send_IPI_mask(mask, cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +@@ -1379,8 +1363,68 @@ static int ioapic_retrigger_irq(unsigned + * races. + */ + ++#ifdef CONFIG_SMP ++asmlinkage void smp_irq_move_cleanup_interrupt(void) ++{ ++ unsigned vector, me; ++ ack_APIC_irq(); ++ exit_idle(); ++ irq_enter(); ++ ++ me = smp_processor_id(); ++ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { ++ unsigned int irq; ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ irq = __get_cpu_var(vector_irq)[vector]; ++ if (irq >= NR_IRQS) ++ continue; ++ ++ desc = irq_desc + irq; ++ cfg = irq_cfg + irq; ++ spin_lock(&desc->lock); ++ if (!cfg->move_cleanup_count) ++ goto unlock; ++ ++ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) ++ goto unlock; ++ ++ __get_cpu_var(vector_irq)[vector] = -1; ++ cfg->move_cleanup_count--; ++unlock: ++ spin_unlock(&desc->lock); ++ } ++ ++ irq_exit(); ++} ++ ++static void irq_complete_move(unsigned int irq) ++{ ++ struct irq_cfg *cfg = irq_cfg + irq; ++ unsigned vector, me; ++ ++ if (likely(!cfg->move_in_progress)) ++ return; ++ ++ vector = ~get_irq_regs()->orig_rax; ++ me = smp_processor_id(); ++ if ((vector == cfg->vector) && ++ cpu_isset(smp_processor_id(), cfg->domain)) { ++ cpumask_t cleanup_mask; ++ ++ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); ++ cfg->move_cleanup_count = cpus_weight(cleanup_mask); ++ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); ++ cfg->move_in_progress = 0; ++ } ++} ++#else ++static inline void irq_complete_move(unsigned int irq) {} ++#endif ++ + static void ack_apic_edge(unsigned int irq) + { ++ irq_complete_move(irq); + move_native_irq(irq); + ack_APIC_irq(); + } +@@ -1389,6 +1433,7 @@ static void ack_apic_level(unsigned int + { + int do_unmask_irq = 0; + ++ irq_complete_move(irq); + #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) + /* If we are moving the irq we need to mask it */ + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { +@@ -1440,7 +1485,7 @@ static inline void init_IO_APIC_traps(vo + */ + for (irq = 0; irq < NR_IRQS ; irq++) { + int tmp = irq; +- if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { ++ if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 +@@ -1538,7 +1583,7 @@ static inline void unlock_ExtINT_logic(v + + entry1.dest_mode = 0; /* physical delivery */ + entry1.mask = 0; /* unmask IRQ now */ +- entry1.dest.physical.physical_dest = hard_smp_processor_id(); ++ entry1.dest = hard_smp_processor_id(); + entry1.delivery_mode = dest_ExtINT; + entry1.polarity = entry0.polarity; + entry1.trigger = 0; +@@ -1582,15 +1627,14 @@ static inline void unlock_ExtINT_logic(v + */ + static inline void check_timer(void) + { ++ struct irq_cfg *cfg = irq_cfg + 0; + int apic1, pin1, apic2, pin2; +- int vector; +- cpumask_t mask; + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); +- vector = assign_irq_vector(0, TARGET_CPUS, &mask); ++ assign_irq_vector(0, TARGET_CPUS); + + /* + * Subtle, code in do_timer_interrupt() expects an AEOI +@@ -1610,7 +1654,7 @@ static inline void check_timer(void) + apic2 = ioapic_i8259.apic; + + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", +- vector, apic1, pin1, apic2, pin2); ++ cfg->vector, apic1, pin1, apic2, pin2); + + if (pin1 != -1) { + /* +@@ -1641,7 +1685,7 @@ static inline void check_timer(void) + /* + * legacy devices should be connected to IO APIC #0 + */ +- setup_ExtINT_IRQ0_pin(apic2, pin2, vector); ++ setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); + if (timer_irq_works()) { + apic_printk(APIC_VERBOSE," works.\n"); + nmi_watchdog_default(); +@@ -1666,14 +1710,14 @@ static inline void check_timer(void) + + disable_8259A_irq(0); + irq_desc[0].chip = &lapic_irq_type; +- apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ ++ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ + enable_8259A_irq(0); + + if (timer_irq_works()) { + apic_printk(APIC_VERBOSE," works.\n"); + return; + } +- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); ++ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); + apic_printk(APIC_VERBOSE," failed.\n"); + + apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); +@@ -1828,19 +1872,16 @@ int create_irq(void) + /* Allocate an unused irq */ + int irq; + int new; +- int vector = 0; + unsigned long flags; +- cpumask_t mask; + + irq = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for (new = (NR_IRQS - 1); new >= 0; new--) { + if (platform_legacy_irq(new)) + continue; +- if (irq_vector[new] != 0) ++ if (irq_cfg[new].vector != 0) + continue; +- vector = __assign_irq_vector(new, TARGET_CPUS, &mask); +- if (likely(vector > 0)) ++ if (__assign_irq_vector(new, TARGET_CPUS) == 0) + irq = new; + break; + } +@@ -1871,12 +1912,15 @@ void destroy_irq(unsigned int irq) + #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) + static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) + { +- int vector; ++ struct irq_cfg *cfg = irq_cfg + irq; ++ int err; + unsigned dest; + cpumask_t tmp; + +- vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); +- if (vector >= 0) { ++ tmp = TARGET_CPUS; ++ err = assign_irq_vector(irq, tmp); ++ if (!err) { ++ cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg->address_hi = MSI_ADDR_BASE_HI; +@@ -1896,40 +1940,38 @@ static int msi_compose_msg(struct pci_de + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | +- MSI_DATA_VECTOR(vector); ++ MSI_DATA_VECTOR(cfg->vector); + } +- return vector; ++ return err; + } + + #ifdef CONFIG_SMP + static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; +- int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(mask, tmp, CPU_MASK_ALL); ++ return; + +- vector = assign_irq_vector(irq, mask, &tmp); +- if (vector < 0) ++ if (assign_irq_vector(irq, mask)) + return; + ++ cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + read_msi_msg(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; +- msg.data |= MSI_DATA_VECTOR(vector); ++ msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + write_msi_msg(irq, &msg); +- set_native_irq_info(irq, mask); ++ irq_desc[irq].affinity = mask; + } + #endif /* CONFIG_SMP */ + +@@ -1948,24 +1990,31 @@ static struct irq_chip msi_chip = { + .retrigger = ioapic_retrigger_irq, + }; + +-int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) ++int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) + { + struct msi_msg msg; +- int ret; ++ int irq, ret; ++ irq = create_irq(); ++ if (irq < 0) ++ return irq; ++ ++ set_irq_msi(irq, desc); + ret = msi_compose_msg(dev, irq, &msg); +- if (ret < 0) ++ if (ret < 0) { ++ destroy_irq(irq); + return ret; ++ } + + write_msi_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + +- return 0; ++ return irq; + } + + void arch_teardown_msi_irq(unsigned int irq) + { +- return; ++ destroy_irq(irq); + } + + #endif /* CONFIG_PCI_MSI */ +@@ -1993,24 +2042,22 @@ static void target_ht_irq(unsigned int i + + static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp; +- int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(mask, tmp, CPU_MASK_ALL); ++ return; + +- vector = assign_irq_vector(irq, mask, &tmp); +- if (vector < 0) ++ if (assign_irq_vector(irq, mask)) + return; + ++ cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + +- target_ht_irq(irq, dest, vector); +- set_native_irq_info(irq, mask); ++ target_ht_irq(irq, dest, cfg->vector); ++ irq_desc[irq].affinity = mask; + } + #endif + +@@ -2027,14 +2074,17 @@ static struct irq_chip ht_irq_chip = { + + int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) + { +- int vector; ++ struct irq_cfg *cfg = irq_cfg + irq; ++ int err; + cpumask_t tmp; + +- vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); +- if (vector >= 0) { ++ tmp = TARGET_CPUS; ++ err = assign_irq_vector(irq, tmp); ++ if (!err) { + struct ht_irq_msg msg; + unsigned dest; + ++ cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); +@@ -2042,7 +2092,7 @@ int arch_setup_ht_irq(unsigned int irq, + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | +- HT_IRQ_LOW_VECTOR(vector) | ++ HT_IRQ_LOW_VECTOR(cfg->vector) | + ((INT_DEST_MODE == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | +@@ -2057,7 +2107,7 @@ int arch_setup_ht_irq(unsigned int irq, + set_irq_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); + } +- return vector; ++ return err; + } + #endif /* CONFIG_HT_IRQ */ + +@@ -2082,13 +2132,8 @@ int __init io_apic_get_redir_entries (in + } + + +-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) ++int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) + { +- struct IO_APIC_route_entry entry; +- unsigned long flags; +- int vector; +- cpumask_t mask; +- + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); +@@ -2101,42 +2146,7 @@ int io_apic_set_pci_routing (int ioapic, + if (irq >= 16) + add_pin_to_irq(irq, ioapic, pin); + +- +- vector = assign_irq_vector(irq, TARGET_CPUS, &mask); +- if (vector < 0) +- return vector; +- +- /* +- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. +- * Note that we mask (disable) IRQs now -- these get enabled when the +- * corresponding device driver registers for this IRQ. +- */ +- +- memset(&entry,0,sizeof(entry)); +- +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.dest_mode = INT_DEST_MODE; +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); +- entry.trigger = edge_level; +- entry.polarity = active_high_low; +- entry.mask = 1; /* Disabled (masked) */ +- entry.vector = vector & 0xff; +- +- apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " +- "IRQ %d Mode:%i Active:%i)\n", ioapic, +- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, +- edge_level, active_high_low); +- +- ioapic_register_intr(irq, entry.vector, edge_level); +- +- if (!ioapic && (irq < 16)) +- disable_8259A_irq(irq); +- +- ioapic_write_entry(ioapic, pin, entry); +- +- spin_lock_irqsave(&ioapic_lock, flags); +- set_native_irq_info(irq, TARGET_CPUS); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + + return 0; + } +@@ -2169,8 +2179,10 @@ void __init setup_ioapic_dest(void) + * when you have too many devices, because at that time only boot + * cpu is online. + */ +- if(!irq_vector[irq]) +- setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); ++ if (!irq_cfg[irq].vector) ++ setup_IO_APIC_irq(ioapic, pin, irq, ++ irq_trigger(irq_entry), ++ irq_polarity(irq_entry)); + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); + } +--- head-2010-01-18.orig/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + atomic_t irq_err_count; + +@@ -120,9 +121,15 @@ asmlinkage unsigned int do_IRQ(struct pt + + if (likely(irq < NR_IRQS)) + generic_handle_irq(irq); +- else if (printk_ratelimit()) +- printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", +- __func__, smp_processor_id(), irq); ++ else { ++#ifndef CONFIG_XEN ++ if (!disable_apic) ++ ack_APIC_irq(); ++#endif ++ if (printk_ratelimit()) ++ printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", ++ __func__, smp_processor_id(), irq); ++ } + + /*irq_exit();*/ + +--- head-2010-01-18.orig/arch/x86/kernel/mpparse_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/mpparse_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -60,9 +60,9 @@ unsigned long mp_lapic_addr = 0; + /* Processor that is doing the boot up */ + unsigned int boot_cpu_id = -1U; + /* Internal processor count */ +-unsigned int num_processors __initdata = 0; ++unsigned int num_processors __cpuinitdata = 0; + +-unsigned disabled_cpus __initdata; ++unsigned disabled_cpus __cpuinitdata; + + /* Bitmask of physically existing CPUs */ + physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; +@@ -808,7 +808,7 @@ int mp_register_gsi(u32 gsi, int trigger + return gsi; + + /* Don't set up the ACPI SCI because it's already set up */ +- if (acpi_fadt.sci_int == gsi) ++ if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); +--- head-2010-01-18.orig/arch/x86/kernel/process_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -338,14 +338,17 @@ void load_gs_index(unsigned gs) + void flush_thread(void) + { + struct task_struct *tsk = current; +- struct thread_info *t = current_thread_info(); + +- if (t->flags & _TIF_ABI_PENDING) { +- t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); +- if (t->flags & _TIF_IA32) ++ if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { ++ clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); ++ if (test_tsk_thread_flag(tsk, TIF_IA32)) { ++ clear_tsk_thread_flag(tsk, TIF_IA32); ++ } else { ++ set_tsk_thread_flag(tsk, TIF_IA32); + current_thread_info()->status |= TS_COMPAT; ++ } + } +- t->flags &= ~_TIF_DEBUG; ++ clear_tsk_thread_flag(tsk, TIF_DEBUG); + + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; +--- head-2010-01-18.orig/arch/x86/kernel/setup_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -141,7 +141,7 @@ EXPORT_SYMBOL_GPL(edid_info); + + extern int root_mountflags; + +-char command_line[COMMAND_LINE_SIZE]; ++char __initdata command_line[COMMAND_LINE_SIZE]; + + struct resource standard_io_resources[] = { + { .name = "dma1", .start = 0x00, .end = 0x1f, +@@ -179,134 +179,6 @@ struct resource code_resource = { + .flags = IORESOURCE_RAM, + }; + +-#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM) +- +-static struct resource system_rom_resource = { +- .name = "System ROM", +- .start = 0xf0000, +- .end = 0xfffff, +- .flags = IORESOURCE_ROM, +-}; +- +-static struct resource extension_rom_resource = { +- .name = "Extension ROM", +- .start = 0xe0000, +- .end = 0xeffff, +- .flags = IORESOURCE_ROM, +-}; +- +-static struct resource adapter_rom_resources[] = { +- { .name = "Adapter ROM", .start = 0xc8000, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM } +-}; +- +-static struct resource video_rom_resource = { +- .name = "Video ROM", +- .start = 0xc0000, +- .end = 0xc7fff, +- .flags = IORESOURCE_ROM, +-}; +- +-static struct resource video_ram_resource = { +- .name = "Video RAM area", +- .start = 0xa0000, +- .end = 0xbffff, +- .flags = IORESOURCE_RAM, +-}; +- +-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) +- +-static int __init romchecksum(unsigned char *rom, unsigned long length) +-{ +- unsigned char *p, sum = 0; +- +- for (p = rom; p < rom + length; p++) +- sum += *p; +- return sum == 0; +-} +- +-static void __init probe_roms(void) +-{ +- unsigned long start, length, upper; +- unsigned char *rom; +- int i; +- +-#ifdef CONFIG_XEN +- /* Nothing to do if not running in dom0. */ +- if (!is_initial_xendomain()) +- return; +-#endif +- +- /* video rom */ +- upper = adapter_rom_resources[0].start; +- for (start = video_rom_resource.start; start < upper; start += 2048) { +- rom = isa_bus_to_virt(start); +- if (!romsignature(rom)) +- continue; +- +- video_rom_resource.start = start; +- +- /* 0 < length <= 0x7f * 512, historically */ +- length = rom[2] * 512; +- +- /* if checksum okay, trust length byte */ +- if (length && romchecksum(rom, length)) +- video_rom_resource.end = start + length - 1; +- +- request_resource(&iomem_resource, &video_rom_resource); +- break; +- } +- +- start = (video_rom_resource.end + 1 + 2047) & ~2047UL; +- if (start < upper) +- start = upper; +- +- /* system rom */ +- request_resource(&iomem_resource, &system_rom_resource); +- upper = system_rom_resource.start; +- +- /* check for extension rom (ignore length byte!) */ +- rom = isa_bus_to_virt(extension_rom_resource.start); +- if (romsignature(rom)) { +- length = extension_rom_resource.end - extension_rom_resource.start + 1; +- if (romchecksum(rom, length)) { +- request_resource(&iomem_resource, &extension_rom_resource); +- upper = extension_rom_resource.start; +- } +- } +- +- /* check for adapter roms on 2k boundaries */ +- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; +- start += 2048) { +- rom = isa_bus_to_virt(start); +- if (!romsignature(rom)) +- continue; +- +- /* 0 < length <= 0x7f * 512, historically */ +- length = rom[2] * 512; +- +- /* but accept any length that fits if checksum okay */ +- if (!length || start + length > upper || !romchecksum(rom, length)) +- continue; +- +- adapter_rom_resources[i].start = start; +- adapter_rom_resources[i].end = start + length - 1; +- request_resource(&iomem_resource, &adapter_rom_resources[i]); +- +- start = adapter_rom_resources[i++].end & ~2047UL; +- } +-} +- + #ifdef CONFIG_PROC_VMCORE + /* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. This option will be passed +@@ -404,7 +276,7 @@ void __init setup_arch(char **cmdline_p) + #ifdef CONFIG_XEN + extern struct e820map machine_e820; + +- printk(KERN_INFO "Command line: %s\n", saved_command_line); ++ printk(KERN_INFO "Command line: %s\n", boot_command_line); + + /* Register a call for panic conditions. */ + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); +@@ -431,7 +303,7 @@ void __init setup_arch(char **cmdline_p) + + ARCH_SETUP + #else +- printk(KERN_INFO "Command line: %s\n", saved_command_line); ++ printk(KERN_INFO "Command line: %s\n", boot_command_line); + + ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); + screen_info = SCREEN_INFO; +@@ -462,7 +334,7 @@ void __init setup_arch(char **cmdline_p) + + early_identify_cpu(&boot_cpu_data); + +- strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); ++ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + parse_early_param(); +@@ -532,6 +404,11 @@ void __init setup_arch(char **cmdline_p) + /* reserve ebda region */ + if (ebda_addr) + reserve_bootmem_generic(ebda_addr, ebda_size); ++#ifdef CONFIG_NUMA ++ /* reserve nodemap region */ ++ if (nodemap_addr) ++ reserve_bootmem_generic(nodemap_addr, nodemap_size); ++#endif + + #ifdef CONFIG_SMP + /* +@@ -732,10 +609,8 @@ void __init setup_arch(char **cmdline_p) + #endif + + /* +- * Request address space for all standard RAM and ROM resources +- * and also for regions reported as reserved by the e820. ++ * We trust e820 completely. No explicit ROM probing in memory. + */ +- probe_roms(); + #ifdef CONFIG_XEN + if (is_initial_xendomain()) + e820_reserve_resources(machine_e820.map, machine_e820.nr_map); +@@ -744,8 +619,6 @@ void __init setup_arch(char **cmdline_p) + e820_mark_nosave_regions(); + #endif + +- request_resource(&iomem_resource, &video_ram_resource); +- + { + unsigned i; + /* request I/O space for devices used on all i[345]86 PCs */ +@@ -1324,7 +1197,8 @@ static int show_cpuinfo(struct seq_file + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, +- NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", ++ NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", ++ "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, +@@ -1342,7 +1216,7 @@ static int show_cpuinfo(struct seq_file + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, +- NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, ++ NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ +@@ -1352,8 +1226,10 @@ static int show_cpuinfo(struct seq_file + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* AMD-defined (#2) */ +- "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ++ "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", ++ "altmovcr8", "abm", "sse4a", ++ "misalignsse", "3dnowprefetch", ++ "osvw", "ibs", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; +@@ -1364,6 +1240,9 @@ static int show_cpuinfo(struct seq_file + "ttp", /* thermal trip */ + "tm", + "stc", ++ "100mhzsteps", ++ "hwpstate", ++ NULL, /* tsc invariant mapped to constant_tsc */ + NULL, + /* nothing */ /* constant_tsc - moved to flags */ + }; +@@ -1480,26 +1359,3 @@ struct seq_operations cpuinfo_op = { + .stop = c_stop, + .show = show_cpuinfo, + }; +- +-#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) +-#include +-static __init int add_pcspkr(void) +-{ +- struct platform_device *pd; +- int ret; +- +- if (!is_initial_xendomain()) +- return 0; +- +- pd = platform_device_alloc("pcspkr", -1); +- if (!pd) +- return -ENOMEM; +- +- ret = platform_device_add(pd); +- if (ret) +- platform_device_put(pd); +- +- return ret; +-} +-device_initcall(add_pcspkr); +-#endif +--- head-2010-01-18.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/vsyscall_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -34,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -44,56 +46,41 @@ + #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) + #define __syscall_clobber "r11","rcx","memory" + +-int __sysctl_vsyscall __section_sysctl_vsyscall = 1; +-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; ++struct vsyscall_gtod_data_t { ++ seqlock_t lock; ++ int sysctl_enabled; ++ struct timeval wall_time_tv; ++ struct timezone sys_tz; ++ cycle_t offset_base; ++ struct clocksource clock; ++}; + int __vgetcpu_mode __section_vgetcpu_mode; + +-#include +- +-static __always_inline void timeval_normalize(struct timeval * tv) ++struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = + { +- time_t __sec; +- +- __sec = tv->tv_usec / 1000000; +- if (__sec) { +- tv->tv_usec %= 1000000; +- tv->tv_sec += __sec; +- } +-} ++ .lock = SEQLOCK_UNLOCKED, ++ .sysctl_enabled = 1, ++}; + +-static __always_inline void do_vgettimeofday(struct timeval * tv) ++void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) + { +- long sequence, t; +- unsigned long sec, usec; ++ unsigned long flags; + +- do { +- sequence = read_seqbegin(&__xtime_lock); +- +- sec = __xtime.tv_sec; +- usec = __xtime.tv_nsec / 1000; +- +- if (__vxtime.mode != VXTIME_HPET) { +- t = get_cycles_sync(); +- if (t < __vxtime.last_tsc) +- t = __vxtime.last_tsc; +- usec += ((t - __vxtime.last_tsc) * +- __vxtime.tsc_quot) >> 32; +- /* See comment in x86_64 do_gettimeofday. */ +- } else { +- usec += ((readl((void __iomem *) +- fix_to_virt(VSYSCALL_HPET) + 0xf0) - +- __vxtime.last) * __vxtime.quot) >> 32; +- } +- } while (read_seqretry(&__xtime_lock, sequence)); +- +- tv->tv_sec = sec + usec / 1000000; +- tv->tv_usec = usec % 1000000; ++ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); ++ /* copy vsyscall data */ ++ vsyscall_gtod_data.clock = *clock; ++ vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; ++ vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; ++ vsyscall_gtod_data.sys_tz = sys_tz; ++ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + } + +-/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ ++/* RED-PEN may want to readd seq locking, but then the variable should be ++ * write-once. ++ */ + static __always_inline void do_get_tz(struct timezone * tz) + { +- *tz = __sys_tz; ++ *tz = __vsyscall_gtod_data.sys_tz; + } + + static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) +@@ -101,7 +88,8 @@ static __always_inline int gettimeofday( + int ret; + asm volatile("vsysc2: syscall" + : "=a" (ret) +- : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber ); ++ : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) ++ : __syscall_clobber ); + return ret; + } + +@@ -114,10 +102,44 @@ static __always_inline long time_syscall + return secs; + } + ++static __always_inline void do_vgettimeofday(struct timeval * tv) ++{ ++ cycle_t now, base, mask, cycle_delta; ++ unsigned long seq, mult, shift, nsec_delta; ++ cycle_t (*vread)(void); ++ do { ++ seq = read_seqbegin(&__vsyscall_gtod_data.lock); ++ ++ vread = __vsyscall_gtod_data.clock.vread; ++ if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { ++ gettimeofday(tv,NULL); ++ return; ++ } ++ now = vread(); ++ base = __vsyscall_gtod_data.clock.cycle_last; ++ mask = __vsyscall_gtod_data.clock.mask; ++ mult = __vsyscall_gtod_data.clock.mult; ++ shift = __vsyscall_gtod_data.clock.shift; ++ ++ *tv = __vsyscall_gtod_data.wall_time_tv; ++ ++ } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); ++ ++ /* calculate interval: */ ++ cycle_delta = (now - base) & mask; ++ /* convert to nsecs: */ ++ nsec_delta = (cycle_delta * mult) >> shift; ++ ++ /* convert to usecs and add to timespec: */ ++ tv->tv_usec += nsec_delta / NSEC_PER_USEC; ++ while (tv->tv_usec > USEC_PER_SEC) { ++ tv->tv_sec += 1; ++ tv->tv_usec -= USEC_PER_SEC; ++ } ++} ++ + int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) + { +- if (!__sysctl_vsyscall) +- return gettimeofday(tv,tz); + if (tv) + do_vgettimeofday(tv); + if (tz) +@@ -129,11 +151,11 @@ int __vsyscall(0) vgettimeofday(struct t + * unlikely */ + time_t __vsyscall(1) vtime(time_t *t) + { +- if (!__sysctl_vsyscall) ++ if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) + return time_syscall(t); + else if (t) +- *t = __xtime.tv_sec; +- return __xtime.tv_sec; ++ *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; ++ return __vsyscall_gtod_data.wall_time_tv.tv_sec; + } + + /* Fast way to get current CPU and node. +@@ -210,7 +232,7 @@ static int vsyscall_sysctl_change(ctl_ta + ret = -ENOMEM; + goto out; + } +- if (!sysctl_vsyscall) { ++ if (!vsyscall_gtod_data.sysctl_enabled) { + writew(SYSCALL, map1); + writew(SYSCALL, map2); + } else { +@@ -232,16 +254,17 @@ static int vsyscall_sysctl_nostrat(ctl_t + + static ctl_table kernel_table2[] = { + { .ctl_name = 99, .procname = "vsyscall64", +- .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644, ++ .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), ++ .mode = 0644, + .strategy = vsyscall_sysctl_nostrat, + .proc_handler = vsyscall_sysctl_change }, +- { 0, } ++ {} + }; + + static ctl_table kernel_root_table2[] = { + { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, + .child = kernel_table2 }, +- { 0 }, ++ {} + }; + + #endif +@@ -304,14 +327,14 @@ static int __init vsyscall_init(void) + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); + map_vsyscall(); + #ifdef CONFIG_XEN +- sysctl_vsyscall = 0; /* disable vgettimeofay() */ ++ vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */ + if (boot_cpu_has(X86_FEATURE_RDTSCP)) + vgetcpu_mode = VGETCPU_RDTSCP; + else + vgetcpu_mode = VGETCPU_LSL; + #endif + #ifdef CONFIG_SYSCTL +- register_sysctl_table(kernel_root_table2, 0); ++ register_sysctl_table(kernel_root_table2); + #endif + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); + hotcpu_notifier(cpu_vsyscall_notifier, 0); +--- head-2010-01-18.orig/arch/x86/mm/fault_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/fault_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -56,38 +56,17 @@ int unregister_page_fault_notifier(struc + } + EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + +-static inline int notify_page_fault(enum die_val val, const char *str, +- struct pt_regs *regs, long err, int trap, int sig) ++static inline int notify_page_fault(struct pt_regs *regs, long err) + { + struct die_args args = { + .regs = regs, +- .str = str, ++ .str = "page fault", + .err = err, +- .trapnr = trap, +- .signr = sig ++ .trapnr = 14, ++ .signr = SIGSEGV + }; +- return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +-} +- +-void bust_spinlocks(int yes) +-{ +- int loglevel_save = console_loglevel; +- if (yes) { +- oops_in_progress = 1; +- } else { +-#ifdef CONFIG_VT +- unblank_screen(); +-#endif +- oops_in_progress = 0; +- /* +- * OK, the message is on the console. Now we call printk() +- * without oops_in_progress set so that printk will give klogd +- * a poke. Hold onto your hats... +- */ +- console_loglevel = 15; /* NMI oopser may have shut the console up */ +- printk(" "); +- console_loglevel = loglevel_save; +- } ++ return atomic_notifier_call_chain(¬ify_page_fault_chain, ++ DIE_PAGE_FAULT, &args); + } + + /* Sometimes the CPU reports invalid exceptions on prefetch. +@@ -437,8 +416,7 @@ asmlinkage void __kprobes do_page_fault( + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch +@@ -447,8 +425,7 @@ asmlinkage void __kprobes do_page_fault( + goto bad_area_nosemaphore; + } + +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + + if (likely(regs->eflags & X86_EFLAGS_IF)) +--- head-2010-01-18.orig/arch/x86/mm/init_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -1132,20 +1132,30 @@ int kern_addr_valid(unsigned long addr) + extern int exception_trace, page_fault_trace; + + static ctl_table debug_table2[] = { +- { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, +- proc_dointvec }, +- { 0, } ++ { ++ .ctl_name = 99, ++ .procname = "exception-trace", ++ .data = &exception_trace, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec ++ }, ++ {} + }; + + static ctl_table debug_root_table2[] = { +- { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555, +- .child = debug_table2 }, +- { 0 }, ++ { ++ .ctl_name = CTL_DEBUG, ++ .procname = "debug", ++ .mode = 0555, ++ .child = debug_table2 ++ }, ++ {} + }; + + static __init int x8664_sysctl_init(void) + { +- register_sysctl_table(debug_root_table2, 1); ++ register_sysctl_table(debug_root_table2); + return 0; + } + __initcall(x8664_sysctl_init); +--- head-2010-01-18.orig/arch/x86/mm/pageattr_64-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pageattr_64-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -350,8 +350,8 @@ static void flush_kernel_map(void *arg) + void *adr = page_address(pg); + if (cpu_has_clflush) + cache_flush_page(adr); +- __flush_tlb_one(adr); + } ++ __flush_tlb_all(); + } + + static inline void flush_map(struct list_head *l) +@@ -376,6 +376,7 @@ static void revert_page(unsigned long ad + pud_t *pud; + pmd_t *pmd; + pte_t large_pte; ++ unsigned long pfn; + + pgd = pgd_offset_k(address); + BUG_ON(pgd_none(*pgd)); +@@ -383,7 +384,8 @@ static void revert_page(unsigned long ad + BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, address); + BUG_ON(__pmd_val(*pmd) & _PAGE_PSE); +- large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); ++ pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; ++ large_pte = pfn_pte(pfn, ref_prot); + large_pte = pte_mkhuge(large_pte); + set_pte((pte_t *)pmd, large_pte); + } +--- head-2010-01-18.orig/drivers/acpi/processor_extcntl.c 2009-12-04 10:44:41.000000000 +0100 ++++ head-2010-01-18/drivers/acpi/processor_extcntl.c 2009-11-06 10:46:41.000000000 +0100 +@@ -32,9 +32,8 @@ + + #define ACPI_PROCESSOR_COMPONENT 0x01000000 + #define ACPI_PROCESSOR_CLASS "processor" +-#define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver" + #define _COMPONENT ACPI_PROCESSOR_COMPONENT +-ACPI_MODULE_NAME("acpi_processor") ++ACPI_MODULE_NAME("processor_extcntl") + + static int processor_extcntl_parse_csd(struct acpi_processor *pr); + static int processor_extcntl_get_performance(struct acpi_processor *pr); +@@ -56,24 +55,17 @@ static int processor_notify_smm(void) + return 0; + + /* Can't write pstate_cnt to smi_cmd if either value is zero */ +- if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) { ++ if (!acpi_gbl_FADT.smi_command || !acpi_gbl_FADT.pstate_control) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n")); + return 0; + } + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n", +- acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd)); ++ acpi_gbl_FADT.pstate_control, acpi_gbl_FADT.smi_command)); + +- /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use +- * it anyway, so we need to support it... */ +- if (acpi_fadt_is_v1) { +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, +- "Using v1.0 FADT reserved value for pstate_cnt\n")); +- } +- +- status = acpi_os_write_port(acpi_fadt.smi_cmd, +- (u32) acpi_fadt.pstate_cnt, 8); ++ status = acpi_os_write_port(acpi_gbl_FADT.smi_command, ++ acpi_gbl_FADT.pstate_control, 8); + if (ACPI_FAILURE(status)) + return status; + +--- head-2010-01-18.orig/drivers/char/tpm/tpm_xen.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/char/tpm/tpm_xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -481,7 +481,6 @@ static struct xenbus_device_id tpmfront_ + + static struct xenbus_driver tpmfront = { + .name = "vtpm", +- .owner = THIS_MODULE, + .ids = tpmfront_ids, + .probe = tpmfront_probe, + .remove = tpmfront_remove, +@@ -491,9 +490,9 @@ static struct xenbus_driver tpmfront = { + .suspend_cancel = tpmfront_suspend_cancel, + }; + +-static void __init init_tpm_xenbus(void) ++static int __init init_tpm_xenbus(void) + { +- xenbus_register_frontend(&tpmfront); ++ return xenbus_register_frontend(&tpmfront); + } + + static int tpmif_allocate_tx_buffers(struct tpm_private *tp) +--- head-2010-01-18.orig/drivers/pci/msi-xen.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/drivers/pci/msi-xen.c 2009-12-04 10:54:10.000000000 +0100 +@@ -36,8 +36,6 @@ struct msi_dev_list { + struct list_head list; + spinlock_t pirq_list_lock; + struct list_head pirq_list_head; +- /* Used for saving/restoring MSI-X tables */ +- void __iomem *mask_base; + /* Store default pre-assigned irq */ + unsigned int default_irq; + }; +@@ -46,14 +44,38 @@ struct msi_pirq_entry { + struct list_head list; + int pirq; + int entry_nr; +-#ifdef CONFIG_PM +- /* PM save area for MSIX address/data */ +- u32 address_hi_save; +- u32 address_lo_save; +- u32 data_save; +-#endif + }; + ++static void msi_set_enable(struct pci_dev *dev, int enable) ++{ ++ int pos; ++ u16 control; ++ ++ pos = pci_find_capability(dev, PCI_CAP_ID_MSI); ++ if (pos) { ++ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); ++ control &= ~PCI_MSI_FLAGS_ENABLE; ++ if (enable) ++ control |= PCI_MSI_FLAGS_ENABLE; ++ pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); ++ } ++} ++ ++static void msix_set_enable(struct pci_dev *dev, int enable) ++{ ++ int pos; ++ u16 control; ++ ++ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); ++ if (pos) { ++ pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); ++ control &= ~PCI_MSIX_FLAGS_ENABLE; ++ if (enable) ++ control |= PCI_MSIX_FLAGS_ENABLE; ++ pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); ++ } ++} ++ + static struct msi_dev_list *get_msi_dev_pirq_list(struct pci_dev *dev) + { + struct msi_dev_list *msi_dev_list, *ret = NULL; +@@ -163,8 +185,8 @@ static int msi_unmap_pirq(struct pci_dev + int rc; + + unmap.domid = msi_get_dev_owner(dev); +- /* See comments in msi_map_pirq_to_vector, input parameter pirq +- * mean irq number only if the device belongs to dom0 itself. ++ /* See comments in msi_map_vector, input parameter pirq means ++ * irq number only if the device belongs to dom0 itself. + */ + unmap.pirq = (unmap.domid != DOMID_SELF) + ? pirq : evtchn_get_xen_pirq(pirq); +@@ -200,8 +222,7 @@ static u64 find_table_base(struct pci_de + /* + * Protected by msi_lock + */ +-static int msi_map_pirq_to_vector(struct pci_dev *dev, int pirq, +- int entry_nr, u64 table_base) ++static int msi_map_vector(struct pci_dev *dev, int entry_nr, u64 table_base) + { + struct physdev_map_pirq map_irq; + int rc; +@@ -212,7 +233,7 @@ static int msi_map_pirq_to_vector(struct + map_irq.domid = domid; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; +- map_irq.pirq = pirq < 0 ? -1 : evtchn_get_xen_pirq(pirq); ++ map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + map_irq.devfn = dev->devfn; + map_irq.entry_nr = entry_nr; +@@ -223,7 +244,7 @@ static int msi_map_pirq_to_vector(struct + + if (rc < 0) + return rc; +- /* This happens when MSI support is not enabled in Xen. */ ++ /* This happens when MSI support is not enabled in older Xen. */ + if (rc == 0 && map_irq.pirq < 0) + return -ENOSYS; + +@@ -235,236 +256,35 @@ static int msi_map_pirq_to_vector(struct + * to another domain, and will be 'Linux irq' if it belongs to dom0. + */ + return ((domid != DOMID_SELF) ? +- map_irq.pirq : evtchn_map_pirq(pirq, map_irq.pirq)); +-} +- +-static int msi_map_vector(struct pci_dev *dev, int entry_nr, u64 table_base) +-{ +- return msi_map_pirq_to_vector(dev, -1, entry_nr, table_base); ++ map_irq.pirq : evtchn_map_pirq(-1, map_irq.pirq)); + } + + static int msi_init(void) + { +- static int status = 0; +- +- if (pci_msi_quirk) { +- pci_msi_enable = 0; +- printk(KERN_WARNING "PCI: MSI quirk detected. MSI disabled.\n"); +- status = -EINVAL; +- } +- +- return status; +-} +- +-void pci_scan_msi_device(struct pci_dev *dev) { } +- +-void disable_msi_mode(struct pci_dev *dev, int pos, int type) +-{ +- u16 control; +- +- pci_read_config_word(dev, msi_control_reg(pos), &control); +- if (type == PCI_CAP_ID_MSI) { +- /* Set enabled bits to single MSI & enable MSI_enable bit */ +- msi_disable(control); +- pci_write_config_word(dev, msi_control_reg(pos), control); +- dev->msi_enabled = 0; +- } else { +- msix_disable(control); +- pci_write_config_word(dev, msi_control_reg(pos), control); +- dev->msix_enabled = 0; +- } +- +- pci_intx(dev, 1); /* enable intx */ +-} +- +-static void enable_msi_mode(struct pci_dev *dev, int pos, int type) +-{ +- u16 control; +- +- pci_read_config_word(dev, msi_control_reg(pos), &control); +- if (type == PCI_CAP_ID_MSI) { +- /* Set enabled bits to single MSI & enable MSI_enable bit */ +- msi_enable(control, 1); +- pci_write_config_word(dev, msi_control_reg(pos), control); +- dev->msi_enabled = 1; +- } else { +- msix_enable(control); +- pci_write_config_word(dev, msi_control_reg(pos), control); +- dev->msix_enabled = 1; +- } +- +- pci_intx(dev, 0); /* disable intx */ +-} +- +-#ifdef CONFIG_PM +-int pci_save_msi_state(struct pci_dev *dev) +-{ +- int pos, i = 0; +- u16 control; +- struct pci_cap_saved_state *save_state; +- u32 *cap; +- +- pos = pci_find_capability(dev, PCI_CAP_ID_MSI); +- if (pos <= 0 || dev->no_msi) +- return 0; +- +- pci_read_config_word(dev, msi_control_reg(pos), &control); +- if (!(control & PCI_MSI_FLAGS_ENABLE)) +- return 0; +- +- save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5, +- GFP_KERNEL); +- if (!save_state) { +- printk(KERN_ERR "Out of memory in pci_save_msi_state\n"); +- return -ENOMEM; +- } +- cap = &save_state->data[0]; +- +- pci_read_config_dword(dev, pos, &cap[i++]); +- control = cap[0] >> 16; +- pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]); +- if (control & PCI_MSI_FLAGS_64BIT) { +- pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]); +- pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]); +- } else +- pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]); +- if (control & PCI_MSI_FLAGS_MASKBIT) +- pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]); +- save_state->cap_nr = PCI_CAP_ID_MSI; +- pci_add_saved_cap(dev, save_state); + return 0; + } + ++#ifdef CONFIG_PM + void pci_restore_msi_state(struct pci_dev *dev) + { +- int i = 0, pos; +- u16 control; +- struct pci_cap_saved_state *save_state; +- u32 *cap; +- +- save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI); +- pos = pci_find_capability(dev, PCI_CAP_ID_MSI); +- if (!save_state || pos <= 0) +- return; +- cap = &save_state->data[0]; +- +- control = cap[i++] >> 16; +- pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]); +- if (control & PCI_MSI_FLAGS_64BIT) { +- pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]); +- pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]); +- } else +- pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]); +- if (control & PCI_MSI_FLAGS_MASKBIT) +- pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]); +- pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); +- enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); +- pci_remove_saved_cap(save_state); +- kfree(save_state); +-} +- +-int pci_save_msix_state(struct pci_dev *dev) +-{ +- int pos; +- u16 control; +- struct pci_cap_saved_state *save_state; +- unsigned long flags; +- struct msi_dev_list *msi_dev_entry; +- struct msi_pirq_entry *pirq_entry; +- void __iomem *base; +- +- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); +- if (pos <= 0 || dev->no_msi) +- return 0; +- +- /* save the capability */ +- pci_read_config_word(dev, msi_control_reg(pos), &control); +- if (!(control & PCI_MSIX_FLAGS_ENABLE)) +- return 0; +- +- msi_dev_entry = get_msi_dev_pirq_list(dev); +- /* If we failed to map the MSI-X table at pci_enable_msix, +- * We could not support saving them here. +- */ +- if (!(base = msi_dev_entry->mask_base)) +- return -ENOMEM; +- +- save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16), +- GFP_KERNEL); +- if (!save_state) { +- printk(KERN_ERR "Out of memory in pci_save_msix_state\n"); +- return -ENOMEM; +- } +- *((u16 *)&save_state->data[0]) = control; +- +- spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags); +- list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) { +- int j; +- +- /* save the table */ +- j = pirq_entry->entry_nr; +- pirq_entry->address_lo_save = +- readl(base + j * PCI_MSIX_ENTRY_SIZE + +- PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); +- pirq_entry->address_hi_save = +- readl(base + j * PCI_MSIX_ENTRY_SIZE + +- PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); +- pirq_entry->data_save = +- readl(base + j * PCI_MSIX_ENTRY_SIZE + +- PCI_MSIX_ENTRY_DATA_OFFSET); +- } +- spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags); +- +- save_state->cap_nr = PCI_CAP_ID_MSIX; +- pci_add_saved_cap(dev, save_state); +- return 0; +-} +- +-void pci_restore_msix_state(struct pci_dev *dev) +-{ +- u16 save; +- int pos, j; +- void __iomem *base; +- struct pci_cap_saved_state *save_state; +- unsigned long flags; +- struct msi_dev_list *msi_dev_entry; +- struct msi_pirq_entry *pirq_entry; +- +- save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX); +- if (!save_state) +- return; +- +- save = *((u16 *)&save_state->data[0]); +- pci_remove_saved_cap(save_state); +- kfree(save_state); ++ int rc; ++ struct physdev_restore_msi restore; + +- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); +- if (pos <= 0) ++ if (!dev->msi_enabled && !dev->msix_enabled) + return; + +- msi_dev_entry = get_msi_dev_pirq_list(dev); +- base = msi_dev_entry->mask_base; +- +- spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags); +- list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) { +- /* route the table */ +- j = pirq_entry->entry_nr; +- writel(pirq_entry->address_lo_save, +- base + j * PCI_MSIX_ENTRY_SIZE + +- PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); +- writel(pirq_entry->address_hi_save, +- base + j * PCI_MSIX_ENTRY_SIZE + +- PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); +- writel(pirq_entry->data_save, +- base + j * PCI_MSIX_ENTRY_SIZE + +- PCI_MSIX_ENTRY_DATA_OFFSET); +- } +- spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags); ++ pci_intx(dev, 0); /* disable intx */ ++ if (dev->msi_enabled) ++ msi_set_enable(dev, 0); ++ if (dev->msix_enabled) ++ msix_set_enable(dev, 0); + +- pci_write_config_word(dev, msi_control_reg(pos), save); +- enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); ++ restore.bus = dev->bus->number; ++ restore.devfn = dev->devfn; ++ rc = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore); ++ WARN(rc && rc != -ENOSYS, "restore_msi -> %d\n", rc); + } +-#endif ++#endif /* CONFIG_PM */ + + /** + * msi_capability_init - configure device's MSI capability structure +@@ -480,6 +300,8 @@ static int msi_capability_init(struct pc + int pos, pirq; + u16 control; + ++ msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ ++ + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + pci_read_config_word(dev, msi_control_reg(pos), &control); + +@@ -488,7 +310,8 @@ static int msi_capability_init(struct pc + return -EBUSY; + + /* Set MSI enabled bits */ +- enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); ++ pci_intx(dev, 0); /* disable intx */ ++ msi_set_enable(dev, 1); + dev->msi_enabled = 1; + + dev->irq = pirq; +@@ -509,25 +332,20 @@ static int msix_capability_init(struct p + struct msix_entry *entries, int nvec) + { + u64 table_base; +- u16 control; +- int pirq, i, j, mapped, pos, nr_entries; ++ int pirq, i, j, mapped, pos; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + struct msi_pirq_entry *pirq_entry; + + if (!msi_dev_entry) + return -ENOMEM; + ++ msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ ++ + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + table_base = find_table_base(dev, pos); + if (!table_base) + return -ENODEV; + +- pci_read_config_word(dev, msi_control_reg(pos), &control); +- nr_entries = multi_msix_capable(control); +- if (!msi_dev_entry->mask_base) +- msi_dev_entry->mask_base = +- ioremap_nocache(table_base, nr_entries * PCI_MSIX_ENTRY_SIZE); +- + /* MSI-X Table Initialization */ + for (i = 0; i < nvec; i++) { + mapped = 0; +@@ -566,7 +384,8 @@ static int msix_capability_init(struct p + return avail; + } + +- enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); ++ pci_intx(dev, 0); /* disable intx */ ++ msix_set_enable(dev, 1); + dev->msix_enabled = 1; + + return 0; +@@ -651,17 +470,14 @@ int pci_enable_msi(struct pci_dev* dev) + /* Check whether driver already requested for MSI-X irqs */ + if (dev->msix_enabled) { + printk(KERN_INFO "PCI: %s: Can't enable MSI. " +- "Device already has MSI-X irq assigned\n", +- pci_name(dev)); +- dev->irq = temp; ++ "Device already has MSI-X enabled\n", ++ pci_name(dev)); + return -EINVAL; + } + + status = msi_capability_init(dev); + if ( !status ) + msi_dev_entry->default_irq = temp; +- else +- dev->irq = temp; + + return status; + } +@@ -669,7 +485,6 @@ int pci_enable_msi(struct pci_dev* dev) + extern void pci_frontend_disable_msi(struct pci_dev* dev); + void pci_disable_msi(struct pci_dev* dev) + { +- int pos; + int pirq; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + +@@ -678,13 +493,11 @@ void pci_disable_msi(struct pci_dev* dev + if (!dev) + return; + ++ if (!dev->msi_enabled) ++ return; ++ + #ifdef CONFIG_XEN_PCIDEV_FRONTEND + if (!is_initial_xendomain()) { +- if (!(dev->msi_enabled)) { +- printk(KERN_INFO "PCI: %s: Device did not enabled MSI.\n", +- pci_name(dev)); +- return; +- } + evtchn_map_pirq(dev->irq, 0); + pci_frontend_disable_msi(dev); + dev->irq = msi_dev_entry->default_irq; +@@ -693,23 +506,15 @@ void pci_disable_msi(struct pci_dev* dev + } + #endif + +- pos = pci_find_capability(dev, PCI_CAP_ID_MSI); +- if (!pos) +- return; +- +- if (!(dev->msi_enabled)) { +- printk(KERN_INFO "PCI: %s: Device did not enabled MSI.\n", +- pci_name(dev)); +- return; +- } +- + pirq = dev->irq; + /* Restore dev->irq to its default pin-assertion vector */ + dev->irq = msi_dev_entry->default_irq; + msi_unmap_pirq(dev, pirq); + + /* Disable MSI mode */ +- disable_msi_mode(dev, pos, PCI_CAP_ID_MSI); ++ msi_set_enable(dev, 0); ++ pci_intx(dev, 1); /* enable intx */ ++ dev->msi_enabled = 0; + } + + /** +@@ -804,7 +609,6 @@ int pci_enable_msix(struct pci_dev* dev, + printk(KERN_INFO "PCI: %s: Can't enable MSI-X. " + "Device already has an MSI irq assigned\n", + pci_name(dev)); +- dev->irq = temp; + return -EINVAL; + } + +@@ -812,8 +616,6 @@ int pci_enable_msix(struct pci_dev* dev, + + if ( !status ) + msi_dev_entry->default_irq = temp; +- else +- dev->irq = temp; + + return status; + } +@@ -821,18 +623,13 @@ int pci_enable_msix(struct pci_dev* dev, + extern void pci_frontend_disable_msix(struct pci_dev* dev); + void pci_disable_msix(struct pci_dev* dev) + { +- int pos; +- u16 control; +- + if (!pci_msi_enable) + return; + if (!dev) + return; +- if (!dev->msix_enabled) { +- printk(KERN_INFO "PCI: %s: Device did not enabled MSI-X.\n", +- pci_name(dev)); ++ ++ if (!dev->msix_enabled) + return; +- } + + #ifdef CONFIG_XEN_PCIDEV_FRONTEND + if (!is_initial_xendomain()) { +@@ -855,18 +652,12 @@ void pci_disable_msix(struct pci_dev* de + } + #endif + +- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); +- if (!pos) +- return; +- +- pci_read_config_word(dev, msi_control_reg(pos), &control); +- if (!(control & PCI_MSIX_FLAGS_ENABLE)) +- return; +- + msi_remove_pci_irq_vectors(dev); + + /* Disable MSI mode */ +- disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); ++ msix_set_enable(dev, 0); ++ pci_intx(dev, 1); /* enable intx */ ++ dev->msix_enabled = 0; + } + + /** +@@ -898,8 +689,6 @@ void msi_remove_pci_irq_vectors(struct p + kfree(pirq_entry); + } + spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags); +- iounmap(msi_dev_entry->mask_base); +- msi_dev_entry->mask_base = NULL; + dev->irq = msi_dev_entry->default_irq; + } + +--- head-2010-01-18.orig/drivers/xen/balloon/sysfs.c 2009-06-09 15:01:37.000000000 +0200 ++++ head-2010-01-18/drivers/xen/balloon/sysfs.c 2009-11-06 10:46:41.000000000 +0100 +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include "common.h" + + #ifdef HAVE_XEN_PLATFORM_COMPAT_H +--- head-2010-01-18.orig/drivers/xen/blkback/xenbus.c 2008-05-08 14:02:04.000000000 +0200 ++++ head-2010-01-18/drivers/xen/blkback/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -527,7 +527,6 @@ static const struct xenbus_device_id blk + + static struct xenbus_driver blkback = { + .name = "vbd", +- .owner = THIS_MODULE, + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, +@@ -537,5 +536,6 @@ static struct xenbus_driver blkback = { + + void blkif_xenbus_init(void) + { +- xenbus_register_backend(&blkback); ++ if (xenbus_register_backend(&blkback)) ++ BUG(); + } +--- head-2010-01-18.orig/drivers/xen/blkfront/blkfront.c 2010-01-18 16:17:45.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blkfront/blkfront.c 2010-01-18 16:18:07.000000000 +0100 +@@ -911,7 +911,6 @@ MODULE_ALIAS("xen:vbd"); + + static struct xenbus_driver blkfront = { + .name = "vbd", +- .owner = THIS_MODULE, + .ids = blkfront_ids, + .probe = blkfront_probe, + .remove = blkfront_remove, +--- head-2010-01-18.orig/drivers/xen/blktap/xenbus.c 2008-09-15 13:40:15.000000000 +0200 ++++ head-2010-01-18/drivers/xen/blktap/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -465,7 +465,6 @@ static const struct xenbus_device_id blk + + static struct xenbus_driver blktap = { + .name = "tap", +- .owner = THIS_MODULE, + .ids = blktap_ids, + .probe = blktap_probe, + .remove = blktap_remove, +@@ -475,5 +474,6 @@ static struct xenbus_driver blktap = { + + void tap_blkif_xenbus_init(void) + { +- xenbus_register_backend(&blktap); ++ if (xenbus_register_backend(&blktap)) ++ BUG(); + } +--- head-2010-01-18.orig/drivers/xen/core/evtchn.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/evtchn.c 2009-11-06 10:46:41.000000000 +0100 +@@ -145,7 +145,7 @@ static void bind_evtchn_to_cpu(unsigned + BUG_ON(!test_bit(chn, s->evtchn_mask)); + + if (irq != -1) +- set_native_irq_info(irq, cpumask_of_cpu(cpu)); ++ irq_desc[irq].affinity = cpumask_of_cpu(cpu); + + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); +@@ -158,7 +158,7 @@ static void init_evtchn_cpu_bindings(voi + + /* By default all event channels notify CPU#0. */ + for (i = 0; i < NR_IRQS; i++) +- set_native_irq_info(i, cpumask_of_cpu(0)); ++ irq_desc[i].affinity = cpumask_of_cpu(0); + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +@@ -736,6 +736,7 @@ static struct irq_chip dynirq_chip = { + .name = "Dynamic", + .startup = startup_dynirq, + .shutdown = mask_dynirq, ++ .disable = mask_dynirq, + .mask = mask_dynirq, + .unmask = unmask_dynirq, + .mask_ack = ack_dynirq, +--- head-2010-01-18.orig/drivers/xen/core/smpboot.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/smpboot.c 2009-11-06 10:46:41.000000000 +0100 +@@ -116,7 +116,7 @@ static int __cpuinit xen_smp_intr_init(u + rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, + cpu, + smp_reschedule_interrupt, +- SA_INTERRUPT, ++ IRQF_DISABLED|IRQF_NOBALANCING, + resched_name[cpu], + NULL); + if (rc < 0) +@@ -127,7 +127,7 @@ static int __cpuinit xen_smp_intr_init(u + rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, + cpu, + smp_call_function_interrupt, +- SA_INTERRUPT, ++ IRQF_DISABLED|IRQF_NOBALANCING, + callfunc_name[cpu], + NULL); + if (rc < 0) +@@ -256,7 +256,7 @@ void __init smp_prepare_cpus(unsigned in + { + unsigned int cpu; + struct task_struct *idle; +- int apicid, acpiid; ++ int apicid; + struct vcpu_get_physid cpu_id; + #ifdef __x86_64__ + struct desc_ptr *gdt_descr; +@@ -265,14 +265,8 @@ void __init smp_prepare_cpus(unsigned in + #endif + + apicid = 0; +- if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) { ++ if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); +- acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); +-#ifdef CONFIG_ACPI +- if (acpiid != 0xff) +- x86_acpiid_to_apicid[acpiid] = apicid; +-#endif +- } + boot_cpu_data.apicid = apicid; + cpu_data[0] = boot_cpu_data; + +@@ -328,14 +322,8 @@ void __init smp_prepare_cpus(unsigned in + XENFEAT_writable_descriptor_tables); + + apicid = cpu; +- if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { ++ if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); +- acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); +-#ifdef CONFIG_ACPI +- if (acpiid != 0xff) +- x86_acpiid_to_apicid[acpiid] = apicid; +-#endif +- } + cpu_data[cpu] = boot_cpu_data; + cpu_data[cpu].apicid = apicid; + +--- head-2010-01-18.orig/drivers/xen/fbfront/xenfb.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/drivers/xen/fbfront/xenfb.c 2009-11-06 10:46:41.000000000 +0100 +@@ -857,7 +857,6 @@ MODULE_ALIAS("xen:vfb"); + + static struct xenbus_driver xenfb_driver = { + .name = "vfb", +- .owner = THIS_MODULE, + .ids = xenfb_ids, + .probe = xenfb_probe, + .remove = xenfb_remove, +--- head-2010-01-18.orig/drivers/xen/fbfront/xenkbd.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/fbfront/xenkbd.c 2009-11-06 10:46:41.000000000 +0100 +@@ -323,7 +323,6 @@ MODULE_ALIAS("xen:vkbd"); + + static struct xenbus_driver xenkbd_driver = { + .name = "vkbd", +- .owner = THIS_MODULE, + .ids = xenkbd_ids, + .probe = xenkbd_probe, + .remove = xenkbd_remove, +--- head-2010-01-18.orig/drivers/xen/netback/interface.c 2010-01-04 11:56:34.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netback/interface.c 2010-01-04 12:26:37.000000000 +0100 +@@ -121,7 +121,7 @@ static void netbk_get_drvinfo(struct net + struct ethtool_drvinfo *info) + { + strcpy(info->driver, "netbk"); +- strcpy(info->bus_info, dev->class_dev.dev->bus_id); ++ strcpy(info->bus_info, dev->dev.parent->bus_id); + } + + static const struct netif_stat { +--- head-2010-01-18.orig/drivers/xen/netback/xenbus.c 2009-03-18 10:39:32.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netback/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -439,7 +439,6 @@ static const struct xenbus_device_id net + + static struct xenbus_driver netback = { + .name = "vif", +- .owner = THIS_MODULE, + .ids = netback_ids, + .probe = netback_probe, + .remove = netback_remove, +@@ -450,5 +449,6 @@ static struct xenbus_driver netback = { + + void netif_xenbus_init(void) + { +- xenbus_register_backend(&netback); ++ if (xenbus_register_backend(&netback)) ++ BUG(); + } +--- head-2010-01-18.orig/drivers/xen/netfront/netfront.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netfront/netfront.c 2009-11-06 10:46:41.000000000 +0100 +@@ -1892,20 +1892,19 @@ static struct ethtool_ops network_ethtoo + }; + + #ifdef CONFIG_SYSFS +-static ssize_t show_rxbuf_min(struct class_device *cd, char *buf) ++static ssize_t show_rxbuf_min(struct device *dev, ++ struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); +- struct netfront_info *info = netdev_priv(netdev); ++ struct netfront_info *info = netdev_priv(to_net_dev(dev)); + + return sprintf(buf, "%u\n", info->rx_min_target); + } + +-static ssize_t store_rxbuf_min(struct class_device *cd, ++static ssize_t store_rxbuf_min(struct device *dev, ++ struct device_attribute *attr, + const char *buf, size_t len) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); ++ struct net_device *netdev = to_net_dev(dev); + struct netfront_info *np = netdev_priv(netdev); + char *endp; + unsigned long target; +@@ -1935,20 +1934,19 @@ static ssize_t store_rxbuf_min(struct cl + return len; + } + +-static ssize_t show_rxbuf_max(struct class_device *cd, char *buf) ++static ssize_t show_rxbuf_max(struct device *dev, ++ struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); +- struct netfront_info *info = netdev_priv(netdev); ++ struct netfront_info *info = netdev_priv(to_net_dev(dev)); + + return sprintf(buf, "%u\n", info->rx_max_target); + } + +-static ssize_t store_rxbuf_max(struct class_device *cd, ++static ssize_t store_rxbuf_max(struct device *dev, ++ struct device_attribute *attr, + const char *buf, size_t len) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); ++ struct net_device *netdev = to_net_dev(dev); + struct netfront_info *np = netdev_priv(netdev); + char *endp; + unsigned long target; +@@ -1978,16 +1976,15 @@ static ssize_t store_rxbuf_max(struct cl + return len; + } + +-static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf) ++static ssize_t show_rxbuf_cur(struct device *dev, ++ struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); +- struct netfront_info *info = netdev_priv(netdev); ++ struct netfront_info *info = netdev_priv(to_net_dev(dev)); + + return sprintf(buf, "%u\n", info->rx_target); + } + +-static const struct class_device_attribute xennet_attrs[] = { ++static struct device_attribute xennet_attrs[] = { + __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), + __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), + __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), +@@ -1999,8 +1996,8 @@ static int xennet_sysfs_addif(struct net + int error = 0; + + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { +- error = class_device_create_file(&netdev->class_dev, +- &xennet_attrs[i]); ++ error = device_create_file(&netdev->dev, ++ &xennet_attrs[i]); + if (error) + goto fail; + } +@@ -2008,8 +2005,7 @@ static int xennet_sysfs_addif(struct net + + fail: + while (--i >= 0) +- class_device_remove_file(&netdev->class_dev, +- &xennet_attrs[i]); ++ device_remove_file(&netdev->dev, &xennet_attrs[i]); + return error; + } + +@@ -2017,10 +2013,8 @@ static void xennet_sysfs_delif(struct ne + { + int i; + +- for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { +- class_device_remove_file(&netdev->class_dev, +- &xennet_attrs[i]); +- } ++ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) ++ device_remove_file(&netdev->dev, &xennet_attrs[i]); + } + + #endif /* CONFIG_SYSFS */ +@@ -2186,7 +2180,6 @@ MODULE_ALIAS("xen:vif"); + + static struct xenbus_driver netfront_driver = { + .name = "vif", +- .owner = THIS_MODULE, + .ids = netfront_ids, + .probe = netfront_probe, + .remove = __devexit_p(netfront_remove), +--- head-2010-01-18.orig/drivers/xen/pciback/xenbus.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/drivers/xen/pciback/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -683,7 +683,6 @@ static const struct xenbus_device_id xen + + static struct xenbus_driver xenbus_pciback_driver = { + .name = "pciback", +- .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pciback_xenbus_probe, + .remove = pciback_xenbus_remove, +--- head-2010-01-18.orig/drivers/xen/pcifront/xenbus.c 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/drivers/xen/pcifront/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -449,7 +449,6 @@ MODULE_ALIAS("xen:pci"); + + static struct xenbus_driver xenbus_pcifront_driver = { + .name = "pcifront", +- .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pcifront_xenbus_probe, + .remove = pcifront_xenbus_remove, +--- head-2010-01-18.orig/drivers/xen/scsiback/xenbus.c 2009-03-18 10:39:32.000000000 +0100 ++++ head-2010-01-18/drivers/xen/scsiback/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -360,7 +360,6 @@ static struct xenbus_device_id scsiback_ + + static struct xenbus_driver scsiback = { + .name = "vscsi", +- .owner = THIS_MODULE, + .ids = scsiback_ids, + .probe = scsiback_probe, + .remove = scsiback_remove, +--- head-2010-01-18.orig/drivers/xen/scsifront/xenbus.c 2008-07-21 11:00:33.000000000 +0200 ++++ head-2010-01-18/drivers/xen/scsifront/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -401,7 +401,6 @@ static struct xenbus_device_id scsifront + + static struct xenbus_driver scsifront_driver = { + .name = "vscsi", +- .owner = THIS_MODULE, + .ids = scsifront_ids, + .probe = scsifront_probe, + .remove = scsifront_remove, +--- head-2010-01-18.orig/drivers/xen/tpmback/common.h 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/tpmback/common.h 2009-11-06 10:46:41.000000000 +0100 +@@ -54,11 +54,11 @@ typedef struct tpmif_st { + + void tpmif_disconnect_complete(tpmif_t * tpmif); + tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi); +-void tpmif_interface_init(void); ++int tpmif_interface_init(void); + void tpmif_interface_exit(void); + void tpmif_schedule_work(tpmif_t * tpmif); + void tpmif_deschedule_work(tpmif_t * tpmif); +-void tpmif_xenbus_init(void); ++int tpmif_xenbus_init(void); + void tpmif_xenbus_exit(void); + int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); + irqreturn_t tpmif_be_int(int irq, void *dev_id); +--- head-2010-01-18.orig/drivers/xen/tpmback/interface.c 2010-01-04 12:25:38.000000000 +0100 ++++ head-2010-01-18/drivers/xen/tpmback/interface.c 2010-01-04 12:26:29.000000000 +0100 +@@ -160,13 +160,14 @@ void tpmif_disconnect_complete(tpmif_t * + free_tpmif(tpmif); + } + +-void __init tpmif_interface_init(void) ++int __init tpmif_interface_init(void) + { + tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t), + 0, 0, NULL, NULL); ++ return tpmif_cachep ? 0 : -ENOMEM; + } + +-void __exit tpmif_interface_exit(void) ++void tpmif_interface_exit(void) + { + kmem_cache_destroy(tpmif_cachep); + } +--- head-2010-01-18.orig/drivers/xen/tpmback/tpmback.c 2010-01-04 12:23:26.000000000 +0100 ++++ head-2010-01-18/drivers/xen/tpmback/tpmback.c 2010-01-04 12:26:23.000000000 +0100 +@@ -928,22 +928,30 @@ static int __init tpmback_init(void) + spin_lock_init(&tpm_schedule_list_lock); + INIT_LIST_HEAD(&tpm_schedule_list); + +- tpmif_interface_init(); +- tpmif_xenbus_init(); ++ rc = tpmif_interface_init(); ++ if (!rc) { ++ rc = tpmif_xenbus_init(); ++ if (rc) ++ tpmif_interface_exit(); ++ } ++ if (rc) { ++ misc_deregister(&vtpms_miscdevice); ++ return rc; ++ } + + printk(KERN_ALERT "Successfully initialized TPM backend driver.\n"); + + return 0; + } +- + module_init(tpmback_init); + +-void __exit tpmback_exit(void) ++static void __exit tpmback_exit(void) + { + vtpm_release_packets(NULL, 0); + tpmif_xenbus_exit(); + tpmif_interface_exit(); + misc_deregister(&vtpms_miscdevice); + } ++module_exit(tpmback_exit) + + MODULE_LICENSE("Dual BSD/GPL"); +--- head-2010-01-18.orig/drivers/xen/tpmback/xenbus.c 2008-03-06 08:54:32.000000000 +0100 ++++ head-2010-01-18/drivers/xen/tpmback/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -270,7 +270,6 @@ static const struct xenbus_device_id tpm + + static struct xenbus_driver tpmback = { + .name = "vtpm", +- .owner = THIS_MODULE, + .ids = tpmback_ids, + .probe = tpmback_probe, + .remove = tpmback_remove, +@@ -278,9 +277,9 @@ static struct xenbus_driver tpmback = { + }; + + +-void tpmif_xenbus_init(void) ++int tpmif_xenbus_init(void) + { +- xenbus_register_backend(&tpmback); ++ return xenbus_register_backend(&tpmback); + } + + void tpmif_xenbus_exit(void) +--- head-2010-01-18.orig/drivers/xen/usbback/xenbus.c 2009-11-06 10:23:23.000000000 +0100 ++++ head-2010-01-18/drivers/xen/usbback/xenbus.c 2009-11-06 10:49:41.000000000 +0100 +@@ -320,7 +320,6 @@ static const struct xenbus_device_id usb + + static struct xenbus_driver usbback_driver = { + .name = "vusb", +- .owner = THIS_MODULE, + .ids = usbback_ids, + .probe = usbback_probe, + .otherend_changed = frontend_changed, +--- head-2010-01-18.orig/drivers/xen/usbfront/xenbus.c 2009-10-15 11:45:41.000000000 +0200 ++++ head-2010-01-18/drivers/xen/usbfront/xenbus.c 2009-11-06 10:46:41.000000000 +0100 +@@ -376,7 +376,6 @@ static const struct xenbus_device_id usb + + static struct xenbus_driver usbfront_driver = { + .name = "vusb", +- .owner = THIS_MODULE, + .ids = usbfront_ids, + .probe = usbfront_probe, + .otherend_changed = backend_changed, +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_probe.c 2009-12-04 10:51:20.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_probe.c 2009-12-04 10:52:26.000000000 +0100 +@@ -375,7 +375,9 @@ static void xenbus_dev_shutdown(struct d + } + + int xenbus_register_driver_common(struct xenbus_driver *drv, +- struct xen_bus_type *bus) ++ struct xen_bus_type *bus, ++ struct module *owner, ++ const char *mod_name) + { + int ret; + +@@ -385,7 +387,10 @@ int xenbus_register_driver_common(struct + drv->driver.name = drv->name; + drv->driver.bus = &bus->bus; + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) +- drv->driver.owner = drv->owner; ++ drv->driver.owner = owner; ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) ++ drv->driver.mod_name = mod_name; + #endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + drv->driver.probe = xenbus_dev_probe; +@@ -399,13 +404,15 @@ int xenbus_register_driver_common(struct + return ret; + } + +-int xenbus_register_frontend(struct xenbus_driver *drv) ++int __xenbus_register_frontend(struct xenbus_driver *drv, ++ struct module *owner, const char *mod_name) + { + int ret; + + drv->read_otherend_details = read_backend_details; + +- ret = xenbus_register_driver_common(drv, &xenbus_frontend); ++ ret = xenbus_register_driver_common(drv, &xenbus_frontend, ++ owner, mod_name); + if (ret) + return ret; + +@@ -414,7 +421,7 @@ int xenbus_register_frontend(struct xenb + + return 0; + } +-EXPORT_SYMBOL_GPL(xenbus_register_frontend); ++EXPORT_SYMBOL_GPL(__xenbus_register_frontend); + + void xenbus_unregister_driver(struct xenbus_driver *drv) + { +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_probe.h 2009-12-04 10:44:50.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_probe.h 2009-11-06 10:46:41.000000000 +0100 +@@ -69,7 +69,9 @@ extern int xenbus_match(struct device *_ + extern int xenbus_dev_probe(struct device *_dev); + extern int xenbus_dev_remove(struct device *_dev); + extern int xenbus_register_driver_common(struct xenbus_driver *drv, +- struct xen_bus_type *bus); ++ struct xen_bus_type *bus, ++ struct module *owner, ++ const char *mod_name); + extern int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename); +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2008-01-21 11:15:26.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_probe_backend.c 2009-11-06 10:46:41.000000000 +0100 +@@ -172,13 +172,15 @@ static int xenbus_uevent_backend(struct + return 0; + } + +-int xenbus_register_backend(struct xenbus_driver *drv) ++int __xenbus_register_backend(struct xenbus_driver *drv, ++ struct module *owner, const char *mod_name) + { + drv->read_otherend_details = read_frontend_details; + +- return xenbus_register_driver_common(drv, &xenbus_backend); ++ return xenbus_register_driver_common(drv, &xenbus_backend, ++ owner, mod_name); + } +-EXPORT_SYMBOL_GPL(xenbus_register_backend); ++EXPORT_SYMBOL_GPL(__xenbus_register_backend); + + /* backend/// */ + static int xenbus_probe_backend_unit(const char *dir, +--- head-2010-01-18.orig/drivers/xen/xenoprof/xenoprofile.c 2010-01-07 09:57:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenoprof/xenoprofile.c 2010-01-07 09:58:08.000000000 +0100 +@@ -235,7 +235,7 @@ static int bind_virq(void) + result = bind_virq_to_irqhandler(VIRQ_XENOPROF, + i, + xenoprof_ovf_interrupt, +- SA_INTERRUPT, ++ IRQF_DISABLED|IRQF_NOBALANCING, + "xenoprof", + NULL); + +--- head-2010-01-18.orig/arch/x86/include/asm/i8253.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/i8253.h 2009-11-06 10:46:41.000000000 +0100 +@@ -8,10 +8,14 @@ + + extern spinlock_t i8253_lock; + ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++ + extern struct clock_event_device *global_clock_event; + + extern void setup_pit_timer(void); + ++#endif ++ + #define inb_pit inb_p + #define outb_pit outb_p + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/desc_32.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/desc_32.h 2009-11-06 10:46:41.000000000 +0100 +@@ -21,7 +21,7 @@ struct Xgt_desc_struct { + + extern struct Xgt_desc_struct idt_descr; + DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +- ++extern struct Xgt_desc_struct early_gdt_descr; + + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) + { +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-06 10:46:41.000000000 +0100 +@@ -160,6 +160,19 @@ static inline void arch_leave_lazy_mmu_m + #define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu)) + #endif + ++#if 0 /* All uses are in places potentially called asynchronously, but ++ * asynchronous code should rather not make use of lazy mode at all. ++ * Therefore, all uses of this function get commented out, proper ++ * detection of asynchronous invocations is added whereever needed, ++ * and this function is disabled to catch any new (improper) uses. ++ */ ++static inline void arch_flush_lazy_mmu_mode(void) ++{ ++ if (arch_use_lazy_mmu_mode()) ++ xen_multicall_flush(false); ++} ++#endif ++ + #else /* !CONFIG_XEN || MODULE */ + + static inline void xen_multicall_flush(bool ignore) {} +@@ -217,7 +230,7 @@ HYPERVISOR_block( + return rc; + } + +-static inline void /*__noreturn*/ ++static inline void __noreturn + HYPERVISOR_shutdown( + unsigned int reason) + { +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/irqflags_32.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/irqflags_32.h 2009-11-06 10:46:41.000000000 +0100 +@@ -108,7 +108,7 @@ sysexit_scrit: /**** START OF SYSEXIT CR + sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ + mov $__KERNEL_PDA, %ecx ; \ + push %esp ; \ +- mov %ecx, %gs ; \ ++ mov %ecx, %fs ; \ + call evtchn_do_upcall ; \ + add $4,%esp ; \ + jmp ret_from_intr +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:46:41.000000000 +0100 +@@ -27,13 +27,13 @@ static inline void enter_lazy_tlb(struct + static inline void __prepare_arch_switch(void) + { + /* +- * Save away %fs. No need to save %gs, as it was saved on the ++ * Save away %gs. No need to save %fs, as it was saved on the + * stack on entry. No need to save %es and %ds, as those are + * always kernel segments while inside the kernel. + */ +- asm volatile ( "mov %%fs,%0" +- : "=m" (current->thread.fs)); +- asm volatile ( "movl %0,%%fs" ++ asm volatile ( "mov %%gs,%0" ++ : "=m" (current->thread.gs)); ++ asm volatile ( "movl %0,%%gs" + : : "r" (0) ); + } + +@@ -95,7 +95,7 @@ static inline void switch_mm(struct mm_s + } + + #define deactivate_mm(tsk, mm) \ +- asm("movl %0,%%fs": :"r" (0)); ++ asm("movl %0,%%gs": :"r" (0)); + + static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) + { +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgalloc_32.h 2008-07-21 11:00:33.000000000 +0200 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgalloc_32.h 2009-11-06 10:46:41.000000000 +0100 +@@ -6,12 +6,23 @@ + #include /* for struct page */ + #include /* for phys_to_virt and page_to_pseudophys */ + +-#define pmd_populate_kernel(mm, pmd, pte) \ +- set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) ++#define paravirt_alloc_pt(pfn) do { } while (0) ++#define paravirt_alloc_pd(pfn) do { } while (0) ++#define paravirt_alloc_pd(pfn) do { } while (0) ++#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) ++#define paravirt_release_pt(pfn) do { } while (0) ++#define paravirt_release_pd(pfn) do { } while (0) ++ ++#define pmd_populate_kernel(mm, pmd, pte) \ ++do { \ ++ paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ ++ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ ++} while (0) + + #define pmd_populate(mm, pmd, pte) \ + do { \ + unsigned long pfn = page_to_pfn(pte); \ ++ paravirt_alloc_pt(pfn); \ + if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \ + if (!PageHighMem(pte)) \ + BUG_ON(HYPERVISOR_update_va_mapping( \ +@@ -42,7 +53,11 @@ static inline void pte_free_kernel(pte_t + + extern void pte_free(struct page *pte); + +-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) ++#define __pte_free_tlb(tlb,pte) \ ++do { \ ++ paravirt_release_pt(page_to_pfn(pte)); \ ++ tlb_remove_page((tlb),(pte)); \ ++} while (0) + + #ifdef CONFIG_X86_PAE + /* +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:46:41.000000000 +0100 +@@ -275,6 +275,7 @@ static inline pte_t pte_mkhuge(pte_t pte + */ + #define pte_update(mm, addr, ptep) do { } while (0) + #define pte_update_defer(mm, addr, ptep) do { } while (0) ++#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) + + /* + * We only update the dirty/accessed state if we set +@@ -490,12 +491,24 @@ extern pte_t *lookup_address(unsigned lo + #endif + + #if defined(CONFIG_HIGHPTE) +-#define pte_offset_map(dir, address) \ +- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ +- pte_index(address)) +-#define pte_offset_map_nested(dir, address) \ +- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \ +- pte_index(address)) ++#define pte_offset_map(dir, address) \ ++({ \ ++ pte_t *__ptep; \ ++ unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ ++ __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ ++ paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ ++ __ptep = __ptep + pte_index(address); \ ++ __ptep; \ ++}) ++#define pte_offset_map_nested(dir, address) \ ++({ \ ++ pte_t *__ptep; \ ++ unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ ++ __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ ++ paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ ++ __ptep = __ptep + pte_index(address); \ ++ __ptep; \ ++}) + #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) + #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) + #else +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/processor_32.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/processor_32.h 2009-11-06 10:46:41.000000000 +0100 +@@ -431,7 +431,7 @@ struct thread_struct { + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ +- .gs = __KERNEL_PDA, \ ++ .fs = __KERNEL_PDA, \ + } + + /* +@@ -449,8 +449,8 @@ struct thread_struct { + } + + #define start_thread(regs, new_eip, new_esp) do { \ +- __asm__("movl %0,%%fs": :"r" (0)); \ +- regs->xgs = 0; \ ++ __asm__("movl %0,%%gs": :"r" (0)); \ ++ regs->xfs = 0; \ + set_fs(USER_DS); \ + regs->xds = __USER_DS; \ + regs->xes = __USER_DS; \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/smp_32.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp_32.h 2009-11-06 10:46:41.000000000 +0100 +@@ -52,6 +52,11 @@ extern void cpu_exit_clear(void); + extern void cpu_uninit(void); + #endif + ++#ifndef CONFIG_PARAVIRT ++#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ ++do { } while (0) ++#endif ++ + /* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:46:41.000000000 +0100 +@@ -416,15 +416,6 @@ static inline int pmd_large(pmd_t pte) { + #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + #define mk_pte_huge(entry) (__pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) + +-/* physical address -> PTE */ +-static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) +-{ +- unsigned long pteval; +- pteval = physpage | pgprot_val(pgprot); +- pteval &= __supported_pte_mask; +- return __pte(pteval); +-} +- + /* Change flags of a PTE */ + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) + { +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/smp_64.h 2009-11-06 10:46:27.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp_64.h 2009-11-06 10:46:41.000000000 +0100 +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + extern int disable_apic; + + #ifdef CONFIG_X86_LOCAL_APIC +@@ -73,7 +74,7 @@ extern int __cpu_disable(void); + extern void __cpu_die(unsigned int cpu); + extern void prefill_possible_map(void); + extern unsigned num_processors; +-extern unsigned disabled_cpus; ++extern unsigned __cpuinitdata disabled_cpus; + + #define NO_PROC_ID 0xFF /* No processor magic marker */ + +--- head-2010-01-18.orig/include/xen/xenbus.h 2009-12-04 10:44:50.000000000 +0100 ++++ head-2010-01-18/include/xen/xenbus.h 2009-11-06 10:46:41.000000000 +0100 +@@ -93,8 +93,7 @@ struct xenbus_device_id + + /* A xenbus driver. */ + struct xenbus_driver { +- char *name; +- struct module *owner; ++ const char *name; + const struct xenbus_device_id *ids; + int (*probe)(struct xenbus_device *dev, + const struct xenbus_device_id *id); +@@ -115,8 +114,25 @@ static inline struct xenbus_driver *to_x + return container_of(drv, struct xenbus_driver, driver); + } + +-int xenbus_register_frontend(struct xenbus_driver *drv); +-int xenbus_register_backend(struct xenbus_driver *drv); ++int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, ++ struct module *owner, ++ const char *mod_name); ++ ++static inline int __must_check ++xenbus_register_frontend(struct xenbus_driver *drv) ++{ ++ return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); ++} ++ ++int __must_check __xenbus_register_backend(struct xenbus_driver *drv, ++ struct module *owner, ++ const char *mod_name); ++static inline int __must_check ++xenbus_register_backend(struct xenbus_driver *drv) ++{ ++ return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); ++} ++ + void xenbus_unregister_driver(struct xenbus_driver *drv); + + struct xenbus_transaction +--- head-2010-01-18.orig/lib/swiotlb-xen.c 2009-04-07 13:58:49.000000000 +0200 ++++ head-2010-01-18/lib/swiotlb-xen.c 2009-11-06 10:46:41.000000000 +0100 +@@ -135,8 +135,8 @@ __setup("swiotlb=", setup_io_tlb_npages) + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the PCI DMA API. + */ +-void +-swiotlb_init_with_default_size (size_t default_size) ++void __init ++swiotlb_init_with_default_size(size_t default_size) + { + unsigned long i, bytes; + int rc; +@@ -221,7 +221,7 @@ swiotlb_init_with_default_size (size_t d + dma_bits); + } + +-void ++void __init + swiotlb_init(void) + { + long ram_end; +@@ -457,8 +457,8 @@ swiotlb_full(struct device *dev, size_t + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ +- printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " +- "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); ++ printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at " ++ "device %s\n", size, dev ? dev->bus_id : "?"); + + if (size > io_tlb_overflow && do_panic) { + if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) +@@ -602,7 +602,7 @@ swiotlb_map_sg(struct device *hwdev, str + sg[0].dma_length = 0; + return 0; + } +- sg->dma_address = (dma_addr_t)virt_to_bus(map); ++ sg->dma_address = virt_to_bus(map); + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; +@@ -624,8 +624,7 @@ swiotlb_unmap_sg(struct device *hwdev, s + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) +- unmap_single(hwdev, +- (void *)bus_to_virt(sg->dma_address), ++ unmap_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + else + gnttab_dma_unmap_page(sg->dma_address); +@@ -648,8 +647,7 @@ swiotlb_sync_sg_for_cpu(struct device *h + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) +- sync_single(hwdev, +- (void *)bus_to_virt(sg->dma_address), ++ sync_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } + +@@ -663,8 +661,7 @@ swiotlb_sync_sg_for_device(struct device + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) +- sync_single(hwdev, +- (void *)bus_to_virt(sg->dma_address), ++ sync_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } + --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32.27-28 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32.27-28 @@ -0,0 +1,51 @@ +From: Jiri Slaby +Subject: Linux 2.6.32.28 +Patch-mainline: 2.6.32.28 + +Signed-off-by: Jiri Slaby +Automatically created from "patches.kernel.org/patch-2.6.32.27-28" by xen-port-patches.py + +--- sle11sp1-2011-01-14.orig/arch/x86/include/mach-xen/asm/processor.h 2010-03-17 14:37:31.000000000 +0100 ++++ sle11sp1-2011-01-14/arch/x86/include/mach-xen/asm/processor.h 2011-01-14 12:47:12.000000000 +0100 +@@ -710,31 +710,6 @@ extern unsigned long boot_option_idle_o + extern unsigned long idle_halt; + extern unsigned long idle_nomwait; + +-#ifndef CONFIG_XEN +-/* +- * on systems with caches, caches must be flashed as the absolute +- * last instruction before going into a suspended halt. Otherwise, +- * dirty data can linger in the cache and become stale on resume, +- * leading to strange errors. +- * +- * perform a variety of operations to guarantee that the compiler +- * will not reorder instructions. wbinvd itself is serializing +- * so the processor will not reorder. +- * +- * Systems without cache can just go into halt. +- */ +-static inline void wbinvd_halt(void) +-{ +- mb(); +- /* check for clflush to determine if wbinvd is legal */ +- if (cpu_has_clflush) +- asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory"); +- else +- while (1) +- halt(); +-} +-#endif +- + extern void enable_sep_cpu(void); + extern int sysenter_setup(void); + +--- sle11sp1-2011-01-14.orig/arch/x86/kernel/apic/io_apic-xen.c 2010-11-04 13:20:41.000000000 +0100 ++++ sle11sp1-2011-01-14/arch/x86/kernel/apic/io_apic-xen.c 2011-01-14 12:46:27.000000000 +0100 +@@ -3678,6 +3678,7 @@ static int dmar_msi_set_affinity(unsigne + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); ++ msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-sysdev-suspend +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-sysdev-suspend @@ -0,0 +1,506 @@ +From: jbeulich@novell.com +Subject: use base kernel suspend/resume infrastructure +Patch-mainline: obsolete + +... rather than calling just a few functions explicitly. + +--- head-2009-12-16.orig/arch/x86/kernel/time-xen.c 2009-11-23 10:44:50.000000000 +0100 ++++ head-2009-12-16/arch/x86/kernel/time-xen.c 2009-11-23 10:48:29.000000000 +0100 +@@ -70,6 +70,10 @@ static DEFINE_PER_CPU(struct vcpu_runsta + /* Must be signed, as it's compared with s64 quantities which can be -ve. */ + #define NS_PER_TICK (1000000000LL/HZ) + ++static struct vcpu_set_periodic_timer xen_set_periodic_tick = { ++ .period_ns = NS_PER_TICK ++}; ++ + static void __clock_was_set(struct work_struct *unused) + { + clock_was_set(); +@@ -550,6 +554,17 @@ void mark_tsc_unstable(char *reason) + } + EXPORT_SYMBOL_GPL(mark_tsc_unstable); + ++static void init_missing_ticks_accounting(unsigned int cpu) ++{ ++ struct vcpu_runstate_info *runstate = setup_runstate_area(cpu); ++ ++ per_cpu(processed_blocked_time, cpu) = ++ runstate->time[RUNSTATE_blocked]; ++ per_cpu(processed_stolen_time, cpu) = ++ runstate->time[RUNSTATE_runnable] + ++ runstate->time[RUNSTATE_offline]; ++} ++ + static cycle_t cs_last; + + static cycle_t xen_clocksource_read(struct clocksource *cs) +@@ -586,11 +601,32 @@ static cycle_t xen_clocksource_read(stru + #endif + } + ++/* No locking required. Interrupts are disabled on all CPUs. */ + static void xen_clocksource_resume(void) + { +- extern void time_resume(void); ++ unsigned int cpu; ++ ++ init_cpu_khz(); ++ ++ for_each_online_cpu(cpu) { ++ switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu, ++ &xen_set_periodic_tick)) { ++ case 0: ++#if CONFIG_XEN_COMPAT <= 0x030004 ++ case -ENOSYS: ++#endif ++ break; ++ default: ++ BUG(); ++ } ++ get_time_values_from_xen(cpu); ++ per_cpu(processed_system_time, cpu) = ++ per_cpu(shadow_time, 0).system_timestamp; ++ init_missing_ticks_accounting(cpu); ++ } ++ ++ processed_system_time = per_cpu(shadow_time, 0).system_timestamp; + +- time_resume(); + cs_last = local_clock(); + } + +@@ -622,17 +658,6 @@ struct vcpu_runstate_info *setup_runstat + return runstate; + } + +-static void init_missing_ticks_accounting(unsigned int cpu) +-{ +- struct vcpu_runstate_info *runstate = setup_runstate_area(cpu); +- +- per_cpu(processed_blocked_time, cpu) = +- runstate->time[RUNSTATE_blocked]; +- per_cpu(processed_stolen_time, cpu) = +- runstate->time[RUNSTATE_runnable] + +- runstate->time[RUNSTATE_offline]; +-} +- + void xen_read_persistent_clock(struct timespec *ts) + { + const shared_info_t *s = HYPERVISOR_shared_info; +@@ -678,10 +703,6 @@ static void __init setup_cpu0_timer_irq( + BUG_ON(per_cpu(timer_irq, 0) < 0); + } + +-static struct vcpu_set_periodic_timer xen_set_periodic_tick = { +- .period_ns = NS_PER_TICK +-}; +- + void __init time_init(void) + { + init_cpu_khz(); +@@ -806,35 +827,6 @@ void xen_halt(void) + } + EXPORT_SYMBOL(xen_halt); + +-/* No locking required. Interrupts are disabled on all CPUs. */ +-void time_resume(void) +-{ +- unsigned int cpu; +- +- init_cpu_khz(); +- +- for_each_online_cpu(cpu) { +- switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu, +- &xen_set_periodic_tick)) { +- case 0: +-#if CONFIG_XEN_COMPAT <= 0x030004 +- case -ENOSYS: +-#endif +- break; +- default: +- BUG(); +- } +- get_time_values_from_xen(cpu); +- per_cpu(processed_system_time, cpu) = +- per_cpu(shadow_time, 0).system_timestamp; +- init_missing_ticks_accounting(cpu); +- } +- +- processed_system_time = per_cpu(shadow_time, 0).system_timestamp; +- +- update_wallclock(); +-} +- + #ifdef CONFIG_SMP + static char timer_name[NR_CPUS][15]; + +--- head-2009-12-16.orig/drivers/xen/core/evtchn.c 2009-11-06 11:04:38.000000000 +0100 ++++ head-2009-12-16/drivers/xen/core/evtchn.c 2009-11-06 11:10:15.000000000 +0100 +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1109,10 +1110,21 @@ static void restore_cpu_ipis(unsigned in + } + } + +-void irq_resume(void) ++static int evtchn_resume(struct sys_device *dev) + { + unsigned int cpu, irq, evtchn; + struct irq_cfg *cfg; ++ struct evtchn_status status; ++ ++ /* Avoid doing anything in the 'suspend cancelled' case. */ ++ status.dom = DOMID_SELF; ++ status.port = evtchn_from_irq(percpu_read(virq_to_irq[VIRQ_TIMER])); ++ if (HYPERVISOR_event_channel_op(EVTCHNOP_status, &status)) ++ BUG(); ++ if (status.status == EVTCHNSTAT_virq ++ && status.vcpu == smp_processor_id() ++ && status.u.virq == VIRQ_TIMER) ++ return 0; + + init_evtchn_cpu_bindings(); + +@@ -1148,7 +1160,32 @@ void irq_resume(void) + restore_cpu_ipis(cpu); + } + ++ return 0; ++} ++ ++static struct sysdev_class evtchn_sysclass = { ++ .name = "evtchn", ++ .resume = evtchn_resume, ++}; ++ ++static struct sys_device device_evtchn = { ++ .id = 0, ++ .cls = &evtchn_sysclass, ++}; ++ ++static int __init evtchn_register(void) ++{ ++ int err; ++ ++ if (is_initial_xendomain()) ++ return 0; ++ ++ err = sysdev_class_register(&evtchn_sysclass); ++ if (!err) ++ err = sysdev_register(&device_evtchn); ++ return err; + } ++core_initcall(evtchn_register); + #endif + + int __init arch_early_irq_init(void) +--- head-2009-12-16.orig/drivers/xen/core/gnttab.c 2009-12-15 09:24:56.000000000 +0100 ++++ head-2009-12-16/drivers/xen/core/gnttab.c 2009-12-15 09:28:00.000000000 +0100 +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -707,23 +708,37 @@ EXPORT_SYMBOL(gnttab_post_map_adjust); + + #endif /* __HAVE_ARCH_PTE_SPECIAL */ + +-int gnttab_resume(void) ++static int gnttab_resume(struct sys_device *dev) + { + if (max_nr_grant_frames() < nr_grant_frames) + return -ENOSYS; + return gnttab_map(0, nr_grant_frames - 1); + } ++#define gnttab_resume() gnttab_resume(NULL) + + #ifdef CONFIG_PM_SLEEP +-int gnttab_suspend(void) +-{ + #ifdef CONFIG_X86 ++static int gnttab_suspend(struct sys_device *dev, pm_message_t state) ++{ + apply_to_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * nr_grant_frames, + unmap_pte_fn, NULL); +-#endif + return 0; + } ++#else ++#define gnttab_suspend NULL ++#endif ++ ++static struct sysdev_class gnttab_sysclass = { ++ .name = "gnttab", ++ .resume = gnttab_resume, ++ .suspend = gnttab_suspend, ++}; ++ ++static struct sys_device device_gnttab = { ++ .id = 0, ++ .cls = &gnttab_sysclass, ++}; + #endif + + #else /* !CONFIG_XEN */ +@@ -803,6 +818,17 @@ int __devinit gnttab_init(void) + if (!is_running_on_xen()) + return -ENODEV; + ++#if defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP) ++ if (!is_initial_xendomain()) { ++ int err = sysdev_class_register(&gnttab_sysclass); ++ ++ if (!err) ++ err = sysdev_register(&device_gnttab); ++ if (err) ++ return err; ++ } ++#endif ++ + nr_grant_frames = 1; + boot_max_nr_grant_frames = __max_nr_grant_frames(); + +--- head-2009-12-16.orig/drivers/xen/core/machine_reboot.c 2009-12-18 13:34:27.000000000 +0100 ++++ head-2009-12-16/drivers/xen/core/machine_reboot.c 2009-12-18 14:19:13.000000000 +0100 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include "../../base/base.h" + + #if defined(__i386__) || defined(__x86_64__) + #include +@@ -145,47 +146,28 @@ struct suspend { + static int take_machine_down(void *_suspend) + { + struct suspend *suspend = _suspend; +- int suspend_cancelled, err; +- extern void time_resume(void); ++ int suspend_cancelled; + +- if (suspend->fast_suspend) { +- BUG_ON(!irqs_disabled()); +- } else { +- BUG_ON(irqs_disabled()); +- +- for (;;) { +- err = smp_suspend(); +- if (err) +- return err; +- +- xenbus_suspend(); +- preempt_disable(); +- +- if (num_online_cpus() == 1) +- break; +- +- preempt_enable(); +- xenbus_suspend_cancel(); +- } +- +- local_irq_disable(); +- } ++ BUG_ON(!irqs_disabled()); + + mm_pin_all(); +- gnttab_suspend(); +- pre_suspend(); +- +- /* +- * This hypercall returns 1 if suspend was cancelled or the domain was +- * merely checkpointed, and 0 if it is resuming in a new domain. +- */ +- suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); ++ suspend_cancelled = sysdev_suspend(PMSG_SUSPEND); ++ if (!suspend_cancelled) { ++ pre_suspend(); + ++ /* ++ * This hypercall returns 1 if suspend was cancelled or the domain was ++ * merely checkpointed, and 0 if it is resuming in a new domain. ++ */ ++ suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); ++ } else ++ BUG_ON(suspend_cancelled > 0); + suspend->resume_notifier(suspend_cancelled); +- post_suspend(suspend_cancelled); +- gnttab_resume(); ++ if (suspend_cancelled >= 0) { ++ post_suspend(suspend_cancelled); ++ sysdev_resume(); ++ } + if (!suspend_cancelled) { +- irq_resume(); + #ifdef __x86_64__ + /* + * Older versions of Xen do not save/restore the user %cr3. +@@ -197,10 +179,6 @@ static int take_machine_down(void *_susp + current->active_mm->pgd))); + #endif + } +- time_resume(); +- +- if (!suspend->fast_suspend) +- local_irq_enable(); + + return suspend_cancelled; + } +@@ -208,8 +186,14 @@ static int take_machine_down(void *_susp + int __xen_suspend(int fast_suspend, void (*resume_notifier)(int)) + { + int err, suspend_cancelled; ++ const char *what; + struct suspend suspend; + ++#define _check(fn, args...) ({ \ ++ what = #fn; \ ++ err = (fn)(args); \ ++}) ++ + BUG_ON(smp_processor_id() != 0); + BUG_ON(in_interrupt()); + +@@ -225,41 +209,91 @@ int __xen_suspend(int fast_suspend, void + if (num_possible_cpus() == 1) + fast_suspend = 0; + +- if (fast_suspend) { +- err = stop_machine_create(); +- if (err) +- return err; ++ if (fast_suspend && _check(stop_machine_create)) { ++ printk(KERN_ERR "%s() failed: %d\n", what, err); ++ return err; + } + + suspend.fast_suspend = fast_suspend; + suspend.resume_notifier = resume_notifier; + ++ if (_check(dpm_suspend_start, PMSG_SUSPEND)) { ++ if (fast_suspend) ++ stop_machine_destroy(); ++ printk(KERN_ERR "%s() failed: %d\n", what, err); ++ return err; ++ } ++ + if (fast_suspend) { + xenbus_suspend(); ++ ++ if (_check(dpm_suspend_noirq, PMSG_SUSPEND)) { ++ xenbus_suspend_cancel(); ++ dpm_resume_end(PMSG_RESUME); ++ stop_machine_destroy(); ++ printk(KERN_ERR "%s() failed: %d\n", what, err); ++ return err; ++ } ++ + err = stop_machine(take_machine_down, &suspend, + &cpumask_of_cpu(0)); + if (err < 0) + xenbus_suspend_cancel(); + } else { ++ BUG_ON(irqs_disabled()); ++ ++ for (;;) { ++ xenbus_suspend(); ++ ++ if (!_check(dpm_suspend_noirq, PMSG_SUSPEND) ++ && _check(smp_suspend)) ++ dpm_resume_noirq(PMSG_RESUME); ++ if (err) { ++ xenbus_suspend_cancel(); ++ dpm_resume_end(PMSG_RESUME); ++ printk(KERN_ERR "%s() failed: %d\n", ++ what, err); ++ return err; ++ } ++ ++ preempt_disable(); ++ ++ if (num_online_cpus() == 1) ++ break; ++ ++ preempt_enable(); ++ ++ dpm_resume_noirq(PMSG_RESUME); ++ ++ xenbus_suspend_cancel(); ++ } ++ ++ local_irq_disable(); + err = take_machine_down(&suspend); ++ local_irq_enable(); + } + +- if (err < 0) +- return err; ++ dpm_resume_noirq(PMSG_RESUME); + +- suspend_cancelled = err; +- if (!suspend_cancelled) { +- xencons_resume(); +- xenbus_resume(); +- } else { +- xenbus_suspend_cancel(); ++ if (err >= 0) { ++ suspend_cancelled = err; ++ if (!suspend_cancelled) { ++ xencons_resume(); ++ xenbus_resume(); ++ } else { ++ xenbus_suspend_cancel(); ++ err = 0; ++ } ++ ++ if (!fast_suspend) ++ smp_resume(); + } + +- if (!fast_suspend) +- smp_resume(); +- else ++ dpm_resume_end(PMSG_RESUME); ++ ++ if (fast_suspend) + stop_machine_destroy(); + +- return 0; ++ return err; + } + #endif +--- head-2009-12-16.orig/include/xen/evtchn.h 2009-12-18 10:10:04.000000000 +0100 ++++ head-2009-12-16/include/xen/evtchn.h 2009-12-18 10:13:12.000000000 +0100 +@@ -107,7 +107,9 @@ int bind_ipi_to_irqhandler( + */ + void unbind_from_irqhandler(unsigned int irq, void *dev_id); + ++#ifndef CONFIG_XEN + void irq_resume(void); ++#endif + + /* Entry point for notifications into Linux subsystems. */ + asmlinkage void evtchn_do_upcall(struct pt_regs *regs); +--- head-2009-12-16.orig/include/xen/gnttab.h 2008-11-04 11:13:10.000000000 +0100 ++++ head-2009-12-16/include/xen/gnttab.h 2009-11-06 11:10:15.000000000 +0100 +@@ -110,8 +110,9 @@ static inline void __gnttab_dma_unmap_pa + + void gnttab_reset_grant_page(struct page *page); + +-int gnttab_suspend(void); ++#ifndef CONFIG_XEN + int gnttab_resume(void); ++#endif + + void *arch_gnttab_alloc_shared(unsigned long *frames); + --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-x86_64-pgd-pin +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-x86_64-pgd-pin @@ -0,0 +1,111 @@ +From: jbeulich@novell.com +Subject: make pinning of pgd pairs transparent to callers +Patch-mainline: obsolete + +--- head-2010-01-04.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:53:45.000000000 +0100 ++++ head-2010-01-04/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:55:40.000000000 +0100 +@@ -114,8 +114,8 @@ void xen_l1_entry_update(pte_t *ptr, pte + void xen_l2_entry_update(pmd_t *ptr, pmd_t val); + void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */ + void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */ +-void xen_pgd_pin(unsigned long ptr); +-void xen_pgd_unpin(unsigned long ptr); ++void xen_pgd_pin(pgd_t *); ++void xen_pgd_unpin(pgd_t *); + + void xen_init_pgd_pin(void); + +--- head-2010-01-04.orig/arch/x86/mm/hypervisor.c 2010-01-05 16:47:18.000000000 +0100 ++++ head-2010-01-04/arch/x86/mm/hypervisor.c 2010-01-05 16:47:51.000000000 +0100 +@@ -624,26 +624,38 @@ EXPORT_SYMBOL_GPL(xen_invlpg_mask); + + #endif /* CONFIG_SMP */ + +-void xen_pgd_pin(unsigned long ptr) +-{ +- struct mmuext_op op; + #ifdef CONFIG_X86_64 +- op.cmd = MMUEXT_PIN_L4_TABLE; +-#elif defined(CONFIG_X86_PAE) +- op.cmd = MMUEXT_PIN_L3_TABLE; ++#define NR_PGD_PIN_OPS 2 + #else +- op.cmd = MMUEXT_PIN_L2_TABLE; ++#define NR_PGD_PIN_OPS 1 + #endif +- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); +- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); ++ ++void xen_pgd_pin(pgd_t *pgd) ++{ ++ struct mmuext_op op[NR_PGD_PIN_OPS]; ++ ++ op[0].cmd = MMUEXT_PIN_L3_TABLE; ++ op[0].arg1.mfn = virt_to_mfn(pgd); ++#ifdef CONFIG_X86_64 ++ op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE; ++ op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd)); ++#endif ++ if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) ++ BUG(); + } + +-void xen_pgd_unpin(unsigned long ptr) ++void xen_pgd_unpin(pgd_t *pgd) + { +- struct mmuext_op op; +- op.cmd = MMUEXT_UNPIN_TABLE; +- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); +- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); ++ struct mmuext_op op[NR_PGD_PIN_OPS]; ++ ++ op[0].cmd = MMUEXT_UNPIN_TABLE; ++ op[0].arg1.mfn = virt_to_mfn(pgd); ++#ifdef CONFIG_X86_64 ++ op[1].cmd = MMUEXT_UNPIN_TABLE; ++ op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd)); ++#endif ++ if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) ++ BUG(); + } + + void xen_set_ldt(const void *ptr, unsigned int ents) +--- head-2010-01-04.orig/arch/x86/mm/init_64-xen.c 2009-11-06 11:12:01.000000000 +0100 ++++ head-2010-01-04/arch/x86/mm/init_64-xen.c 2009-10-13 17:25:37.000000000 +0200 +@@ -747,10 +747,8 @@ void __init xen_init_pt(void) + early_make_page_readonly(level1_fixmap_pgt, + XENFEAT_writable_page_tables); + +- if (!xen_feature(XENFEAT_writable_page_tables)) { +- xen_pgd_pin(__pa_symbol(init_level4_pgt)); +- xen_pgd_pin(__pa_symbol(__user_pgd(init_level4_pgt))); +- } ++ if (!xen_feature(XENFEAT_writable_page_tables)) ++ xen_pgd_pin(init_level4_pgt); + } + + void __init xen_finish_init_mapping(void) +--- head-2010-01-04.orig/arch/x86/mm/pgtable-xen.c 2009-10-14 14:19:25.000000000 +0200 ++++ head-2010-01-04/arch/x86/mm/pgtable-xen.c 2009-10-14 14:20:47.000000000 +0200 +@@ -346,19 +346,13 @@ static void __pgd_pin(pgd_t *pgd) + { + pgd_walk(pgd, PAGE_KERNEL_RO); + kmap_flush_unused(); +- xen_pgd_pin(__pa(pgd)); /* kernel */ +-#ifdef CONFIG_X86_64 +- xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */ +-#endif ++ xen_pgd_pin(pgd); + SetPagePinned(virt_to_page(pgd)); + } + + static void __pgd_unpin(pgd_t *pgd) + { +- xen_pgd_unpin(__pa(pgd)); +-#ifdef CONFIG_X86_64 +- xen_pgd_unpin(__pa(__user_pgd(pgd))); +-#endif ++ xen_pgd_unpin(pgd); + pgd_walk(pgd, PAGE_KERNEL); + ClearPagePinned(virt_to_page(pgd)); + } --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-x86-machphys-prediction +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-x86-machphys-prediction @@ -0,0 +1,204 @@ +From: jbeulich@novell.com +Subject: properly predict phys<->mach translations +Patch-mainline: obsolete + +--- head-2009-07-28.orig/arch/x86/include/mach-xen/asm/maddr_32.h 2009-07-28 12:14:16.000000000 +0200 ++++ head-2009-07-28/arch/x86/include/mach-xen/asm/maddr_32.h 2009-07-29 10:56:35.000000000 +0200 +@@ -30,17 +30,19 @@ extern unsigned int machine_to_phys_or + + static inline unsigned long pfn_to_mfn(unsigned long pfn) + { +- if (xen_feature(XENFEAT_auto_translated_physmap)) ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return pfn; +- BUG_ON(max_mapnr && pfn >= max_mapnr); ++ if (likely(max_mapnr)) ++ BUG_ON(pfn >= max_mapnr); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; + } + + static inline int phys_to_machine_mapping_valid(unsigned long pfn) + { +- if (xen_feature(XENFEAT_auto_translated_physmap)) ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return 1; +- BUG_ON(max_mapnr && pfn >= max_mapnr); ++ if (likely(max_mapnr)) ++ BUG_ON(pfn >= max_mapnr); + return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); + } + +@@ -48,7 +50,7 @@ static inline unsigned long mfn_to_pfn(u + { + unsigned long pfn; + +- if (xen_feature(XENFEAT_auto_translated_physmap)) ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return mfn; + + if (unlikely((mfn >> machine_to_phys_order) != 0)) +@@ -95,17 +97,18 @@ static inline unsigned long mfn_to_pfn(u + static inline unsigned long mfn_to_local_pfn(unsigned long mfn) + { + unsigned long pfn = mfn_to_pfn(mfn); +- if ((pfn < max_mapnr) +- && !xen_feature(XENFEAT_auto_translated_physmap) +- && (phys_to_machine_mapping[pfn] != mfn)) ++ if (likely(pfn < max_mapnr) ++ && likely(!xen_feature(XENFEAT_auto_translated_physmap)) ++ && unlikely(phys_to_machine_mapping[pfn] != mfn)) + return max_mapnr; /* force !pfn_valid() */ + return pfn; + } + + static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) + { +- BUG_ON(max_mapnr && pfn >= max_mapnr); +- if (xen_feature(XENFEAT_auto_translated_physmap)) { ++ if (likely(max_mapnr)) ++ BUG_ON(pfn >= max_mapnr); ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } +--- head-2009-07-28.orig/arch/x86/include/mach-xen/asm/maddr_64.h 2009-07-28 12:14:16.000000000 +0200 ++++ head-2009-07-28/arch/x86/include/mach-xen/asm/maddr_64.h 2009-07-29 10:56:35.000000000 +0200 +@@ -25,17 +25,19 @@ extern unsigned int machine_to_phys_or + + static inline unsigned long pfn_to_mfn(unsigned long pfn) + { +- if (xen_feature(XENFEAT_auto_translated_physmap)) ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return pfn; +- BUG_ON(max_mapnr && pfn >= max_mapnr); ++ if (likely(max_mapnr)) ++ BUG_ON(pfn >= max_mapnr); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; + } + + static inline int phys_to_machine_mapping_valid(unsigned long pfn) + { +- if (xen_feature(XENFEAT_auto_translated_physmap)) ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return 1; +- BUG_ON(max_mapnr && pfn >= max_mapnr); ++ if (likely(max_mapnr)) ++ BUG_ON(pfn >= max_mapnr); + return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); + } + +@@ -43,7 +45,7 @@ static inline unsigned long mfn_to_pfn(u + { + unsigned long pfn; + +- if (xen_feature(XENFEAT_auto_translated_physmap)) ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return mfn; + + if (unlikely((mfn >> machine_to_phys_order) != 0)) +@@ -90,17 +92,18 @@ static inline unsigned long mfn_to_pfn(u + static inline unsigned long mfn_to_local_pfn(unsigned long mfn) + { + unsigned long pfn = mfn_to_pfn(mfn); +- if ((pfn < max_mapnr) +- && !xen_feature(XENFEAT_auto_translated_physmap) +- && (phys_to_machine_mapping[pfn] != mfn)) ++ if (likely(pfn < max_mapnr) ++ && likely(!xen_feature(XENFEAT_auto_translated_physmap)) ++ && unlikely(phys_to_machine_mapping[pfn] != mfn)) + return max_mapnr; /* force !pfn_valid() */ + return pfn; + } + + static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) + { +- BUG_ON(max_mapnr && pfn >= max_mapnr); +- if (xen_feature(XENFEAT_auto_translated_physmap)) { ++ if (likely(max_mapnr)) ++ BUG_ON(pfn >= max_mapnr); ++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } +--- head-2009-07-28.orig/arch/x86/include/mach-xen/asm/pgtable_types.h 2009-07-28 13:14:11.000000000 +0200 ++++ head-2009-07-28/arch/x86/include/mach-xen/asm/pgtable_types.h 2009-07-29 10:56:35.000000000 +0200 +@@ -207,7 +207,7 @@ typedef struct { pgdval_t pgd; } pgd_t; + #define __pgd_ma(x) ((pgd_t) { (x) } ) + static inline pgd_t xen_make_pgd(pgdval_t val) + { +- if (val & _PAGE_PRESENT) ++ if (likely(val & _PAGE_PRESENT)) + val = pte_phys_to_machine(val); + return (pgd_t) { val }; + } +@@ -217,10 +217,10 @@ static inline pgdval_t xen_pgd_val(pgd_t + { + pgdval_t ret = __pgd_val(pgd); + #if PAGETABLE_LEVELS == 2 && CONFIG_XEN_COMPAT <= 0x030002 +- if (ret) ++ if (likely(ret)) + ret = machine_to_phys(ret) | _PAGE_PRESENT; + #else +- if (ret & _PAGE_PRESENT) ++ if (likely(ret & _PAGE_PRESENT)) + ret = pte_machine_to_phys(ret); + #endif + return ret; +@@ -237,7 +237,7 @@ typedef struct { pudval_t pud; } pud_t; + #define __pud_ma(x) ((pud_t) { (x) } ) + static inline pud_t xen_make_pud(pudval_t val) + { +- if (val & _PAGE_PRESENT) ++ if (likely(val & _PAGE_PRESENT)) + val = pte_phys_to_machine(val); + return (pud_t) { val }; + } +@@ -246,7 +246,7 @@ static inline pud_t xen_make_pud(pudval_ + static inline pudval_t xen_pud_val(pud_t pud) + { + pudval_t ret = __pud_val(pud); +- if (ret & _PAGE_PRESENT) ++ if (likely(ret & _PAGE_PRESENT)) + ret = pte_machine_to_phys(ret); + return ret; + } +@@ -266,7 +266,7 @@ typedef struct { pmdval_t pmd; } pmd_t; + #define __pmd_ma(x) ((pmd_t) { (x) } ) + static inline pmd_t xen_make_pmd(pmdval_t val) + { +- if (val & _PAGE_PRESENT) ++ if (likely(val & _PAGE_PRESENT)) + val = pte_phys_to_machine(val); + return (pmd_t) { val }; + } +@@ -276,10 +276,10 @@ static inline pmdval_t xen_pmd_val(pmd_t + { + pmdval_t ret = __pmd_val(pmd); + #if CONFIG_XEN_COMPAT <= 0x030002 +- if (ret) ++ if (likely(ret)) + ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; + #else +- if (ret & _PAGE_PRESENT) ++ if (likely(ret & _PAGE_PRESENT)) + ret = pte_machine_to_phys(ret); + #endif + return ret; +@@ -308,7 +308,7 @@ static inline pmdval_t pmd_flags(pmd_t p + #define __pte_ma(x) ((pte_t) { .pte = (x) } ) + static inline pte_t xen_make_pte(pteval_t val) + { +- if ((val & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT) ++ if (likely((val & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT)) + val = pte_phys_to_machine(val); + return (pte_t) { .pte = val }; + } +@@ -317,7 +317,7 @@ static inline pte_t xen_make_pte(pteval_ + static inline pteval_t xen_pte_val(pte_t pte) + { + pteval_t ret = __pte_val(pte); +- if ((pte.pte_low & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT) ++ if (likely((pte.pte_low & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT)) + ret = pte_machine_to_phys(ret); + return ret; + } --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.30 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.30 @@ -0,0 +1,18468 @@ +From: Linux Kernel Mailing List +Subject: Linux 2.6.30 +Patch-mainline: 2.6.30 + + This patch contains the differences between 2.6.29 and 2.6.30. + +Acked-by: Jeff Mahoney +Automatically created from "patches.kernel.org/patch-2.6.30" by xen-port-patches.py + +--- head-2010-01-18.orig/arch/ia64/include/asm/xen/hypervisor.h 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/ia64/include/asm/xen/hypervisor.h 2009-11-06 10:52:02.000000000 +0100 +@@ -34,10 +34,12 @@ + #define _ASM_IA64_XEN_HYPERVISOR_H + + #include ++#ifdef CONFIG_PARAVIRT_XEN + #include + #include /* to compile feature.c */ + #include /* to comiple xen-netfront.c */ + #include ++#endif + + /* xen_domain_type is set before executing any C code by early_xen_setup */ + enum xen_domain_type { +--- head-2010-01-18.orig/arch/ia64/kernel/vmlinux.lds.S 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/arch/ia64/kernel/vmlinux.lds.S 2009-11-06 10:52:02.000000000 +0100 +@@ -176,7 +176,7 @@ SECTIONS + __start_gate_section = .; + *(.data.gate) + __stop_gate_section = .; +-#ifdef CONFIG_XEN ++#ifdef CONFIG_PARAVIRT_XEN + . = ALIGN(PAGE_SIZE); + __xen_start_gate_section = .; + *(.data.gate.xen) +--- head-2010-01-18.orig/arch/x86/Kconfig 2009-11-20 11:02:51.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig 2009-11-20 11:03:01.000000000 +0100 +@@ -47,8 +47,8 @@ config X86 + select USER_STACKTRACE_SUPPORT + select HAVE_DMA_API_DEBUG + select HAVE_KERNEL_GZIP +- select HAVE_KERNEL_BZIP2 +- select HAVE_KERNEL_LZMA ++ select HAVE_KERNEL_BZIP2 if !XEN ++ select HAVE_KERNEL_LZMA if !XEN + select HAVE_ARCH_KMEMCHECK + + config OUTPUT_FORMAT +@@ -325,11 +325,11 @@ config X86_XEN + + config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" +- depends on X86_32 && SMP ++ depends on X86_32 && SMP && !XEN + ---help--- + This option is needed for the systems that have more than 8 CPUs + +-if X86_32 ++if X86_32 && !XEN + config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y +@@ -360,7 +360,7 @@ config X86_64_XEN + help + This option will compile a kernel compatible with Xen hypervisor + +-if X86_64 ++if X86_64 && !XEN + config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y +@@ -804,7 +804,7 @@ config MAXSMP + + config NR_CPUS + int "Maximum number of CPUs" if SMP && !MAXSMP +- range 2 8 if SMP && X86_32 && !X86_BIGSMP ++ range 2 8 if SMP && X86_32 && !X86_BIGSMP && !X86_XEN + range 2 512 if SMP && !MAXSMP + default "1" if !SMP + default "4096" if MAXSMP +@@ -878,10 +878,6 @@ config X86_VISWS_APIC + def_bool y + depends on X86_32 && X86_VISWS + +-config X86_XEN_GENAPIC +- def_bool y +- depends on X86_64_XEN +- + config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + default n +--- head-2010-01-18.orig/arch/x86/Makefile 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/Makefile 2009-11-06 10:52:02.000000000 +0100 +@@ -111,10 +111,6 @@ endif + # prevent gcc from generating any FP code by mistake + KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) + +-# Xen subarch support +-mflags-$(CONFIG_XEN) := -Iarch/x86/include/mach-xen +-mcore-$(CONFIG_XEN) := arch/x86/mach-xen/ +- + KBUILD_CFLAGS += $(mflags-y) + KBUILD_AFLAGS += $(mflags-y) + +@@ -186,10 +182,10 @@ bzImage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ ++endif + + $(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ +-endif + + PHONY += install + install: +--- head-2010-01-18.orig/arch/x86/boot/Makefile 2009-12-04 10:44:45.000000000 +0100 ++++ head-2010-01-18/arch/x86/boot/Makefile 2009-11-06 10:52:02.000000000 +0100 +@@ -204,6 +204,12 @@ $(obj)/vmlinux-stripped: OBJCOPYFLAGS := + $(obj)/vmlinux-stripped: vmlinux FORCE + $(call if_changed,objcopy) + ++ifndef CONFIG_XEN ++bzImage := bzImage ++else ++bzImage := vmlinuz ++endif ++ + install: +- sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ ++ sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/$(bzImage) \ + System.map "$(INSTALL_PATH)" +--- head-2010-01-18.orig/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:52:02.000000000 +0100 +@@ -502,7 +502,7 @@ ia32_sys_call_table: + .quad sys32_olduname + .quad sys_umask /* 60 */ + .quad sys_chroot +- .quad sys32_ustat ++ .quad compat_sys_ustat + .quad sys_dup2 + .quad sys_getppid + .quad sys_getpgrp /* 65 */ +@@ -773,4 +773,6 @@ ia32_sys_call_table: + .quad sys_dup3 /* 330 */ + .quad sys_pipe2 + .quad sys_inotify_init1 ++ .quad compat_sys_preadv ++ .quad compat_sys_pwritev + ia32_syscall_end: +--- head-2010-01-18.orig/arch/x86/include/asm/kexec.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/kexec.h 2009-11-06 10:52:02.000000000 +0100 +@@ -21,8 +21,14 @@ + # define PA_CONTROL_PAGE 0 + # define VA_CONTROL_PAGE 1 + # define PA_TABLE_PAGE 2 ++# ifndef CONFIG_XEN + # define PA_SWAP_PAGE 3 + # define PAGES_NR 4 ++# else /* CONFIG_XEN, see comment above ++# define VA_TABLE_PAGE 3 */ ++# define PA_SWAP_PAGE 4 ++# define PAGES_NR 5 ++# endif /* CONFIG_XEN */ + #endif + + # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +--- head-2010-01-18.orig/arch/x86/include/asm/page_64_types.h 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/page_64_types.h 2009-11-06 10:52:02.000000000 +0100 +@@ -69,7 +69,15 @@ extern void init_extra_mapping_wb(unsign + #endif /* !__ASSEMBLY__ */ + + #ifdef CONFIG_FLATMEM ++/* ++ * While max_pfn is not exported, max_mapnr never gets initialized for non-Xen ++ * other than for hotplugged memory. ++ */ ++#ifndef CONFIG_XEN + #define pfn_valid(pfn) ((pfn) < max_pfn) ++#else ++#define pfn_valid(pfn) ((pfn) < max_mapnr) ++#endif + #endif + + #endif /* _ASM_X86_PAGE_64_DEFS_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/desc.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/desc.h 2009-11-06 10:52:02.000000000 +0100 +@@ -39,7 +39,7 @@ extern gate_desc idt_table[]; + struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; + } __attribute__((aligned(PAGE_SIZE))); +-DECLARE_PER_CPU(struct gdt_page, gdt_page); ++DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); + + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) + { +@@ -91,7 +91,6 @@ static inline int desc_empty(const void + #define store_gdt(dtr) native_store_gdt(dtr) + #define store_idt(dtr) native_store_idt(dtr) + #define store_tr(tr) (tr = native_store_tr()) +-#define store_ldt(ldt) asm("sldt %0":"=m" (ldt)) + + #define load_TLS(t, cpu) native_load_tls(t, cpu) + #define set_ldt native_set_ldt +@@ -111,6 +110,8 @@ static inline void paravirt_free_ldt(str + { + } + ++#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) ++ + static inline void native_write_idt_entry(gate_desc *idt, int entry, + const gate_desc *gate) + { +@@ -251,6 +252,8 @@ static inline void native_load_tls(struc + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; + } + #else ++#include ++ + #define load_TLS(t, cpu) xen_load_tls(t, cpu) + #define set_ldt xen_set_ldt + +@@ -265,8 +268,9 @@ static inline void xen_load_tls(struct t + struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN; + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) +- if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), +- *(u64 *)&t->tls_array[i])) ++ if (HYPERVISOR_update_descriptor( ++ arbitrary_virt_to_machine(&gdt[i]), ++ *(u64 *)&t->tls_array[i])) + BUG(); + } + #endif +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap.h 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/fixmap.h 2009-11-06 10:52:02.000000000 +0100 +@@ -1,11 +1,154 @@ ++/* ++ * fixmap.h: compile-time virtual memory allocation ++ * ++ * This file is subject to the terms and conditions of the GNU General Public ++ * License. See the file "COPYING" in the main directory of this archive ++ * for more details. ++ * ++ * Copyright (C) 1998 Ingo Molnar ++ * ++ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 ++ * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 ++ */ ++ + #ifndef _ASM_X86_FIXMAP_H + #define _ASM_X86_FIXMAP_H + ++#ifndef __ASSEMBLY__ ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_32 ++#include ++#include ++#else ++#include ++#endif ++ ++/* ++ * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall ++ * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. ++ * Because of this, FIXADDR_TOP x86 integration was left as later work. ++ */ ++#ifdef CONFIG_X86_32 ++/* used by vmalloc.c, vsyscall.lds.S. ++ * ++ * Leave one empty page between vmalloc'ed areas and ++ * the start of the fixmap. ++ */ ++extern unsigned long __FIXADDR_TOP; ++#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) ++ ++#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) ++#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) ++#else ++#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) ++ ++/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ ++#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) ++#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) ++#endif ++ ++ ++/* ++ * Here we define all the compile-time 'special' virtual ++ * addresses. The point is to have a constant address at ++ * compile time, but to set the physical address only ++ * in the boot process. ++ * for x86_32: We allocate these special addresses ++ * from the end of virtual memory (0xfffff000) backwards. ++ * Also this lets us do fail-safe vmalloc(), we ++ * can guarantee that these special addresses and ++ * vmalloc()-ed addresses never overlap. ++ * ++ * These 'compile-time allocated' memory buffers are ++ * fixed-size 4k pages (or larger if used with an increment ++ * higher than 1). Use set_fixmap(idx,phys) to associate ++ * physical memory with fixmap indices. ++ * ++ * TLB entries of such buffers will not be flushed across ++ * task switches. ++ */ ++enum fixed_addresses { + #ifdef CONFIG_X86_32 +-# include "fixmap_32.h" ++ FIX_HOLE, ++ FIX_VDSO, + #else +-# include "fixmap_64.h" ++ VSYSCALL_LAST_PAGE, ++ VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE ++ + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, ++ VSYSCALL_HPET, ++#endif ++ FIX_DBGP_BASE, ++ FIX_EARLYCON_MEM_BASE, ++#ifdef CONFIG_X86_LOCAL_APIC ++ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ + #endif ++#ifndef CONFIG_XEN ++#ifdef CONFIG_X86_IO_APIC ++ FIX_IO_APIC_BASE_0, ++ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, ++#endif ++#else ++ FIX_SHARED_INFO, ++#define NR_FIX_ISAMAPS 256 ++ FIX_ISAMAP_END, ++ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, ++#endif ++#ifdef CONFIG_X86_VISWS_APIC ++ FIX_CO_CPU, /* Cobalt timer */ ++ FIX_CO_APIC, /* Cobalt APIC Redirection Table */ ++ FIX_LI_PCIA, /* Lithium PCI Bridge A */ ++ FIX_LI_PCIB, /* Lithium PCI Bridge B */ ++#endif ++#ifdef CONFIG_X86_F00F_BUG ++ FIX_F00F_IDT, /* Virtual mapping for IDT */ ++#endif ++#ifdef CONFIG_X86_CYCLONE_TIMER ++ FIX_CYCLONE_TIMER, /*cyclone timer register*/ ++#endif ++#ifdef CONFIG_X86_32 ++ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ ++ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, ++#ifdef CONFIG_PCI_MMCONFIG ++ FIX_PCIE_MCFG, ++#endif ++#endif ++#ifdef CONFIG_PARAVIRT ++ FIX_PARAVIRT_BOOTMAP, ++#endif ++ FIX_TEXT_POKE0, /* reserve 2 pages for text_poke() */ ++ FIX_TEXT_POKE1, ++ __end_of_permanent_fixed_addresses, ++#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT ++ FIX_OHCI1394_BASE, ++#endif ++ /* ++ * 256 temporary boot-time mappings, used by early_ioremap(), ++ * before ioremap() is functional. ++ * ++ * We round it up to the next 256 pages boundary so that we ++ * can have a single pgd entry and a single pte table: ++ */ ++#define NR_FIX_BTMAPS 64 ++#define FIX_BTMAPS_SLOTS 4 ++ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - ++ (__end_of_permanent_fixed_addresses & 255), ++ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, ++#ifdef CONFIG_X86_32 ++ FIX_WP_TEST, ++#endif ++ __end_of_fixed_addresses ++}; ++ ++ ++extern void reserve_top_address(unsigned long reserve); ++ ++#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) ++#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) ++#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) ++#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) + + extern int fixmaps_set; + +@@ -13,10 +156,10 @@ extern pte_t *kmap_pte; + extern pgprot_t kmap_prot; + extern pte_t *pkmap_page_table; + +-void xen_set_fixmap(enum fixed_addresses, maddr_t, pgprot_t); ++void xen_set_fixmap(enum fixed_addresses, phys_addr_t, pgprot_t); + + static inline void __set_fixmap(enum fixed_addresses idx, +- maddr_t phys, pgprot_t flags) ++ phys_addr_t phys, pgprot_t flags) + { + xen_set_fixmap(idx, phys, flags); + } +@@ -65,4 +208,5 @@ static inline unsigned long virt_to_fix( + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); + } ++#endif /* !__ASSEMBLY__ */ + #endif /* _ASM_X86_FIXMAP_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2009-11-06 10:51:47.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,125 +0,0 @@ +-/* +- * fixmap.h: compile-time virtual memory allocation +- * +- * This file is subject to the terms and conditions of the GNU General Public +- * License. See the file "COPYING" in the main directory of this archive +- * for more details. +- * +- * Copyright (C) 1998 Ingo Molnar +- * +- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 +- */ +- +-#ifndef _ASM_X86_FIXMAP_32_H +-#define _ASM_X86_FIXMAP_32_H +- +-/* used by vmalloc.c, vsyscall.lds.S. +- * +- * Leave one empty page between vmalloc'ed areas and +- * the start of the fixmap. +- */ +-extern unsigned long __FIXADDR_TOP; +-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) +- +-#ifndef __ASSEMBLY__ +-#include +-#include +-#include +-#include +-#include +-#include +- +-/* +- * Here we define all the compile-time 'special' virtual +- * addresses. The point is to have a constant address at +- * compile time, but to set the physical address only +- * in the boot process. We allocate these special addresses +- * from the end of virtual memory (0xfffff000) backwards. +- * Also this lets us do fail-safe vmalloc(), we +- * can guarantee that these special addresses and +- * vmalloc()-ed addresses never overlap. +- * +- * these 'compile-time allocated' memory buffers are +- * fixed-size 4k pages. (or larger if used with an increment +- * highger than 1) use fixmap_set(idx,phys) to associate +- * physical memory with fixmap indices. +- * +- * TLB entries of such buffers will not be flushed across +- * task switches. +- */ +-enum fixed_addresses { +- FIX_HOLE, +- FIX_VDSO, +- FIX_DBGP_BASE, +- FIX_EARLYCON_MEM_BASE, +-#ifdef CONFIG_X86_LOCAL_APIC +- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +-#endif +-#ifndef CONFIG_XEN +-#ifdef CONFIG_X86_IO_APIC +- FIX_IO_APIC_BASE_0, +- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, +-#endif +-#else +- FIX_SHARED_INFO, +-#define NR_FIX_ISAMAPS 256 +- FIX_ISAMAP_END, +- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, +-#endif +-#ifdef CONFIG_X86_VISWS_APIC +- FIX_CO_CPU, /* Cobalt timer */ +- FIX_CO_APIC, /* Cobalt APIC Redirection Table */ +- FIX_LI_PCIA, /* Lithium PCI Bridge A */ +- FIX_LI_PCIB, /* Lithium PCI Bridge B */ +-#endif +-#ifdef CONFIG_X86_F00F_BUG +- FIX_F00F_IDT, /* Virtual mapping for IDT */ +-#endif +-#ifdef CONFIG_X86_CYCLONE_TIMER +- FIX_CYCLONE_TIMER, /*cyclone timer register*/ +-#endif +- FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ +- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +-#ifdef CONFIG_PCI_MMCONFIG +- FIX_PCIE_MCFG, +-#endif +-#ifdef CONFIG_PARAVIRT +- FIX_PARAVIRT_BOOTMAP, +-#endif +- __end_of_permanent_fixed_addresses, +- /* +- * 256 temporary boot-time mappings, used by early_ioremap(), +- * before ioremap() is functional. +- * +- * We round it up to the next 256 pages boundary so that we +- * can have a single pgd entry and a single pte table: +- */ +-#define NR_FIX_BTMAPS 64 +-#define FIX_BTMAPS_SLOTS 4 +- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - +- (__end_of_permanent_fixed_addresses & 255), +- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, +- FIX_WP_TEST, +-#ifdef CONFIG_ACPI +- FIX_ACPI_BEGIN, +- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +-#endif +-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT +- FIX_OHCI1394_BASE, +-#endif +- __end_of_fixed_addresses +-}; +- +-extern void reserve_top_address(unsigned long reserve); +- +- +-#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) +- +-#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +-#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +-#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +-#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) +- +-#endif /* !__ASSEMBLY__ */ +-#endif /* _ASM_X86_FIXMAP_32_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2009-11-06 10:51:55.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,90 +0,0 @@ +-/* +- * fixmap.h: compile-time virtual memory allocation +- * +- * This file is subject to the terms and conditions of the GNU General Public +- * License. See the file "COPYING" in the main directory of this archive +- * for more details. +- * +- * Copyright (C) 1998 Ingo Molnar +- */ +- +-#ifndef _ASM_X86_FIXMAP_64_H +-#define _ASM_X86_FIXMAP_64_H +- +-#include +-#include +-#include +-#include +-#include +-#include +- +-/* +- * Here we define all the compile-time 'special' virtual +- * addresses. The point is to have a constant address at +- * compile time, but to set the physical address only +- * in the boot process. +- * +- * These 'compile-time allocated' memory buffers are +- * fixed-size 4k pages (or larger if used with an increment +- * higher than 1). Use set_fixmap(idx,phys) to associate +- * physical memory with fixmap indices. +- * +- * TLB entries of such buffers will not be flushed across +- * task switches. +- */ +- +-enum fixed_addresses { +- VSYSCALL_LAST_PAGE, +- VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE +- + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, +- VSYSCALL_HPET, +- FIX_DBGP_BASE, +- FIX_EARLYCON_MEM_BASE, +-#ifdef CONFIG_X86_LOCAL_APIC +- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +-#endif +-#ifndef CONFIG_XEN +- FIX_IO_APIC_BASE_0, +- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, +-#else +-#define NR_FIX_ISAMAPS 256 +- FIX_ISAMAP_END, +- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, +-#endif +-#ifdef CONFIG_PARAVIRT +- FIX_PARAVIRT_BOOTMAP, +-#else +- FIX_SHARED_INFO, +-#endif +- __end_of_permanent_fixed_addresses, +-#ifdef CONFIG_ACPI +- FIX_ACPI_BEGIN, +- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +-#endif +-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT +- FIX_OHCI1394_BASE, +-#endif +- /* +- * 256 temporary boot-time mappings, used by early_ioremap(), +- * before ioremap() is functional. +- * +- * We round it up to the next 256 pages boundary so that we +- * can have a single pgd entry and a single pte table: +- */ +-#define NR_FIX_BTMAPS 64 +-#define FIX_BTMAPS_SLOTS 4 +- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - +- (__end_of_permanent_fixed_addresses & 255), +- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, +- __end_of_fixed_addresses +-}; +- +-#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) +-#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +- +-/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ +-#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) +-#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) +- +-#endif /* _ASM_X86_FIXMAP_64_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/highmem.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/highmem.h 2009-11-06 10:52:02.000000000 +0100 +@@ -63,6 +63,7 @@ void *kmap_atomic(struct page *page, enu + void *kmap_atomic_pte(struct page *page, enum km_type type); + void kunmap_atomic(void *kvaddr, enum km_type type); + void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); ++void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); + struct page *kmap_atomic_to_page(void *ptr); + + #define kmap_atomic_pte(page, type) \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:42:20.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:43:12.000000000 +0100 +@@ -46,7 +46,7 @@ + #include + #include + #include +-#include ++#include + + extern shared_info_t *HYPERVISOR_shared_info; + +@@ -153,20 +153,16 @@ int __must_check xen_multi_mmuext_op(str + #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE + static inline void arch_enter_lazy_mmu_mode(void) + { +- __get_cpu_var(xen_lazy_mmu) = true; ++ percpu_write(xen_lazy_mmu, true); + } + + static inline void arch_leave_lazy_mmu_mode(void) + { +- __get_cpu_var(xen_lazy_mmu) = false; ++ percpu_write(xen_lazy_mmu, false); + xen_multicall_flush(false); + } + +-#if defined(CONFIG_X86_32) +-#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu)) +-#elif !defined(arch_use_lazy_mmu_mode) +-#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu)) +-#endif ++#define arch_use_lazy_mmu_mode() unlikely(percpu_read(xen_lazy_mmu)) + + #if 0 /* All uses are in places potentially called asynchronously, but + * asynchronous code should rather not make use of lazy mode at all. +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/io.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/io.h 2009-11-06 10:52:02.000000000 +0100 +@@ -5,6 +5,10 @@ + + #include + #include ++#include ++#ifdef __KERNEL__ ++#include ++#endif + + #define build_mmio_read(name, size, type, reg, barrier) \ + static inline type name(const volatile void __iomem *addr) \ +@@ -82,6 +86,101 @@ static inline void writeq(__u64 val, vol + + #define native_io_delay xen_io_delay + ++/** ++ * virt_to_phys - map virtual addresses to physical ++ * @address: address to remap ++ * ++ * The returned physical address is the physical (CPU) mapping for ++ * the memory address given. It is only valid to use this function on ++ * addresses directly mapped or allocated via kmalloc. ++ * ++ * This function does not give bus mappings for DMA transfers. In ++ * almost all conceivable cases a device driver should not be using ++ * this function ++ */ ++ ++static inline phys_addr_t virt_to_phys(volatile void *address) ++{ ++ return __pa(address); ++} ++ ++/** ++ * phys_to_virt - map physical address to virtual ++ * @address: address to remap ++ * ++ * The returned virtual address is a current CPU mapping for ++ * the memory address given. It is only valid to use this function on ++ * addresses that have a kernel mapping ++ * ++ * This function does not handle bus mappings for DMA transfers. In ++ * almost all conceivable cases a device driver should not be using ++ * this function ++ */ ++ ++static inline void *phys_to_virt(phys_addr_t address) ++{ ++ return __va(address); ++} ++ ++/* ++ * Change "struct page" to physical address. ++ */ ++#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) ++#undef page_to_phys ++#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page))) ++#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page))) ++ ++/* ++ * ISA I/O bus memory addresses are 1:1 with the physical address. ++ * However, we truncate the address to unsigned int to avoid undesirable ++ * promitions in legacy drivers. ++ */ ++#define isa_virt_to_bus(_x) ({ \ ++ unsigned long _va_ = (unsigned long)(_x); \ ++ _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) < (NR_FIX_ISAMAPS << PAGE_SHIFT) \ ++ ? _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) \ ++ : ({ BUG(); (unsigned long)virt_to_bus(_va_); }); }) ++#define isa_bus_to_virt(_x) ((void *)fix_to_virt(FIX_ISAMAP_BEGIN) + (_x)) ++ ++/* ++ * However PCI ones are not necessarily 1:1 and therefore these interfaces ++ * are forbidden in portable PCI drivers. ++ * ++ * Allow them on x86 for legacy drivers, though. ++ */ ++#define virt_to_bus(_x) phys_to_machine(__pa(_x)) ++#define bus_to_virt(_x) __va(machine_to_phys(_x)) ++ ++/** ++ * ioremap - map bus memory into CPU space ++ * @offset: bus address of the memory ++ * @size: size of the resource to map ++ * ++ * ioremap performs a platform specific sequence of operations to ++ * make bus memory CPU accessible via the readb/readw/readl/writeb/ ++ * writew/writel functions and the other mmio helpers. The returned ++ * address is not guaranteed to be usable directly as a virtual ++ * address. ++ * ++ * If the area you are trying to map is a PCI BAR you should have a ++ * look at pci_iomap(). ++ */ ++extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); ++extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); ++extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, ++ unsigned long prot_val); ++ ++/* ++ * The default ioremap() behavior is non-cached: ++ */ ++static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) ++{ ++ return ioremap_nocache(offset, size); ++} ++ ++extern void iounmap(volatile void __iomem *addr); ++ ++ + #ifdef CONFIG_X86_32 + # include "../../asm/io_32.h" + #else +@@ -93,11 +192,6 @@ static inline void writeq(__u64 val, vol + /* We will be supplying our own /dev/mem implementation */ + #define ARCH_HAS_DEV_MEM + +-#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) +-#undef page_to_phys +-#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page))) +-#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page))) +- + #define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \ + (unsigned long)(bv)->bv_offset) + +@@ -106,23 +200,7 @@ static inline void writeq(__u64 val, vol + && bvec_to_pseudophys(vec1) + (vec1)->bv_len \ + == bvec_to_pseudophys(vec2)) + +-#undef virt_to_bus +-#undef bus_to_virt +-#define virt_to_bus(_x) phys_to_machine(__pa(_x)) +-#define bus_to_virt(_x) __va(machine_to_phys(_x)) +- +-#include +- + #undef __ISA_IO_base +-#undef isa_virt_to_bus +-#undef isa_page_to_bus +-#undef isa_bus_to_virt +-#define isa_virt_to_bus(_x) ({ \ +- unsigned long _va_ = (unsigned long)(_x); \ +- _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) < (NR_FIX_ISAMAPS << PAGE_SHIFT) \ +- ? _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) \ +- : ({ BUG(); (unsigned long)virt_to_bus(_va_); }); }) +-#define isa_bus_to_virt(_x) ((void *)fix_to_virt(FIX_ISAMAP_BEGIN) + (_x)) + + #endif + +@@ -131,7 +209,7 @@ extern void unxlate_dev_mem_ptr(unsigned + + extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size, + unsigned long prot_val); +-extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); ++extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); + + /* + * early_ioremap() and early_iounmap() are for temporary early boot-time +@@ -140,10 +218,12 @@ extern void __iomem *ioremap_wc(unsigned + */ + extern void early_ioremap_init(void); + extern void early_ioremap_reset(void); +-extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); +-extern void __iomem *early_memremap(unsigned long offset, unsigned long size); ++extern void __iomem *early_ioremap(resource_size_t phys_addr, ++ unsigned long size); ++extern void __iomem *early_memremap(resource_size_t phys_addr, ++ unsigned long size); + extern void early_iounmap(void __iomem *addr, unsigned long size); +-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + ++#define IO_SPACE_LIMIT 0xffff + + #endif /* _ASM_X86_IO_H */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/ipi.h 2009-11-06 10:52:02.000000000 +0100 +@@ -0,0 +1,13 @@ ++#ifndef _ASM_X86_IPI_H ++#define _ASM_X86_IPI_H ++ ++#include ++#include ++ ++void xen_send_IPI_mask(const struct cpumask *, int vector); ++void xen_send_IPI_mask_allbutself(const struct cpumask *, int vector); ++void xen_send_IPI_allbutself(int vector); ++void xen_send_IPI_all(int vector); ++void xen_send_IPI_self(int vector); ++ ++#endif /* _ASM_X86_IPI_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 10:52:02.000000000 +0100 +@@ -94,7 +94,7 @@ static inline void halt(void) + + #ifdef CONFIG_X86_64 + # define __REG_si %rsi +-# define __CPU_num %gs:pda_cpunumber ++# define __CPU_num PER_CPU_VAR(cpu_number) + #else + # define __REG_si %esi + # define __CPU_num TI_cpu(%ebp) +@@ -130,6 +130,7 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT + mov $__KERNEL_PERCPU, %ecx ; \ + push %esp ; \ + mov %ecx, %fs ; \ ++ SET_KERNEL_GS %ecx ; \ + call evtchn_do_upcall ; \ + add $4,%esp ; \ + jmp ret_from_intr +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-12-22 13:20:34.000000000 +0100 +@@ -2,29 +2,46 @@ + #define _ASM_X86_IRQ_VECTORS_H + + #ifdef CONFIG_X86_32 +-# define SYSCALL_VECTOR 0x80 ++# define SYSCALL_VECTOR 0x80 + #else +-# define IA32_SYSCALL_VECTOR 0x80 ++# define IA32_SYSCALL_VECTOR 0x80 + #endif + +-#define RESCHEDULE_VECTOR 0 +-#define CALL_FUNCTION_VECTOR 1 +-#define CALL_FUNC_SINGLE_VECTOR 2 +-#define SPIN_UNLOCK_VECTOR 3 +-#define NR_IPIS 4 ++#define RESCHEDULE_VECTOR 0 ++#define CALL_FUNCTION_VECTOR 1 ++#define CALL_FUNC_SINGLE_VECTOR 2 ++#define SPIN_UNLOCK_VECTOR 3 ++#define NR_IPIS 4 + + /* + * The maximum number of vectors supported by i386 processors + * is limited to 256. For processors other than i386, NR_VECTORS + * should be changed accordingly. + */ +-#define NR_VECTORS 256 ++#define NR_VECTORS 256 + +-#define FIRST_VM86_IRQ 3 +-#define LAST_VM86_IRQ 15 +-#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) ++#define FIRST_VM86_IRQ 3 ++#define LAST_VM86_IRQ 15 + +-#define NR_IRQS_LEGACY 16 ++#ifndef __ASSEMBLY__ ++static inline int invalid_vm86_irq(int irq) ++{ ++ return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; ++} ++#endif ++ ++/* ++ * Size the maximum number of interrupts. ++ * ++ * If the irq_desc[] array has a sparse layout, we can size things ++ * generously - it scales up linearly with the maximum number of CPUs, ++ * and the maximum number of IO-APICs, whichever is higher. ++ * ++ * In other cases we size more conservatively, to not create too large ++ * static arrays. ++ */ ++ ++#define NR_IRQS_LEGACY 16 + + /* + * The flat IRQ space is divided into two regions: +@@ -35,21 +52,41 @@ + * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These + * are bound using the provided bind/unbind functions. + */ ++#define PIRQ_BASE 0 + +-#define PIRQ_BASE 0 +-#if defined(NR_CPUS) && defined(MAX_IO_APICS) +-# if !defined(CONFIG_SPARSE_IRQ) && NR_CPUS < MAX_IO_APICS +-# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS) +-# elif defined(CONFIG_SPARSE_IRQ) && 8 * NR_CPUS > 32 * MAX_IO_APICS +-# define NR_PIRQS (NR_VECTORS + 8 * NR_CPUS) ++#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) ++#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) ++ ++#ifdef CONFIG_X86_IO_APIC ++# if !defined(NR_CPUS) || !defined(MAX_IO_APICS) ++/* nothing */ ++# elif defined(CONFIG_SPARSE_IRQ) ++# define NR_PIRQS \ ++ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ ++ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ ++ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) ++# elif NR_CPUS < MAX_IO_APICS ++# define NR_PIRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) + # else +-# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS) ++# define NR_PIRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) + # endif ++#elif defined(CONFIG_XEN_PCIDEV_FRONTEND) ++# define NR_PIRQS (NR_VECTORS + CPU_VECTOR_LIMIT) ++#else /* !CONFIG_X86_IO_APIC: */ ++# define NR_PIRQS NR_IRQS_LEGACY ++#endif ++ ++#ifndef __ASSEMBLY__ ++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SPARSE_IRQ) ++extern int nr_pirqs; ++#else ++# define nr_pirqs NR_PIRQS ++#endif + #endif + +-#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) +-#define NR_DYNIRQS 256 ++#define DYNIRQ_BASE (PIRQ_BASE + nr_pirqs) ++#define NR_DYNIRQS 256 + +-#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) ++#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) + + #endif /* _ASM_X86_IRQ_VECTORS_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context.h 2009-11-06 10:52:02.000000000 +0100 +@@ -26,11 +26,117 @@ static inline void xen_activate_mm(struc + int init_new_context(struct task_struct *tsk, struct mm_struct *mm); + void destroy_context(struct mm_struct *mm); + ++ ++static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) ++{ ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ ++ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) ++ percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); ++#endif ++} ++ ++#define prepare_arch_switch(next) __prepare_arch_switch() ++ ++static inline void __prepare_arch_switch(void) ++{ + #ifdef CONFIG_X86_32 +-# include "mmu_context_32.h" ++ /* ++ * Save away %gs. No need to save %fs, as it was saved on the ++ * stack on entry. No need to save %es and %ds, as those are ++ * always kernel segments while inside the kernel. ++ */ ++ lazy_save_gs(current->thread.gs); ++ lazy_load_gs(__KERNEL_STACK_CANARY); + #else +-# include "mmu_context_64.h" ++ /* ++ * Save away %es, %ds, %fs and %gs. Must happen before reload ++ * of cr3/ldt (i.e., not in __switch_to). ++ */ ++ __asm__ __volatile__ ( ++ "mov %%es,%0 ; mov %%ds,%1 ; mov %%fs,%2 ; mov %%gs,%3" ++ : "=m" (current->thread.es), ++ "=m" (current->thread.ds), ++ "=m" (current->thread.fsindex), ++ "=m" (current->thread.gsindex) ); ++ ++ if (current->thread.ds) ++ __asm__ __volatile__ ( "movl %0,%%ds" : : "r" (0) ); ++ ++ if (current->thread.es) ++ __asm__ __volatile__ ( "movl %0,%%es" : : "r" (0) ); ++ ++ if (current->thread.fsindex) { ++ __asm__ __volatile__ ( "movl %0,%%fs" : : "r" (0) ); ++ current->thread.fs = 0; ++ } ++ ++ if (current->thread.gsindex) { ++ load_gs_index(0); ++ current->thread.gs = 0; ++ } ++#endif ++} ++ ++static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ++ struct task_struct *tsk) ++{ ++ unsigned cpu = smp_processor_id(); ++ struct mmuext_op _op[2 + (sizeof(long) > 4)], *op = _op; ++ ++ if (likely(prev != next)) { ++ BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && ++ !PagePinned(virt_to_page(next->pgd))); ++ ++ /* stop flush ipis for the previous mm */ ++ cpu_clear(cpu, prev->cpu_vm_mask); ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ ++ percpu_write(cpu_tlbstate.state, TLBSTATE_OK); ++ percpu_write(cpu_tlbstate.active_mm, next); + #endif ++ cpu_set(cpu, next->cpu_vm_mask); ++ ++ /* Re-load page tables: load_cr3(next->pgd) */ ++ op->cmd = MMUEXT_NEW_BASEPTR; ++ op->arg1.mfn = virt_to_mfn(next->pgd); ++ op++; ++ ++ /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */ ++#ifdef CONFIG_X86_64 ++ op->cmd = MMUEXT_NEW_USER_BASEPTR; ++ op->arg1.mfn = virt_to_mfn(__user_pgd(next->pgd)); ++ op++; ++#endif ++ ++ /* ++ * load the LDT, if the LDT is different: ++ */ ++ if (unlikely(prev->context.ldt != next->context.ldt)) { ++ /* load_LDT_nolock(&next->context) */ ++ op->cmd = MMUEXT_SET_LDT; ++ op->arg1.linear_addr = (unsigned long)next->context.ldt; ++ op->arg2.nr_ents = next->context.size; ++ op++; ++ } ++ ++ BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); ++ } ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ ++ else { ++ percpu_write(cpu_tlbstate.state, TLBSTATE_OK); ++ BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); ++ ++ if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { ++ /* We were in lazy tlb mode and leave_mm disabled ++ * tlb flush IPI delivery. We must reload CR3 ++ * to make sure to use no freed page tables. ++ */ ++ load_cr3(next->pgd); ++ xen_new_user_pt(__pa(__user_pgd(next->pgd))); ++ load_LDT_nolock(&next->context); ++ } ++ } ++#endif ++} + + #define activate_mm(prev, next) \ + do { \ +@@ -38,5 +144,17 @@ do { \ + switch_mm((prev), (next), NULL); \ + } while (0); + ++#ifdef CONFIG_X86_32 ++#define deactivate_mm(tsk, mm) \ ++do { \ ++ lazy_load_gs(0); \ ++} while (0) ++#else ++#define deactivate_mm(tsk, mm) \ ++do { \ ++ load_gs_index(0); \ ++ loadsegment(fs, 0); \ ++} while (0) ++#endif + + #endif /* _ASM_X86_MMU_CONTEXT_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:51:55.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,83 +0,0 @@ +-#ifndef _ASM_X86_MMU_CONTEXT_32_H +-#define _ASM_X86_MMU_CONTEXT_32_H +- +-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +-{ +-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ +- if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) +- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); +-#endif +-} +- +-#define prepare_arch_switch(next) __prepare_arch_switch() +- +-static inline void __prepare_arch_switch(void) +-{ +- /* +- * Save away %gs. No need to save %fs, as it was saved on the +- * stack on entry. No need to save %es and %ds, as those are +- * always kernel segments while inside the kernel. +- */ +- asm volatile ( "mov %%gs,%0" +- : "=m" (current->thread.gs)); +- asm volatile ( "movl %0,%%gs" +- : : "r" (0) ); +-} +- +-static inline void switch_mm(struct mm_struct *prev, +- struct mm_struct *next, +- struct task_struct *tsk) +-{ +- int cpu = smp_processor_id(); +- struct mmuext_op _op[2], *op = _op; +- +- if (likely(prev != next)) { +- BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && +- !PagePinned(virt_to_page(next->pgd))); +- +- /* stop flush ipis for the previous mm */ +- cpu_clear(cpu, prev->cpu_vm_mask); +-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ +- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); +- x86_write_percpu(cpu_tlbstate.active_mm, next); +-#endif +- cpu_set(cpu, next->cpu_vm_mask); +- +- /* Re-load page tables: load_cr3(next->pgd) */ +- op->cmd = MMUEXT_NEW_BASEPTR; +- op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT); +- op++; +- +- /* +- * load the LDT, if the LDT is different: +- */ +- if (unlikely(prev->context.ldt != next->context.ldt)) { +- /* load_LDT_nolock(&next->context, cpu) */ +- op->cmd = MMUEXT_SET_LDT; +- op->arg1.linear_addr = (unsigned long)next->context.ldt; +- op->arg2.nr_ents = next->context.size; +- op++; +- } +- +- BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); +- } +-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ +- else { +- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); +- BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); +- +- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { +- /* We were in lazy tlb mode and leave_mm disabled +- * tlb flush IPI delivery. We must reload %cr3. +- */ +- load_cr3(next->pgd); +- load_LDT_nolock(&next->context); +- } +- } +-#endif +-} +- +-#define deactivate_mm(tsk, mm) \ +- asm("movl %0,%%gs": :"r" (0)); +- +-#endif /* _ASM_X86_MMU_CONTEXT_32_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context_64.h 2009-11-06 10:51:47.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,106 +0,0 @@ +-#ifndef _ASM_X86_MMU_CONTEXT_64_H +-#define _ASM_X86_MMU_CONTEXT_64_H +- +-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +-{ +-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) +- if (read_pda(mmu_state) == TLBSTATE_OK) +- write_pda(mmu_state, TLBSTATE_LAZY); +-#endif +-} +- +-#define prepare_arch_switch(next) __prepare_arch_switch() +- +-static inline void __prepare_arch_switch(void) +-{ +- /* +- * Save away %es, %ds, %fs and %gs. Must happen before reload +- * of cr3/ldt (i.e., not in __switch_to). +- */ +- __asm__ __volatile__ ( +- "mov %%es,%0 ; mov %%ds,%1 ; mov %%fs,%2 ; mov %%gs,%3" +- : "=m" (current->thread.es), +- "=m" (current->thread.ds), +- "=m" (current->thread.fsindex), +- "=m" (current->thread.gsindex) ); +- +- if (current->thread.ds) +- __asm__ __volatile__ ( "movl %0,%%ds" : : "r" (0) ); +- +- if (current->thread.es) +- __asm__ __volatile__ ( "movl %0,%%es" : : "r" (0) ); +- +- if (current->thread.fsindex) { +- __asm__ __volatile__ ( "movl %0,%%fs" : : "r" (0) ); +- current->thread.fs = 0; +- } +- +- if (current->thread.gsindex) { +- load_gs_index(0); +- current->thread.gs = 0; +- } +-} +- +-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, +- struct task_struct *tsk) +-{ +- unsigned cpu = smp_processor_id(); +- struct mmuext_op _op[3], *op = _op; +- +- if (likely(prev != next)) { +- BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && +- !PagePinned(virt_to_page(next->pgd))); +- +- /* stop flush ipis for the previous mm */ +- cpu_clear(cpu, prev->cpu_vm_mask); +-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) +- write_pda(mmu_state, TLBSTATE_OK); +- write_pda(active_mm, next); +-#endif +- cpu_set(cpu, next->cpu_vm_mask); +- +- /* load_cr3(next->pgd) */ +- op->cmd = MMUEXT_NEW_BASEPTR; +- op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT); +- op++; +- +- /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */ +- op->cmd = MMUEXT_NEW_USER_BASEPTR; +- op->arg1.mfn = pfn_to_mfn(__pa(__user_pgd(next->pgd)) >> PAGE_SHIFT); +- op++; +- +- if (unlikely(next->context.ldt != prev->context.ldt)) { +- /* load_LDT_nolock(&next->context) */ +- op->cmd = MMUEXT_SET_LDT; +- op->arg1.linear_addr = (unsigned long)next->context.ldt; +- op->arg2.nr_ents = next->context.size; +- op++; +- } +- +- BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); +- } +-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) +- else { +- write_pda(mmu_state, TLBSTATE_OK); +- if (read_pda(active_mm) != next) +- BUG(); +- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { +- /* We were in lazy tlb mode and leave_mm disabled +- * tlb flush IPI delivery. We must reload CR3 +- * to make sure to use no freed page tables. +- */ +- load_cr3(next->pgd); +- xen_new_user_pt(__pa(__user_pgd(next->pgd))); +- load_LDT_nolock(&next->context); +- } +- } +-#endif +-} +- +-#define deactivate_mm(tsk, mm) \ +-do { \ +- load_gs_index(0); \ +- asm volatile("movl %0,%%fs"::"r"(0)); \ +-} while (0) +- +-#endif /* _ASM_X86_MMU_CONTEXT_64_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:52:02.000000000 +0100 +@@ -41,7 +41,6 @@ static inline int pci_proc_domain(struct + return pci_domain_nr(bus); + } + +-extern void pci_iommu_alloc(void); + + /* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes +@@ -92,12 +91,44 @@ static inline void early_quirks(void) { + + extern void pci_iommu_alloc(void); + +-#endif /* __KERNEL__ */ ++/* MSI arch hooks */ ++#define arch_setup_msi_irqs arch_setup_msi_irqs ++#define arch_teardown_msi_irqs arch_teardown_msi_irqs ++ ++#define PCI_DMA_BUS_IS_PHYS 0 ++ ++#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) || defined(CONFIG_SWIOTLB) ++ ++#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ ++ dma_addr_t ADDR_NAME; ++#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ ++ __u32 LEN_NAME; ++#define pci_unmap_addr(PTR, ADDR_NAME) \ ++ ((PTR)->ADDR_NAME) ++#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ ++ (((PTR)->ADDR_NAME) = (VAL)) ++#define pci_unmap_len(PTR, LEN_NAME) \ ++ ((PTR)->LEN_NAME) ++#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ ++ (((PTR)->LEN_NAME) = (VAL)) + +-#ifdef CONFIG_X86_32 +-# include "pci_32.h" + #else +-# include "../../asm/pci_64.h" ++ ++#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0]; ++#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0]; ++#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME) ++#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ ++ do { break; } while (pci_unmap_addr(PTR, ADDR_NAME)) ++#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME) ++#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ ++ do { break; } while (pci_unmap_len(PTR, LEN_NAME)) ++ ++#endif ++ ++#endif /* __KERNEL__ */ ++ ++#ifdef CONFIG_X86_64 ++#include "../../asm/pci_64.h" + #endif + + /* implement the pci_ DMA API in terms of the generic device dma_ one */ +@@ -115,11 +146,6 @@ static inline int __pcibus_to_node(const + return sd->node; + } + +-static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) +-{ +- return node_to_cpumask(__pcibus_to_node(bus)); +-} +- + static inline const struct cpumask * + cpumask_of_pcibus(const struct pci_bus *bus) + { +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-20 11:15:54.000000000 +0100 +@@ -1,178 +1,9 @@ + #ifndef _ASM_X86_PGTABLE_H + #define _ASM_X86_PGTABLE_H + +-#define FIRST_USER_ADDRESS 0 ++#include + +-#define _PAGE_BIT_PRESENT 0 /* is present */ +-#define _PAGE_BIT_RW 1 /* writeable */ +-#define _PAGE_BIT_USER 2 /* userspace addressable */ +-#define _PAGE_BIT_PWT 3 /* page write through */ +-#define _PAGE_BIT_PCD 4 /* page cache disabled */ +-#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ +-#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ +-#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ +-#define _PAGE_BIT_PAT 7 /* on 4KB pages */ +-#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +-#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ +-#define _PAGE_BIT_UNUSED3 11 +-#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +-#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 +-#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 +-#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ +- +-/* If _PAGE_BIT_PRESENT is clear, we use these: */ +-/* - if the user mapped it with PROT_NONE; pte_present gives true */ +-#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL +-/* - set: nonlinear file mapping, saved PTE; unset:swap */ +-#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY +- +-#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +-#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +-#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) +-#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) +-#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) +-#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) +-#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) +-#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +-#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +-#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) +-#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +-#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +-#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) +-#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) +-#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +-#define __HAVE_ARCH_PTE_SPECIAL +- +-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +-#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) +-#else +-#define _PAGE_NX (_AT(pteval_t, 0)) +-#endif +- +-#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) +-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +- +-#ifndef __ASSEMBLY__ +-#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002 +-extern unsigned int __kernel_page_user; +-#else +-#define __kernel_page_user 0 +-#endif +-#endif +- +-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ +- _PAGE_ACCESSED | _PAGE_DIRTY) +-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ +- _PAGE_DIRTY | __kernel_page_user) +- +-/* Set of bits not changed in pte_modify */ +-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \ +- _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) +- +-/* +- * PAT settings are part of the hypervisor interface, which sets the +- * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]). +- */ +-#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT) +-#define _PAGE_CACHE_WB (0) +-#define _PAGE_CACHE_WT (_PAGE_PWT) +-#define _PAGE_CACHE_WC (_PAGE_PAT) +-#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT) +-#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) +-#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) +- +-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ +- _PAGE_ACCESSED | _PAGE_NX) +- +-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ +- _PAGE_USER | _PAGE_ACCESSED) +-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ +- _PAGE_ACCESSED | _PAGE_NX) +-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ +- _PAGE_ACCESSED) +-#define PAGE_COPY PAGE_COPY_NOEXEC +-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ +- _PAGE_ACCESSED | _PAGE_NX) +-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ +- _PAGE_ACCESSED) +- +-#define __PAGE_KERNEL_EXEC \ +- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user) +-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) +- +-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) +-#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) +-#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) +-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) +-#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) +-#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) +-#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) +-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) +-#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) +-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) +- +-#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) +-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) +-#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) +-#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) +- +-#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) +-#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) +-#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +-#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) +-#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) +-#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) +-#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) +-#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) +-#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) +-#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) +-#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) +-#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) +-#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) +- +-#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) +-#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) +-#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) +-#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) +- +-/* xwr */ +-#define __P000 PAGE_NONE +-#define __P001 PAGE_READONLY +-#define __P010 PAGE_COPY +-#define __P011 PAGE_COPY +-#define __P100 PAGE_READONLY_EXEC +-#define __P101 PAGE_READONLY_EXEC +-#define __P110 PAGE_COPY_EXEC +-#define __P111 PAGE_COPY_EXEC +- +-#define __S000 PAGE_NONE +-#define __S001 PAGE_READONLY +-#define __S010 PAGE_SHARED +-#define __S011 PAGE_SHARED +-#define __S100 PAGE_READONLY_EXEC +-#define __S101 PAGE_READONLY_EXEC +-#define __S110 PAGE_SHARED_EXEC +-#define __S111 PAGE_SHARED_EXEC +- +-/* +- * early identity mapping pte attrib macros. +- */ +-#ifdef CONFIG_X86_64 +-#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC +-#else +-/* +- * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection +- * bits are combined, this will alow user to access the high address mapped +- * VDSO in the presence of CONFIG_COMPAT_VDSO +- */ +-#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ +-#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +-#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ +-#endif ++#include + + /* + * Macro to mark a page protection value as UC- +@@ -184,9 +15,6 @@ extern unsigned int __kernel_page_user; + + #ifndef __ASSEMBLY__ + +-#define pgprot_writecombine pgprot_writecombine +-extern pgprot_t pgprot_writecombine(pgprot_t prot); +- + /* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. +@@ -197,6 +25,59 @@ extern unsigned long empty_zero_page[PAG + extern spinlock_t pgd_lock; + extern struct list_head pgd_list; + ++#define set_pte(ptep, pte) xen_set_pte(ptep, pte) ++#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) ++ ++#define set_pte_atomic(ptep, pte) \ ++ xen_set_pte_atomic(ptep, pte) ++ ++#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) ++ ++#ifndef __PAGETABLE_PUD_FOLDED ++#define set_pgd(pgdp, pgd) xen_set_pgd(pgdp, pgd) ++#define pgd_clear(pgd) xen_pgd_clear(pgd) ++#endif ++ ++#ifndef set_pud ++# define set_pud(pudp, pud) xen_set_pud(pudp, pud) ++#endif ++ ++#ifndef __PAGETABLE_PMD_FOLDED ++#define pud_clear(pud) xen_pud_clear(pud) ++#endif ++ ++#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) ++#define pmd_clear(pmd) xen_pmd_clear(pmd) ++ ++#define pte_update(mm, addr, ptep) do { } while (0) ++#define pte_update_defer(mm, addr, ptep) do { } while (0) ++ ++static inline void __init paravirt_pagetable_setup_start(pgd_t *base) ++{ ++ xen_pagetable_setup_start(base); ++} ++ ++static inline void __init paravirt_pagetable_setup_done(pgd_t *base) ++{ ++ xen_pagetable_setup_done(base); ++} ++ ++#define pgd_val(x) xen_pgd_val(x) ++#define __pgd(x) xen_make_pgd(x) ++ ++#ifndef __PAGETABLE_PUD_FOLDED ++#define pud_val(x) xen_pud_val(x) ++#define __pud(x) xen_make_pud(x) ++#endif ++ ++#ifndef __PAGETABLE_PMD_FOLDED ++#define pmd_val(x) xen_pmd_val(x) ++#define __pmd(x) xen_make_pmd(x) ++#endif ++ ++#define pte_val(x) xen_pte_val(x) ++#define __pte(x) xen_make_pte(x) ++ + /* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. +@@ -252,53 +133,67 @@ static inline int pte_special(pte_t pte) + + static inline int pmd_large(pmd_t pte) + { +- return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == ++ return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); + } + ++static inline pte_t pte_set_flags(pte_t pte, pteval_t set) ++{ ++ pteval_t v = __pte_val(pte); ++ ++ return __pte_ma(v | set); ++} ++ ++static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) ++{ ++ pteval_t v = __pte_val(pte); ++ ++ return __pte_ma(v & ~clear); ++} ++ + static inline pte_t pte_mkclean(pte_t pte) + { +- return __pte_ma(__pte_val(pte) & ~_PAGE_DIRTY); ++ return pte_clear_flags(pte, _PAGE_DIRTY); + } + + static inline pte_t pte_mkold(pte_t pte) + { +- return __pte_ma(__pte_val(pte) & ~_PAGE_ACCESSED); ++ return pte_clear_flags(pte, _PAGE_ACCESSED); + } + + static inline pte_t pte_wrprotect(pte_t pte) + { +- return __pte_ma(__pte_val(pte) & ~_PAGE_RW); ++ return pte_clear_flags(pte, _PAGE_RW); + } + + static inline pte_t pte_mkexec(pte_t pte) + { +- return __pte_ma(__pte_val(pte) & ~_PAGE_NX); ++ return pte_clear_flags(pte, _PAGE_NX); + } + + static inline pte_t pte_mkdirty(pte_t pte) + { +- return __pte_ma(__pte_val(pte) | _PAGE_DIRTY); ++ return pte_set_flags(pte, _PAGE_DIRTY); + } + + static inline pte_t pte_mkyoung(pte_t pte) + { +- return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED); ++ return pte_set_flags(pte, _PAGE_ACCESSED); + } + + static inline pte_t pte_mkwrite(pte_t pte) + { +- return __pte_ma(__pte_val(pte) | _PAGE_RW); ++ return pte_set_flags(pte, _PAGE_RW); + } + + static inline pte_t pte_mkhuge(pte_t pte) + { +- return __pte_ma(__pte_val(pte) | _PAGE_PSE); ++ return pte_set_flags(pte, _PAGE_PSE); + } + + static inline pte_t pte_clrhuge(pte_t pte) + { +- return __pte_ma(__pte_val(pte) & ~_PAGE_PSE); ++ return pte_clear_flags(pte, _PAGE_PSE); + } + + static inline pte_t pte_mkglobal(pte_t pte) +@@ -313,11 +208,9 @@ static inline pte_t pte_clrglobal(pte_t + + static inline pte_t pte_mkspecial(pte_t pte) + { +- return __pte_ma(__pte_val(pte) | _PAGE_SPECIAL); ++ return pte_set_flags(pte, _PAGE_SPECIAL); + } + +-extern pteval_t __supported_pte_mask; +- + /* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. +@@ -391,68 +284,208 @@ static inline int is_new_memtype_allowed + return 1; + } + +-#ifndef __ASSEMBLY__ +-#ifndef CONFIG_XEN +-/* Indicate that x86 has its own track and untrack pfn vma functions */ +-#define __HAVE_PFNMAP_TRACKING +-#endif ++pmd_t *populate_extra_pmd(unsigned long vaddr); ++pte_t *populate_extra_pte(unsigned long vaddr); ++#endif /* __ASSEMBLY__ */ + +-#define __HAVE_PHYS_MEM_ACCESS_PROT +-struct file; +-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, +- unsigned long size, pgprot_t vma_prot); +-int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, +- unsigned long size, pgprot_t *vma_prot); ++#ifdef CONFIG_X86_32 ++# include "pgtable_32.h" ++#else ++# include "pgtable_64.h" + #endif + +-/* Install a pte for a particular vaddr in kernel space. */ +-void set_pte_vaddr(unsigned long vaddr, pte_t pte); ++#ifndef __ASSEMBLY__ ++#include + +-#ifndef CONFIG_XEN +-extern void native_pagetable_setup_start(pgd_t *base); +-extern void native_pagetable_setup_done(pgd_t *base); ++static inline int pte_none(pte_t pte) ++{ ++ return !pte.pte; ++} ++ ++#define __HAVE_ARCH_PTE_SAME ++static inline int pte_same(pte_t a, pte_t b) ++{ ++ return a.pte == b.pte; ++} ++ ++static inline int pte_present(pte_t a) ++{ ++ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); ++} ++ ++static inline int pmd_present(pmd_t pmd) ++{ ++#if CONFIG_XEN_COMPAT <= 0x030002 ++/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. ++ can temporarily clear it. */ ++ return __pmd_val(pmd) != 0; + #else +-static inline void xen_pagetable_setup_start(pgd_t *base) {} +-static inline void xen_pagetable_setup_done(pgd_t *base) {} ++ return pmd_flags(pmd) & _PAGE_PRESENT; + #endif ++} + +-struct seq_file; +-extern void arch_report_meminfo(struct seq_file *m); ++static inline int pmd_none(pmd_t pmd) ++{ ++ /* Only check low word on 32-bit platforms, since it might be ++ out of sync with upper half. */ ++ return (unsigned long)__pmd_val(pmd) == 0; ++} + +-#define set_pte(ptep, pte) xen_set_pte(ptep, pte) +-#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) ++static inline unsigned long pmd_page_vaddr(pmd_t pmd) ++{ ++ return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); ++} + +-#define set_pte_atomic(ptep, pte) \ +- xen_set_pte_atomic(ptep, pte) ++/* ++ * Currently stuck as a macro due to indirect forward reference to ++ * linux/mmzone.h's __section_mem_map_addr() definition: ++ */ ++#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) + +-#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) ++/* ++ * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] ++ * ++ * this macro returns the index of the entry in the pmd page which would ++ * control the given virtual address ++ */ ++static inline unsigned pmd_index(unsigned long address) ++{ ++ return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); ++} + +-#ifndef __PAGETABLE_PUD_FOLDED +-#define set_pgd(pgdp, pgd) xen_set_pgd(pgdp, pgd) +-#define pgd_clear(pgd) xen_pgd_clear(pgd) +-#endif ++/* ++ * Conversion functions: convert a page and protection to a page entry, ++ * and a page entry and page directory to the page they refer to. ++ * ++ * (Currently stuck as a macro because of indirect forward reference ++ * to linux/mm.h:page_to_nid()) ++ */ ++#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +-#ifndef set_pud +-# define set_pud(pudp, pud) xen_set_pud(pudp, pud) +-#endif ++/* ++ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] ++ * ++ * this function returns the index of the entry in the pte page which would ++ * control the given virtual address ++ */ ++static inline unsigned pte_index(unsigned long address) ++{ ++ return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); ++} + +-#ifndef __PAGETABLE_PMD_FOLDED +-#define pud_clear(pud) xen_pud_clear(pud) ++static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) ++{ ++ return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); ++} ++ ++static inline int pmd_bad(pmd_t pmd) ++{ ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ return (pmd_flags(pmd) & ~_PAGE_USER & ~_PAGE_PRESENT) ++ != (_KERNPG_TABLE & ~_PAGE_PRESENT); ++#else ++ return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + #endif ++} + +-#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) +-#define pmd_clear(pmd) xen_pmd_clear(pmd) ++static inline unsigned long pages_to_mb(unsigned long npg) ++{ ++ return npg >> (20 - PAGE_SHIFT); ++} + +-#define pte_update(mm, addr, ptep) do { } while (0) +-#define pte_update_defer(mm, addr, ptep) do { } while (0) ++#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ ++ direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO) + +-#endif /* __ASSEMBLY__ */ ++#if PAGETABLE_LEVELS > 2 ++static inline int pud_none(pud_t pud) ++{ ++ return __pud_val(pud) == 0; ++} + +-#ifdef CONFIG_X86_32 +-# include "pgtable_32.h" ++static inline int pud_present(pud_t pud) ++{ ++ return pud_flags(pud) & _PAGE_PRESENT; ++} ++ ++static inline unsigned long pud_page_vaddr(pud_t pud) ++{ ++ return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK); ++} ++ ++/* ++ * Currently stuck as a macro due to indirect forward reference to ++ * linux/mmzone.h's __section_mem_map_addr() definition: ++ */ ++#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) ++ ++/* Find an entry in the second-level page table.. */ ++static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) ++{ ++ return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); ++} ++ ++static inline unsigned long pmd_pfn(pmd_t pmd) ++{ ++ return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; ++} ++ ++static inline int pud_large(pud_t pud) ++{ ++ return (__pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == ++ (_PAGE_PSE | _PAGE_PRESENT); ++} ++ ++static inline int pud_bad(pud_t pud) ++{ ++ return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; ++} + #else +-# include "pgtable_64.h" +-#endif ++static inline int pud_large(pud_t pud) ++{ ++ return 0; ++} ++#endif /* PAGETABLE_LEVELS > 2 */ ++ ++#if PAGETABLE_LEVELS > 3 ++static inline int pgd_present(pgd_t pgd) ++{ ++ return pgd_flags(pgd) & _PAGE_PRESENT; ++} ++ ++static inline unsigned long pgd_page_vaddr(pgd_t pgd) ++{ ++ return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); ++} ++ ++/* ++ * Currently stuck as a macro due to indirect forward reference to ++ * linux/mmzone.h's __section_mem_map_addr() definition: ++ */ ++#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) ++ ++/* to find an entry in a page-table-directory. */ ++static inline unsigned pud_index(unsigned long address) ++{ ++ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); ++} ++ ++static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) ++{ ++ return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); ++} ++ ++static inline int pgd_bad(pgd_t pgd) ++{ ++ return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; ++} ++ ++static inline int pgd_none(pgd_t pgd) ++{ ++ return !__pgd_val(pgd); ++} ++#endif /* PAGETABLE_LEVELS > 3 */ ++ ++#endif /* __ASSEMBLY__ */ + + /* + * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] +@@ -479,28 +512,6 @@ extern void arch_report_meminfo(struct s + + #ifndef __ASSEMBLY__ + +-enum { +- PG_LEVEL_NONE, +- PG_LEVEL_4K, +- PG_LEVEL_2M, +- PG_LEVEL_1G, +- PG_LEVEL_NUM +-}; +- +-#ifdef CONFIG_PROC_FS +-extern void update_page_count(int level, unsigned long pages); +-#else +-static inline void update_page_count(int level, unsigned long pages) { } +-#endif +- +-/* +- * Helper function that returns the kernel pagetable entry controlling +- * the virtual address 'address'. NULL means no pagetable entry present. +- * NOTE: the return type is pte_t but if the pmd is PSE then we return it +- * as a pte too. +- */ +-extern pte_t *lookup_address(unsigned long address, unsigned int *level); +- + /* local pte updates need not use xchg for locking */ + static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res) + { +@@ -633,15 +644,18 @@ static inline void clone_pgd_range(pgd_t + memcpy(dst, src, count * sizeof(pgd_t)); + } + +-#define arbitrary_virt_to_machine(va) \ ++#define arbitrary_virt_to_mfn(va) \ + ({ \ + unsigned int __lvl; \ + pte_t *__ptep = lookup_address((unsigned long)(va), &__lvl); \ + BUG_ON(!__ptep || __lvl != PG_LEVEL_4K || !pte_present(*__ptep));\ +- (((maddr_t)pte_mfn(*__ptep) << PAGE_SHIFT) \ +- | ((unsigned long)(va) & (PAGE_SIZE - 1))); \ ++ pte_mfn(*__ptep); \ + }) + ++#define arbitrary_virt_to_machine(va) \ ++ (((maddr_t)arbitrary_virt_to_mfn(va) << PAGE_SHIFT) \ ++ | ((unsigned long)(va) & (PAGE_SIZE - 1))) ++ + #ifdef CONFIG_HIGHPTE + #include + struct page *kmap_atomic_to_page(void *); +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:52:02.000000000 +0100 +@@ -20,21 +20,6 @@ + __FILE__, __LINE__, &(e), __pgd_val(e), \ + (pgd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT) + +-static inline int pud_none(pud_t pud) +-{ +- return __pud_val(pud) == 0; +- +-} +-static inline int pud_bad(pud_t pud) +-{ +- return (__pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; +-} +- +-static inline int pud_present(pud_t pud) +-{ +- return __pud_val(pud) & _PAGE_PRESENT; +-} +- + /* Rules for using set_pte: the pte being assigned *must* be + * either not present or in a state where the hardware will + * not attempt to update the pte. In places where this is +@@ -102,15 +87,6 @@ static inline void pud_clear(pud_t *pudp + xen_tlb_flush(); + } + +-#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) +- +-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) +- +- +-/* Find an entry in the second-level page table.. */ +-#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ +- pmd_index(address)) +- + #ifdef CONFIG_SMP + static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res) + { +@@ -127,17 +103,6 @@ static inline pte_t xen_ptep_get_and_cle + #define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) + #endif + +-#define __HAVE_ARCH_PTE_SAME +-static inline int pte_same(pte_t a, pte_t b) +-{ +- return a.pte_low == b.pte_low && a.pte_high == b.pte_high; +-} +- +-static inline int pte_none(pte_t pte) +-{ +- return !(pte.pte_low | pte.pte_high); +-} +- + #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ + ((_pte).pte_high << (32-PAGE_SHIFT))) + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2009-11-06 10:51:47.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,24 +0,0 @@ +-#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H +-#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H +- +-#define SHARED_KERNEL_PMD 0 +- +-/* +- * PGDIR_SHIFT determines what a top-level page table entry can map +- */ +-#define PGDIR_SHIFT 30 +-#define PTRS_PER_PGD 4 +- +-/* +- * PMD_SHIFT determines the size of the area a middle-level +- * page table can map +- */ +-#define PMD_SHIFT 21 +-#define PTRS_PER_PMD 512 +- +-/* +- * entries per page directory level +- */ +-#define PTRS_PER_PTE 512 +- +-#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable-3level_types.h 2009-11-06 10:52:02.000000000 +0100 +@@ -0,0 +1,44 @@ ++#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H ++#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H ++ ++#ifndef __ASSEMBLY__ ++#include ++ ++typedef u64 pteval_t; ++typedef u64 pmdval_t; ++typedef u64 pudval_t; ++typedef u64 pgdval_t; ++typedef u64 pgprotval_t; ++ ++typedef union { ++ struct { ++ unsigned long pte_low, pte_high; ++ }; ++ pteval_t pte; ++} pte_t; ++#endif /* !__ASSEMBLY__ */ ++ ++#define SHARED_KERNEL_PMD 0 ++ ++#define PAGETABLE_LEVELS 3 ++ ++/* ++ * PGDIR_SHIFT determines what a top-level page table entry can map ++ */ ++#define PGDIR_SHIFT 30 ++#define PTRS_PER_PGD 4 ++ ++/* ++ * PMD_SHIFT determines the size of the area a middle-level ++ * page table can map ++ */ ++#define PMD_SHIFT 21 ++#define PTRS_PER_PMD 512 ++ ++/* ++ * entries per page directory level ++ */ ++#define PTRS_PER_PTE 512 ++ ++ ++#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:52:02.000000000 +0100 +@@ -1,6 +1,8 @@ + #ifndef _ASM_X86_PGTABLE_32_H + #define _ASM_X86_PGTABLE_32_H + ++#include ++ + /* + * The Linux memory management assumes a three-level page table setup. On + * the i386, we use that, but "fold" the mid level into the top-level page +@@ -31,47 +33,6 @@ void paging_init(void); + + extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); + +-/* +- * The Linux x86 paging architecture is 'compile-time dual-mode', it +- * implements both the traditional 2-level x86 page tables and the +- * newer 3-level PAE-mode page tables. +- */ +-#ifdef CONFIG_X86_PAE +-# include +-# define PMD_SIZE (1UL << PMD_SHIFT) +-# define PMD_MASK (~(PMD_SIZE - 1)) +-#else +-# include +-#endif +- +-#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +-#define PGDIR_MASK (~(PGDIR_SIZE - 1)) +- +-/* Just any arbitrary offset to the start of the vmalloc VM area: the +- * current 8MB value just means that there will be a 8MB "hole" after the +- * physical memory until the kernel virtual memory starts. That means that +- * any out-of-bounds memory accesses will hopefully be caught. +- * The vmalloc() routines leaves a hole of 4kB between each vmalloced +- * area for the same reason. ;) +- */ +-#define VMALLOC_OFFSET (8 * 1024 * 1024) +-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +-#ifdef CONFIG_X86_PAE +-#define LAST_PKMAP 512 +-#else +-#define LAST_PKMAP 1024 +-#endif +- +-#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ +- & PMD_MASK) +- +-#ifdef CONFIG_HIGHMEM +-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +-#else +-# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +-#endif +- +-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + + /* + * Define this if things work differently on an i386 and an i486: +@@ -80,66 +41,12 @@ extern void set_pmd_pfn(unsigned long, u + */ + #undef TEST_ACCESS_OK + +-/* The boot page tables (all created as a single array) */ +-extern unsigned long pg0[]; +- +-#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) +- +-/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ +-#define pmd_none(x) (!(unsigned long)__pmd_val(x)) +-#if CONFIG_XEN_COMPAT <= 0x030002 +-/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. +- can temporarily clear it. */ +-#define pmd_present(x) (__pmd_val(x)) +-#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) +-#else +-#define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) +-#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +-#endif +- +- +-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) +- + #ifdef CONFIG_X86_PAE + # include + #else + # include + #endif + +-/* +- * Conversion functions: convert a page and protection to a page entry, +- * and a page entry and page directory to the page they refer to. +- */ +-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +- +- +-static inline int pud_large(pud_t pud) { return 0; } +- +-/* +- * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] +- * +- * this macro returns the index of the entry in the pmd page which would +- * control the given virtual address +- */ +-#define pmd_index(address) \ +- (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +- +-/* +- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] +- * +- * this macro returns the index of the entry in the pte page which would +- * control the given virtual address +- */ +-#define pte_index(address) \ +- (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +-#define pte_offset_kernel(dir, address) \ +- ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address))) +- +-#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) +- +-#define pmd_page_vaddr(pmd) \ +- ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK)) +- + #if defined(CONFIG_HIGHPTE) + #define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ +@@ -185,7 +92,4 @@ void make_lowmem_page_writable(void *va, + #define kern_addr_valid(kaddr) (0) + #endif + +-#define io_remap_pfn_range(vma, from, pfn, size, prot) \ +- direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO) +- + #endif /* _ASM_X86_PGTABLE_32_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:52:02.000000000 +0100 +@@ -2,6 +2,8 @@ + #define _ASM_X86_PGTABLE_64_H + + #include ++#include ++ + #ifndef __ASSEMBLY__ + + /* +@@ -12,12 +14,12 @@ + #include + #include + #include +-#include + + #ifdef CONFIG_XEN + extern pud_t level3_user_pgt[512]; + + extern void xen_init_pt(void); ++extern void xen_switch_pt(void); + #endif + + extern pud_t level3_kernel_pgt[512]; +@@ -33,39 +35,13 @@ extern void paging_init(void); + + #endif /* !__ASSEMBLY__ */ + +-#define SHARED_KERNEL_PMD 0 +- +-/* +- * PGDIR_SHIFT determines what a top-level page table entry can map +- */ +-#define PGDIR_SHIFT 39 +-#define PTRS_PER_PGD 512 +- +-/* +- * 3rd level page +- */ +-#define PUD_SHIFT 30 +-#define PTRS_PER_PUD 512 +- +-/* +- * PMD_SHIFT determines the size of the area a middle-level +- * page table can map +- */ +-#define PMD_SHIFT 21 +-#define PTRS_PER_PMD 512 +- +-/* +- * entries per page directory level +- */ +-#define PTRS_PER_PTE 512 +- + #ifndef __ASSEMBLY__ + + #define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", \ + __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e)) + #define pmd_ERROR(e) \ +- printk("%s:%d: bad pmd %p(%016lx pfn %010Lx).\n", \ ++ printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", \ + __FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e)) + #define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016lx pfn %010Lx).\n", \ +@@ -76,9 +52,6 @@ extern void paging_init(void); + __FILE__, __LINE__, &(e), __pgd_val(e), \ + (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT) + +-#define pgd_none(x) (!__pgd_val(x)) +-#define pud_none(x) (!__pud_val(x)) +- + struct mm_struct; + + void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); +@@ -138,48 +111,6 @@ static inline void xen_pgd_clear(pgd_t * + xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0)); + } + +-#define pte_same(a, b) ((a).pte == (b).pte) +- +-#endif /* !__ASSEMBLY__ */ +- +-#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +-#define PMD_MASK (~(PMD_SIZE - 1)) +-#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +-#define PUD_MASK (~(PUD_SIZE - 1)) +-#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +-#define PGDIR_MASK (~(PGDIR_SIZE - 1)) +- +-#define MAX_PHYSMEM_BITS 43 +-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +-#define VMALLOC_START _AC(0xffffc20000000000, UL) +-#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) +-#define VMEMMAP_START _AC(0xffffe20000000000, UL) +-#define MODULES_VADDR _AC(0xffffffffa0000000, UL) +-#define MODULES_END _AC(0xffffffffff000000, UL) +-#define MODULES_LEN (MODULES_END - MODULES_VADDR) +- +-#ifndef __ASSEMBLY__ +- +-static inline int pgd_bad(pgd_t pgd) +-{ +- return (__pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +-} +- +-static inline int pud_bad(pud_t pud) +-{ +- return (__pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +-} +- +-static inline int pmd_bad(pmd_t pmd) +-{ +- return (__pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +-} +- +-#define pte_none(x) (!(x).pte) +-#define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE)) +- +-#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ +- + #define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT) + + /* +@@ -190,47 +121,12 @@ static inline int pmd_bad(pmd_t pmd) + /* + * Level 4 access. + */ +-#define pgd_page_vaddr(pgd) \ +- ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) +-#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) +-#define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT) + static inline int pgd_large(pgd_t pgd) { return 0; } + #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) + + /* PUD - Level3 access */ +-/* to find an entry in a page-table-directory. */ +-#define pud_page_vaddr(pud) \ +- ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK)) +-#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT)) +-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +-#define pud_offset(pgd, address) \ +- ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) +-#define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT) +- +-static inline int pud_large(pud_t pte) +-{ +- return (__pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == +- (_PAGE_PSE | _PAGE_PRESENT); +-} + + /* PMD - Level 2 access */ +-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK)) +-#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) +- +-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +-#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ +- pmd_index(address)) +-#define pmd_none(x) (!__pmd_val(x)) +-#if CONFIG_XEN_COMPAT <= 0x030002 +-/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. +- can temporarily clear it. */ +-#define pmd_present(x) (__pmd_val(x)) +-#else +-#define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) +-#endif +-#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) +-#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) +- + #define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) + #define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \ + _PAGE_FILE }) +@@ -238,13 +134,6 @@ static inline int pud_large(pud_t pte) + + /* PTE - Level 1 access. */ + +-/* page, protection -> pte */ +-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot)) +- +-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ +- pte_index((address))) +- + /* x86-64 always has all page tables mapped. */ + #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) + #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) +@@ -278,9 +167,6 @@ static inline int pud_large(pud_t pte) + extern int kern_addr_valid(unsigned long addr); + extern void cleanup_highmap(void); + +-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ +- direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO) +- + #define HAVE_ARCH_UNMAPPED_AREA + #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_64_types.h 2009-11-06 10:52:02.000000000 +0100 +@@ -0,0 +1,63 @@ ++#ifndef _ASM_X86_PGTABLE_64_DEFS_H ++#define _ASM_X86_PGTABLE_64_DEFS_H ++ ++#ifndef __ASSEMBLY__ ++#include ++ ++/* ++ * These are used to make use of C type-checking.. ++ */ ++typedef unsigned long pteval_t; ++typedef unsigned long pmdval_t; ++typedef unsigned long pudval_t; ++typedef unsigned long pgdval_t; ++typedef unsigned long pgprotval_t; ++ ++typedef union { pteval_t pte; unsigned int pte_low; } pte_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#define SHARED_KERNEL_PMD 0 ++#define PAGETABLE_LEVELS 4 ++ ++/* ++ * PGDIR_SHIFT determines what a top-level page table entry can map ++ */ ++#define PGDIR_SHIFT 39 ++#define PTRS_PER_PGD 512 ++ ++/* ++ * 3rd level page ++ */ ++#define PUD_SHIFT 30 ++#define PTRS_PER_PUD 512 ++ ++/* ++ * PMD_SHIFT determines the size of the area a middle-level ++ * page table can map ++ */ ++#define PMD_SHIFT 21 ++#define PTRS_PER_PMD 512 ++ ++/* ++ * entries per page directory level ++ */ ++#define PTRS_PER_PTE 512 ++ ++#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) ++#define PMD_MASK (~(PMD_SIZE - 1)) ++#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) ++#define PUD_MASK (~(PUD_SIZE - 1)) ++#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) ++#define PGDIR_MASK (~(PGDIR_SIZE - 1)) ++ ++#define MAX_PHYSMEM_BITS 43 ++#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) ++#define VMALLOC_START _AC(0xffffc20000000000, UL) ++#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) ++#define VMEMMAP_START _AC(0xffffe20000000000, UL) ++#define MODULES_VADDR _AC(0xffffffffa0000000, UL) ++#define MODULES_END _AC(0xffffffffff000000, UL) ++#define MODULES_LEN (MODULES_END - MODULES_VADDR) ++ ++#endif /* _ASM_X86_PGTABLE_64_DEFS_H */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_types.h 2009-11-06 10:52:02.000000000 +0100 +@@ -0,0 +1,388 @@ ++#ifndef _ASM_X86_PGTABLE_DEFS_H ++#define _ASM_X86_PGTABLE_DEFS_H ++ ++#include ++#include ++ ++#define FIRST_USER_ADDRESS 0 ++ ++#define _PAGE_BIT_PRESENT 0 /* is present */ ++#define _PAGE_BIT_RW 1 /* writeable */ ++#define _PAGE_BIT_USER 2 /* userspace addressable */ ++#define _PAGE_BIT_PWT 3 /* page write through */ ++#define _PAGE_BIT_PCD 4 /* page cache disabled */ ++#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ ++#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ ++#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ ++#define _PAGE_BIT_PAT 7 /* on 4KB pages */ ++#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ ++#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ ++#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ ++#define _PAGE_BIT_UNUSED3 11 ++#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ ++#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 ++#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 ++#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ ++ ++/* If _PAGE_BIT_PRESENT is clear, we use these: */ ++/* - if the user mapped it with PROT_NONE; pte_present gives true */ ++#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL ++/* - set: nonlinear file mapping, saved PTE; unset:swap */ ++#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY ++ ++#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) ++#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) ++#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) ++#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) ++#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) ++#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) ++#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) ++#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) ++#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) ++#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) ++#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) ++#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) ++#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) ++#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) ++#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) ++#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) ++#define __HAVE_ARCH_PTE_SPECIAL ++ ++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) ++#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) ++#else ++#define _PAGE_NX (_AT(pteval_t, 0)) ++#endif ++ ++#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) ++#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) ++ ++#ifndef __ASSEMBLY__ ++#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002 ++extern unsigned int __kernel_page_user; ++#else ++#define __kernel_page_user 0 ++#endif ++#endif ++ ++#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ ++ _PAGE_ACCESSED | _PAGE_DIRTY) ++#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ ++ _PAGE_DIRTY | __kernel_page_user) ++ ++/* Set of bits not changed in pte_modify */ ++#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \ ++ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) ++ ++/* ++ * PAT settings are part of the hypervisor interface, which sets the ++ * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]). ++ */ ++#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT) ++#define _PAGE_CACHE_WB (0) ++#define _PAGE_CACHE_WT (_PAGE_PWT) ++#define _PAGE_CACHE_WC (_PAGE_PAT) ++#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT) ++#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) ++#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) ++ ++#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) ++#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ ++ _PAGE_ACCESSED | _PAGE_NX) ++ ++#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ ++ _PAGE_USER | _PAGE_ACCESSED) ++#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ ++ _PAGE_ACCESSED | _PAGE_NX) ++#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ ++ _PAGE_ACCESSED) ++#define PAGE_COPY PAGE_COPY_NOEXEC ++#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ ++ _PAGE_ACCESSED | _PAGE_NX) ++#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ ++ _PAGE_ACCESSED) ++ ++#define __PAGE_KERNEL_EXEC \ ++ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user) ++#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) ++ ++#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) ++#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) ++#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) ++#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) ++#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) ++#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) ++#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) ++#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) ++#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) ++#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) ++#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) ++ ++#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) ++#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) ++#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) ++#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) ++ ++#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) ++#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) ++#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) ++#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) ++#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) ++#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) ++#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) ++#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) ++#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) ++#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) ++#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) ++#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) ++#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) ++ ++#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) ++#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) ++#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) ++#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) ++ ++/* xwr */ ++#define __P000 PAGE_NONE ++#define __P001 PAGE_READONLY ++#define __P010 PAGE_COPY ++#define __P011 PAGE_COPY ++#define __P100 PAGE_READONLY_EXEC ++#define __P101 PAGE_READONLY_EXEC ++#define __P110 PAGE_COPY_EXEC ++#define __P111 PAGE_COPY_EXEC ++ ++#define __S000 PAGE_NONE ++#define __S001 PAGE_READONLY ++#define __S010 PAGE_SHARED ++#define __S011 PAGE_SHARED ++#define __S100 PAGE_READONLY_EXEC ++#define __S101 PAGE_READONLY_EXEC ++#define __S110 PAGE_SHARED_EXEC ++#define __S111 PAGE_SHARED_EXEC ++ ++/* ++ * early identity mapping pte attrib macros. ++ */ ++#ifdef CONFIG_X86_64 ++#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC ++#else ++/* ++ * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection ++ * bits are combined, this will alow user to access the high address mapped ++ * VDSO in the presence of CONFIG_COMPAT_VDSO ++ */ ++#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ ++#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ ++#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ ++#endif ++ ++#ifdef CONFIG_X86_32 ++# include ++#else ++# include "pgtable_64_types.h" ++#endif ++ ++#ifndef __ASSEMBLY__ ++ ++#include ++ ++/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ ++#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) ++ ++/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ ++#define PTE_FLAGS_MASK (~PTE_PFN_MASK) ++ ++typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; ++ ++#include ++ ++typedef struct { pgdval_t pgd; } pgd_t; ++ ++#define __pgd_ma(x) ((pgd_t) { (x) } ) ++static inline pgd_t xen_make_pgd(pgdval_t val) ++{ ++ if (val & _PAGE_PRESENT) ++ val = pte_phys_to_machine(val); ++ return (pgd_t) { val }; ++} ++ ++#define __pgd_val(x) ((x).pgd) ++static inline pgdval_t xen_pgd_val(pgd_t pgd) ++{ ++ pgdval_t ret = __pgd_val(pgd); ++#if PAGETABLE_LEVELS == 2 && CONFIG_XEN_COMPAT <= 0x030002 ++ if (ret) ++ ret = machine_to_phys(ret) | _PAGE_PRESENT; ++#else ++ if (ret & _PAGE_PRESENT) ++ ret = pte_machine_to_phys(ret); ++#endif ++ return ret; ++} ++ ++static inline pgdval_t pgd_flags(pgd_t pgd) ++{ ++ return __pgd_val(pgd) & PTE_FLAGS_MASK; ++} ++ ++#if PAGETABLE_LEVELS > 3 ++typedef struct { pudval_t pud; } pud_t; ++ ++#define __pud_ma(x) ((pud_t) { (x) } ) ++static inline pud_t xen_make_pud(pudval_t val) ++{ ++ if (val & _PAGE_PRESENT) ++ val = pte_phys_to_machine(val); ++ return (pud_t) { val }; ++} ++ ++#define __pud_val(x) ((x).pud) ++static inline pudval_t xen_pud_val(pud_t pud) ++{ ++ pudval_t ret = __pud_val(pud); ++ if (ret & _PAGE_PRESENT) ++ ret = pte_machine_to_phys(ret); ++ return ret; ++} ++#else ++#include ++ ++#define __pud_val(x) __pgd_val((x).pgd) ++static inline pudval_t xen_pud_val(pud_t pud) ++{ ++ return xen_pgd_val(pud.pgd); ++} ++#endif ++ ++#if PAGETABLE_LEVELS > 2 ++typedef struct { pmdval_t pmd; } pmd_t; ++ ++#define __pmd_ma(x) ((pmd_t) { (x) } ) ++static inline pmd_t xen_make_pmd(pmdval_t val) ++{ ++ if (val & _PAGE_PRESENT) ++ val = pte_phys_to_machine(val); ++ return (pmd_t) { val }; ++} ++ ++#define __pmd_val(x) ((x).pmd) ++static inline pmdval_t xen_pmd_val(pmd_t pmd) ++{ ++ pmdval_t ret = __pmd_val(pmd); ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ if (ret) ++ ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; ++#else ++ if (ret & _PAGE_PRESENT) ++ ret = pte_machine_to_phys(ret); ++#endif ++ return ret; ++} ++#else ++#include ++ ++#define __pmd_ma(x) ((pmd_t) { .pud.pgd = __pgd_ma(x) } ) ++#define __pmd_val(x) __pgd_val((x).pud.pgd) ++static inline pmdval_t xen_pmd_val(pmd_t pmd) ++{ ++ return xen_pgd_val(pmd.pud.pgd); ++} ++#endif ++ ++static inline pudval_t pud_flags(pud_t pud) ++{ ++ return __pud_val(pud) & PTE_FLAGS_MASK; ++} ++ ++static inline pmdval_t pmd_flags(pmd_t pmd) ++{ ++ return __pmd_val(pmd) & PTE_FLAGS_MASK; ++} ++ ++#define __pte_ma(x) ((pte_t) { .pte = (x) } ) ++static inline pte_t xen_make_pte(pteval_t val) ++{ ++ if ((val & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT) ++ val = pte_phys_to_machine(val); ++ return (pte_t) { .pte = val }; ++} ++ ++#define __pte_val(x) ((x).pte) ++static inline pteval_t xen_pte_val(pte_t pte) ++{ ++ pteval_t ret = __pte_val(pte); ++ if ((pte.pte_low & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT) ++ ret = pte_machine_to_phys(ret); ++ return ret; ++} ++ ++static inline pteval_t pte_flags(pte_t pte) ++{ ++ return __pte_val(pte) & PTE_FLAGS_MASK; ++} ++ ++#define pgprot_val(x) ((x).pgprot) ++#define __pgprot(x) ((pgprot_t) { (x) } ) ++ ++ ++typedef struct page *pgtable_t; ++ ++extern pteval_t __supported_pte_mask; ++extern int nx_enabled; ++extern void set_nx(void); ++ ++#define pgprot_writecombine pgprot_writecombine ++extern pgprot_t pgprot_writecombine(pgprot_t prot); ++ ++#ifndef CONFIG_XEN ++/* Indicate that x86 has its own track and untrack pfn vma functions */ ++#define __HAVE_PFNMAP_TRACKING ++#endif ++ ++#define __HAVE_PHYS_MEM_ACCESS_PROT ++struct file; ++pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, ++ unsigned long size, pgprot_t vma_prot); ++int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, ++ unsigned long size, pgprot_t *vma_prot); ++ ++/* Install a pte for a particular vaddr in kernel space. */ ++void set_pte_vaddr(unsigned long vaddr, pte_t pte); ++ ++#ifndef CONFIG_XEN ++extern void native_pagetable_setup_start(pgd_t *base); ++extern void native_pagetable_setup_done(pgd_t *base); ++#else ++static inline void xen_pagetable_setup_start(pgd_t *base) {} ++static inline void xen_pagetable_setup_done(pgd_t *base) {} ++#endif ++ ++struct seq_file; ++extern void arch_report_meminfo(struct seq_file *m); ++ ++enum { ++ PG_LEVEL_NONE, ++ PG_LEVEL_4K, ++ PG_LEVEL_2M, ++ PG_LEVEL_1G, ++ PG_LEVEL_NUM ++}; ++ ++#ifdef CONFIG_PROC_FS ++extern void update_page_count(int level, unsigned long pages); ++#else ++static inline void update_page_count(int level, unsigned long pages) { } ++#endif ++ ++/* ++ * Helper function that returns the kernel pagetable entry controlling ++ * the virtual address 'address'. NULL means no pagetable entry present. ++ * NOTE: the return type is pte_t but if the pmd is PSE then we return it ++ * as a pte too. ++ */ ++extern pte_t *lookup_address(unsigned long address, unsigned int *level); ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* _ASM_X86_PGTABLE_DEFS_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/processor.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/processor.h 2009-11-06 10:52:02.000000000 +0100 +@@ -16,6 +16,7 @@ struct mm_struct; + #include + #include + #include ++#include + #include + #include + #include +@@ -74,10 +75,10 @@ struct cpuinfo_x86 { + char pad0; + #else + /* Number of 4K pages in DTLB/ITLB combined(in pages): */ +- int x86_tlbsize; ++ int x86_tlbsize; ++#endif + __u8 x86_virt_bits; + __u8 x86_phys_bits; +-#endif + /* CPUID returned core id bits: */ + __u8 x86_coreid_bits; + /* Max extended CPUID function supported: */ +@@ -92,9 +93,9 @@ struct cpuinfo_x86 { + int x86_cache_alignment; /* In bytes */ + int x86_power; + unsigned long loops_per_jiffy; +-#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) + /* cpus sharing the last level cache: */ +- cpumask_t llc_shared_map; ++ cpumask_var_t llc_shared_map; + #endif + /* cpuid returned max cores value: */ + u16 x86_max_cores; +@@ -138,7 +139,7 @@ extern struct cpuinfo_x86 new_cpu_data; + extern __u32 cleared_cpu_caps[NCAPINTS]; + + #ifdef CONFIG_SMP +-DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); ++DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); + #define cpu_data(cpu) per_cpu(cpu_info, cpu) + #define current_cpu_data __get_cpu_var(cpu_info) + #else +@@ -251,7 +252,6 @@ struct x86_hw_tss { + #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) + #define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) + #define INVALID_IO_BITMAP_OFFSET 0x8000 +-#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 + + #ifndef CONFIG_X86_NO_TSS + struct tss_struct { +@@ -267,11 +267,6 @@ struct tss_struct { + * be within the limit. + */ + unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; +- /* +- * Cache the current maximum and the last task that used the bitmap: +- */ +- unsigned long io_bitmap_max; +- struct thread_struct *io_bitmap_owner; + + /* + * .. and then another 0x100 bytes for the emergency kernel stack: +@@ -280,7 +275,7 @@ struct tss_struct { + + } ____cacheline_aligned; + +-DECLARE_PER_CPU(struct tss_struct, init_tss); ++DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); + + /* + * Save the original ist values for checking stack pointers during debugging +@@ -363,6 +358,11 @@ struct i387_soft_struct { + u32 entry_eip; + }; + ++struct ymmh_struct { ++ /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ ++ u32 ymmh_space[64]; ++}; ++ + struct xsave_hdr_struct { + u64 xstate_bv; + u64 reserved1[2]; +@@ -372,6 +372,7 @@ struct xsave_hdr_struct { + struct xsave_struct { + struct i387_fxsave_struct i387; + struct xsave_hdr_struct xsave_hdr; ++ struct ymmh_struct ymmh; + /* new processor state extensions will go here */ + } __attribute__ ((packed, aligned (64))); + +@@ -382,11 +383,37 @@ union thread_xstate { + struct xsave_struct xsave; + }; + +-#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS) ++#ifdef CONFIG_X86_64 ++#ifndef CONFIG_X86_NO_TSS + DECLARE_PER_CPU(struct orig_ist, orig_ist); + #endif + +-extern void print_cpu_info(struct cpuinfo_x86 *); ++union irq_stack_union { ++ char irq_stack[IRQ_STACK_SIZE]; ++ /* ++ * GCC hardcodes the stack canary as %gs:40. Since the ++ * irq_stack is the object at %gs:0, we reserve the bottom ++ * 48 bytes of the irq stack for the canary. ++ */ ++ struct { ++ char gs_base[40]; ++ unsigned long stack_canary; ++ }; ++}; ++ ++DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union); ++DECLARE_INIT_PER_CPU(irq_stack_union); ++ ++DECLARE_PER_CPU(char *, irq_stack_ptr); ++DECLARE_PER_CPU(unsigned int, irq_count); ++extern unsigned long kernel_eflags; ++extern asmlinkage void ignore_sysret(void); ++#else /* X86_64 */ ++#ifdef CONFIG_CC_STACKPROTECTOR ++DECLARE_PER_CPU(unsigned long, stack_canary); ++#endif ++#endif /* X86_64 */ ++ + extern unsigned int xstate_size; + extern void free_thread_xstate(struct task_struct *); + extern struct kmem_cache *task_xstate_cachep; +@@ -660,6 +687,7 @@ static inline void __sti_mwait(unsigned + extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); + + extern void select_idle_routine(const struct cpuinfo_x86 *c); ++extern void init_c1e_mask(void); + + extern unsigned long boot_option_idle_override; + extern unsigned long idle_halt; +@@ -697,9 +725,9 @@ extern int sysenter_setup(void); + extern struct desc_ptr early_gdt_descr; + + extern void cpu_set_gdt(int); +-extern void switch_to_new_gdt(void); ++extern void switch_to_new_gdt(int); ++extern void load_percpu_segment(int); + extern void cpu_init(void); +-extern void init_gdt(int cpu); + + static inline unsigned long get_debugctlmsr(void) + { +@@ -784,6 +812,7 @@ static inline void spin_lock_prefetch(co + * User space process size: 3GB (default). + */ + #define TASK_SIZE PAGE_OFFSET ++#define TASK_SIZE_MAX TASK_SIZE + #define STACK_TOP TASK_SIZE + #define STACK_TOP_MAX STACK_TOP + +@@ -843,7 +872,7 @@ extern unsigned long thread_saved_pc(str + /* + * User space process size. 47bits minus one guard page. + */ +-#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE) ++#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) + + /* This decides where the kernel will search for a free chunk of vm + * space during mmap's. +@@ -852,12 +881,12 @@ extern unsigned long thread_saved_pc(str + 0xc0000000 : 0xFFFFe000) + + #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ +- IA32_PAGE_OFFSET : TASK_SIZE64) ++ IA32_PAGE_OFFSET : TASK_SIZE_MAX) + #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ +- IA32_PAGE_OFFSET : TASK_SIZE64) ++ IA32_PAGE_OFFSET : TASK_SIZE_MAX) + + #define STACK_TOP TASK_SIZE +-#define STACK_TOP_MAX TASK_SIZE64 ++#define STACK_TOP_MAX TASK_SIZE_MAX + + #define INIT_THREAD { \ + .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:14:58.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:15:20.000000000 +0100 +@@ -15,53 +15,25 @@ + # include + # endif + #endif +-#include + #include +- +-#ifdef CONFIG_X86_64 +- +-#define cpu_callin_mask cpu_possible_mask +-#define cpu_callout_mask cpu_possible_mask +-extern cpumask_var_t cpu_initialized_mask; +-extern cpumask_var_t cpu_sibling_setup_mask; +- +-#else /* CONFIG_X86_32 */ +- +-#define cpu_callin_map cpu_possible_map +-#define cpu_callout_map cpu_possible_map +-extern cpumask_t cpu_initialized; +-extern cpumask_t cpu_sibling_setup_map; +- +-#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) +-#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) +-#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) +-#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) +- +-#endif /* CONFIG_X86_32 */ +- +-extern void (*mtrr_hook)(void); +-extern void zap_low_mappings(void); +- +-extern int __cpuinit get_local_pda(int cpu); ++#include + + extern int smp_num_siblings; + extern unsigned int num_processors; + +-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +-DECLARE_PER_CPU(cpumask_t, cpu_core_map); ++DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); ++DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); + DECLARE_PER_CPU(u16, cpu_llc_id); +-#ifdef CONFIG_X86_32 + DECLARE_PER_CPU(int, cpu_number); +-#endif + + static inline struct cpumask *cpu_sibling_mask(int cpu) + { +- return &per_cpu(cpu_sibling_map, cpu); ++ return per_cpu(cpu_sibling_map, cpu); + } + + static inline struct cpumask *cpu_core_mask(int cpu) + { +- return &per_cpu(cpu_core_map, cpu); ++ return per_cpu(cpu_core_map, cpu); + } + + DECLARE_PER_CPU(u16, x86_cpu_to_apicid); +@@ -149,9 +121,10 @@ static inline void arch_send_call_functi + smp_ops.send_call_func_single_ipi(cpu); + } + +-static inline void arch_send_call_function_ipi(cpumask_t mask) ++#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask ++static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) + { +- smp_ops.send_call_func_ipi(&mask); ++ smp_ops.send_call_func_ipi(mask); + } + + void cpu_disable_common(void); +@@ -176,14 +149,12 @@ void xen_send_call_func_single_ipi(int c + #define smp_send_stop xen_smp_send_stop + #define smp_send_reschedule xen_smp_send_reschedule + #define arch_send_call_function_single_ipi xen_send_call_func_single_ipi +-#define arch_send_call_function_ipi(m) xen_send_call_func_ipi(&(m)) ++#define arch_send_call_function_ipi_mask xen_send_call_func_ipi + + void play_dead(void); + + #endif /* CONFIG_XEN */ + +-extern void prefill_possible_map(void); +- + void smp_store_cpu_info(int id); + #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) + +@@ -192,10 +163,6 @@ static inline int num_booting_cpus(void) + { + return cpumask_weight(cpu_callout_mask); + } +-#else +-static inline void prefill_possible_map(void) +-{ +-} + #endif /* CONFIG_SMP */ + + extern unsigned disabled_cpus __cpuinitdata; +@@ -206,11 +173,11 @@ extern unsigned disabled_cpus __cpuinitd + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ +-#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) ++#define raw_smp_processor_id() (percpu_read(cpu_number)) + #define safe_smp_processor_id() smp_processor_id() + + #elif defined(CONFIG_X86_64_SMP) +-#define raw_smp_processor_id() read_pda(cpunumber) ++#define raw_smp_processor_id() (percpu_read(cpu_number)) + + #define stack_smp_processor_id() \ + ({ \ +@@ -220,10 +187,6 @@ extern unsigned disabled_cpus __cpuinitd + }) + #define safe_smp_processor_id() smp_processor_id() + +-#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ +-#define cpu_physical_id(cpu) boot_cpu_physical_apicid +-#define safe_smp_processor_id() 0 +-#define stack_smp_processor_id() 0 + #endif + + #ifdef CONFIG_X86_LOCAL_APIC +@@ -235,28 +198,9 @@ static inline int logical_smp_processor_ + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); + } + +-#include +-static inline unsigned int read_apic_id(void) +-{ +- unsigned int reg; +- +- reg = *(u32 *)(APIC_BASE + APIC_ID); +- +- return GET_APIC_ID(reg); +-} + #endif + +- +-# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) + extern int hard_smp_processor_id(void); +-# else +-#include +-static inline int hard_smp_processor_id(void) +-{ +- /* we don't want to mark this access volatile - bad code generation */ +- return read_apic_id(); +-} +-# endif /* APIC_DEFINITION */ + + #else /* CONFIG_X86_LOCAL_APIC */ + +@@ -266,11 +210,5 @@ static inline int hard_smp_processor_id( + + #endif /* CONFIG_X86_LOCAL_APIC */ + +-#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +-extern unsigned char boot_cpu_id; +-#else +-#define boot_cpu_id 0 +-#endif +- + #endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_SMP_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/spinlock.h 2009-11-17 15:30:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/spinlock.h 2009-11-17 15:30:53.000000000 +0100 +@@ -250,40 +250,18 @@ static __always_inline void __ticket_spi + static inline int xen_spinlock_init(unsigned int cpu) { return 0; } + static inline void xen_spinlock_cleanup(unsigned int cpu) {} + +-/* +- * Define virtualization-friendly old-style lock byte lock, for use in +- * pv_lock_ops if desired. +- * +- * This differs from the pre-2.6.24 spinlock by always using xchgb +- * rather than decb to take the lock; this allows it to use a +- * zero-initialized lock structure. It also maintains a 1-byte +- * contention counter, so that we can implement +- * __byte_spin_is_contended. +- */ +-struct __byte_spinlock { +- s8 lock; +-#if NR_CPUS < 256 +- s8 spinners; +-#else +-#error NR_CPUS >= 256 support not implemented +-#endif +-}; +- + static inline int __byte_spin_is_locked(raw_spinlock_t *lock) + { +- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; +- return bl->lock != 0; ++ return lock->lock != 0; + } + + static inline int __byte_spin_is_contended(raw_spinlock_t *lock) + { +- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; +- return bl->spinners != 0; ++ return lock->spinners != 0; + } + + static inline void __byte_spin_lock(raw_spinlock_t *lock) + { +- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + s8 val = 1; + + asm("1: xchgb %1, %0\n" +@@ -296,27 +274,25 @@ static inline void __byte_spin_lock(raw_ + " " LOCK_PREFIX "decb %2\n" + " jmp 1b\n" + "3:" +- : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); ++ : "+m" (lock->lock), "+q" (val), "+m" (lock->spinners): : "memory"); + } + + #define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock) + + static inline int __byte_spin_trylock(raw_spinlock_t *lock) + { +- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + u8 old = 1; + + asm("xchgb %1,%0" +- : "+m" (bl->lock), "+q" (old) : : "memory"); ++ : "+m" (lock->lock), "+q" (old) : : "memory"); + + return old == 0; + } + + static inline void __byte_spin_unlock(raw_spinlock_t *lock) + { +- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + smp_wmb(); +- bl->lock = 0; ++ lock->lock = 0; + } + + #define __raw_spin(n) __byte_spin_##n +@@ -417,8 +393,7 @@ static inline int __raw_read_trylock(raw + { + atomic_t *count = (atomic_t *)lock; + +- atomic_dec(count); +- if (atomic_read(count) >= 0) ++ if (atomic_dec_return(count) >= 0) + return 1; + atomic_inc(count); + return 0; +@@ -445,6 +420,9 @@ static inline void __raw_write_unlock(ra + : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); + } + ++#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) ++#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) ++ + #define _raw_spin_relax(lock) cpu_relax() + #define _raw_read_relax(lock) cpu_relax() + #define _raw_write_relax(lock) cpu_relax() +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/spinlock_types.h 2010-01-18 16:48:59.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/spinlock_types.h 2010-01-18 16:52:32.000000000 +0100 +@@ -24,6 +24,20 @@ typedef union { + # define TICKET_SHIFT 16 + u16 cur, seq; + #endif ++#else ++/* ++ * This differs from the pre-2.6.24 spinlock by always using xchgb ++ * rather than decb to take the lock; this allows it to use a ++ * zero-initialized lock structure. It also maintains a 1-byte ++ * contention counter, so that we can implement ++ * __byte_spin_is_contended. ++ */ ++ u8 lock; ++#if CONFIG_NR_CPUS < 256 ++ u8 spinners; ++#else ++# error NR_CPUS >= 256 not implemented ++#endif + #endif + }; + } raw_spinlock_t; +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:52:02.000000000 +0100 +@@ -21,9 +21,24 @@ + struct task_struct; /* one of the stranger aspects of C forward declarations */ + struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); ++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + + #ifdef CONFIG_X86_32 + ++#ifdef CONFIG_CC_STACKPROTECTOR ++#define __switch_canary \ ++ "movl %P[task_canary](%[next]), %%ebx\n\t" \ ++ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" ++#define __switch_canary_oparam \ ++ , [stack_canary] "=m" (per_cpu_var(stack_canary)) ++#define __switch_canary_iparam \ ++ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) ++#else /* CC_STACKPROTECTOR */ ++#define __switch_canary ++#define __switch_canary_oparam ++#define __switch_canary_iparam ++#endif /* CC_STACKPROTECTOR */ ++ + /* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. +@@ -45,6 +60,7 @@ do { \ + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ + "pushl %[next_ip]\n\t" /* restore EIP */ \ ++ __switch_canary \ + "jmp __switch_to\n" /* regparm call */ \ + "1:\t" \ + "popl %%ebp\n\t" /* restore EBP */ \ +@@ -59,6 +75,8 @@ do { \ + "=b" (ebx), "=c" (ecx), "=d" (edx), \ + "=S" (esi), "=D" (edi) \ + \ ++ __switch_canary_oparam \ ++ \ + /* input parameters: */ \ + : [next_sp] "m" (next->thread.sp), \ + [next_ip] "m" (next->thread.ip), \ +@@ -67,6 +85,8 @@ do { \ + [prev] "a" (prev), \ + [next] "d" (next) \ + \ ++ __switch_canary_iparam \ ++ \ + : /* reloaded segment registers */ \ + "memory"); \ + } while (0) +@@ -87,27 +107,44 @@ do { \ + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ + "r12", "r13", "r14", "r15" + ++#ifdef CONFIG_CC_STACKPROTECTOR ++#define __switch_canary \ ++ "movq %P[task_canary](%%rsi),%%r8\n\t" \ ++ "movq %%r8,"__percpu_arg([gs_canary])"\n\t" ++#define __switch_canary_oparam \ ++ , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) ++#define __switch_canary_iparam \ ++ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) ++#else /* CC_STACKPROTECTOR */ ++#define __switch_canary ++#define __switch_canary_oparam ++#define __switch_canary_iparam ++#endif /* CC_STACKPROTECTOR */ ++ + /* Save restore flags to clear handle leaking NT */ + #define switch_to(prev, next, last) \ +- asm volatile(SAVE_CONTEXT \ ++ asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ +- "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ ++ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ ++ __switch_canary \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ +- LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ +- "jc ret_from_fork\n\t" \ ++ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ ++ "jnz ret_from_fork\n\t" \ + RESTORE_CONTEXT \ + : "=a" (last) \ ++ __switch_canary_oparam \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ +- [tif_fork] "i" (TIF_FORK), \ ++ [_tif_fork] "i" (_TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ +- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ ++ [current_task] "m" (per_cpu_var(current_task)) \ ++ __switch_canary_iparam \ + : "memory", "cc" __EXTRA_CLOBBER) + #endif + +@@ -166,6 +203,25 @@ extern void xen_load_gs_index(unsigned); + #define savesegment(seg, value) \ + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") + ++/* ++ * x86_32 user gs accessors. ++ */ ++#ifdef CONFIG_X86_32 ++#ifdef CONFIG_X86_32_LAZY_GS ++#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) ++#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) ++#define task_user_gs(tsk) ((tsk)->thread.gs) ++#define lazy_save_gs(v) savesegment(gs, (v)) ++#define lazy_load_gs(v) loadsegment(gs, (v)) ++#else /* X86_32_LAZY_GS */ ++#define get_user_gs(regs) (u16)((regs)->gs) ++#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) ++#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) ++#define lazy_save_gs(v) do { } while (0) ++#define lazy_load_gs(v) do { } while (0) ++#endif /* X86_32_LAZY_GS */ ++#endif /* X86_32 */ ++ + static inline unsigned long get_limit(unsigned long segment) + { + unsigned long __limit; +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/tlbflush.h 2009-11-06 10:52:02.000000000 +0100 +@@ -86,21 +86,20 @@ static inline void flush_tlb_range(struc + flush_tlb_mm(vma->vm_mm); + } + ++#ifndef CONFIG_XEN + #define TLBSTATE_OK 1 + #define TLBSTATE_LAZY 2 + +-#ifdef CONFIG_X86_32 + struct tlb_state { + struct mm_struct *active_mm; + int state; +- char __cacheline_padding[L1_CACHE_BYTES-8]; + }; +-DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); ++DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); + +-void reset_lazy_tlbstate(void); +-#else + static inline void reset_lazy_tlbstate(void) + { ++ percpu_write(cpu_tlbstate.state, 0); ++ percpu_write(cpu_tlbstate.active_mm, &init_mm); + } + #endif + +@@ -112,4 +111,6 @@ static inline void flush_tlb_kernel_rang + flush_tlb_all(); + } + ++extern void zap_low_mappings(void); ++ + #endif /* _ASM_X86_TLBFLUSH_H */ +--- head-2010-01-18.orig/arch/x86/kernel/Makefile 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/Makefile 2009-11-06 10:52:02.000000000 +0100 +@@ -122,7 +122,6 @@ obj-$(CONFIG_X86_XEN) += fixup.o + ### + # 64 bit specific files + ifeq ($(CONFIG_X86_64),y) +- obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o + obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o + obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o + obj-$(CONFIG_AUDIT) += audit_64.o +@@ -134,11 +133,10 @@ ifeq ($(CONFIG_X86_64),y) + obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o + obj-y += vsmp_64.o + +- obj-$(CONFIG_XEN) += nmi.o + time_64-$(CONFIG_XEN) += time_32.o + endif + +-disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o genx2apic_%.o \ +- hpet.o i8253.o i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o \ +- smpboot.o tlb_$(BITS).o tsc.o tsc_sync.o uv_%.o vsmp_64.o ++disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \ ++ i8259.o irqinit_$(BITS).o pci-swiotlb.o reboot.o smpboot.o tsc.o \ ++ tsc_sync.o uv_%.o vsmp_64.o + disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o +--- head-2010-01-18.orig/arch/x86/kernel/acpi/boot.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/acpi/boot.c 2009-11-06 10:52:02.000000000 +0100 +@@ -112,11 +112,6 @@ char *__init __acpi_map_table(unsigned l + if (!phys || !size) + return NULL; + +-#ifdef CONFIG_XEN +- if (phys + size <= (NR_FIX_ISAMAPS << PAGE_SHIFT)) +- return isa_bus_to_virt(phys); +-#endif +- + return early_ioremap(phys, size); + } + void __init __acpi_unmap_table(char *map, unsigned long size) +@@ -148,8 +143,10 @@ static int __init acpi_parse_madt(struct + madt->address); + } + ++#ifndef CONFIG_XEN + default_acpi_madt_oem_check(madt->header.oem_id, + madt->header.oem_table_id); ++#endif + + return 0; + } +--- head-2010-01-18.orig/arch/x86/kernel/acpi/sleep-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/acpi/sleep-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -104,6 +104,7 @@ int acpi_save_state_mem(void) + stack_start.sp = temp_stack + sizeof(temp_stack); + early_gdt_descr.address = + (unsigned long)get_cpu_gdt_table(smp_processor_id()); ++ initial_gs = per_cpu_offset(smp_processor_id()); + #endif + initial_code = (unsigned long)wakeup_long64; + saved_magic = 0x123456789abcdef0; +--- head-2010-01-18.orig/arch/x86/kernel/apic/Makefile 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic/Makefile 2009-11-06 10:52:02.000000000 +0100 +@@ -17,3 +17,10 @@ obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o + obj-$(CONFIG_X86_NUMAQ) += numaq_32.o + obj-$(CONFIG_X86_ES7000) += es7000_32.o + obj-$(CONFIG_X86_SUMMIT) += summit_32.o ++ ++obj-$(CONFIG_XEN) += nmi.o ++ ++probe_64-$(CONFIG_XEN) := probe_32.o ++ ++disabled-obj-$(CONFIG_XEN) := apic_flat_$(BITS).o ++disabled-obj-$(filter-out $(CONFIG_SMP),$(CONFIG_XEN)) += ipi.o +--- head-2010-01-18.orig/arch/x86/kernel/apic/apic-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic/apic-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -4,11 +4,20 @@ + + #include + #include ++#include + + #include + #include + #include + ++unsigned int num_processors; ++ ++/* ++ * Map cpu index to physical APIC ID ++ */ ++DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; ++EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); ++ + /* + * Debug level, exported for io_apic.c + */ +--- head-2010-01-18.orig/arch/x86/kernel/apic/io_apic-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic/io_apic-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -1,7 +1,7 @@ + /* + * Intel IO-APIC support for multi-Pentium hosts. + * +- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo ++ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo + * + * Many thanks to Stig Venaas for trying out countless experimental + * patches and reporting/debugging problems patiently! +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -61,9 +62,7 @@ + #include + #include + +-#include +-#include +-#include ++#include + + #ifdef CONFIG_XEN + #include +@@ -97,11 +96,11 @@ static DEFINE_SPINLOCK(vector_lock); + int nr_ioapic_registers[MAX_IO_APICS]; + + /* I/O APIC entries */ +-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; ++struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; + int nr_ioapics; + + /* MP IRQ source entries */ +-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; ++struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; + + /* # of MP IRQ source entries */ + int mp_irq_entries; +@@ -114,10 +113,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BU + + int skip_ioapic_setup; + ++void arch_disable_smp_support(void) ++{ ++#ifdef CONFIG_PCI ++ noioapicquirk = 1; ++ noioapicreroute = -1; ++#endif ++ skip_ioapic_setup = 1; ++} ++ + static int __init parse_noapic(char *str) + { + /* disable IO-APIC */ +- disable_ioapic_setup(); ++ arch_disable_smp_support(); + return 0; + } + early_param("noapic", parse_noapic); +@@ -372,7 +380,7 @@ set_extra_move_desc(struct irq_desc *des + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ +- if (!cpumask_intersects(&desc->affinity, mask)) ++ if (!cpumask_intersects(desc->affinity, mask)) + cfg->move_desc_pending = 1; + } + } +@@ -397,12 +405,20 @@ struct io_apic { + unsigned int index; + unsigned int unused[3]; + unsigned int data; ++ unsigned int unused2[11]; ++ unsigned int eoi; + }; + + static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + { + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) +- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); ++ + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); ++} ++ ++static inline void io_apic_eoi(unsigned int apic, unsigned int vector) ++{ ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(vector, &io_apic->eoi); + } + #endif /* CONFIG_XEN */ + +@@ -416,7 +432,7 @@ static inline unsigned int io_apic_read( + struct physdev_apic apic_op; + int ret; + +- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; ++ apic_op.apic_physbase = mp_ioapics[apic].apicaddr; + apic_op.reg = reg; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); + if (ret) +@@ -434,7 +450,7 @@ static inline void io_apic_write(unsigne + #else + struct physdev_apic apic_op; + +- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; ++ apic_op.apic_physbase = mp_ioapics[apic].apicaddr; + apic_op.reg = reg; + apic_op.value = value; + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); +@@ -522,7 +538,7 @@ __ioapic_write_entry(int apic, int pin, + io_apic_write(apic, 0x10 + 2*pin, eu.w1); + } + +-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) + { + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); +@@ -558,11 +574,11 @@ static void send_cleanup_vector(struct i + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + cfg->move_cleanup_count++; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) +- send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); ++ apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(cleanup_mask); +- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); ++ apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +@@ -583,16 +599,12 @@ static void __target_IO_APIC_irq(unsigne + + apic = entry->apic; + pin = entry->pin; +-#ifdef CONFIG_INTR_REMAP + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); +-#else +- io_apic_write(apic, 0x11 + pin*2, dest); +-#endif + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; +@@ -607,8 +619,9 @@ static int + assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); + + /* +- * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid +- * of that, or returns BAD_APICID and leaves desc->affinity untouched. ++ * Either sets desc->affinity to a valid value, and returns ++ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and ++ * leaves desc->affinity untouched. + */ + static unsigned int + set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +@@ -624,9 +637,12 @@ set_desc_affinity(struct irq_desc *desc, + if (assign_irq_vector(irq, cfg, mask)) + return BAD_APICID; + +- cpumask_and(&desc->affinity, cfg->domain, mask); ++ /* check that before desc->addinity get updated */ + set_extra_move_desc(desc, mask); +- return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); ++ ++ cpumask_copy(desc->affinity, mask); ++ ++ return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); + } + + static void +@@ -840,23 +856,6 @@ static void clear_IO_APIC (void) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); + } +- +-#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) +-void send_IPI_self(int vector) +-{ +- unsigned int cfg; +- +- /* +- * Wait for idle. +- */ +- apic_wait_icr_idle(); +- cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; +- /* +- * Send the IPI. The write to APIC_ICR fires this off. +- */ +- apic_write(APIC_ICR, cfg); +-} +-#endif /* !CONFIG_SMP && CONFIG_X86_32*/ + #else + #define add_pin_to_irq_cpu(cfg, cpu, apic, pin) + #endif /* CONFIG_XEN */ +@@ -868,8 +867,9 @@ void send_IPI_self(int vector) + */ + + #define MAX_PIRQS 8 +-static int pirq_entries [MAX_PIRQS]; +-static int pirqs_enabled; ++static int pirq_entries[MAX_PIRQS] = { ++ [0 ... MAX_PIRQS - 1] = -1 ++}; + + static int __init ioapic_pirq_setup(char *str) + { +@@ -878,10 +878,6 @@ static int __init ioapic_pirq_setup(char + + get_options(str, ARRAY_SIZE(ints), ints); + +- for (i = 0; i < MAX_PIRQS; i++) +- pirq_entries[i] = -1; +- +- pirqs_enabled = 1; + apic_printk(APIC_VERBOSE, KERN_INFO + "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; +@@ -903,75 +899,106 @@ __setup("pirq=", ioapic_pirq_setup); + #endif /* CONFIG_X86_32 */ + + #ifdef CONFIG_INTR_REMAP +-/* I/O APIC RTE contents at the OS boot up */ +-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; ++struct IO_APIC_route_entry **alloc_ioapic_entries(void) ++{ ++ int apic; ++ struct IO_APIC_route_entry **ioapic_entries; ++ ++ ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, ++ GFP_ATOMIC); ++ if (!ioapic_entries) ++ return 0; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ioapic_entries[apic] = ++ kzalloc(sizeof(struct IO_APIC_route_entry) * ++ nr_ioapic_registers[apic], GFP_ATOMIC); ++ if (!ioapic_entries[apic]) ++ goto nomem; ++ } ++ ++ return ioapic_entries; ++ ++nomem: ++ while (--apic >= 0) ++ kfree(ioapic_entries[apic]); ++ kfree(ioapic_entries); ++ ++ return 0; ++} + + /* +- * Saves and masks all the unmasked IO-APIC RTE's ++ * Saves all the IO-APIC RTE's + */ +-int save_mask_IO_APIC_setup(void) ++int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) + { +- union IO_APIC_reg_01 reg_01; +- unsigned long flags; + int apic, pin; + +- /* +- * The number of IO-APIC IRQ registers (== #pins): +- */ ++ if (!ioapic_entries) ++ return -ENOMEM; ++ + for (apic = 0; apic < nr_ioapics; apic++) { +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_01.raw = io_apic_read(apic, 1); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- nr_ioapic_registers[apic] = reg_01.bits.entries+1; ++ if (!ioapic_entries[apic]) ++ return -ENOMEM; ++ ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ++ ioapic_entries[apic][pin] = ++ ioapic_read_entry(apic, pin); + } + ++ return 0; ++} ++ ++/* ++ * Mask all IO APIC entries. ++ */ ++void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) ++{ ++ int apic, pin; ++ ++ if (!ioapic_entries) ++ return; ++ + for (apic = 0; apic < nr_ioapics; apic++) { +- early_ioapic_entries[apic] = +- kzalloc(sizeof(struct IO_APIC_route_entry) * +- nr_ioapic_registers[apic], GFP_KERNEL); +- if (!early_ioapic_entries[apic]) +- goto nomem; +- } ++ if (!ioapic_entries[apic]) ++ break; + +- for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + +- entry = early_ioapic_entries[apic][pin] = +- ioapic_read_entry(apic, pin); ++ entry = ioapic_entries[apic][pin]; + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } +- +- return 0; +- +-nomem: +- while (apic >= 0) +- kfree(early_ioapic_entries[apic--]); +- memset(early_ioapic_entries, 0, +- ARRAY_SIZE(early_ioapic_entries)); +- +- return -ENOMEM; ++ } + } + +-void restore_IO_APIC_setup(void) ++/* ++ * Restore IO APIC entries which was saved in ioapic_entries. ++ */ ++int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) + { + int apic, pin; + ++ if (!ioapic_entries) ++ return -ENOMEM; ++ + for (apic = 0; apic < nr_ioapics; apic++) { +- if (!early_ioapic_entries[apic]) +- break; ++ if (!ioapic_entries[apic]) ++ return -ENOMEM; ++ + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, +- early_ioapic_entries[apic][pin]); +- kfree(early_ioapic_entries[apic]); +- early_ioapic_entries[apic] = NULL; ++ ioapic_entries[apic][pin]); + } ++ return 0; + } + +-void reinit_intr_remapped_IO_APIC(int intr_remapping) ++void reinit_intr_remapped_IO_APIC(int intr_remapping, ++ struct IO_APIC_route_entry **ioapic_entries) ++ + { + /* + * for now plain restore of previous settings. +@@ -980,7 +1007,17 @@ void reinit_intr_remapped_IO_APIC(int in + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ +- restore_IO_APIC_setup(); ++ restore_IO_APIC_setup(ioapic_entries); ++} ++ ++void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) ++{ ++ int apic; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ kfree(ioapic_entries[apic]); ++ ++ kfree(ioapic_entries); + } + #endif + +@@ -992,10 +1029,10 @@ static int find_irq_entry(int apic, int + int i; + + for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_irqtype == type && +- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || +- mp_irqs[i].mp_dstapic == MP_APIC_ALL) && +- mp_irqs[i].mp_dstirq == pin) ++ if (mp_irqs[i].irqtype == type && ++ (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || ++ mp_irqs[i].dstapic == MP_APIC_ALL) && ++ mp_irqs[i].dstirq == pin) + return i; + + return -1; +@@ -1010,13 +1047,13 @@ static int __init find_isa_irq_pin(int i + int i; + + for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; ++ int lbus = mp_irqs[i].srcbus; + + if (test_bit(lbus, mp_bus_not_pci) && +- (mp_irqs[i].mp_irqtype == type) && +- (mp_irqs[i].mp_srcbusirq == irq)) ++ (mp_irqs[i].irqtype == type) && ++ (mp_irqs[i].srcbusirq == irq)) + +- return mp_irqs[i].mp_dstirq; ++ return mp_irqs[i].dstirq; + } + return -1; + } +@@ -1026,17 +1063,17 @@ static int __init find_isa_irq_apic(int + int i; + + for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; ++ int lbus = mp_irqs[i].srcbus; + + if (test_bit(lbus, mp_bus_not_pci) && +- (mp_irqs[i].mp_irqtype == type) && +- (mp_irqs[i].mp_srcbusirq == irq)) ++ (mp_irqs[i].irqtype == type) && ++ (mp_irqs[i].srcbusirq == irq)) + break; + } + if (i < mp_irq_entries) { + int apic; + for(apic = 0; apic < nr_ioapics; apic++) { +- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) ++ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) + return apic; + } + } +@@ -1062,23 +1099,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; ++ int lbus = mp_irqs[i].srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) +- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || +- mp_irqs[i].mp_dstapic == MP_APIC_ALL) ++ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || ++ mp_irqs[i].dstapic == MP_APIC_ALL) + break; + + if (!test_bit(lbus, mp_bus_not_pci) && +- !mp_irqs[i].mp_irqtype && ++ !mp_irqs[i].irqtype && + (bus == lbus) && +- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { +- int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); ++ (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { ++ int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + +- if (pin == (mp_irqs[i].mp_srcbusirq & 3)) ++ if (pin == (mp_irqs[i].srcbusirq & 3)) + return irq; + /* + * Use the first all-but-pin matching entry as a +@@ -1121,7 +1158,7 @@ static int EISA_ELCR(unsigned int irq) + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) ++#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) + #define default_EISA_polarity(idx) default_ISA_polarity(idx) + + /* PCI interrupts are always polarity one level triggered, +@@ -1138,13 +1175,13 @@ static int EISA_ELCR(unsigned int irq) + + static int MPBIOS_polarity(int idx) + { +- int bus = mp_irqs[idx].mp_srcbus; ++ int bus = mp_irqs[idx].srcbus; + int polarity; + + /* + * Determine IRQ line polarity (high active or low active): + */ +- switch (mp_irqs[idx].mp_irqflag & 3) ++ switch (mp_irqs[idx].irqflag & 3) + { + case 0: /* conforms, ie. bus-type dependent polarity */ + if (test_bit(bus, mp_bus_not_pci)) +@@ -1180,13 +1217,13 @@ static int MPBIOS_polarity(int idx) + + static int MPBIOS_trigger(int idx) + { +- int bus = mp_irqs[idx].mp_srcbus; ++ int bus = mp_irqs[idx].srcbus; + int trigger; + + /* + * Determine IRQ trigger mode (edge or level sensitive): + */ +- switch ((mp_irqs[idx].mp_irqflag>>2) & 3) ++ switch ((mp_irqs[idx].irqflag>>2) & 3) + { + case 0: /* conforms, ie. bus-type dependent */ + if (test_bit(bus, mp_bus_not_pci)) +@@ -1264,16 +1301,16 @@ int (*ioapic_renumber_irq)(int ioapic, i + static int pin_2_irq(int idx, int apic, int pin) + { + int irq, i; +- int bus = mp_irqs[idx].mp_srcbus; ++ int bus = mp_irqs[idx].srcbus; + + /* + * Debugging check, we are in big trouble if this message pops up! + */ +- if (mp_irqs[idx].mp_dstirq != pin) ++ if (mp_irqs[idx].dstirq != pin) + printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + + if (test_bit(bus, mp_bus_not_pci)) { +- irq = mp_irqs[idx].mp_srcbusirq; ++ irq = mp_irqs[idx].srcbusirq; + } else { + /* + * PCI IRQs are mapped in order +@@ -1366,7 +1403,7 @@ __assign_irq_vector(int irq, struct irq_ + int new_cpu; + int vector, offset; + +- vector_allocation_domain(cpu, tmp_mask); ++ apic->vector_allocation_domain(cpu, tmp_mask); + + vector = current_vector; + offset = current_offset; +@@ -1476,9 +1513,7 @@ void __setup_vector_irq(int cpu) + } + + static struct irq_chip ioapic_chip; +-#ifdef CONFIG_INTR_REMAP + static struct irq_chip ir_ioapic_chip; +-#endif + + #define IOAPIC_AUTO -1 + #define IOAPIC_EDGE 0 +@@ -1517,7 +1552,6 @@ static void ioapic_register_intr(int irq + else + desc->status &= ~IRQ_LEVEL; + +-#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + desc->status |= IRQ_MOVE_PCNTXT; + if (trigger) +@@ -1529,7 +1563,7 @@ static void ioapic_register_intr(int irq + handle_edge_irq, "edge"); + return; + } +-#endif ++ + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + set_irq_chip_and_handler_name(irq, &ioapic_chip, +@@ -1544,37 +1578,44 @@ static void ioapic_register_intr(int irq + #define ioapic_register_intr(irq, desc, trigger) evtchn_register_pirq(irq) + #endif + +-static int setup_ioapic_entry(int apic, int irq, +- struct IO_APIC_route_entry *entry, +- unsigned int destination, int trigger, +- int polarity, int vector) ++int setup_ioapic_entry(int apic_id, int irq, ++ struct IO_APIC_route_entry *entry, ++ unsigned int destination, int trigger, ++ int polarity, int vector, int pin) + { + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +-#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { +- struct intel_iommu *iommu = map_ioapic_to_ir(apic); ++#ifndef CONFIG_XEN ++ struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) +- panic("No mapping iommu for ioapic %d\n", apic); ++ panic("No mapping iommu for ioapic %d\n", apic_id); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) +- panic("Failed to allocate IRTE for ioapic %d\n", apic); ++ panic("Failed to allocate IRTE for ioapic %d\n", apic_id); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; +- irte.dst_mode = INT_DEST_MODE; +- irte.trigger_mode = trigger; +- irte.dlvry_mode = INT_DELIVERY_MODE; ++ irte.dst_mode = apic->irq_dest_mode; ++ /* ++ * Trigger mode in the IRTE will always be edge, and the ++ * actual level or edge trigger will be setup in the IO-APIC ++ * RTE. This will help simplify level triggered irq migration. ++ * For more details, see the comments above explainig IO-APIC ++ * irq migration in the presence of interrupt-remapping. ++ */ ++ irte.trigger_mode = 0; ++ irte.dlvry_mode = apic->irq_delivery_mode; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + +@@ -1584,18 +1625,22 @@ static int setup_ioapic_entry(int apic, + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); +- } else ++ /* ++ * IO-APIC RTE will be configured with virtual vector. ++ * irq handler will do the explicit EOI to the io-apic. ++ */ ++ ir_entry->vector = pin; + #endif +- { +- entry->delivery_mode = INT_DELIVERY_MODE; +- entry->dest_mode = INT_DEST_MODE; ++ } else { ++ entry->delivery_mode = apic->irq_delivery_mode; ++ entry->dest_mode = apic->irq_dest_mode; + entry->dest = destination; ++ entry->vector = vector; + } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; +- entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. +@@ -1605,7 +1650,7 @@ static int setup_ioapic_entry(int apic, + return 0; + } + +-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, ++static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, + int trigger, int polarity) + { + struct irq_cfg *cfg; +@@ -1617,26 +1662,26 @@ static void setup_IO_APIC_irq(int apic, + + cfg = desc->chip_data; + +- if (assign_irq_vector(irq, cfg, TARGET_CPUS)) ++ if (assign_irq_vector(irq, cfg, apic->target_cpus())) + return; + + #ifndef CONFIG_XEN +- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); ++ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + #else +- dest = cpu_mask_to_apicid(TARGET_CPUS); ++ dest = 0; /* meaningless */ + #endif + + apic_printk(APIC_VERBOSE,KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", +- apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, ++ apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, + irq, trigger, polarity); + + +- if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, +- dest, trigger, polarity, cfg->vector)) { ++ if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, ++ dest, trigger, polarity, cfg->vector, pin)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", +- mp_ioapics[apic].mp_apicid, pin); ++ mp_ioapics[apic_id].apicid, pin); + __clear_irq_vector(irq, cfg); + return; + } +@@ -1645,12 +1690,12 @@ static void setup_IO_APIC_irq(int apic, + if (irq < NR_IRQS_LEGACY) + disable_8259A_irq(irq); + +- ioapic_write_entry(apic, pin, entry); ++ ioapic_write_entry(apic_id, pin, entry); + } + + static void __init setup_IO_APIC_irqs(void) + { +- int apic, pin, idx, irq; ++ int apic_id, pin, idx, irq; + int notcon = 0; + struct irq_desc *desc; + struct irq_cfg *cfg; +@@ -1658,21 +1703,19 @@ static void __init setup_IO_APIC_irqs(vo + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + +- for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { + +- idx = find_irq_entry(apic, pin, mp_INT); ++ idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) { + if (!notcon) { + notcon = 1; + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", +- mp_ioapics[apic].mp_apicid, +- pin); ++ mp_ioapics[apic_id].apicid, pin); + } else + apic_printk(APIC_VERBOSE, " %d-%d", +- mp_ioapics[apic].mp_apicid, +- pin); ++ mp_ioapics[apic_id].apicid, pin); + continue; + } + if (notcon) { +@@ -1681,23 +1724,30 @@ static void __init setup_IO_APIC_irqs(vo + notcon = 0; + } + +- irq = pin_2_irq(idx, apic, pin); ++ irq = pin_2_irq(idx, apic_id, pin); ++ + #if defined(CONFIG_XEN) +- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) ++ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) + continue; +-#elif defined(CONFIG_X86_32) +- if (multi_timer_check(apic, irq)) ++#else ++ /* ++ * Skip the timer IRQ if there's a quirk handler ++ * installed and if it returns 1: ++ */ ++ if (apic->multi_timer_check && ++ apic->multi_timer_check(apic_id, irq)) + continue; + #endif ++ + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; + } + cfg = desc->chip_data; +- add_pin_to_irq_cpu(cfg, cpu, apic, pin); ++ add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); + +- setup_IO_APIC_irq(apic, pin, irq, desc, ++ setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); + } + } +@@ -1711,15 +1761,13 @@ static void __init setup_IO_APIC_irqs(vo + /* + * Set up the timer pin, possibly with the 8259A-master behind. + */ +-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ++static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, + int vector) + { + struct IO_APIC_route_entry entry; + +-#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + return; +-#endif + + memset(&entry, 0, sizeof(entry)); + +@@ -1727,10 +1775,10 @@ static void __init setup_timer_IRQ0_pin( + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ +- entry.dest_mode = INT_DEST_MODE; +- entry.mask = 1; /* mask IRQ now */ +- entry.dest = cpu_mask_to_apicid(TARGET_CPUS); +- entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = apic->irq_dest_mode; ++ entry.mask = 0; /* don't mask IRQ for edge */ ++ entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); ++ entry.delivery_mode = apic->irq_delivery_mode; + entry.polarity = 0; + entry.trigger = 0; + entry.vector = vector; +@@ -1744,7 +1792,7 @@ static void __init setup_timer_IRQ0_pin( + /* + * Add it to the IO-APIC irq-routing table: + */ +- ioapic_write_entry(apic, pin, entry); ++ ioapic_write_entry(apic_id, pin, entry); + } + + +@@ -1766,7 +1814,7 @@ __apicdebuginit(void) print_IO_APIC(void + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (i = 0; i < nr_ioapics; i++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", +- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); ++ mp_ioapics[i].apicid, nr_ioapic_registers[i]); + + /* + * We are a bit conservative about what we expect. We have to +@@ -1786,7 +1834,7 @@ __apicdebuginit(void) print_IO_APIC(void + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk("\n"); +- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); ++ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); + printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); +@@ -2050,13 +2098,6 @@ void __init enable_IO_APIC(void) + int apic; + unsigned long flags; + +-#ifdef CONFIG_X86_32 +- int i; +- if (!pirqs_enabled) +- for (i = 0; i < MAX_PIRQS; i++) +- pirq_entries[i] = -1; +-#endif +- + /* + * The number of IO-APIC IRQ registers (== #pins): + */ +@@ -2129,8 +2170,13 @@ void disable_IO_APIC(void) + * If the i8259 is routed through an IOAPIC + * Put that IOAPIC in virtual wire mode + * so legacy interrupts can be delivered. ++ * ++ * With interrupt-remapping, for now we will use virtual wire A mode, ++ * as virtual wire B is little complex (need to configure both ++ * IOAPIC RTE aswell as interrupt-remapping table entry). ++ * As this gets called during crash dump, keep this simple for now. + */ +- if (ioapic_i8259.pin != -1) { ++ if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { + struct IO_APIC_route_entry entry; + + memset(&entry, 0, sizeof(entry)); +@@ -2150,7 +2196,10 @@ void disable_IO_APIC(void) + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); + } + +- disconnect_bsp_APIC(ioapic_i8259.pin != -1); ++ /* ++ * Use virtual wire A mode when interrupt remapping is enabled. ++ */ ++ disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); + } + + #ifdef CONFIG_X86_32 +@@ -2165,7 +2214,7 @@ static void __init setup_ioapic_ids_from + { + union IO_APIC_reg_00 reg_00; + physid_mask_t phys_id_present_map; +- int apic; ++ int apic_id; + int i; + unsigned char old_id; + unsigned long flags; +@@ -2184,26 +2233,26 @@ static void __init setup_ioapic_ids_from + * This is broken; anything with a real cpu count has to + * circumvent this idiocy regardless. + */ +- phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); ++ phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ +- for (apic = 0; apic < nr_ioapics; apic++) { ++ for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); ++ reg_00.raw = io_apic_read(apic_id, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + +- old_id = mp_ioapics[apic].mp_apicid; ++ old_id = mp_ioapics[apic_id].apicid; + +- if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { ++ if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", +- apic, mp_ioapics[apic].mp_apicid); ++ apic_id, mp_ioapics[apic_id].apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.bits.ID); +- mp_ioapics[apic].mp_apicid = reg_00.bits.ID; ++ mp_ioapics[apic_id].apicid = reg_00.bits.ID; + } + + /* +@@ -2211,10 +2260,10 @@ static void __init setup_ioapic_ids_from + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ +- if (check_apicid_used(phys_id_present_map, +- mp_ioapics[apic].mp_apicid)) { ++ if (apic->check_apicid_used(phys_id_present_map, ++ mp_ioapics[apic_id].apicid)) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", +- apic, mp_ioapics[apic].mp_apicid); ++ apic_id, mp_ioapics[apic_id].apicid); + for (i = 0; i < get_physical_broadcast(); i++) + if (!physid_isset(i, phys_id_present_map)) + break; +@@ -2223,13 +2272,13 @@ static void __init setup_ioapic_ids_from + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + physid_set(i, phys_id_present_map); +- mp_ioapics[apic].mp_apicid = i; ++ mp_ioapics[apic_id].apicid = i; + } else { + physid_mask_t tmp; +- tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); ++ tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + apic_printk(APIC_VERBOSE, "Setting %d in the " + "phys_id_present_map\n", +- mp_ioapics[apic].mp_apicid); ++ mp_ioapics[apic_id].apicid); + physids_or(phys_id_present_map, phys_id_present_map, tmp); + } + +@@ -2238,11 +2287,11 @@ static void __init setup_ioapic_ids_from + * We need to adjust the IRQ routing table + * if the ID changed. + */ +- if (old_id != mp_ioapics[apic].mp_apicid) ++ if (old_id != mp_ioapics[apic_id].apicid) + for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_dstapic == old_id) +- mp_irqs[i].mp_dstapic +- = mp_ioapics[apic].mp_apicid; ++ if (mp_irqs[i].dstapic == old_id) ++ mp_irqs[i].dstapic ++ = mp_ioapics[apic_id].apicid; + + /* + * Read the right value from the MPC table and +@@ -2250,20 +2299,20 @@ static void __init setup_ioapic_ids_from + */ + apic_printk(APIC_VERBOSE, KERN_INFO + "...changing IO-APIC physical APIC ID to %d ...", +- mp_ioapics[apic].mp_apicid); ++ mp_ioapics[apic_id].apicid); + +- reg_00.bits.ID = mp_ioapics[apic].mp_apicid; ++ reg_00.bits.ID = mp_ioapics[apic_id].apicid; + spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0, reg_00.raw); ++ io_apic_write(apic_id, 0, reg_00.raw); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); ++ reg_00.raw = io_apic_read(apic_id, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); +- if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) ++ if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) + printk("could not set ID!\n"); + else + apic_printk(APIC_VERBOSE, " ok.\n"); +@@ -2366,7 +2415,7 @@ static int ioapic_retrigger_irq(unsigned + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); +- send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); ++ apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +@@ -2374,7 +2423,7 @@ static int ioapic_retrigger_irq(unsigned + #else + static int ioapic_retrigger_irq(unsigned int irq) + { +- send_IPI_self(irq_cfg(irq)->vector); ++ apic->send_IPI_self(irq_cfg(irq)->vector); + + return 1; + } +@@ -2392,37 +2441,24 @@ static int ioapic_retrigger_irq(unsigned + #ifdef CONFIG_SMP + + #ifdef CONFIG_INTR_REMAP +-static void ir_irq_migration(struct work_struct *work); +- +-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + + /* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * +- * For edge triggered, irq migration is a simple atomic update(of vector +- * and cpu destination) of IRTE and flush the hardware cache. +- * +- * For level triggered, we need to modify the io-apic RTE aswell with the update +- * vector information, along with modifying IRTE with vector and destination. +- * So irq migration for level triggered is little bit more complex compared to +- * edge triggered migration. But the good news is, we use the same algorithm +- * for level triggered migration as we have today, only difference being, +- * we now initiate the irq migration from process context instead of the +- * interrupt context. ++ * For both level and edge triggered, irq migration is a simple atomic ++ * update(of vector and cpu destination) of IRTE and flush the hardware cache. + * +- * In future, when we do a directed EOI (combined with cpu EOI broadcast +- * suppression) to the IO-APIC, level triggered irq migration will also be +- * as simple as edge triggered migration and we can do the irq migration +- * with a simple atomic update to IO-APIC RTE. ++ * For level triggered, we eliminate the io-apic RTE modification (with the ++ * updated vector information), by using a virtual vector (io-apic pin number). ++ * Real vector that is used for interrupting cpu will be coming from ++ * the interrupt-remapping table entry. + */ + static void + migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) + { + struct irq_cfg *cfg; + struct irte irte; +- int modify_ioapic_rte; + unsigned int dest; +- unsigned long flags; + unsigned int irq; + + if (!cpumask_intersects(mask, cpu_online_mask)) +@@ -2438,14 +2474,7 @@ migrate_ioapic_irq_desc(struct irq_desc + + set_extra_move_desc(desc, mask); + +- dest = cpu_mask_to_apicid_and(cfg->domain, mask); +- +- modify_ioapic_rte = desc->status & IRQ_LEVEL; +- if (modify_ioapic_rte) { +- spin_lock_irqsave(&ioapic_lock, flags); +- __target_IO_APIC_irq(irq, dest, cfg); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- } ++ dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); +@@ -2458,61 +2487,7 @@ migrate_ioapic_irq_desc(struct irq_desc + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + +- cpumask_copy(&desc->affinity, mask); +-} +- +-static int migrate_irq_remapped_level_desc(struct irq_desc *desc) +-{ +- int ret = -1; +- struct irq_cfg *cfg = desc->chip_data; +- +- mask_IO_APIC_irq_desc(desc); +- +- if (io_apic_level_ack_pending(cfg)) { +- /* +- * Interrupt in progress. Migrating irq now will change the +- * vector information in the IO-APIC RTE and that will confuse +- * the EOI broadcast performed by cpu. +- * So, delay the irq migration to the next instance. +- */ +- schedule_delayed_work(&ir_migration_work, 1); +- goto unmask; +- } +- +- /* everthing is clear. we have right of way */ +- migrate_ioapic_irq_desc(desc, &desc->pending_mask); +- +- ret = 0; +- desc->status &= ~IRQ_MOVE_PENDING; +- cpumask_clear(&desc->pending_mask); +- +-unmask: +- unmask_IO_APIC_irq_desc(desc); +- +- return ret; +-} +- +-static void ir_irq_migration(struct work_struct *work) +-{ +- unsigned int irq; +- struct irq_desc *desc; +- +- for_each_irq_desc(irq, desc) { +- if (desc->status & IRQ_MOVE_PENDING) { +- unsigned long flags; +- +- spin_lock_irqsave(&desc->lock, flags); +- if (!desc->chip->set_affinity || +- !(desc->status & IRQ_MOVE_PENDING)) { +- desc->status &= ~IRQ_MOVE_PENDING; +- spin_unlock_irqrestore(&desc->lock, flags); +- continue; +- } +- +- desc->chip->set_affinity(irq, &desc->pending_mask); +- spin_unlock_irqrestore(&desc->lock, flags); +- } +- } ++ cpumask_copy(desc->affinity, mask); + } + + /* +@@ -2521,13 +2496,6 @@ static void ir_irq_migration(struct work + static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, + const struct cpumask *mask) + { +- if (desc->status & IRQ_LEVEL) { +- desc->status |= IRQ_MOVE_PENDING; +- cpumask_copy(&desc->pending_mask, mask); +- migrate_irq_remapped_level_desc(desc); +- return; +- } +- + migrate_ioapic_irq_desc(desc, mask); + } + static void set_ir_ioapic_affinity_irq(unsigned int irq, +@@ -2537,6 +2505,11 @@ static void set_ir_ioapic_affinity_irq(u + + set_ir_ioapic_affinity_irq_desc(desc, mask); + } ++#else ++static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, ++ const struct cpumask *mask) ++{ ++} + #endif + + asmlinkage void smp_irq_move_cleanup_interrupt(void) +@@ -2550,6 +2523,7 @@ asmlinkage void smp_irq_move_cleanup_int + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; ++ unsigned int irr; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; +@@ -2569,6 +2543,18 @@ asmlinkage void smp_irq_move_cleanup_int + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + goto unlock; + ++ irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); ++ /* ++ * Check if the vector that needs to be cleanedup is ++ * registered at the cpu's IRR. If so, then this is not ++ * the best time to clean it up. Lets clean it up in the ++ * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR ++ * to myself. ++ */ ++ if (irr & (1 << (vector % 32))) { ++ apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); ++ goto unlock; ++ } + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; + unlock: +@@ -2591,7 +2577,7 @@ static void irq_complete_move(struct irq + + /* domain has not changed, but affinity did */ + me = smp_processor_id(); +- if (cpu_isset(me, desc->affinity)) { ++ if (cpumask_test_cpu(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; +@@ -2617,17 +2603,51 @@ static void irq_complete_move(struct irq + static inline void irq_complete_move(struct irq_desc **descp) {} + #endif + +-#ifdef CONFIG_INTR_REMAP ++static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) ++{ ++ int apic, pin; ++ struct irq_pin_list *entry; ++ ++ entry = cfg->irq_2_pin; ++ for (;;) { ++ ++ if (!entry) ++ break; ++ ++ apic = entry->apic; ++ pin = entry->pin; ++ io_apic_eoi(apic, pin); ++ entry = entry->next; ++ } ++} ++ ++static void ++eoi_ioapic_irq(struct irq_desc *desc) ++{ ++ struct irq_cfg *cfg; ++ unsigned long flags; ++ unsigned int irq; ++ ++ irq = desc->irq; ++ cfg = desc->chip_data; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __eoi_ioapic_irq(irq, cfg); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#ifdef CONFIG_X86_X2APIC + static void ack_x2apic_level(unsigned int irq) + { ++ struct irq_desc *desc = irq_to_desc(irq); + ack_x2APIC_irq(); ++ eoi_ioapic_irq(desc); + } + + static void ack_x2apic_edge(unsigned int irq) + { + ack_x2APIC_irq(); + } +- + #endif + + static void ack_apic_edge(unsigned int irq) +@@ -2693,6 +2713,9 @@ static void ack_apic_level(unsigned int + */ + ack_APIC_irq(); + ++ if (irq_remapped(irq)) ++ eoi_ioapic_irq(desc); ++ + /* Now we can move and renable the irq */ + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. +@@ -2738,6 +2761,26 @@ static void ack_apic_level(unsigned int + #endif + } + ++#ifdef CONFIG_INTR_REMAP ++static void ir_ack_apic_edge(unsigned int irq) ++{ ++#ifdef CONFIG_X86_X2APIC ++ if (x2apic_enabled()) ++ return ack_x2apic_edge(irq); ++#endif ++ return ack_apic_edge(irq); ++} ++ ++static void ir_ack_apic_level(unsigned int irq) ++{ ++#ifdef CONFIG_X86_X2APIC ++ if (x2apic_enabled()) ++ return ack_x2apic_level(irq); ++#endif ++ return ack_apic_level(irq); ++} ++#endif /* CONFIG_INTR_REMAP */ ++ + static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .startup = startup_ioapic_irq, +@@ -2751,20 +2794,20 @@ static struct irq_chip ioapic_chip __rea + .retrigger = ioapic_retrigger_irq, + }; + +-#ifdef CONFIG_INTR_REMAP + static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, +- .ack = ack_x2apic_edge, +- .eoi = ack_x2apic_level, ++#ifdef CONFIG_INTR_REMAP ++ .ack = ir_ack_apic_edge, ++ .eoi = ir_ack_apic_level, + #ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, + #endif ++#endif + .retrigger = ioapic_retrigger_irq, + }; +-#endif + #endif /* CONFIG_XEN */ + + static inline void init_IO_APIC_traps(void) +@@ -2786,7 +2829,7 @@ static inline void init_IO_APIC_traps(vo + */ + for_each_irq_desc(irq, desc) { + #ifdef CONFIG_XEN +- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) ++ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) + continue; + #endif + cfg = desc->chip_data; +@@ -2948,19 +2991,15 @@ static inline void __init check_timer(vo + int cpu = boot_cpu_id; + int apic1, pin1, apic2, pin2; + unsigned long flags; +- unsigned int ver; + int no_pin1 = 0; + + local_irq_save(flags); + +- ver = apic_read(APIC_LVR); +- ver = GET_APIC_VERSION(ver); +- + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); +- assign_irq_vector(0, cfg, TARGET_CPUS); ++ assign_irq_vector(0, cfg, apic->target_cpus()); + + /* + * As IRQ0 is to be enabled in the 8259A, the virtual +@@ -2974,7 +3013,13 @@ static inline void __init check_timer(vo + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + init_8259A(1); + #ifdef CONFIG_X86_32 +- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); ++ { ++ unsigned int ver; ++ ++ ver = apic_read(APIC_LVR); ++ ver = GET_APIC_VERSION(ver); ++ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); ++ } + #endif + + pin1 = find_isa_irq_pin(0, mp_INT); +@@ -2994,10 +3039,8 @@ static inline void __init check_timer(vo + * 8259A. + */ + if (pin1 == -1) { +-#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); +-#endif + pin1 = pin2; + apic1 = apic2; + no_pin1 = 1; +@@ -3013,8 +3056,17 @@ static inline void __init check_timer(vo + if (no_pin1) { + add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); ++ } else { ++ /* for edge trigger, setup_IO_APIC_irq already ++ * leave it unmasked. ++ * so only need to unmask if it is level-trigger ++ * do we really have level trigger timer? ++ */ ++ int idx; ++ idx = find_irq_entry(apic1, pin1, mp_INT); ++ if (idx != -1 && irq_trigger(idx)) ++ unmask_IO_APIC_irq_desc(desc); + } +- unmask_IO_APIC_irq_desc(desc); + if (timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); +@@ -3024,10 +3076,9 @@ static inline void __init check_timer(vo + clear_IO_APIC_pin(0, pin1); + goto out; + } +-#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); +-#endif ++ local_irq_disable(); + clear_IO_APIC_pin(apic1, pin1); + if (!no_pin1) + apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " +@@ -3042,7 +3093,6 @@ static inline void __init check_timer(vo + */ + replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); +- unmask_IO_APIC_irq_desc(desc); + enable_8259A_irq(0); + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); +@@ -3057,6 +3107,7 @@ static inline void __init check_timer(vo + /* + * Cleanup, just in case ... + */ ++ local_irq_disable(); + disable_8259A_irq(0); + clear_IO_APIC_pin(apic2, pin2); + apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); +@@ -3082,6 +3133,7 @@ static inline void __init check_timer(vo + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + goto out; + } ++ local_irq_disable(); + disable_8259A_irq(0); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); +@@ -3099,6 +3151,7 @@ static inline void __init check_timer(vo + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + goto out; + } ++ local_irq_disable(); + apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); + panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " + "report. Then try booting with the 'noapic' option.\n"); +@@ -3131,7 +3184,7 @@ out: + void __init setup_IO_APIC(void) + { + +-#if defined(CONFIG_X86_32) || defined(CONFIG_XEN) ++#ifdef CONFIG_XEN + enable_IO_APIC(); + #else + /* +@@ -3213,8 +3266,8 @@ static int ioapic_resume(struct sys_devi + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(dev->id, 0); +- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { +- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; ++ if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { ++ reg_00.bits.ID = mp_ioapics[dev->id].apicid; + io_apic_write(dev->id, 0, reg_00.raw); + } + spin_unlock_irqrestore(&ioapic_lock, flags); +@@ -3264,6 +3317,7 @@ static int __init ioapic_init_sysfs(void + + device_initcall(ioapic_init_sysfs); + ++static int nr_irqs_gsi = NR_IRQS_LEGACY; + /* + * Dynamic irq allocate and deallocation + */ +@@ -3278,11 +3332,11 @@ unsigned int create_irq_nr(unsigned int + struct irq_desc *desc_new = NULL; + + irq = 0; +- spin_lock_irqsave(&vector_lock, flags); +- for (new = irq_want; new < NR_IRQS; new++) { +- if (platform_legacy_irq(new)) +- continue; ++ if (irq_want < nr_irqs_gsi) ++ irq_want = nr_irqs_gsi; + ++ spin_lock_irqsave(&vector_lock, flags); ++ for (new = irq_want; new < nr_irqs; new++) { + desc_new = irq_to_desc_alloc_cpu(new, cpu); + if (!desc_new) { + printk(KERN_INFO "can not get irq_desc for %d\n", new); +@@ -3292,7 +3346,7 @@ unsigned int create_irq_nr(unsigned int + + if (cfg_new->vector != 0) + continue; +- if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) ++ if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) + irq = new; + break; + } +@@ -3307,7 +3361,6 @@ unsigned int create_irq_nr(unsigned int + return irq; + } + +-static int nr_irqs_gsi = NR_IRQS_LEGACY; + int create_irq(void) + { + unsigned int irq_want; +@@ -3336,9 +3389,7 @@ void destroy_irq(unsigned int irq) + if (desc) + desc->chip_data = cfg; + +-#ifdef CONFIG_INTR_REMAP + free_irte(irq); +-#endif + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq, cfg); + spin_unlock_irqrestore(&vector_lock, flags); +@@ -3355,14 +3406,16 @@ static int msi_compose_msg(struct pci_de + int err; + unsigned dest; + ++ if (disable_apic) ++ return -ENXIO; ++ + cfg = irq_cfg(irq); +- err = assign_irq_vector(irq, cfg, TARGET_CPUS); ++ err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + +- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); ++ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + +-#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; +@@ -3374,9 +3427,9 @@ static int msi_compose_msg(struct pci_de + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; +- irte.dst_mode = INT_DEST_MODE; ++ irte.dst_mode = apic->irq_dest_mode; + irte.trigger_mode = 0; /* edge */ +- irte.dlvry_mode = INT_DELIVERY_MODE; ++ irte.dlvry_mode = apic->irq_delivery_mode; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + +@@ -3388,16 +3441,19 @@ static int msi_compose_msg(struct pci_de + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); +- } else +-#endif +- { +- msg->address_hi = MSI_ADDR_BASE_HI; ++ } else { ++ if (x2apic_enabled()) ++ msg->address_hi = MSI_ADDR_BASE_HI | ++ MSI_ADDR_EXT_DEST_ID(dest); ++ else ++ msg->address_hi = MSI_ADDR_BASE_HI; ++ + msg->address_lo = + MSI_ADDR_BASE_LO | +- ((INT_DEST_MODE == 0) ? ++ ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); +@@ -3405,7 +3461,7 @@ static int msi_compose_msg(struct pci_de + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); +@@ -3491,15 +3547,16 @@ static struct irq_chip msi_chip = { + .retrigger = ioapic_retrigger_irq, + }; + +-#ifdef CONFIG_INTR_REMAP + static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, +- .ack = ack_x2apic_edge, ++#ifdef CONFIG_INTR_REMAP ++ .ack = ir_ack_apic_edge, + #ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, + #endif ++#endif + .retrigger = ioapic_retrigger_irq, + }; + +@@ -3529,7 +3586,6 @@ static int msi_alloc_irte(struct pci_dev + } + return index; + } +-#endif + + static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) + { +@@ -3543,7 +3599,6 @@ static int setup_msi_irq(struct pci_dev + set_irq_msi(irq, msidesc); + write_msi_msg(irq, &msg); + +-#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_to_desc(irq); + /* +@@ -3552,7 +3607,6 @@ static int setup_msi_irq(struct pci_dev + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +-#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); +@@ -3560,60 +3614,26 @@ static int setup_msi_irq(struct pci_dev + return 0; + } + +-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) +-{ +- unsigned int irq; +- int ret; +- unsigned int irq_want; +- +- irq_want = nr_irqs_gsi; +- irq = create_irq_nr(irq_want); +- if (irq == 0) +- return -1; +- +-#ifdef CONFIG_INTR_REMAP +- if (!intr_remapping_enabled) +- goto no_ir; +- +- ret = msi_alloc_irte(dev, irq, 1); +- if (ret < 0) +- goto error; +-no_ir: +-#endif +- ret = setup_msi_irq(dev, msidesc, irq); +- if (ret < 0) { +- destroy_irq(irq); +- return ret; +- } +- return 0; +- +-#ifdef CONFIG_INTR_REMAP +-error: +- destroy_irq(irq); +- return ret; +-#endif +-} +- + int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) + { + unsigned int irq; + int ret, sub_handle; + struct msi_desc *msidesc; + unsigned int irq_want; +- +-#ifdef CONFIG_INTR_REMAP +- struct intel_iommu *iommu = 0; ++ struct intel_iommu *iommu = NULL; + int index = 0; +-#endif ++ ++ /* x86 doesn't support multiple MSI yet */ ++ if (type == PCI_CAP_ID_MSI && nvec > 1) ++ return 1; + + irq_want = nr_irqs_gsi; + sub_handle = 0; + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want); +- irq_want++; + if (irq == 0) + return -1; +-#ifdef CONFIG_INTR_REMAP ++ irq_want = irq + 1; + if (!intr_remapping_enabled) + goto no_ir; + +@@ -3641,7 +3661,6 @@ int arch_setup_msi_irqs(struct pci_dev * + set_irte_irq(irq, iommu, index, sub_handle); + } + no_ir: +-#endif + ret = setup_msi_irq(dev, msidesc, irq); + if (ret < 0) + goto error; +@@ -3659,7 +3678,7 @@ void arch_teardown_msi_irq(unsigned int + destroy_irq(irq); + } + +-#ifdef CONFIG_DMAR ++#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) + #ifdef CONFIG_SMP + static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) + { +@@ -3740,7 +3759,7 @@ static void hpet_msi_set_affinity(unsign + + #endif /* CONFIG_SMP */ + +-struct irq_chip hpet_msi_type = { ++static struct irq_chip hpet_msi_type = { + .name = "HPET_MSI", + .unmask = hpet_msi_unmask, + .mask = hpet_msi_mask, +@@ -3755,12 +3774,14 @@ int arch_setup_hpet_msi(unsigned int irq + { + int ret; + struct msi_msg msg; ++ struct irq_desc *desc = irq_to_desc(irq); + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + + hpet_msi_write(irq, &msg); ++ desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, + "edge"); + +@@ -3823,13 +3844,17 @@ int arch_setup_ht_irq(unsigned int irq, + struct irq_cfg *cfg; + int err; + ++ if (disable_apic) ++ return -ENXIO; ++ + cfg = irq_cfg(irq); +- err = assign_irq_vector(irq, cfg, TARGET_CPUS); ++ err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (!err) { + struct ht_irq_msg msg; + unsigned dest; + +- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); ++ dest = apic->cpu_mask_to_apicid_and(cfg->domain, ++ apic->target_cpus()); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + +@@ -3837,11 +3862,11 @@ int arch_setup_ht_irq(unsigned int irq, + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | +- ((INT_DEST_MODE == 0) ? ++ ((apic->irq_dest_mode == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ ((apic->irq_delivery_mode != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; +@@ -3857,7 +3882,7 @@ int arch_setup_ht_irq(unsigned int irq, + } + #endif /* CONFIG_HT_IRQ */ + +-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) ++#ifdef CONFIG_X86_UV + /* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. +@@ -3889,12 +3914,12 @@ int arch_enable_uv_irq(char *irq_name, u + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + + entry->vector = cfg->vector; +- entry->delivery_mode = INT_DELIVERY_MODE; +- entry->dest_mode = INT_DEST_MODE; ++ entry->delivery_mode = apic->irq_delivery_mode; ++ entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; +- entry->dest = cpu_mask_to_apicid(eligible_cpu); ++ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +@@ -3957,7 +3982,29 @@ void __init probe_nr_irqs_gsi(void) + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); + } ++ ++#ifdef CONFIG_SPARSE_IRQ ++int __init arch_probe_nr_irqs(void) ++{ ++ int nr; ++ ++ if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) ++ nr_irqs = NR_VECTORS * nr_cpu_ids; ++ ++ nr = nr_irqs_gsi + 8 * nr_cpu_ids; ++#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) ++ /* ++ * for MSI and HT dyn irq ++ */ ++ nr += nr_irqs_gsi * 16; + #endif ++ if (nr < nr_irqs) ++ nr_irqs = nr; ++ ++ return 0; ++} ++#endif ++#endif /* CONFIG_XEN */ + + /* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration +@@ -3985,7 +4032,7 @@ int __init io_apic_get_unique_id(int ioa + */ + + if (physids_empty(apic_id_map)) +- apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); ++ apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(ioapic, 0); +@@ -4001,10 +4048,10 @@ int __init io_apic_get_unique_id(int ioa + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ +- if (check_apicid_used(apic_id_map, apic_id)) { ++ if (apic->check_apicid_used(apic_id_map, apic_id)) { + + for (i = 0; i < get_physical_broadcast(); i++) { +- if (!check_apicid_used(apic_id_map, i)) ++ if (!apic->check_apicid_used(apic_id_map, i)) + break; + } + +@@ -4017,7 +4064,7 @@ int __init io_apic_get_unique_id(int ioa + apic_id = i; + } + +- tmp = apicid_to_cpu_present(apic_id); ++ tmp = apic->apicid_to_cpu_present(apic_id); + physids_or(apic_id_map, apic_id_map, tmp); + + if (reg_00.bits.ID != apic_id) { +@@ -4062,7 +4109,7 @@ int io_apic_set_pci_routing (int ioapic, + int cpu = boot_cpu_id; + + #ifdef CONFIG_XEN +- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) { ++ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n", + ioapic, irq); + return -EINVAL; +@@ -4103,8 +4150,8 @@ int acpi_get_override_irq(int bus_irq, i + return -1; + + for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_irqtype == mp_INT && +- mp_irqs[i].mp_srcbusirq == bus_irq) ++ if (mp_irqs[i].irqtype == mp_INT && ++ mp_irqs[i].srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; +@@ -4120,7 +4167,7 @@ int acpi_get_override_irq(int bus_irq, i + /* + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online +- * so mask in all cases should simply be TARGET_CPUS ++ * so mask in all cases should simply be apic->target_cpus() + */ + #ifdef CONFIG_SMP + void __init setup_ioapic_dest(void) +@@ -4159,15 +4206,13 @@ void __init setup_ioapic_dest(void) + */ + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) +- mask = &desc->affinity; ++ mask = desc->affinity; + else +- mask = TARGET_CPUS; ++ mask = apic->target_cpus(); + +-#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq_desc(desc, mask); + else +-#endif + set_ioapic_affinity_irq_desc(desc, mask); + } + +@@ -4220,7 +4265,7 @@ void __init ioapic_init_mappings(void) + ioapic_res = ioapic_setup_resources(); + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { +- ioapic_phys = mp_ioapics[i].mp_apicaddr; ++ ioapic_phys = mp_ioapics[i].apicaddr; + #ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR +@@ -4260,9 +4305,12 @@ static int __init ioapic_insert_resource + struct resource *r = ioapic_resources; + + if (!r) { +- printk(KERN_ERR +- "IO APIC resources could be not be allocated.\n"); +- return -1; ++ if (nr_ioapics > 0) { ++ printk(KERN_ERR ++ "IO APIC resources couldn't be allocated.\n"); ++ return -1; ++ } ++ return 0; + } + + for (i = 0; i < nr_ioapics; i++) { +--- head-2010-01-18.orig/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -17,38 +17,8 @@ + #include + #include + #include ++#include + +-#ifdef CONFIG_X86_32 +-#ifndef CONFIG_XEN +-#include +-#include +- +-/* +- * the following functions deal with sending IPIs between CPUs. +- * +- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. +- */ +- +-static inline int __prepare_ICR(unsigned int shortcut, int vector) +-{ +- unsigned int icr = shortcut | APIC_DEST_LOGICAL; +- +- switch (vector) { +- default: +- icr |= APIC_DM_FIXED | vector; +- break; +- case NMI_VECTOR: +- icr |= APIC_DM_NMI; +- break; +- } +- return icr; +-} +- +-static inline int __prepare_ICR2(unsigned int mask) +-{ +- return SET_APIC_DEST_FIELD(mask); +-} +-#else + #include + + DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); +@@ -59,36 +29,10 @@ static inline void __send_IPI_one(unsign + BUG_ON(irq < 0); + notify_remote_via_irq(irq); + } +-#endif + +-void __send_IPI_shortcut(unsigned int shortcut, int vector) ++static void __send_IPI_shortcut(unsigned int shortcut, int vector) + { +-#ifndef CONFIG_XEN +- /* +- * Subtle. In the case of the 'never do double writes' workaround +- * we have to lock out interrupts to be safe. As we don't care +- * of the value read we use an atomic rmw access to avoid costly +- * cli/sti. Otherwise we use an even cheaper single atomic write +- * to the APIC. +- */ +- unsigned int cfg; +- +- /* +- * Wait for idle. +- */ +- apic_wait_icr_idle(); +- +- /* +- * No need to touch the target chip field +- */ +- cfg = __prepare_ICR(shortcut, vector); +- +- /* +- * Send the IPI. The write to APIC_ICR fires this off. +- */ +- apic_write(APIC_ICR, cfg); +-#else +- int cpu; ++ unsigned int cpu; + + switch (shortcut) { + case APIC_DEST_SELF: +@@ -99,149 +43,53 @@ void __send_IPI_shortcut(unsigned int sh + if (cpu != smp_processor_id()) + __send_IPI_one(cpu, vector); + break; ++ case APIC_DEST_ALLINC: ++ for_each_online_cpu(cpu) ++ __send_IPI_one(cpu, vector); ++ break; + default: + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, + vector); + break; + } +-#endif + } + +-void send_IPI_self(int vector) ++void xen_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) + { +- __send_IPI_shortcut(APIC_DEST_SELF, vector); +-} +- +-#ifndef CONFIG_XEN +-/* +- * This is used to send an IPI with no shorthand notation (the destination is +- * specified in bits 56 to 63 of the ICR). +- */ +-static inline void __send_IPI_dest_field(unsigned long mask, int vector) +-{ +- unsigned long cfg; +- +- /* +- * Wait for idle. +- */ +- if (unlikely(vector == NMI_VECTOR)) +- safe_apic_wait_icr_idle(); +- else +- apic_wait_icr_idle(); +- +- /* +- * prepare target chip field +- */ +- cfg = __prepare_ICR2(mask); +- apic_write(APIC_ICR2, cfg); +- +- /* +- * program the ICR +- */ +- cfg = __prepare_ICR(0, vector); +- +- /* +- * Send the IPI. The write to APIC_ICR fires this off. +- */ +- apic_write(APIC_ICR, cfg); +-} +-#endif +- +-/* +- * This is only used on smaller machines. +- */ +-void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) +-{ +-#ifndef CONFIG_XEN +- unsigned long mask = cpumask_bits(cpumask)[0]; +-#else + unsigned int cpu; +-#endif + unsigned long flags; + + local_irq_save(flags); +-#ifndef CONFIG_XEN +- WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); +- __send_IPI_dest_field(mask, vector); +-#else + WARN_ON(!cpumask_subset(cpumask, cpu_online_mask)); + for_each_cpu_and(cpu, cpumask, cpu_online_mask) +- __send_IPI_one(cpu, vector); +-#endif ++ if (cpu != smp_processor_id()) ++ __send_IPI_one(cpu, vector); + local_irq_restore(flags); + } + +-void send_IPI_mask_sequence(const struct cpumask *mask, int vector) ++void xen_send_IPI_mask(const struct cpumask *cpumask, int vector) + { +-#ifndef CONFIG_XEN ++ unsigned int cpu; + unsigned long flags; +- unsigned int query_cpu; +- +- /* +- * Hack. The clustered APIC addressing mode doesn't allow us to send +- * to an arbitrary mask, so I do a unicasts to each CPU instead. This +- * should be modified to do 1 message per cluster ID - mbligh +- */ + + local_irq_save(flags); +- for_each_cpu(query_cpu, mask) +- __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); ++ WARN_ON(!cpumask_subset(cpumask, cpu_online_mask)); ++ for_each_cpu_and(cpu, cpumask, cpu_online_mask) ++ __send_IPI_one(cpu, vector); + local_irq_restore(flags); +-#else +- send_IPI_mask_bitmask(mask, vector); +-#endif + } + +-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) ++void xen_send_IPI_allbutself(int vector) + { +- unsigned long flags; +- unsigned int query_cpu; +- unsigned int this_cpu = smp_processor_id(); +- +- /* See Hack comment above */ +- +- local_irq_save(flags); +-#ifndef CONFIG_XEN +- for_each_cpu(query_cpu, mask) +- if (query_cpu != this_cpu) +- __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), +- vector); +-#else +- WARN_ON(!cpumask_subset(mask, cpu_online_mask)); +- for_each_cpu_and(query_cpu, mask, cpu_online_mask) +- if (query_cpu != this_cpu) +- __send_IPI_one(query_cpu, vector); +-#endif +- local_irq_restore(flags); ++ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); + } + +-#ifndef CONFIG_XEN +-/* must come after the send_IPI functions above for inlining */ +-static int convert_apicid_to_cpu(int apic_id) +-{ +- int i; +- +- for_each_possible_cpu(i) { +- if (per_cpu(x86_cpu_to_apicid, i) == apic_id) +- return i; +- } +- return -1; ++void xen_send_IPI_all(int vector) ++{ ++ __send_IPI_shortcut(APIC_DEST_ALLINC, vector); + } + +-int safe_smp_processor_id(void) ++void xen_send_IPI_self(int vector) + { +- int apicid, cpuid; +- +- if (!boot_cpu_has(X86_FEATURE_APIC)) +- return 0; +- +- apicid = hard_smp_processor_id(); +- if (apicid == BAD_APICID) +- return 0; +- +- cpuid = convert_apicid_to_cpu(apicid); +- +- return cpuid >= 0 ? cpuid : 0; ++ __send_IPI_shortcut(APIC_DEST_SELF, vector); + } +-#endif +-#endif +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/kernel/apic/probe_32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -0,0 +1,69 @@ ++/* ++ * Default generic APIC driver. This handles up to 8 CPUs. ++ * ++ * Copyright 2003 Andi Kleen, SuSE Labs. ++ * Subject to the GNU Public License, v.2 ++ * ++ * Generic x86 APIC driver probe layer. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int xen_phys_pkg_id(int cpuid_apic, int index_msb) ++{ ++ return cpuid_apic; ++} ++ ++static struct apic apic_xen = { ++ ++ .name = "default", ++ ++ .irq_delivery_mode = dest_LowestPrio, ++ /* logical delivery broadcast to all CPUs: */ ++ .irq_dest_mode = 1, ++ ++ .target_cpus = default_target_cpus, ++ ++ .phys_pkg_id = xen_phys_pkg_id, ++ .mps_oem_check = NULL, ++ ++#ifdef CONFIG_SMP ++ .send_IPI_mask = xen_send_IPI_mask, ++ .send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself, ++ .send_IPI_allbutself = xen_send_IPI_allbutself, ++ .send_IPI_all = xen_send_IPI_all, ++ .send_IPI_self = xen_send_IPI_self, ++#endif ++}; ++ ++struct apic *apic = &apic_xen; ++EXPORT_SYMBOL_GPL(apic); +--- head-2010-01-18.orig/arch/x86/kernel/asm-offsets_32.c 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/asm-offsets_32.c 2009-11-06 10:52:02.000000000 +0100 +@@ -115,6 +115,11 @@ void foo(void) + + OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); + ++#ifdef CONFIG_XEN ++ BLANK(); ++ OFFSET(XEN_START_mfn_list, start_info, mfn_list); ++#endif ++ + #ifdef CONFIG_PARAVIRT + BLANK(); + OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); +--- head-2010-01-18.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -1,101 +1,94 @@ +-#include +-#include +-#include +-#include + #include ++#include + #include ++#include + #include +-#include +-#include ++#include ++#include + #include ++#include ++#include ++#include + #include +-#include +-#include +-#include +-#include +-#include ++#include ++ ++#include + #include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + #include ++#include ++#include ++#include + #include ++#include + #include +-#include +-#include + #include ++ + #ifdef CONFIG_X86_LOCAL_APIC +-#include +-#include +-#include +-#include +-#elif defined(CONFIG_X86_64_XEN) +-#include ++#include + #endif + +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- + #ifdef CONFIG_XEN +-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_LOCAL_APIC) +-#define phys_pkg_id(a,b) a +-#endif + #include + #endif + + #include "cpu.h" + +-#ifdef CONFIG_X86_64 +- + /* all of these masks are initialized in setup_cpu_local_masks() */ ++cpumask_var_t cpu_initialized_mask; + #ifndef CONFIG_XEN +-cpumask_var_t cpu_callin_mask; + cpumask_var_t cpu_callout_mask; ++cpumask_var_t cpu_callin_mask; + #endif +-cpumask_var_t cpu_initialized_mask; + + /* representing cpus for which sibling maps can be computed */ + cpumask_var_t cpu_sibling_setup_mask; + +-#else /* CONFIG_X86_32 */ +- ++/* correctly size the local cpu masks */ ++void __init setup_cpu_local_masks(void) ++{ ++ alloc_bootmem_cpumask_var(&cpu_initialized_mask); + #ifndef CONFIG_XEN +-cpumask_t cpu_callin_map; +-cpumask_t cpu_callout_map; ++ alloc_bootmem_cpumask_var(&cpu_callin_mask); ++ alloc_bootmem_cpumask_var(&cpu_callout_mask); + #endif +-cpumask_t cpu_initialized; +-cpumask_t cpu_sibling_setup_map; +- +-#endif /* CONFIG_X86_32 */ +- ++ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); ++} + +-static struct cpu_dev *this_cpu __cpuinitdata; ++static const struct cpu_dev *this_cpu __cpuinitdata; + ++DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { + #ifdef CONFIG_X86_64 +-/* We need valid kernel segments for data and code in long mode too +- * IRET will check the segment types kkeil 2000/10/28 +- * Also sysret mandates a special GDT layout +- */ +-/* The TLS descriptors are currently at a different place compared to i386. +- Hopefully nobody expects them at a fixed place (Wine?) */ +-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, +- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +-} }; ++ /* ++ * We need valid kernel segments for data and code in long mode too ++ * IRET will check the segment types kkeil 2000/10/28 ++ * Also sysret mandates a special GDT layout ++ * ++ * TLS descriptors are currently at a different place compared to i386. ++ * Hopefully nobody expects them at a fixed place (Wine?) ++ */ ++ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, ++ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, ++ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, ++ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, ++ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, ++ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, + #else +-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, ++ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, ++ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, ++ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, ++ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + #ifndef CONFIG_XEN + /* + * Segments used for calling PnP BIOS have byte granularity. +@@ -103,33 +96,41 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p + * the transfer segment sizes are set at run time. + */ + /* 32-bit code */ +- [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, ++ [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ +- [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, ++ [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, ++ [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, ++ [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, ++ [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + /* 32-bit code */ +- [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, ++ [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ +- [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, ++ [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + /* data */ +- [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, ++ [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + +- [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, ++ [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + #endif +- [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, +-} }; ++ [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, ++ GDT_STACK_CANARY_INIT + #endif ++} }; + EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + ++static int __init x86_xsave_setup(char *s) ++{ ++ setup_clear_cpu_cap(X86_FEATURE_XSAVE); ++ return 1; ++} ++__setup("noxsave", x86_xsave_setup); ++ + #ifdef CONFIG_X86_32 + static int cachesize_override __cpuinitdata = -1; + static int disable_x86_serial_nr __cpuinitdata = 1; +@@ -168,16 +169,17 @@ static inline int flag_is_changeable_p(u + * the CPUID. Add "volatile" to not allow gcc to + * optimize the subsequent calls to this function. + */ +- asm volatile ("pushfl\n\t" +- "pushfl\n\t" +- "popl %0\n\t" +- "movl %0,%1\n\t" +- "xorl %2,%0\n\t" +- "pushl %0\n\t" +- "popfl\n\t" +- "pushfl\n\t" +- "popl %0\n\t" +- "popfl\n\t" ++ asm volatile ("pushfl \n\t" ++ "pushfl \n\t" ++ "popl %0 \n\t" ++ "movl %0, %1 \n\t" ++ "xorl %2, %0 \n\t" ++ "pushl %0 \n\t" ++ "popfl \n\t" ++ "pushfl \n\t" ++ "popl %0 \n\t" ++ "popfl \n\t" ++ + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + +@@ -192,18 +194,22 @@ static int __cpuinit have_cpuid_p(void) + + static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) + { +- if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { +- /* Disable processor serial number */ +- unsigned long lo, hi; +- rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); +- lo |= 0x200000; +- wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); +- printk(KERN_NOTICE "CPU serial number disabled.\n"); +- clear_cpu_cap(c, X86_FEATURE_PN); ++ unsigned long lo, hi; + +- /* Disabling the serial number may affect the cpuid level */ +- c->cpuid_level = cpuid_eax(0); +- } ++ if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) ++ return; ++ ++ /* Disable processor serial number: */ ++ ++ rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); ++ lo |= 0x200000; ++ wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); ++ ++ printk(KERN_NOTICE "CPU serial number disabled.\n"); ++ clear_cpu_cap(c, X86_FEATURE_PN); ++ ++ /* Disabling the serial number may affect the cpuid level */ ++ c->cpuid_level = cpuid_eax(0); + } + + static int __init x86_serial_nr_setup(char *s) +@@ -228,16 +234,64 @@ static inline void squash_the_stupid_ser + #endif + + /* ++ * Some CPU features depend on higher CPUID levels, which may not always ++ * be available due to CPUID level capping or broken virtualization ++ * software. Add those features to this table to auto-disable them. ++ */ ++struct cpuid_dependent_feature { ++ u32 feature; ++ u32 level; ++}; ++ ++static const struct cpuid_dependent_feature __cpuinitconst ++cpuid_dependent_features[] = { ++ { X86_FEATURE_MWAIT, 0x00000005 }, ++ { X86_FEATURE_DCA, 0x00000009 }, ++ { X86_FEATURE_XSAVE, 0x0000000d }, ++ { 0, 0 } ++}; ++ ++static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) ++{ ++ const struct cpuid_dependent_feature *df; ++ ++ for (df = cpuid_dependent_features; df->feature; df++) { ++ ++ if (!cpu_has(c, df->feature)) ++ continue; ++ /* ++ * Note: cpuid_level is set to -1 if unavailable, but ++ * extended_extended_level is set to 0 if unavailable ++ * and the legitimate extended levels are all negative ++ * when signed; hence the weird messing around with ++ * signs here... ++ */ ++ if (!((s32)df->level < 0 ? ++ (u32)df->level > (u32)c->extended_cpuid_level : ++ (s32)df->level > (s32)c->cpuid_level)) ++ continue; ++ ++ clear_cpu_cap(c, df->feature); ++ if (!warn) ++ continue; ++ ++ printk(KERN_WARNING ++ "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", ++ x86_cap_flags[df->feature], df->level); ++ } ++} ++ ++/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; +- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used +- * ++ * in particular, if CPUID levels 0x80000002..4 are supported, this ++ * isn't used + */ + + /* Look up CPU names by table lookup. */ +-static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) ++static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) + { +- struct cpu_model_info *info; ++ const struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ +@@ -257,32 +311,52 @@ static char __cpuinit *table_lookup_mode + + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; + +-/* Current gdt points %fs at the "master" per-cpu area: after this, +- * it's on the real one. */ +-void switch_to_new_gdt(void) ++void load_percpu_segment(int cpu) ++{ ++#ifdef CONFIG_X86_32 ++ loadsegment(fs, __KERNEL_PERCPU); ++#else ++ loadsegment(gs, 0); ++#ifndef CONFIG_XEN ++ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); ++#else ++ if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, ++ (unsigned long)per_cpu(irq_stack_union.gs_base, cpu))) ++ BUG(); ++#endif ++#endif ++ load_stack_canary_segment(); ++} ++ ++/* ++ * Current gdt points %fs at the "master" per-cpu area: after this, ++ * it's on the real one. ++ */ ++void switch_to_new_gdt(int cpu) + { + struct desc_ptr gdt_descr; + unsigned long va, frames[16]; + int f; + +- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); ++ gdt_descr.address = (long)get_cpu_gdt_table(cpu); + gdt_descr.size = GDT_SIZE - 1; + + for (va = gdt_descr.address, f = 0; + va < gdt_descr.address + gdt_descr.size; + va += PAGE_SIZE, f++) { +- frames[f] = virt_to_mfn(va); +- make_lowmem_page_readonly( +- (void *)va, XENFEAT_writable_descriptor_tables); ++ frames[f] = arbitrary_virt_to_mfn(va); ++ make_page_readonly((void *)va, ++ XENFEAT_writable_descriptor_tables); + } + if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8)) + BUG(); +-#ifdef CONFIG_X86_32 +- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +-#endif ++ ++ /* Reload the per-cpu base */ ++ ++ load_percpu_segment(cpu); + } + +-static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; ++static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; + + static void __cpuinit default_init(struct cpuinfo_x86 *c) + { +@@ -301,7 +375,7 @@ static void __cpuinit default_init(struc + #endif + } + +-static struct cpu_dev __cpuinitdata default_cpu = { ++static const struct cpu_dev __cpuinitconst default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, +@@ -315,22 +389,24 @@ static void __cpuinit get_model_name(str + if (c->extended_cpuid_level < 0x80000004) + return; + +- v = (unsigned int *) c->x86_model_id; ++ v = (unsigned int *)c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + +- /* Intel chips right-justify this string for some dumb reason; +- undo that brain damage */ ++ /* ++ * Intel chips right-justify this string for some dumb reason; ++ * undo that brain damage: ++ */ + p = q = &c->x86_model_id[0]; + while (*p == ' ') +- p++; ++ p++; + if (p != q) { +- while (*p) +- *q++ = *p++; +- while (q <= &c->x86_model_id[48]) +- *q++ = '\0'; /* Zero-pad the rest */ ++ while (*p) ++ *q++ = *p++; ++ while (q <= &c->x86_model_id[48]) ++ *q++ = '\0'; /* Zero-pad the rest */ + } + } + +@@ -399,36 +475,30 @@ void __cpuinit detect_ht(struct cpuinfo_ + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); +- } else if (smp_num_siblings > 1) { ++ goto out; ++ } + +- if (smp_num_siblings > nr_cpu_ids) { +- printk(KERN_WARNING "CPU: Unsupported number of siblings %d", +- smp_num_siblings); +- smp_num_siblings = 1; +- return; +- } ++ if (smp_num_siblings <= 1) ++ goto out; + +- index_msb = get_count_order(smp_num_siblings); +-#ifdef CONFIG_X86_64 +- c->phys_proc_id = phys_pkg_id(index_msb); +-#else +- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); +-#endif ++ if (smp_num_siblings > nr_cpu_ids) { ++ pr_warning("CPU: Unsupported number of siblings %d", ++ smp_num_siblings); ++ smp_num_siblings = 1; ++ return; ++ } + +- smp_num_siblings = smp_num_siblings / c->x86_max_cores; ++ index_msb = get_count_order(smp_num_siblings); ++ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); + +- index_msb = get_count_order(smp_num_siblings); ++ smp_num_siblings = smp_num_siblings / c->x86_max_cores; + +- core_bits = get_count_order(c->x86_max_cores); ++ index_msb = get_count_order(smp_num_siblings); + +-#ifdef CONFIG_X86_64 +- c->cpu_core_id = phys_pkg_id(index_msb) & +- ((1 << core_bits) - 1); +-#else +- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & +- ((1 << core_bits) - 1); +-#endif +- } ++ core_bits = get_count_order(c->x86_max_cores); ++ ++ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & ++ ((1 << core_bits) - 1); + + out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { +@@ -443,8 +513,8 @@ out: + static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) + { + char *v = c->x86_vendor_id; +- int i; + static int printed; ++ int i; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (!cpu_devs[i]) +@@ -453,6 +523,7 @@ static void __cpuinit get_cpu_vendor(str + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { ++ + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; +@@ -461,7 +532,9 @@ static void __cpuinit get_cpu_vendor(str + + if (!printed) { + printed++; +- printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); ++ printk(KERN_ERR ++ "CPU: vendor_id '%s' unknown, using generic init.\n", v); ++ + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } + +@@ -481,14 +554,17 @@ void __cpuinit cpu_detect(struct cpuinfo + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 junk, tfms, cap0, misc; ++ + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; ++ + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xf) << 4; ++ + if (cap0 & (1<<19)) { + c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + c->x86_cache_alignment = c->x86_clflush_size; +@@ -504,6 +580,7 @@ static void __cpuinit get_cpu_cap(struct + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; ++ + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; +@@ -512,6 +589,7 @@ static void __cpuinit get_cpu_cap(struct + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; ++ + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); +@@ -519,13 +597,15 @@ static void __cpuinit get_cpu_cap(struct + } + } + +-#ifdef CONFIG_X86_64 + if (c->extended_cpuid_level >= 0x80000008) { + u32 eax = cpuid_eax(0x80000008); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } ++#ifdef CONFIG_X86_32 ++ else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) ++ c->x86_phys_bits = 36; + #endif + + if (c->extended_cpuid_level >= 0x80000007) +@@ -572,8 +652,12 @@ static void __init early_identify_cpu(st + { + #ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; ++ c->x86_phys_bits = 36; ++ c->x86_virt_bits = 48; + #else + c->x86_clflush_size = 32; ++ c->x86_phys_bits = 32; ++ c->x86_virt_bits = 32; + #endif + c->x86_cache_alignment = c->x86_clflush_size; + +@@ -596,21 +680,20 @@ static void __init early_identify_cpu(st + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); + +- validate_pat_support(c); +- + #ifdef CONFIG_SMP + c->cpu_index = boot_cpu_id; + #endif ++ filter_cpuid_features(c, false); + } + + void __init early_cpu_init(void) + { +- struct cpu_dev **cdev; ++ const struct cpu_dev *const *cdev; + int count = 0; + +- printk("KERNEL supported cpus:\n"); ++ printk(KERN_INFO "KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { +- struct cpu_dev *cpudev = *cdev; ++ const struct cpu_dev *cpudev = *cdev; + unsigned int j; + + if (count >= X86_VENDOR_NUM) +@@ -621,7 +704,7 @@ void __init early_cpu_init(void) + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; +- printk(" %s %s\n", cpudev->c_vendor, ++ printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } +@@ -663,7 +746,7 @@ static void __cpuinit generic_identify(s + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; + #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) + # ifdef CONFIG_X86_HT +- c->apicid = phys_pkg_id(c->initial_apicid, 0); ++ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); + # else + c->apicid = c->initial_apicid; + # endif +@@ -697,9 +780,13 @@ static void __cpuinit identify_cpu(struc + c->x86_coreid_bits = 0; + #ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; ++ c->x86_phys_bits = 36; ++ c->x86_virt_bits = 48; + #else + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_clflush_size = 32; ++ c->x86_phys_bits = 32; ++ c->x86_virt_bits = 32; + #endif + c->x86_cache_alignment = c->x86_clflush_size; + memset(&c->x86_capability, 0, sizeof c->x86_capability); +@@ -712,7 +799,7 @@ static void __cpuinit identify_cpu(struc + this_cpu->c_identify(c); + + #if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) +- c->apicid = phys_pkg_id(0); ++ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); + #endif + + /* +@@ -732,13 +819,16 @@ static void __cpuinit identify_cpu(struc + squash_the_stupid_serial_number(c); + + /* +- * The vendor-specific functions might have changed features. Now +- * we do "generic changes." ++ * The vendor-specific functions might have changed features. ++ * Now we do "generic changes." + */ + ++ /* Filter out anything that depends on CPUID levels we don't have */ ++ filter_cpuid_features(c, true); ++ + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { +- char *p; ++ const char *p; + p = table_lookup_model(c); + if (p) + strcpy(c->x86_model_id, p); +@@ -794,6 +884,7 @@ static void vgetcpu_set_mode(void) + void __init identify_boot_cpu(void) + { + identify_cpu(&boot_cpu_data); ++ init_c1e_mask(); + #ifdef CONFIG_X86_32 + sysenter_setup(); + enable_sep_cpu(); +@@ -813,11 +904,11 @@ void __cpuinit identify_secondary_cpu(st + } + + struct msr_range { +- unsigned min; +- unsigned max; ++ unsigned min; ++ unsigned max; + }; + +-static struct msr_range msr_range_array[] __cpuinitdata = { ++static const struct msr_range msr_range_array[] __cpuinitconst = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, +@@ -826,14 +917,15 @@ static struct msr_range msr_range_array[ + + static void __cpuinit print_cpu_msr(void) + { ++ unsigned index_min, index_max; + unsigned index; + u64 val; + int i; +- unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; ++ + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; +@@ -843,6 +935,7 @@ static void __cpuinit print_cpu_msr(void + } + + static int show_msr __cpuinitdata; ++ + static __init int setup_show_msr(char *arg) + { + int num; +@@ -864,12 +957,14 @@ __setup("noclflush", setup_noclflush); + + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) + { +- char *vendor = NULL; ++ const char *vendor = NULL; + +- if (c->x86_vendor < X86_VENDOR_NUM) ++ if (c->x86_vendor < X86_VENDOR_NUM) { + vendor = this_cpu->c_vendor; +- else if (c->cpuid_level >= 0) +- vendor = c->x86_vendor_id; ++ } else { ++ if (c->cpuid_level >= 0) ++ vendor = c->x86_vendor_id; ++ } + + if (vendor && !strstr(c->x86_model_id, vendor)) + printk(KERN_CONT "%s ", vendor); +@@ -896,87 +991,57 @@ void __cpuinit print_cpu_info(struct cpu + static __init int setup_disablecpuid(char *arg) + { + int bit; ++ + if (get_option(&arg, &bit) && bit < NCAPINTS*32) + setup_clear_cpu_cap(bit); + else + return 0; ++ + return 1; + } + __setup("clearcpuid=", setup_disablecpuid); + + #ifdef CONFIG_X86_64 +-struct x8664_pda **_cpu_pda __read_mostly; +-EXPORT_SYMBOL(_cpu_pda); +- + #ifndef CONFIG_X86_NO_IDT + struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + #endif + +-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; ++DEFINE_PER_CPU_FIRST(union irq_stack_union, ++ irq_stack_union) __aligned(PAGE_SIZE); + +-static void __ref switch_pt(int cpu) ++void xen_switch_pt(void) + { + #ifdef CONFIG_XEN +- if (cpu == 0) +- xen_init_pt(); + xen_pt_switch(__pa_symbol(init_level4_pgt)); + xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); + #endif + } + +-void __cpuinit pda_init(int cpu) +-{ +- struct x8664_pda *pda = cpu_pda(cpu); ++DEFINE_PER_CPU(char *, irq_stack_ptr) = ++ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; + +- /* Setup up data that may be needed in __get_free_pages early */ +- loadsegment(fs, 0); +- loadsegment(gs, 0); +-#ifndef CONFIG_XEN +- /* Memory clobbers used to order PDA accessed */ +- mb(); +- wrmsrl(MSR_GS_BASE, pda); +- mb(); +-#else +- if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, +- (unsigned long)pda)) +- BUG(); +-#endif +- +- pda->cpunumber = cpu; +- pda->irqcount = -1; +- pda->kernelstack = (unsigned long)stack_thread_info() - +- PDA_STACKOFFSET + THREAD_SIZE; +- pda->active_mm = &init_mm; +- pda->mmu_state = 0; +- +- if (cpu == 0) { +- /* others are initialized in smpboot.c */ +- pda->pcurrent = &init_task; +- pda->irqstackptr = boot_cpu_stack; +- pda->irqstackptr += IRQSTACKSIZE - 64; +- } else { +- if (!pda->irqstackptr) { +- pda->irqstackptr = (char *) +- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); +- if (!pda->irqstackptr) +- panic("cannot allocate irqstack for cpu %d", +- cpu); +- pda->irqstackptr += IRQSTACKSIZE - 64; +- } ++DEFINE_PER_CPU(unsigned long, kernel_stack) = ++ (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; ++EXPORT_PER_CPU_SYMBOL(kernel_stack); + +- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) +- pda->nodenumber = cpu_to_node(cpu); +- } +- +- switch_pt(cpu); +-} ++DEFINE_PER_CPU(unsigned int, irq_count) = -1; + + #ifndef CONFIG_X86_NO_TSS +-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + +- DEBUG_STKSZ] __page_aligned_bss; +-#endif ++/* ++ * Special IST stacks which the CPU switches to when it calls ++ * an IST-marked descriptor entry. Up to 7 stacks (hardware ++ * limit), all of them are 4K, except the debug stack which ++ * is 8K. ++ */ ++static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { ++ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, ++ [DEBUG_STACK - 1] = DEBUG_STKSZ ++}; + +-extern asmlinkage void ignore_sysret(void); ++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks ++ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) ++ __aligned(PAGE_SIZE); ++#endif + + void __cpuinit syscall_init(void) + { +@@ -1020,16 +1085,38 @@ unsigned long kernel_eflags; + DEFINE_PER_CPU(struct orig_ist, orig_ist); + #endif + +-#else ++#else /* CONFIG_X86_64 */ + +-/* Make sure %fs is initialized properly in idle threads */ ++#ifdef CONFIG_CC_STACKPROTECTOR ++DEFINE_PER_CPU(unsigned long, stack_canary); ++#endif ++ ++/* Make sure %fs and %gs are initialized properly in idle threads */ + struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) + { + memset(regs, 0, sizeof(struct pt_regs)); + regs->fs = __KERNEL_PERCPU; ++ regs->gs = __KERNEL_STACK_CANARY; ++ + return regs; + } +-#endif ++#endif /* CONFIG_X86_64 */ ++ ++/* ++ * Clear all 6 debug registers: ++ */ ++static void clear_all_debug_regs(void) ++{ ++ int i; ++ ++ for (i = 0; i < 8; i++) { ++ /* Ignore db4, db5 */ ++ if ((i == 4) || (i == 5)) ++ continue; ++ ++ set_debugreg(0, i); ++ } ++} + + /* + * cpu_init() initializes state that is per-CPU. Some data is already +@@ -1039,24 +1126,31 @@ struct pt_regs * __cpuinit idle_regs(str + * A lot of state is already set up in PDA init for 64 bit + */ + #ifdef CONFIG_X86_64 ++ + void __cpuinit cpu_init(void) + { +- int cpu = stack_smp_processor_id(); + #ifndef CONFIG_X86_NO_TSS +- struct tss_struct *t = &per_cpu(init_tss, cpu); +- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); ++ struct orig_ist *orig_ist; ++ struct tss_struct *t; + unsigned long v; +- char *estacks = NULL; + int i; + #endif + struct task_struct *me; ++ int cpu; + ++ cpu = stack_smp_processor_id(); + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) +- pda_init(cpu); ++ xen_switch_pt(); + #ifndef CONFIG_X86_NO_TSS +- else +- estacks = boot_exception_stacks; ++ t = &per_cpu(init_tss, cpu); ++ orig_ist = &per_cpu(orig_ist, cpu); ++#endif ++ ++#ifdef CONFIG_NUMA ++ if (cpu != 0 && percpu_read(node_number) == 0 && ++ cpu_to_node(cpu) != NUMA_NO_NODE) ++ percpu_write(node_number, cpu_to_node(cpu)); + #endif + + me = current; +@@ -1073,7 +1167,9 @@ void __cpuinit cpu_init(void) + * and set up the GDT descriptor: + */ + +- switch_to_new_gdt(); ++ switch_to_new_gdt(cpu); ++ loadsegment(fs, 0); ++ + #ifndef CONFIG_X86_NO_IDT + load_idt((const struct desc_ptr *)&idt_descr); + #endif +@@ -1086,8 +1182,8 @@ void __cpuinit cpu_init(void) + barrier(); + + check_efer(); +-#ifndef CONFIG_XEN +- if (cpu != 0 && x2apic) ++#ifdef CONFIG_X86_LOCAL_APIC ++ if (cpu != 0) + enable_x2apic(); + #endif + +@@ -1096,24 +1192,17 @@ void __cpuinit cpu_init(void) + * set up and load the per-CPU TSS + */ + if (!orig_ist->ist[0]) { +- static const unsigned int order[N_EXCEPTION_STACKS] = { +- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, +- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER +- }; ++ char *estacks = per_cpu(exception_stacks, cpu); ++ + for (v = 0; v < N_EXCEPTION_STACKS; v++) { +- if (cpu) { +- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); +- if (!estacks) +- panic("Cannot allocate exception " +- "stack %ld %d\n", v, cpu); +- } +- estacks += PAGE_SIZE << order[v]; ++ estacks += exception_stack_sizes[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; + } + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); ++ + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. +@@ -1124,8 +1213,7 @@ void __cpuinit cpu_init(void) + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; +- if (me->mm) +- BUG(); ++ BUG_ON(me->mm); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); +@@ -1144,22 +1232,9 @@ void __cpuinit cpu_init(void) + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); +- else { +-#endif +- /* +- * Clear all 6 debug registers: +- */ +- +- set_debugreg(0UL, 0); +- set_debugreg(0UL, 1); +- set_debugreg(0UL, 2); +- set_debugreg(0UL, 3); +- set_debugreg(0UL, 6); +- set_debugreg(0UL, 7); +-#ifdef CONFIG_KGDB +- /* If the kgdb is connected no debug regs should be altered. */ +- } ++ else + #endif ++ clear_all_debug_regs(); + + fpu_init(); + +@@ -1171,8 +1246,10 @@ void __cpuinit cpu_init(void) + kernel_eflags &= ~X86_EFLAGS_IF; + #endif + ++#ifdef CONFIG_X86_LOCAL_APIC + if (is_uv_system()) + uv_cpu_init(); ++#endif + } + + #else +@@ -1188,7 +1265,8 @@ void __cpuinit cpu_init(void) + + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); +- for (;;) local_irq_enable(); ++ for (;;) ++ local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); +@@ -1196,36 +1274,30 @@ void __cpuinit cpu_init(void) + if (cpu_has_vme || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + +- switch_to_new_gdt(); ++ switch_to_new_gdt(cpu); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; +- if (curr->mm) +- BUG(); ++ BUG_ON(curr->mm); + enter_lazy_tlb(&init_mm, curr); + + load_sp0(t, thread); + + load_LDT(&init_mm.context); + ++#ifndef CONFIG_X86_NO_TSS ++ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); ++#endif ++ + #ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + #endif + +- /* Clear %gs. */ +- asm volatile ("mov %0, %%gs" : : "r" (0)); +- +- /* Clear all 6 debug registers: */ +- set_debugreg(0, 0); +- set_debugreg(0, 1); +- set_debugreg(0, 2); +- set_debugreg(0, 3); +- set_debugreg(0, 6); +- set_debugreg(0, 7); ++ clear_all_debug_regs(); + + /* + * Force FPU initialization: +@@ -1245,6 +1317,4 @@ void __cpuinit cpu_init(void) + + xsave_init(); + } +- +- + #endif +--- head-2010-01-18.orig/arch/x86/kernel/cpu/cpu_debug.c 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/cpu_debug.c 2009-11-06 10:52:02.000000000 +0100 +@@ -59,17 +59,23 @@ static struct cpu_debug_base cpu_base[] + { "misc", CPU_MISC, 0 }, + { "debug", CPU_DEBUG, 0 }, + { "pat", CPU_PAT, 0 }, ++#ifndef CONFIG_XEN + { "vmx", CPU_VMX, 0 }, ++#endif + { "call", CPU_CALL, 0 }, + { "base", CPU_BASE, 0 }, + { "ver", CPU_VER, 0 }, + { "conf", CPU_CONF, 0 }, + { "smm", CPU_SMM, 0 }, ++#ifndef CONFIG_XEN + { "svm", CPU_SVM, 0 }, + { "osvm", CPU_OSVM, 0 }, ++#endif + { "tss", CPU_TSS, 0 }, + { "cr", CPU_CR, 0 }, ++#ifndef CONFIG_XEN + { "dt", CPU_DT, 0 }, ++#endif + { "registers", CPU_REG_ALL, 0 }, + }; + +@@ -298,6 +304,7 @@ static void print_cr(void *arg) + #endif + } + ++#ifndef CONFIG_XEN + static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) + { + seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); +@@ -324,6 +331,7 @@ static void print_dt(void *seq) + store_tr(ldt); + seq_printf(seq, " TR\t: %016lx\n", ldt); + } ++#endif /* CONFIG_XEN */ + + static void print_dr(void *arg) + { +@@ -346,7 +354,7 @@ static void print_apic(void *arg) + { + struct seq_file *seq = arg; + +-#ifdef CONFIG_X86_LOCAL_APIC ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) + seq_printf(seq, " LAPIC\t:\n"); + seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); + seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); +@@ -400,9 +408,11 @@ static int cpu_seq_show(struct seq_file + case CPU_CR: + smp_call_function_single(priv->cpu, print_cr, seq, 1); + break; ++#ifndef CONFIG_XEN + case CPU_DT: + smp_call_function_single(priv->cpu, print_dt, seq, 1); + break; ++#endif + case CPU_DEBUG: + if (priv->file == CPU_INDEX_BIT) + smp_call_function_single(priv->cpu, print_dr, seq, 1); +@@ -487,7 +497,11 @@ static int write_cpu_register(struct cpu + return ret; + + /* Supporting only MSRs */ ++#ifndef CONFIG_XEN + if (priv->type < CPU_TSS_BIT) ++#else ++ if (cpu_base[priv->type].flag < CPU_TSS) ++#endif + return write_msr(priv, val); + + return ret; +@@ -617,7 +631,11 @@ static int cpu_init_allreg(unsigned cpu, + cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); + per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry; + ++#ifndef CONFIG_XEN + if (type < CPU_TSS_BIT) ++#else ++ if (cpu_base[type].flag < CPU_TSS) ++#endif + err = cpu_init_msr(cpu, type, cpu_dentry); + else + err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, +--- head-2010-01-18.orig/arch/x86/kernel/e820-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/e820-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -129,19 +129,50 @@ int __init e820_all_mapped(u64 start, u6 + /* + * Add a memory region to the kernel e820 map. + */ +-void __init e820_add_region(u64 start, u64 size, int type) ++static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, ++ int type) + { +- int x = e820.nr_map; ++ int x = e820x->nr_map; + +- if (x == ARRAY_SIZE(e820.map)) { ++ if (x == ARRAY_SIZE(e820x->map)) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + +- e820.map[x].addr = start; +- e820.map[x].size = size; +- e820.map[x].type = type; +- e820.nr_map++; ++ e820x->map[x].addr = start; ++ e820x->map[x].size = size; ++ e820x->map[x].type = type; ++ e820x->nr_map++; ++} ++ ++void __init e820_add_region(u64 start, u64 size, int type) ++{ ++ __e820_add_region(&e820, start, size, type); ++} ++ ++static void __init e820_print_type(u32 type) ++{ ++ switch (type) { ++ case E820_RAM: ++ case E820_RESERVED_KERN: ++ printk(KERN_CONT "(usable)"); ++ break; ++ case E820_RESERVED: ++ printk(KERN_CONT "(reserved)"); ++ break; ++ case E820_ACPI: ++ printk(KERN_CONT "(ACPI data)"); ++ break; ++ case E820_NVS: ++ printk(KERN_CONT "(ACPI NVS)"); ++ break; ++ case E820_UNUSABLE: ++ printk(KERN_CONT "(unusable)"); ++ break; ++ default: ++ printk(KERN_CONT "type %u", type); ++ break; ++ } + } + + static void __init _e820_print_map(const struct e820map *e820, const char *who) +@@ -153,27 +184,8 @@ static void __init _e820_print_map(const + (unsigned long long) e820->map[i].addr, + (unsigned long long) + (e820->map[i].addr + e820->map[i].size)); +- switch (e820->map[i].type) { +- case E820_RAM: +- case E820_RESERVED_KERN: +- printk(KERN_CONT "(usable)\n"); +- break; +- case E820_RESERVED: +- printk(KERN_CONT "(reserved)\n"); +- break; +- case E820_ACPI: +- printk(KERN_CONT "(ACPI data)\n"); +- break; +- case E820_NVS: +- printk(KERN_CONT "(ACPI NVS)\n"); +- break; +- case E820_UNUSABLE: +- printk("(unusable)\n"); +- break; +- default: +- printk(KERN_CONT "type %u\n", e820->map[i].type); +- break; +- } ++ e820_print_type(e820->map[i].type); ++ printk(KERN_CONT "\n"); + } + } + +@@ -240,7 +252,7 @@ static void __init _e820_print_map(const + */ + + int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, +- int *pnr_map) ++ u32 *pnr_map) + { + struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ +@@ -444,11 +456,12 @@ static int __init append_e820_map(struct + return __append_e820_map(biosmap, nr_map); + } + +-static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, ++static u64 __init __e820_update_range(struct e820map *e820x, u64 start, + u64 size, unsigned old_type, + unsigned new_type) + { +- unsigned int i, x; ++ u64 end; ++ unsigned int i; + u64 real_updated_size = 0; + + BUG_ON(old_type == new_type); +@@ -456,40 +469,59 @@ static u64 __init e820_update_range_map( + if (size > (ULLONG_MAX - start)) + size = ULLONG_MAX - start; + ++ end = start + size; ++ printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", ++ (unsigned long long) start, ++ (unsigned long long) end); ++ e820_print_type(old_type); ++ printk(KERN_CONT " ==> "); ++ e820_print_type(new_type); ++ printk(KERN_CONT "\n"); ++ + for (i = 0; i < e820x->nr_map; i++) { + struct e820entry *ei = &e820x->map[i]; + u64 final_start, final_end; ++ u64 ei_end; ++ + if (ei->type != old_type) + continue; +- /* totally covered? */ +- if (ei->addr >= start && +- (ei->addr + ei->size) <= (start + size)) { ++ ++ ei_end = ei->addr + ei->size; ++ /* totally covered by new range? */ ++ if (ei->addr >= start && ei_end <= end) { + ei->type = new_type; + real_updated_size += ei->size; + continue; + } ++ ++ /* new range is totally covered? */ ++ if (ei->addr < start && ei_end > end) { ++ __e820_add_region(e820x, start, size, new_type); ++ __e820_add_region(e820x, end, ei_end - end, ei->type); ++ ei->size = start - ei->addr; ++ real_updated_size += size; ++ continue; ++ } ++ + /* partially covered */ + final_start = max(start, ei->addr); +- final_end = min(start + size, ei->addr + ei->size); ++ final_end = min(end, ei_end); + if (final_start >= final_end) + continue; + +- x = e820x->nr_map; +- if (x == ARRAY_SIZE(e820x->map)) { +- printk(KERN_ERR "Too many memory map entries!\n"); +- break; +- } +- e820x->map[x].addr = final_start; +- e820x->map[x].size = final_end - final_start; +- e820x->map[x].type = new_type; +- e820x->nr_map++; ++ __e820_add_region(e820x, final_start, final_end - final_start, ++ new_type); + + real_updated_size += final_end - final_start; + ++ /* ++ * left range could be head or tail, so need to update ++ * size at first. ++ */ ++ ei->size -= final_end - final_start; + if (ei->addr < final_start) + continue; + ei->addr = final_end; +- ei->size -= final_end - final_start; + } + return real_updated_size; + } +@@ -497,7 +529,7 @@ static u64 __init e820_update_range_map( + u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) + { +- return e820_update_range_map(&e820, start, size, old_type, new_type); ++ return __e820_update_range(&e820, start, size, old_type, new_type); + } + + static u64 __init e820_update_range_saved(u64 start, u64 size, +@@ -505,11 +537,11 @@ static u64 __init e820_update_range_save + { + #ifdef CONFIG_XEN + if (is_initial_xendomain()) +- return e820_update_range_map(&machine_e820, +- phys_to_machine(start), size, +- old_type, new_type); ++ return __e820_update_range(&machine_e820, ++ phys_to_machine(start), size, ++ old_type, new_type); + #endif +- return e820_update_range_map(&e820_saved, start, size, old_type, ++ return __e820_update_range(&e820_saved, start, size, old_type, + new_type); + } + +@@ -553,7 +585,7 @@ u64 __init e820_remove_range(u64 start, + + void __init update_e820(void) + { +- int nr_map; ++ u32 nr_map; + + nr_map = e820.nr_map; + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) +@@ -564,7 +596,7 @@ void __init update_e820(void) + } + static void __init update_e820_saved(void) + { +- int nr_map; ++ u32 nr_map; + + nr_map = e820_saved.nr_map; + if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) +@@ -916,6 +948,9 @@ void __init reserve_early_overlap_ok(u64 + */ + void __init reserve_early(u64 start, u64 end, char *name) + { ++ if (start >= end) ++ return; ++ + drop_overlaps_that_are_ok(start, end); + __reserve_early(start, end, name, 0); + } +@@ -1389,7 +1424,7 @@ early_param("memmap", parse_memmap_opt); + void __init finish_e820_parsing(void) + { + if (userdef) { +- int nr = e820.nr_map; ++ u32 nr = e820.nr_map; + + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) + early_panic("Invalid user supplied memory map"); +@@ -1479,7 +1514,7 @@ void __init e820_reserve_resources_late( + char *__init default_machine_specific_memory_setup(void) + { + char *who = "BIOS-e820"; +- int new_nr; ++ u32 new_nr; + /* + * Try to copy the BIOS-supplied E820-map. + * +--- head-2010-01-18.orig/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -12,8 +12,8 @@ + #include + #include + #include +-#include + #include ++#include + #include + + #ifndef CONFIG_XEN +@@ -279,7 +279,7 @@ static int dbgp_wait_until_complete(void + return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); + } + +-static void dbgp_mdelay(int ms) ++static void __init dbgp_mdelay(int ms) + { + int i; + +@@ -340,7 +340,7 @@ static void dbgp_set_data(const void *bu + writel(hi, &ehci_debug->data47); + } + +-static void dbgp_get_data(void *buf, int size) ++static void __init dbgp_get_data(void *buf, int size) + { + unsigned char *bytes = buf; + u32 lo, hi; +@@ -384,7 +384,7 @@ static int dbgp_bulk_write(unsigned devn + return ret; + } + +-static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, ++static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, + int size) + { + u32 pids, addr, ctrl; +@@ -415,8 +415,8 @@ static int dbgp_bulk_read(unsigned devnu + return ret; + } + +-static int dbgp_control_msg(unsigned devnum, int requesttype, int request, +- int value, int index, void *data, int size) ++static int __init dbgp_control_msg(unsigned devnum, int requesttype, ++ int request, int value, int index, void *data, int size) + { + u32 pids, addr, ctrl; + struct usb_ctrlrequest req; +@@ -518,7 +518,7 @@ static u32 __init find_dbgp(int ehci_num + return 0; + } + +-static int ehci_reset_port(int port) ++static int __init ehci_reset_port(int port) + { + u32 portsc; + u32 delay_time, delay; +@@ -561,7 +561,7 @@ static int ehci_reset_port(int port) + return -EBUSY; + } + +-static int ehci_wait_for_port(int port) ++static int __init ehci_wait_for_port(int port) + { + u32 status; + int ret, reps; +@@ -586,13 +586,13 @@ static inline void dbgp_printk(const cha + + typedef void (*set_debug_port_t)(int port); + +-static void default_set_debug_port(int port) ++static void __init default_set_debug_port(int port) + { + } + +-static set_debug_port_t set_debug_port = default_set_debug_port; ++static set_debug_port_t __initdata set_debug_port = default_set_debug_port; + +-static void nvidia_set_debug_port(int port) ++static void __init nvidia_set_debug_port(int port) + { + u32 dword; + dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, +--- head-2010-01-18.orig/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:52:02.000000000 +0100 +@@ -30,12 +30,13 @@ + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - %fs +- * 28(%esp) - orig_eax +- * 2C(%esp) - %eip +- * 30(%esp) - %cs +- * 34(%esp) - %eflags +- * 38(%esp) - %oldesp +- * 3C(%esp) - %oldss ++ * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS ++ * 2C(%esp) - orig_eax ++ * 30(%esp) - %eip ++ * 34(%esp) - %cs ++ * 38(%esp) - %eflags ++ * 3C(%esp) - %oldesp ++ * 40(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ +@@ -46,7 +47,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -105,121 +106,221 @@ NMI_MASK = 0x80000000 + #define resume_userspace_sig resume_userspace + #endif + +-#define SAVE_ALL \ +- cld; \ +- pushl %fs; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- /*CFI_REL_OFFSET fs, 0;*/\ +- pushl %es; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- /*CFI_REL_OFFSET es, 0;*/\ +- pushl %ds; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- /*CFI_REL_OFFSET ds, 0;*/\ +- pushl %eax; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- CFI_REL_OFFSET eax, 0;\ +- pushl %ebp; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- CFI_REL_OFFSET ebp, 0;\ +- pushl %edi; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- CFI_REL_OFFSET edi, 0;\ +- pushl %esi; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- CFI_REL_OFFSET esi, 0;\ +- pushl %edx; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- CFI_REL_OFFSET edx, 0;\ +- pushl %ecx; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- CFI_REL_OFFSET ecx, 0;\ +- pushl %ebx; \ +- CFI_ADJUST_CFA_OFFSET 4;\ +- CFI_REL_OFFSET ebx, 0;\ +- movl $(__USER_DS), %edx; \ +- movl %edx, %ds; \ +- movl %edx, %es; \ +- movl $(__KERNEL_PERCPU), %edx; \ ++/* ++ * User gs save/restore ++ * ++ * %gs is used for userland TLS and kernel only uses it for stack ++ * canary which is required to be at %gs:20 by gcc. Read the comment ++ * at the top of stackprotector.h for more info. ++ * ++ * Local labels 98 and 99 are used. ++ */ ++#ifdef CONFIG_X86_32_LAZY_GS ++ ++ /* unfortunately push/pop can't be no-op */ ++.macro PUSH_GS ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++.endm ++.macro POP_GS pop=0 ++ addl $(4 + \pop), %esp ++ CFI_ADJUST_CFA_OFFSET -(4 + \pop) ++.endm ++.macro POP_GS_EX ++.endm ++ ++ /* all the rest are no-op */ ++.macro PTGS_TO_GS ++.endm ++.macro PTGS_TO_GS_EX ++.endm ++.macro GS_TO_REG reg ++.endm ++.macro REG_TO_PTGS reg ++.endm ++.macro SET_KERNEL_GS reg ++.endm ++ ++#else /* CONFIG_X86_32_LAZY_GS */ ++ ++.macro PUSH_GS ++ pushl %gs ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET gs, 0*/ ++.endm ++ ++.macro POP_GS pop=0 ++98: popl %gs ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_RESTORE gs*/ ++ .if \pop <> 0 ++ add $\pop, %esp ++ CFI_ADJUST_CFA_OFFSET -\pop ++ .endif ++.endm ++.macro POP_GS_EX ++.pushsection .fixup, "ax" ++99: movl $0, (%esp) ++ jmp 98b ++.section __ex_table, "a" ++ .align 4 ++ .long 98b, 99b ++.popsection ++.endm ++ ++.macro PTGS_TO_GS ++98: mov PT_GS(%esp), %gs ++.endm ++.macro PTGS_TO_GS_EX ++.pushsection .fixup, "ax" ++99: movl $0, PT_GS(%esp) ++ jmp 98b ++.section __ex_table, "a" ++ .align 4 ++ .long 98b, 99b ++.popsection ++.endm ++ ++.macro GS_TO_REG reg ++ movl %gs, \reg ++ /*CFI_REGISTER gs, \reg*/ ++.endm ++.macro REG_TO_PTGS reg ++ movl \reg, PT_GS(%esp) ++ /*CFI_REL_OFFSET gs, PT_GS*/ ++.endm ++.macro SET_KERNEL_GS reg ++ movl $(__KERNEL_STACK_CANARY), \reg ++ movl \reg, %gs ++.endm ++ ++#endif /* CONFIG_X86_32_LAZY_GS */ ++ ++.macro SAVE_ALL ++ cld ++ PUSH_GS ++ pushl %fs ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET fs, 0;*/ ++ pushl %es ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET es, 0;*/ ++ pushl %ds ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ds, 0;*/ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eax, 0 ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebp, 0 ++ pushl %edi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edi, 0 ++ pushl %esi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esi, 0 ++ pushl %edx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edx, 0 ++ pushl %ecx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ecx, 0 ++ pushl %ebx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebx, 0 ++ movl $(__USER_DS), %edx ++ movl %edx, %ds ++ movl %edx, %es ++ movl $(__KERNEL_PERCPU), %edx + movl %edx, %fs ++ SET_KERNEL_GS %edx ++.endm + +-#define RESTORE_INT_REGS \ +- popl %ebx; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- CFI_RESTORE ebx;\ +- popl %ecx; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- CFI_RESTORE ecx;\ +- popl %edx; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- CFI_RESTORE edx;\ +- popl %esi; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- CFI_RESTORE esi;\ +- popl %edi; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- CFI_RESTORE edi;\ +- popl %ebp; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- CFI_RESTORE ebp;\ +- popl %eax; \ +- CFI_ADJUST_CFA_OFFSET -4;\ ++.macro RESTORE_INT_REGS ++ popl %ebx ++ CFI_ADJUST_CFA_OFFSET -4 ++ CFI_RESTORE ebx ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ CFI_RESTORE ecx ++ popl %edx ++ CFI_ADJUST_CFA_OFFSET -4 ++ CFI_RESTORE edx ++ popl %esi ++ CFI_ADJUST_CFA_OFFSET -4 ++ CFI_RESTORE esi ++ popl %edi ++ CFI_ADJUST_CFA_OFFSET -4 ++ CFI_RESTORE edi ++ popl %ebp ++ CFI_ADJUST_CFA_OFFSET -4 ++ CFI_RESTORE ebp ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE eax ++.endm + +-#define RESTORE_REGS \ +- RESTORE_INT_REGS; \ +-1: popl %ds; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- /*CFI_RESTORE ds;*/\ +-2: popl %es; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- /*CFI_RESTORE es;*/\ +-3: popl %fs; \ +- CFI_ADJUST_CFA_OFFSET -4;\ +- /*CFI_RESTORE fs;*/\ +-.pushsection .fixup,"ax"; \ +-4: movl $0,(%esp); \ +- jmp 1b; \ +-5: movl $0,(%esp); \ +- jmp 2b; \ +-6: movl $0,(%esp); \ +- jmp 3b; \ +-.section __ex_table,"a";\ +- .align 4; \ +- .long 1b,4b; \ +- .long 2b,5b; \ +- .long 3b,6b; \ ++.macro RESTORE_REGS pop=0 ++ RESTORE_INT_REGS ++1: popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_RESTORE ds;*/ ++2: popl %es ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_RESTORE es;*/ ++3: popl %fs ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_RESTORE fs;*/ ++ POP_GS \pop ++.pushsection .fixup, "ax" ++4: movl $0, (%esp) ++ jmp 1b ++5: movl $0, (%esp) ++ jmp 2b ++6: movl $0, (%esp) ++ jmp 3b ++.section __ex_table, "a" ++ .align 4 ++ .long 1b, 4b ++ .long 2b, 5b ++ .long 3b, 6b + .popsection ++ POP_GS_EX ++.endm + +-#define RING0_INT_FRAME \ +- CFI_STARTPROC simple;\ +- CFI_SIGNAL_FRAME;\ +- CFI_DEF_CFA esp, 3*4;\ +- /*CFI_OFFSET cs, -2*4;*/\ ++.macro RING0_INT_FRAME ++ CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME ++ CFI_DEF_CFA esp, 3*4 ++ /*CFI_OFFSET cs, -2*4;*/ + CFI_OFFSET eip, -3*4 ++.endm + +-#define RING0_EC_FRAME \ +- CFI_STARTPROC simple;\ +- CFI_SIGNAL_FRAME;\ +- CFI_DEF_CFA esp, 4*4;\ +- /*CFI_OFFSET cs, -2*4;*/\ ++.macro RING0_EC_FRAME ++ CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME ++ CFI_DEF_CFA esp, 4*4 ++ /*CFI_OFFSET cs, -2*4;*/ + CFI_OFFSET eip, -3*4 ++.endm + +-#define RING0_PTREGS_FRAME \ +- CFI_STARTPROC simple;\ +- CFI_SIGNAL_FRAME;\ +- CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ +- /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ +- CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ +- /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ +- /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ +- CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ +- CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ +- CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ +- CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ +- CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ +- CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ ++.macro RING0_PTREGS_FRAME ++ CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME ++ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX ++ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ ++ CFI_OFFSET eip, PT_EIP-PT_OLDESP ++ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ ++ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ ++ CFI_OFFSET eax, PT_EAX-PT_OLDESP ++ CFI_OFFSET ebp, PT_EBP-PT_OLDESP ++ CFI_OFFSET edi, PT_EDI-PT_OLDESP ++ CFI_OFFSET esi, PT_ESI-PT_OLDESP ++ CFI_OFFSET edx, PT_EDX-PT_OLDESP ++ CFI_OFFSET ecx, PT_ECX-PT_OLDESP + CFI_OFFSET ebx, PT_EBX-PT_OLDESP ++.endm + + ENTRY(ret_from_fork) + CFI_STARTPROC +@@ -344,7 +445,8 @@ sysenter_past_esp: + .previous + + GET_THREAD_INFO(%ebp) +- testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) ++ ++ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + jnz sysenter_audit + sysenter_do_call: + cmpl $(nr_syscalls), %eax +@@ -355,7 +457,7 @@ sysenter_do_call: + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx +- testw $_TIF_ALLWORK_MASK, %cx ++ testl $_TIF_ALLWORK_MASK, %ecx + jne sysexit_audit + sysenter_exit: + /* if something modifies registers it must also disable sysexit */ +@@ -364,11 +466,12 @@ sysenter_exit: + xorl %ebp,%ebp + TRACE_IRQS_ON + 1: mov PT_FS(%esp), %fs ++ PTGS_TO_GS + ENABLE_INTERRUPTS_SYSEXIT + + #ifdef CONFIG_AUDITSYSCALL + sysenter_audit: +- testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) ++ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + jnz syscall_trace_entry + addl $4,%esp + CFI_ADJUST_CFA_OFFSET -4 +@@ -385,7 +488,7 @@ sysenter_audit: + jmp sysenter_do_call + + sysexit_audit: +- testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx ++ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx + jne syscall_exit_work + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_ANY) +@@ -398,7 +501,7 @@ sysexit_audit: + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx +- testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx ++ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx + jne syscall_exit_work + movl PT_EAX(%esp),%eax /* reload syscall return value */ + jmp sysenter_exit +@@ -412,6 +515,7 @@ sysexit_audit: + .align 4 + .long 1b,2b + .popsection ++ PTGS_TO_GS_EX + ENDPROC(ia32_sysenter_target) + + # pv sysenter call handler stub +@@ -447,7 +551,7 @@ ENTRY(system_call) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + GET_THREAD_INFO(%ebp) +- testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) ++ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + jnz syscall_trace_entry + cmpl $(nr_syscalls), %eax + jae syscall_badsys +@@ -461,7 +565,7 @@ syscall_exit: + # between sampling and the iret + TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx +- testw $_TIF_ALLWORK_MASK, %cx # current->work ++ testl $_TIF_ALLWORK_MASK, %ecx # current->work + jne syscall_exit_work + + restore_all: +@@ -492,8 +596,7 @@ restore_nocheck: + #endif + TRACE_IRQS_IRET + restore_nocheck_notrace: +- RESTORE_REGS +- addl $4, %esp # skip orig_eax/error_code ++ RESTORE_REGS 4 # skip orig_eax/error_code + CFI_ADJUST_CFA_OFFSET -4 + irq_return: + INTERRUPT_RETURN +@@ -555,9 +658,7 @@ restore_all_enable_events: + scrit: /**** START OF CRITICAL REGION ****/ + __TEST_PENDING + jnz 14f # process more events if necessary... +- RESTORE_REGS +- addl $4, %esp +- CFI_ADJUST_CFA_OFFSET -4 ++ RESTORE_REGS 4 + 1: INTERRUPT_RETURN + .section __ex_table,"a" + .align 4 +@@ -571,9 +672,7 @@ ecrit: /**** END OF CRITICAL REGION *** + CFI_RESTORE_STATE + hypervisor_iret: + andl $~NMI_MASK, PT_EFLAGS(%esp) +- RESTORE_REGS +- addl $4, %esp +- CFI_ADJUST_CFA_OFFSET -4 ++ RESTORE_REGS 4 + jmp hypercall_page + (__HYPERVISOR_iret * 32) + #endif + CFI_ENDPROC +@@ -641,7 +740,7 @@ END(syscall_trace_entry) + # perform syscall exit tracing + ALIGN + syscall_exit_work: +- testb $_TIF_WORK_SYSCALL_EXIT, %cl ++ testl $_TIF_WORK_SYSCALL_EXIT, %ecx + jz work_pending + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call +@@ -665,29 +764,51 @@ syscall_badsys: + END(syscall_badsys) + CFI_ENDPROC + ++/* ++ * System calls that need a pt_regs pointer. ++ */ ++#define PTREGSCALL(name) \ ++ ALIGN; \ ++ptregs_##name: \ ++ leal 4(%esp),%eax; \ ++ jmp sys_##name; ++ ++PTREGSCALL(iopl) ++PTREGSCALL(fork) ++PTREGSCALL(clone) ++PTREGSCALL(vfork) ++PTREGSCALL(execve) ++PTREGSCALL(sigaltstack) ++PTREGSCALL(sigreturn) ++PTREGSCALL(rt_sigreturn) ++PTREGSCALL(vm86) ++PTREGSCALL(vm86old) ++ + #ifndef CONFIG_XEN +-#define FIXUP_ESPFIX_STACK \ +- /* since we are on a wrong stack, we cant make it a C code :( */ \ +- PER_CPU(gdt_page, %ebx); \ +- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ +- addl %esp, %eax; \ +- pushl $__KERNEL_DS; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- pushl %eax; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- lss (%esp), %esp; \ +- CFI_ADJUST_CFA_OFFSET -8; +-#define UNWIND_ESPFIX_STACK \ +- movl %ss, %eax; \ +- /* see if on espfix stack */ \ +- cmpw $__ESPFIX_SS, %ax; \ +- jne 27f; \ +- movl $__KERNEL_DS, %eax; \ +- movl %eax, %ds; \ +- movl %eax, %es; \ +- /* switch to normal stack */ \ +- FIXUP_ESPFIX_STACK; \ +-27:; ++.macro FIXUP_ESPFIX_STACK ++ /* since we are on a wrong stack, we cant make it a C code :( */ ++ PER_CPU(gdt_page, %ebx) ++ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) ++ addl %esp, %eax ++ pushl $__KERNEL_DS ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ lss (%esp), %esp ++ CFI_ADJUST_CFA_OFFSET -8 ++.endm ++.macro UNWIND_ESPFIX_STACK ++ movl %ss, %eax ++ /* see if on espfix stack */ ++ cmpw $__ESPFIX_SS, %ax ++ jne 27f ++ movl $__KERNEL_DS, %eax ++ movl %eax, %ds ++ movl %eax, %es ++ /* switch to normal stack */ ++ FIXUP_ESPFIX_STACK ++27: ++.endm + + /* + * Build the entry stubs and pointer table with some assembler magic. +@@ -743,7 +864,7 @@ common_interrupt: + ENDPROC(common_interrupt) + CFI_ENDPROC + +-#define BUILD_INTERRUPT(name, nr) \ ++#define BUILD_INTERRUPT3(name, nr, fn) \ + ENTRY(name) \ + RING0_INT_FRAME; \ + pushl $~(nr); \ +@@ -751,13 +872,15 @@ ENTRY(name) \ + SAVE_ALL; \ + TRACE_IRQS_OFF \ + movl %esp,%eax; \ +- call smp_##name; \ ++ call fn; \ + jmp ret_from_intr; \ + CFI_ENDPROC; \ + ENDPROC(name) + ++#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) ++ + /* The include is where all of the SMP etc. interrupts come from */ +-#include "entry_arch.h" ++#include + + #else + #define UNWIND_ESPFIX_STACK +@@ -844,8 +967,13 @@ critical_fixup_table: + .byte 7 # pop %ds + .byte 8 # pop %es + .byte 9,9 # pop %fs +- .byte 10,10,10 # add $4,%esp +- .byte 11 # iret ++#ifndef CONFIG_X86_32_LAZY_GS ++ .byte 10,10 # pop %gs ++ .byte 11,11,11 # add $4,%esp ++#else ++ .byte 10,10,10 # add $8,%esp ++#endif ++ .byte 12 # iret + .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS + .previous + +@@ -1203,7 +1331,7 @@ ENTRY(ia32pv_cstar_target) + .previous + SAVE_ALL + GET_THREAD_INFO(%ebp) +- testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) ++ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + jnz cstar_trace_entry + cmpl $nr_syscalls,%eax + jae cstar_badsys +@@ -1323,7 +1451,10 @@ ENTRY(page_fault) + CFI_ADJUST_CFA_OFFSET 4 + ALIGN + error_code: +- /* the function address is in %fs's slot on the stack */ ++ /* the function address is in %gs's slot on the stack */ ++ pushl %fs ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET fs, 0*/ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ +@@ -1352,20 +1483,15 @@ error_code: + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld +- pushl %fs +- CFI_ADJUST_CFA_OFFSET 4 +- /*CFI_REL_OFFSET fs, 0*/ + movl $(__KERNEL_PERCPU), %ecx + movl %ecx, %fs + UNWIND_ESPFIX_STACK +- popl %ecx +- CFI_ADJUST_CFA_OFFSET -4 +- /*CFI_REGISTER es, ecx*/ +- movl PT_FS(%esp), %edi # get the function address ++ GS_TO_REG %ecx ++ movl PT_GS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart +- mov %ecx, PT_FS(%esp) +- /*CFI_REL_OFFSET fs, ES*/ ++ REG_TO_PTGS %ecx ++ SET_KERNEL_GS %ecx + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es +@@ -1390,20 +1516,21 @@ END(page_fault) + * by hand onto the new stack - while updating the return eip past + * the instruction that would have done it for sysenter. + */ +-#define FIX_STACK(offset, ok, label) \ +- cmpw $__KERNEL_CS,4(%esp); \ +- jne ok; \ +-label: \ +- movl TSS_sysenter_sp0+offset(%esp),%esp; \ +- CFI_DEF_CFA esp, 0; \ +- CFI_UNDEFINED eip; \ +- pushfl; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- pushl $__KERNEL_CS; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- pushl $sysenter_past_esp; \ +- CFI_ADJUST_CFA_OFFSET 4; \ ++.macro FIX_STACK offset ok label ++ cmpw $__KERNEL_CS, 4(%esp) ++ jne \ok ++\label: ++ movl TSS_sysenter_sp0 + \offset(%esp), %esp ++ CFI_DEF_CFA esp, 0 ++ CFI_UNDEFINED eip ++ pushfl ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $__KERNEL_CS ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $sysenter_past_esp ++ CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eip, 0 ++.endm + #endif /* CONFIG_XEN */ + + ENTRY(debug) +@@ -1411,7 +1538,7 @@ ENTRY(debug) + #ifndef CONFIG_XEN + cmpl $ia32_sysenter_target,(%esp) + jne debug_stack_correct +- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) ++ FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn + debug_stack_correct: + #endif /* !CONFIG_XEN */ + pushl $-1 # mark this as an int +@@ -1471,7 +1598,7 @@ nmi_stack_correct: + + nmi_stack_fixup: + RING0_INT_FRAME +- FIX_STACK(12,nmi_stack_correct, 1) ++ FIX_STACK 12, nmi_stack_correct, 1 + jmp nmi_stack_correct + + nmi_debug_stack_check: +@@ -1482,7 +1609,7 @@ nmi_debug_stack_check: + jb nmi_stack_correct + cmpl $debug_esp_fix_insn,(%esp) + ja nmi_stack_correct +- FIX_STACK(24,nmi_stack_correct, 1) ++ FIX_STACK 24, nmi_stack_correct, 1 + jmp nmi_stack_correct + + nmi_espfix_stack: +@@ -1494,7 +1621,7 @@ nmi_espfix_stack: + CFI_ADJUST_CFA_OFFSET 4 + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 +- addw $4, (%esp) ++ addl $4, (%esp) + /* copy the iret frame of 12 bytes */ + .rept 3 + pushl 16(%esp) +--- head-2010-01-18.orig/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:52:02.000000000 +0100 +@@ -51,10 +51,10 @@ + #include + #include + #include +-#include ++#include + #include + #include +-#include ++#include + #include + #include + +@@ -81,20 +81,17 @@ ENTRY(ftrace_caller) + movq 8(%rbp), %rsi + subq $MCOUNT_INSN_SIZE, %rdi + +-.globl ftrace_call +-ftrace_call: ++GLOBAL(ftrace_call) + call ftrace_stub + + MCOUNT_RESTORE_FRAME + + #ifdef CONFIG_FUNCTION_GRAPH_TRACER +-.globl ftrace_graph_call +-ftrace_graph_call: ++GLOBAL(ftrace_graph_call) + jmp ftrace_stub + #endif + +-.globl ftrace_stub +-ftrace_stub: ++GLOBAL(ftrace_stub) + retq + END(ftrace_caller) + +@@ -114,8 +111,7 @@ ENTRY(mcount) + jnz ftrace_graph_caller + #endif + +-.globl ftrace_stub +-ftrace_stub: ++GLOBAL(ftrace_stub) + retq + + trace: +@@ -152,9 +148,7 @@ ENTRY(ftrace_graph_caller) + retq + END(ftrace_graph_caller) + +- +-.globl return_to_handler +-return_to_handler: ++GLOBAL(return_to_handler) + subq $80, %rsp + + movq %rax, (%rsp) +@@ -192,6 +186,7 @@ return_to_handler: + ENTRY(native_usergs_sysret64) + swapgs + sysretq ++ENDPROC(native_usergs_sysret64) + #endif /* CONFIG_PARAVIRT */ + + +@@ -375,15 +370,15 @@ ENTRY(save_args) + je 1f + SWAPGS + /* +- * irqcount is used to check if a CPU is already on an interrupt stack ++ * irq_count is used to check if a CPU is already on an interrupt stack + * or not. While this is essentially redundant with preempt_count it is + * a little cheaper to use a separate counter in the PDA (short of + * moving irq_enter into assembly, which would be too much work) + */ +-1: incl %gs:pda_irqcount ++1: incl PER_CPU_VAR(irq_count) + jne 2f + popq_cfi %rax /* move return address... */ +- mov %gs:pda_irqstackptr,%rsp ++ mov PER_CPU_VAR(irq_stack_ptr),%rsp + EMPTY_FRAME 0 + pushq_cfi %rbp /* backlink for unwinder */ + pushq_cfi %rax /* ... to the new stack */ +@@ -413,6 +408,7 @@ END(save_rest) + + #ifndef CONFIG_XEN + /* save complete stack frame */ ++ .pushsection .kprobes.text, "ax" + ENTRY(save_paranoid) + XCPT_FRAME 1 RDI+8 + cld +@@ -441,6 +437,7 @@ ENTRY(save_paranoid) + 1: ret + CFI_ENDPROC + END(save_paranoid) ++ .popsection + #endif + + /* +@@ -451,6 +448,8 @@ END(save_paranoid) + ENTRY(ret_from_fork) + DEFAULT_FRAME + ++ LOCK ; btr $TIF_FORK,TI_flags(%r8) ++ + push kernel_eflags(%rip) + CFI_ADJUST_CFA_OFFSET 8 + popf # reset kernel eflags +@@ -460,7 +459,6 @@ ENTRY(ret_from_fork) + + GET_THREAD_INFO(%rcx) + +- CFI_REMEMBER_STATE + RESTORE_REST + + testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? +@@ -472,7 +470,6 @@ ENTRY(ret_from_fork) + RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET + jmp ret_from_sys_call # go to the SYSRET fastpath + +- CFI_RESTORE_STATE + CFI_ENDPROC + END(ret_from_fork) + +@@ -648,9 +645,7 @@ tracesys: + * Syscall return path ending with IRET. + * Has correct top of stack, but partial stack frame. + */ +- .globl int_ret_from_sys_call +- .globl int_with_check +-int_ret_from_sys_call: ++GLOBAL(int_ret_from_sys_call) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + testb $3,CS-ARGOFFSET(%rsp) +@@ -661,7 +656,7 @@ int_ret_from_sys_call: + 1: + movl $_TIF_ALLWORK_MASK,%edi + /* edi: mask to check */ +-int_with_check: ++GLOBAL(int_with_check) + LOCKDEP_SYS_EXIT_IRQ + GET_THREAD_INFO(%rcx) + movl TI_flags(%rcx),%edx +@@ -883,10 +878,14 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ + irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt + #endif + ++#ifdef CONFIG_X86_UV + apicinterrupt UV_BAU_MESSAGE \ + uv_bau_message_intr1 uv_bau_message_interrupt ++#endif + apicinterrupt LOCAL_TIMER_VECTOR \ + apic_timer_interrupt smp_apic_timer_interrupt ++apicinterrupt GENERIC_INTERRUPT_VECTOR \ ++ generic_interrupt smp_generic_interrupt + + #ifdef CONFIG_SMP + apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ +@@ -1004,15 +1003,15 @@ ENTRY(do_hypervisor_callback) # do_hyp + movq %rdi, %rsp # we don't return, adjust the stack frame + CFI_ENDPROC + DEFAULT_FRAME +-11: incl %gs:pda_irqcount ++11: incl PER_CPU_VAR(irq_count) + movq %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp +- cmovzq %gs:pda_irqstackptr,%rsp ++ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp + pushq %rbp # backlink for old unwinder + call evtchn_do_upcall + popq %rsp + CFI_DEF_CFA_REGISTER rsp +- decl %gs:pda_irqcount ++ decl PER_CPU_VAR(irq_count) + jmp error_exit + CFI_ENDPROC + END(do_hypervisor_callback) +@@ -1203,14 +1202,14 @@ ENTRY(call_softirq) + CFI_REL_OFFSET rbp,0 + mov %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp +- incl %gs:pda_irqcount +- cmove %gs:pda_irqstackptr,%rsp ++ incl PER_CPU_VAR(irq_count) ++ cmove PER_CPU_VAR(irq_stack_ptr),%rsp + push %rbp # backlink for old unwinder + call __do_softirq + leaveq + CFI_DEF_CFA_REGISTER rsp + CFI_ADJUST_CFA_OFFSET -8 +- decl %gs:pda_irqcount ++ decl PER_CPU_VAR(irq_count) + ret + CFI_ENDPROC + END(call_softirq) +@@ -1256,7 +1255,10 @@ ENTRY(paranoid_exit) + paranoid_swapgs: + TRACE_IRQS_IRETQ 0 + SWAPGS_UNSAFE_STACK ++ RESTORE_ALL 8 ++ jmp irq_return + paranoid_restore: ++ TRACE_IRQS_IRETQ 0 + RESTORE_ALL 8 + jmp irq_return + paranoid_userspace: +--- head-2010-01-18.orig/arch/x86/kernel/head-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -2,6 +2,7 @@ + #include + + #include ++#ifndef CONFIG_XEN + #include + + #define BIOS_LOWMEM_KILOBYTES 0x413 +@@ -18,7 +19,6 @@ + */ + void __init reserve_ebda_region(void) + { +-#ifndef CONFIG_XEN + unsigned int lowmem, ebda_addr; + + /* To determine the position of the EBDA and the */ +@@ -53,5 +53,173 @@ void __init reserve_ebda_region(void) + + /* reserve all memory between lowmem and the 1MB mark */ + reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); ++} ++#else /* CONFIG_XEN */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern void hypervisor_callback(void); ++extern void failsafe_callback(void); ++extern void nmi(void); ++ ++#ifdef CONFIG_X86_64 ++#include ++#define CALLBACK_ADDR(fn) ((unsigned long)(fn)) ++#else ++#define CALLBACK_ADDR(fn) { __KERNEL_CS, (unsigned long)(fn) } ++#endif ++ ++unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; ++EXPORT_SYMBOL(machine_to_phys_mapping); ++unsigned int machine_to_phys_order; ++EXPORT_SYMBOL(machine_to_phys_order); ++ ++void __init xen_start_kernel(void) ++{ ++ unsigned int i; ++ struct xen_machphys_mapping mapping; ++ unsigned long machine_to_phys_nr_ents; ++#ifdef CONFIG_X86_32 ++ struct xen_platform_parameters pp; ++ extern pte_t swapper_pg_fixmap[PTRS_PER_PTE]; ++ unsigned long addr; ++#endif ++ ++ xen_setup_features(); ++ ++ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { ++ machine_to_phys_mapping = (unsigned long *)mapping.v_start; ++ machine_to_phys_nr_ents = mapping.max_mfn + 1; ++ } else ++ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; ++ while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) ++ machine_to_phys_order++; ++ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) ++ phys_to_machine_mapping = ++ (unsigned long *)xen_start_info->mfn_list; ++ ++ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, ++ VMASST_TYPE_writable_pagetables)); ++ ++ reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE), ++ __pa(xen_start_info->pt_base) ++ + (xen_start_info->nr_pt_frames << PAGE_SHIFT), ++ "Xen provided"); ++ ++#ifdef CONFIG_X86_32 ++ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, ++ VMASST_TYPE_4gb_segments)); ++ ++ init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base; ++ ++ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) { ++ hypervisor_virt_start = pp.virt_start; ++ reserve_top_address(0UL - pp.virt_start); ++ } ++ ++ BUG_ON(pte_index(hypervisor_virt_start)); ++ ++ /* Do an early initialization of the fixmap area */ ++ make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables); ++ addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); ++ set_pmd(pmd_offset(pud_offset(swapper_pg_dir + pgd_index(addr), ++ addr), ++ addr), ++ __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE)); ++#else ++ check_efer(); ++ xen_init_pt(); ++#endif ++ ++#define __FIXADDR_TOP (-PAGE_SIZE) ++#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) ++#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \ ++ != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE))) ++ FIX_BUG_ON(SHARED_INFO); ++ FIX_BUG_ON(ISAMAP_BEGIN); ++ FIX_BUG_ON(ISAMAP_END); ++#undef pmd_index ++#undef __FIXADDR_TOP ++ ++ /* Switch to the real shared_info page, and clear the dummy page. */ ++ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); ++ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); ++ memset(empty_zero_page, 0, sizeof(empty_zero_page)); ++ ++ /* Set up mapping of lowest 1MB of physical memory. */ ++ for (i = 0; i < NR_FIX_ISAMAPS; i++) ++ if (is_initial_xendomain()) ++ set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); ++ else ++ __set_fixmap(FIX_ISAMAP_BEGIN - i, ++ virt_to_machine(empty_zero_page), ++ PAGE_KERNEL_RO); ++ ++} ++ ++void __init machine_specific_arch_setup(void) ++{ ++ int ret; ++ static const struct callback_register __initconst event = { ++ .type = CALLBACKTYPE_event, ++ .address = CALLBACK_ADDR(hypervisor_callback) ++ }; ++ static const struct callback_register __initconst failsafe = { ++ .type = CALLBACKTYPE_failsafe, ++ .address = CALLBACK_ADDR(failsafe_callback) ++ }; ++#ifdef CONFIG_X86_64 ++ static const struct callback_register __initconst syscall = { ++ .type = CALLBACKTYPE_syscall, ++ .address = CALLBACK_ADDR(system_call) ++ }; ++#endif ++#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32) ++ static const struct callback_register __initconst nmi_cb = { ++ .type = CALLBACKTYPE_nmi, ++ .address = CALLBACK_ADDR(nmi) ++ }; ++#endif ++ ++ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); ++ if (ret == 0) ++ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); ++#ifdef CONFIG_X86_64 ++ if (ret == 0) ++ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall); ++#endif ++#if CONFIG_XEN_COMPAT <= 0x030002 ++#ifdef CONFIG_X86_32 ++ if (ret == -ENOSYS) ++ ret = HYPERVISOR_set_callbacks( ++ event.address.cs, event.address.eip, ++ failsafe.address.cs, failsafe.address.eip); ++#else ++ ret = HYPERVISOR_set_callbacks( ++ event.address, ++ failsafe.address, ++ syscall.address); ++#endif ++#endif ++ BUG_ON(ret); ++ ++#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32) ++ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb); ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ if (ret == -ENOSYS) { ++ static struct xennmi_callback __initdata cb = { ++ .handler_address = (unsigned long)nmi ++ }; ++ ++ HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); ++ } ++#endif + #endif + } ++#endif /* CONFIG_XEN */ +--- head-2010-01-18.orig/arch/x86/kernel/head32-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -9,6 +9,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -18,7 +19,7 @@ void __init i386_start_kernel(void) + { + reserve_trampoline_memory(); + +- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); ++ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + + #ifndef CONFIG_XEN + #ifdef CONFIG_BLK_DEV_INITRD +@@ -30,14 +31,8 @@ void __init i386_start_kernel(void) + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + } + #endif +- reserve_early(init_pg_tables_start, init_pg_tables_end, +- "INIT_PG_TABLE"); ++ reserve_ebda_region(); + #else +- reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE), +- __pa(xen_start_info->pt_base) +- + (xen_start_info->nr_pt_frames << PAGE_SHIFT), +- "Xen provided"); +- + { + int max_cmdline; + +@@ -46,9 +41,9 @@ void __init i386_start_kernel(void) + memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline); + boot_command_line[max_cmdline-1] = '\0'; + } +-#endif + +- reserve_ebda_region(); ++ xen_start_kernel(); ++#endif + + /* + * At this point everything still needed from the boot loader +--- head-2010-01-18.orig/arch/x86/kernel/head64-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head64-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -7,9 +7,6 @@ + * Modified for Xen. + */ + +-/* PDA is not ready to be used until the end of x86_64_start_kernel(). */ +-#define arch_use_lazy_mmu_mode() false +- + #include + #include + #include +@@ -18,12 +15,12 @@ + #include + #include + #include +-#include + + #include + #include + #include + #include ++#include + #include + #include + #include +@@ -33,27 +30,6 @@ + #include + #include + +-/* boot cpu pda */ +-static struct x8664_pda _boot_cpu_pda; +- +-#ifdef CONFIG_SMP +-/* +- * We install an empty cpu_pda pointer table to indicate to early users +- * (numa_set_node) that the cpu_pda pointer table for cpus other than +- * the boot cpu is not yet setup. +- */ +-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; +-#else +-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; +-#endif +- +-void __init x86_64_init_pda(void) +-{ +- _cpu_pda = __cpu_pda; +- cpu_pda(0) = &_boot_cpu_pda; +- pda_init(0); +-} +- + #ifndef CONFIG_XEN + static void __init zap_identity_mappings(void) + { +@@ -92,16 +68,9 @@ static void __init copy_bootdata(char *r + } + + #include +-unsigned long *machine_to_phys_mapping; +-EXPORT_SYMBOL(machine_to_phys_mapping); +-unsigned int machine_to_phys_order; +-EXPORT_SYMBOL(machine_to_phys_order); + + void __init x86_64_start_kernel(char * real_mode_data) + { +- struct xen_machphys_mapping mapping; +- unsigned long machine_to_phys_nr_ents; +- + /* + * Build-time sanity checks on the kernel image and module + * area mappings. (these are purely build-time and produce no code) +@@ -116,21 +85,8 @@ void __init x86_64_start_kernel(char * r + (__START_KERNEL & PGDIR_MASK))); + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); + +- xen_setup_features(); +- + xen_start_info = (struct start_info *)real_mode_data; +- if (!xen_feature(XENFEAT_auto_translated_physmap)) +- phys_to_machine_mapping = +- (unsigned long *)xen_start_info->mfn_list; +- +- machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START; +- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; +- if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { +- machine_to_phys_mapping = (unsigned long *)mapping.v_start; +- machine_to_phys_nr_ents = mapping.max_mfn + 1; +- } +- while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) +- machine_to_phys_order++; ++ xen_start_kernel(); + + #ifndef CONFIG_XEN + /* clear bss before set_intr_gate with early_idt_handler */ +@@ -155,7 +111,7 @@ void __init x86_64_start_kernel(char * r + if (console_loglevel == 10) + early_printk("Kernel alive\n"); + +- x86_64_init_pda(); ++ xen_switch_pt(); + + x86_64_start_reservations(real_mode_data); + } +@@ -166,12 +122,7 @@ void __init x86_64_start_reservations(ch + + reserve_trampoline_memory(); + +- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); +- +- reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE), +- __pa(xen_start_info->pt_base) +- + (xen_start_info->nr_pt_frames << PAGE_SHIFT), +- "Xen provided"); ++ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + + /* + * At this point everything still needed from the boot loader +--- head-2010-01-18.orig/arch/x86/kernel/head_32-xen.S 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head_32-xen.S 2009-11-06 10:52:02.000000000 +0100 +@@ -6,12 +6,14 @@ + #include + #include + #include +-#include ++#include ++#include + #include + #include + #include + #include + #include ++#include + #include + #include + +@@ -38,9 +40,6 @@ ENTRY(startup_32) + /* Set up the stack pointer */ + movl $(init_thread_union+THREAD_SIZE),%esp + +- movl %ss,%eax +- movl %eax,%fs # gets reset once there's real percpu +- + /* get vendor info */ + xorl %eax,%eax # call CPUID with 0 -> return vendor ID + XEN_CPUID +@@ -63,7 +62,49 @@ ENTRY(startup_32) + + movb $1,X86_HARD_MATH + +- xorl %eax,%eax # Clear GS ++#ifdef CONFIG_CC_STACKPROTECTOR ++ /* ++ * The linker can't handle this by relocation. Manually set ++ * base address in stack canary segment descriptor. ++ */ ++ movl $per_cpu__gdt_page,%eax ++ movl $per_cpu__stack_canary,%ecx ++ subl $20, %ecx ++ movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) ++ shrl $16, %ecx ++ movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) ++ movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) ++#endif ++ ++ # %esi still points to start_info, and no registers ++ # need to be preserved. ++ ++ movl XEN_START_mfn_list(%esi), %ebx ++ movl $(per_cpu__gdt_page - __PAGE_OFFSET), %eax ++ shrl $PAGE_SHIFT, %eax ++ movl (%ebx,%eax,4), %ecx ++ pushl %ecx # frame number for set_gdt below ++ ++ xorl %esi, %esi ++ xorl %edx, %edx ++ shldl $PAGE_SHIFT, %ecx, %edx ++ shll $PAGE_SHIFT, %ecx ++ orl $_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY, %ecx ++ movl $per_cpu__gdt_page, %ebx ++ movl $__HYPERVISOR_update_va_mapping, %eax ++ int $0x82 ++ ++ movl $(PAGE_SIZE_asm / 8), %ecx ++ movl %esp, %ebx ++ movl $__HYPERVISOR_set_gdt, %eax ++ int $0x82 ++ ++ popl %ecx ++ ++ movl $(__KERNEL_PERCPU), %eax ++ movl %eax,%fs # set this cpu's percpu ++ ++ movl $(__KERNEL_STACK_CANARY),%eax + movl %eax,%gs + + cld # gcc2 wants the direction flag cleared at all times +--- head-2010-01-18.orig/arch/x86/kernel/head_64-xen.S 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head_64-xen.S 2009-11-06 10:52:02.000000000 +0100 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + + .section .text.head, "ax", @progbits +@@ -32,11 +33,23 @@ startup_64: + /* rsi is pointer to startup info structure. + pass it to C */ + movq %rsi,%rdi ++ ++ /* Set up %gs. ++ * ++ * The base of %gs always points to the bottom of the irqstack ++ * union. If the stack protector canary is enabled, it is ++ * located at %gs:40. Note that, on SMP, the boot cpu uses ++ * init data section till per cpu areas are set up. ++ */ ++ movl $MSR_GS_BASE,%ecx ++ movq $INIT_PER_CPU_VAR(irq_stack_union),%rax ++ movq %rax,%rdx ++ shrq $32,%rdx ++ wrmsr ++ + pushq $0 # fake return address + jmp x86_64_start_kernel + +-.balign PAGE_SIZE +- + #define NEXT_PAGE(name) \ + .balign PAGE_SIZE; \ + phys_##name = . - .text.head; \ +--- head-2010-01-18.orig/arch/x86/kernel/ioport-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/ioport-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -91,9 +91,8 @@ static int do_iopl(unsigned int level, s + } + + #ifdef CONFIG_X86_32 +-asmlinkage long sys_iopl(unsigned long regsp) ++long sys_iopl(struct pt_regs *regs) + { +- struct pt_regs *regs = (struct pt_regs *)®sp; + unsigned int level = regs->bx; + #else + asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) +--- head-2010-01-18.orig/arch/x86/kernel/irq-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq-xen.c 2009-12-18 09:57:23.000000000 +0100 +@@ -6,13 +6,20 @@ + #include + #include + #include ++#include + + #include + #include + #include ++#include + + atomic_t irq_err_count; + ++#ifndef CONFIG_XEN ++/* Function pointer for generic interrupt vector handling */ ++void (*generic_interrupt_extension)(void) = NULL; ++#endif ++ + /* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. +@@ -36,11 +43,7 @@ void ack_bad_irq(unsigned int irq) + #endif + } + +-#ifdef CONFIG_X86_32 +-# define irq_stats(x) (&per_cpu(irq_stat, x)) +-#else +-# define irq_stats(x) cpu_pda(x) +-#endif ++#define irq_stats(x) (&per_cpu(irq_stat, x)) + /* + * /proc/interrupts printing: + */ +@@ -57,6 +60,19 @@ static int show_other_interrupts(struct + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); ++ ++ seq_printf(p, "%*s: ", prec, "SPU"); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); ++ seq_printf(p, " Spurious interrupts\n"); ++#endif ++#ifndef CONFIG_XEN ++ if (generic_interrupt_extension) { ++ seq_printf(p, "%*s: ", prec, "PLT"); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); ++ seq_printf(p, " Platform interrupts\n"); ++ } + #endif + #ifdef CONFIG_SMP + seq_printf(p, "%*s: ", prec, "RES"); +@@ -86,12 +102,6 @@ static int show_other_interrupts(struct + seq_printf(p, " Threshold APIC interrupts\n"); + # endif + #endif +-#ifdef CONFIG_X86_LOCAL_APIC +- seq_printf(p, "%*s: ", prec, "SPU"); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); +- seq_printf(p, " Spurious interrupts\n"); +-#endif + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + #if defined(CONFIG_X86_IO_APIC) + seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); +@@ -128,23 +138,15 @@ int show_interrupts(struct seq_file *p, + return 0; + + spin_lock_irqsave(&desc->lock, flags); +-#ifndef CONFIG_SMP +- any_count = kstat_irqs(i); +-#else + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); +-#endif + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%*d: ", prec, i); +-#ifndef CONFIG_SMP +- seq_printf(p, "%10u ", kstat_irqs(i)); +-#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); +-#endif + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); + +@@ -169,6 +171,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) + + #ifdef CONFIG_X86_LOCAL_APIC + sum += irq_stats(cpu)->apic_timer_irqs; ++ sum += irq_stats(cpu)->irq_spurious_count; ++#endif ++#ifndef CONFIG_XEN ++ if (generic_interrupt_extension) ++ sum += irq_stats(cpu)->generic_irqs; + #endif + #ifdef CONFIG_SMP + sum += irq_stats(cpu)->irq_resched_count; +@@ -183,9 +190,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) + sum += irq_stats(cpu)->irq_threshold_count; + #endif + #endif +-#ifdef CONFIG_X86_LOCAL_APIC +- sum += irq_stats(cpu)->irq_spurious_count; +-#endif + return sum; + } + +@@ -198,3 +202,64 @@ u64 arch_irq_stat(void) + #endif + return sum; + } ++ ++ ++#ifndef CONFIG_XEN ++/* ++ * do_IRQ handles all normal device IRQ's (the special ++ * SMP cross-CPU interrupts have their own specific ++ * handlers). ++ */ ++unsigned int __irq_entry do_IRQ(struct pt_regs *regs) ++{ ++ struct pt_regs *old_regs = set_irq_regs(regs); ++ ++ /* high bit used in ret_from_ code */ ++ unsigned vector = ~regs->orig_ax; ++ unsigned irq; ++ ++ exit_idle(); ++ irq_enter(); ++ ++ irq = __get_cpu_var(vector_irq)[vector]; ++ ++ if (!handle_irq(irq, regs)) { ++#ifdef CONFIG_X86_64 ++ if (!disable_apic) ++ ack_APIC_irq(); ++#endif ++ ++ if (printk_ratelimit()) ++ printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", ++ __func__, smp_processor_id(), vector, irq); ++ } ++ ++ irq_exit(); ++ ++ set_irq_regs(old_regs); ++ return 1; ++} ++ ++/* ++ * Handler for GENERIC_INTERRUPT_VECTOR. ++ */ ++void smp_generic_interrupt(struct pt_regs *regs) ++{ ++ struct pt_regs *old_regs = set_irq_regs(regs); ++ ++ ack_APIC_irq(); ++ ++ exit_idle(); ++ ++ irq_enter(); ++ ++ inc_irq_stat(generic_irqs); ++ ++ if (generic_interrupt_extension) ++ generic_interrupt_extension(); ++ ++ irq_exit(); ++ ++ set_irq_regs(old_regs); ++} ++#endif +--- head-2010-01-18.orig/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #include + +@@ -55,13 +56,13 @@ static inline void print_stack_overflow( + union irq_ctx { + struct thread_info tinfo; + u32 stack[THREAD_SIZE/sizeof(u32)]; +-}; ++} __attribute__((aligned(PAGE_SIZE))); + +-static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; +-static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; ++static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); ++static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); + +-static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; +-static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; ++static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); ++static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); + + static void call_on_stack(void *func, void *stack) + { +@@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struc + u32 *isp, arg1, arg2; + + curctx = (union irq_ctx *) current_thread_info(); +- irqctx = hardirq_ctx[smp_processor_id()]; ++ irqctx = __get_cpu_var(hardirq_ctx); + + /* + * this is where we switch to the IRQ stack. However, if we are +@@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu) + { + union irq_ctx *irqctx; + +- if (hardirq_ctx[cpu]) ++ if (per_cpu(hardirq_ctx, cpu)) + return; + +- irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; ++ irqctx = &per_cpu(hardirq_stack, cpu); + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + +- hardirq_ctx[cpu] = irqctx; ++ per_cpu(hardirq_ctx, cpu) = irqctx; + +- irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE]; ++ irqctx = &per_cpu(softirq_stack, cpu); + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = 0; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + +- softirq_ctx[cpu] = irqctx; ++ per_cpu(softirq_ctx, cpu) = irqctx; + + printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", +- cpu, hardirq_ctx[cpu], softirq_ctx[cpu]); ++ cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); + } + + void irq_ctx_exit(int cpu) + { +- hardirq_ctx[cpu] = NULL; ++ per_cpu(hardirq_ctx, cpu) = NULL; + } + + asmlinkage void do_softirq(void) +@@ -169,7 +170,7 @@ asmlinkage void do_softirq(void) + + if (local_softirq_pending()) { + curctx = current_thread_info(); +- irqctx = softirq_ctx[smp_processor_id()]; ++ irqctx = __get_cpu_var(softirq_ctx); + irqctx->tinfo.task = curctx->task; + irqctx->tinfo.previous_esp = current_stack_pointer; + +@@ -191,30 +192,16 @@ static inline int + execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } + #endif + +-/* +- * do_IRQ handles all normal device IRQ's (the special +- * SMP cross-CPU interrupts have their own specific +- * handlers). +- */ +-unsigned int do_IRQ(struct pt_regs *regs) ++bool handle_irq(unsigned irq, struct pt_regs *regs) + { +- struct pt_regs *old_regs; +- /* high bit used in ret_from_ code */ +- int overflow; +- unsigned irq = ~regs->orig_ax; + struct irq_desc *desc; +- +- old_regs = set_irq_regs(regs); +- /*irq_enter();*/ ++ int overflow; + + overflow = check_stack_overflow(); + + desc = irq_to_desc(irq); +- if (unlikely(!desc)) { +- printk(KERN_EMERG "%s: cannot handle IRQ %d cpu %d\n", +- __func__, irq, smp_processor_id()); +- BUG(); +- } ++ if (unlikely(!desc)) ++ return false; + + if (!execute_on_irq_stack(overflow, desc, irq)) { + if (unlikely(overflow)) +@@ -222,9 +209,7 @@ unsigned int do_IRQ(struct pt_regs *regs + desc->handle_irq(irq, desc); + } + +- /*irq_exit();*/ +- set_irq_regs(old_regs); +- return 1; ++ return true; + } + + #ifdef CONFIG_HOTPLUG_CPU +@@ -244,7 +229,7 @@ void fixup_irqs(void) + if (irq == 2) + continue; + +- affinity = &desc->affinity; ++ affinity = desc->affinity; + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + /*printk("Breaking affinity for irq %i\n", irq);*/ + affinity = cpu_all_mask; +--- head-2010-01-18.orig/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -18,6 +18,13 @@ + #include + #include + #include ++#include ++ ++DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); ++EXPORT_PER_CPU_SYMBOL(irq_stat); ++ ++DEFINE_PER_CPU(struct pt_regs *, irq_regs); ++EXPORT_PER_CPU_SYMBOL(irq_regs); + + /* + * Probabilistic stack overflow check: +@@ -41,41 +48,18 @@ static inline void stack_overflow_check( + #endif + } + +-/* +- * do_IRQ handles all normal device IRQ's (the special +- * SMP cross-CPU interrupts have their own specific +- * handlers). +- */ +-asmlinkage unsigned int /*__irq_entry*/ do_IRQ(struct pt_regs *regs) ++bool handle_irq(unsigned irq, struct pt_regs *regs) + { +- struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; + +- /* high bit used in ret_from_ code */ +- unsigned irq = ~regs->orig_ax; +- +- /*exit_idle();*/ +- /*irq_enter();*/ +- + stack_overflow_check(regs); + + desc = irq_to_desc(irq); +- if (likely(desc)) +- generic_handle_irq_desc(irq, desc); +- else { +-#ifndef CONFIG_XEN +- if (!disable_apic) +- ack_APIC_irq(); +-#endif +- if (printk_ratelimit()) +- printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", +- __func__, smp_processor_id(), irq); +- } +- +- /*irq_exit();*/ ++ if (unlikely(!desc)) ++ return false; + +- set_irq_regs(old_regs); +- return 1; ++ generic_handle_irq_desc(irq, desc); ++ return true; + } + + #ifdef CONFIG_HOTPLUG_CPU +@@ -99,7 +83,7 @@ void fixup_irqs(void) + /* interrupt's are disabled at this point */ + spin_lock(&desc->lock); + +- affinity = &desc->affinity; ++ affinity = desc->affinity; + if (!irq_has_action(irq) || + cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); +--- head-2010-01-18.orig/arch/x86/kernel/machine_kexec_64.c 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/machine_kexec_64.c 2009-11-06 10:52:02.000000000 +0100 +@@ -90,13 +90,8 @@ void machine_kexec_setup_load_arg(xen_ke + xki->page_list[PA_CONTROL_PAGE] = __ma(control_page); + xki->page_list[PA_TABLE_PAGE] = __ma(table_page); + +- xki->page_list[PA_PGD] = __ma(kexec_pgd); +- xki->page_list[PA_PUD_0] = __ma(kexec_pud0); +- xki->page_list[PA_PUD_1] = __ma(kexec_pud1); +- xki->page_list[PA_PMD_0] = __ma(kexec_pmd0); +- xki->page_list[PA_PMD_1] = __ma(kexec_pmd1); +- xki->page_list[PA_PTE_0] = __ma(kexec_pte0); +- xki->page_list[PA_PTE_1] = __ma(kexec_pte1); ++ if (image->type == KEXEC_TYPE_DEFAULT) ++ xki->page_list[PA_SWAP_PAGE] = page_to_phys(image->swap_page); + } + + int __init machine_kexec_setup_resources(struct resource *hypervisor, +@@ -159,7 +154,7 @@ static int init_one_level2_page(struct k + } + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) +- set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); ++ x_set_pmd(pmd, x__pmd(addr | X__PAGE_KERNEL_LARGE_EXEC)); + result = 0; + out: + return result; +--- head-2010-01-18.orig/arch/x86/kernel/microcode_core-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/microcode_core-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -21,28 +21,28 @@ + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ ++#include + #include +-#include +-#include +-#include ++#include ++#include + #include ++#include + #include +-#include +-#include ++#include + #include +-#include +-#include +-#include +-#include ++#include ++#include + #include ++#include ++#include ++#include + #include +-#include +-#include ++#include ++#include + +-#include +-#include +-#include + #include ++#include ++#include + + MODULE_DESCRIPTION("Microcode Update Driver"); + MODULE_AUTHOR("Tigran Aivazian "); +@@ -51,7 +51,7 @@ MODULE_LICENSE("GPL"); + static int verbose; + module_param(verbose, int, 0644); + +-#define MICROCODE_VERSION "2.00-xen" ++#define MICROCODE_VERSION "2.00-xen" + + /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ + static DEFINE_MUTEX(microcode_mutex); +@@ -143,12 +143,12 @@ static void microcode_dev_exit(void) + + MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); + #else +-#define microcode_dev_init() 0 +-#define microcode_dev_exit() do { } while (0) ++#define microcode_dev_init() 0 ++#define microcode_dev_exit() do { } while (0) + #endif + + /* fake device for request_firmware */ +-static struct platform_device *microcode_pdev; ++static struct platform_device *microcode_pdev; + + static int request_microcode(const char *name) + { +--- head-2010-01-18.orig/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -3,7 +3,7 @@ + * compliant MP-table parsing routines. + * + * (c) 1995 Alan Cox, Building #3 +- * (c) 1998, 1999, 2000 Ingo Molnar ++ * (c) 1998, 1999, 2000, 2009 Ingo Molnar + * (c) 2008 Alexey Starikovskiy + */ + +@@ -29,11 +29,7 @@ + #include + #include + +-#include +-#ifdef CONFIG_X86_32 +-#include +-#include +-#endif ++#include + + static void *_bus_to_virt(unsigned long ma) + { +@@ -123,9 +119,6 @@ static void __init MP_bus_info(struct mp + } else + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); + } +-#endif +- +-#ifdef CONFIG_X86_IO_APIC + + static int bad_ioapic(unsigned long address) + { +@@ -153,11 +146,11 @@ static void __init MP_ioapic_info(struct + if (bad_ioapic(m->apicaddr)) + return; + +- mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; +- mp_ioapics[nr_ioapics].mp_apicid = m->apicid; +- mp_ioapics[nr_ioapics].mp_type = m->type; +- mp_ioapics[nr_ioapics].mp_apicver = m->apicver; +- mp_ioapics[nr_ioapics].mp_flags = m->flags; ++ mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; ++ mp_ioapics[nr_ioapics].apicid = m->apicid; ++ mp_ioapics[nr_ioapics].type = m->type; ++ mp_ioapics[nr_ioapics].apicver = m->apicver; ++ mp_ioapics[nr_ioapics].flags = m->flags; + nr_ioapics++; + } + +@@ -169,55 +162,55 @@ static void print_MP_intsrc_info(struct + m->srcbusirq, m->dstapic, m->dstirq); + } + +-static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) ++static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) + { + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", +- mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, +- (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, +- mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); ++ mp_irq->irqtype, mp_irq->irqflag & 3, ++ (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, ++ mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); + } + + static void __init assign_to_mp_irq(struct mpc_intsrc *m, +- struct mp_config_intsrc *mp_irq) ++ struct mpc_intsrc *mp_irq) + { +- mp_irq->mp_dstapic = m->dstapic; +- mp_irq->mp_type = m->type; +- mp_irq->mp_irqtype = m->irqtype; +- mp_irq->mp_irqflag = m->irqflag; +- mp_irq->mp_srcbus = m->srcbus; +- mp_irq->mp_srcbusirq = m->srcbusirq; +- mp_irq->mp_dstirq = m->dstirq; ++ mp_irq->dstapic = m->dstapic; ++ mp_irq->type = m->type; ++ mp_irq->irqtype = m->irqtype; ++ mp_irq->irqflag = m->irqflag; ++ mp_irq->srcbus = m->srcbus; ++ mp_irq->srcbusirq = m->srcbusirq; ++ mp_irq->dstirq = m->dstirq; + } + +-static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, ++static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) + { +- m->dstapic = mp_irq->mp_dstapic; +- m->type = mp_irq->mp_type; +- m->irqtype = mp_irq->mp_irqtype; +- m->irqflag = mp_irq->mp_irqflag; +- m->srcbus = mp_irq->mp_srcbus; +- m->srcbusirq = mp_irq->mp_srcbusirq; +- m->dstirq = mp_irq->mp_dstirq; ++ m->dstapic = mp_irq->dstapic; ++ m->type = mp_irq->type; ++ m->irqtype = mp_irq->irqtype; ++ m->irqflag = mp_irq->irqflag; ++ m->srcbus = mp_irq->srcbus; ++ m->srcbusirq = mp_irq->srcbusirq; ++ m->dstirq = mp_irq->dstirq; + } + +-static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, ++static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) + { +- if (mp_irq->mp_dstapic != m->dstapic) ++ if (mp_irq->dstapic != m->dstapic) + return 1; +- if (mp_irq->mp_type != m->type) ++ if (mp_irq->type != m->type) + return 2; +- if (mp_irq->mp_irqtype != m->irqtype) ++ if (mp_irq->irqtype != m->irqtype) + return 3; +- if (mp_irq->mp_irqflag != m->irqflag) ++ if (mp_irq->irqflag != m->irqflag) + return 4; +- if (mp_irq->mp_srcbus != m->srcbus) ++ if (mp_irq->srcbus != m->srcbus) + return 5; +- if (mp_irq->mp_srcbusirq != m->srcbusirq) ++ if (mp_irq->srcbusirq != m->srcbusirq) + return 6; +- if (mp_irq->mp_dstirq != m->dstirq) ++ if (mp_irq->dstirq != m->dstirq) + return 7; + + return 0; +@@ -238,8 +231,12 @@ static void __init MP_intsrc_info(struct + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); + } ++#else /* CONFIG_X86_IO_APIC */ ++static inline void __init MP_bus_info(struct mpc_bus *m) {} ++static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} ++static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {} ++#endif /* CONFIG_X86_IO_APIC */ + +-#endif + + static void __init MP_lintsrc_info(struct mpc_lintsrc *m) + { +@@ -289,6 +286,20 @@ static int __init smp_check_mpc(struct m + return 1; + } + ++static void skip_entry(unsigned char **ptr, int *count, int size) ++{ ++ *ptr += size; ++ *count += size; ++} ++ ++static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) ++{ ++ printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n" ++ "type %x\n", *mpt); ++ print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, ++ 1, mpc, mpc->length, 1); ++} ++ + static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) + { + char str[16]; +@@ -300,17 +311,8 @@ static int __init smp_read_mpc(struct mp + if (!smp_check_mpc(mpc, oem, str)) + return 0; + +-#ifdef CONFIG_X86_32 +- /* +- * need to make sure summit and es7000's mps_oem_check is safe to be +- * called early via genericarch 's mps_oem_check +- */ +- if (early) { +-#ifdef CONFIG_X86_NUMAQ +- numaq_mps_oem_check(mpc, oem, str); +-#endif +- } else +- mps_oem_check(mpc, oem, str); ++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) ++ generic_mps_oem_check(mpc, oem, str); + #endif + /* save the local APIC address, it might be non-default */ + if (!acpi_lapic) +@@ -333,61 +335,30 @@ static int __init smp_read_mpc(struct mp + while (count < mpc->length) { + switch (*mpt) { + case MP_PROCESSOR: +- { +- struct mpc_cpu *m = (struct mpc_cpu *)mpt; +- /* ACPI may have already provided this data */ +- if (!acpi_lapic) +- MP_processor_info(m); +- mpt += sizeof(*m); +- count += sizeof(*m); +- break; +- } ++ /* ACPI may have already provided this data */ ++ if (!acpi_lapic) ++ MP_processor_info((struct mpc_cpu *)mpt); ++ skip_entry(&mpt, &count, sizeof(struct mpc_cpu)); ++ break; + case MP_BUS: +- { +- struct mpc_bus *m = (struct mpc_bus *)mpt; +-#ifdef CONFIG_X86_IO_APIC +- MP_bus_info(m); +-#endif +- mpt += sizeof(*m); +- count += sizeof(*m); +- break; +- } ++ MP_bus_info((struct mpc_bus *)mpt); ++ skip_entry(&mpt, &count, sizeof(struct mpc_bus)); ++ break; + case MP_IOAPIC: +- { +-#ifdef CONFIG_X86_IO_APIC +- struct mpc_ioapic *m = (struct mpc_ioapic *)mpt; +- MP_ioapic_info(m); +-#endif +- mpt += sizeof(struct mpc_ioapic); +- count += sizeof(struct mpc_ioapic); +- break; +- } ++ MP_ioapic_info((struct mpc_ioapic *)mpt); ++ skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); ++ break; + case MP_INTSRC: +- { +-#ifdef CONFIG_X86_IO_APIC +- struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; +- +- MP_intsrc_info(m); +-#endif +- mpt += sizeof(struct mpc_intsrc); +- count += sizeof(struct mpc_intsrc); +- break; +- } ++ MP_intsrc_info((struct mpc_intsrc *)mpt); ++ skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); ++ break; + case MP_LINTSRC: +- { +- struct mpc_lintsrc *m = +- (struct mpc_lintsrc *)mpt; +- MP_lintsrc_info(m); +- mpt += sizeof(*m); +- count += sizeof(*m); +- break; +- } ++ MP_lintsrc_info((struct mpc_lintsrc *)mpt); ++ skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc)); ++ break; + default: + /* wrong mptable */ +- printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); +- printk(KERN_ERR "type %x\n", *mpt); +- print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, +- 1, mpc, mpc->length, 1); ++ smp_dump_mptable(mpc, mpt); + count = mpc->length; + break; + } +@@ -395,13 +366,13 @@ static int __init smp_read_mpc(struct mp + (*x86_quirks->mpc_record)++; + } + +-#ifdef CONFIG_X86_GENERICARCH +- generic_bigsmp_probe(); ++#ifdef CONFIG_X86_BIGSMP ++ generic_bigsmp_probe(); + #endif + +-#ifdef CONFIG_X86_32 +- setup_apic_routing(); +-#endif ++ if (apic->setup_apic_routing) ++ apic->setup_apic_routing(); ++ + if (!num_processors) + printk(KERN_ERR "MPTABLE: no processors registered!\n"); + return num_processors; +@@ -426,7 +397,7 @@ static void __init construct_default_ioi + intsrc.type = MP_INTSRC; + intsrc.irqflag = 0; /* conforming */ + intsrc.srcbus = 0; +- intsrc.dstapic = mp_ioapics[0].mp_apicid; ++ intsrc.dstapic = mp_ioapics[0].apicid; + + intsrc.irqtype = mp_INT; + +@@ -579,14 +550,76 @@ static inline void __init construct_defa + } + } + +-static struct intel_mp_floating *mpf_found; ++static struct mpf_intel *mpf_found; ++ ++static unsigned long __init get_mpc_size(unsigned long physptr) ++{ ++ struct mpc_table *mpc; ++ unsigned long size; ++ ++ mpc = early_ioremap(physptr, PAGE_SIZE); ++ size = mpc->length; ++ early_iounmap(mpc, PAGE_SIZE); ++ apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size); ++ ++ return size; ++} ++ ++static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) ++{ ++ struct mpc_table *mpc; ++ unsigned long size; ++ ++ size = get_mpc_size(mpf->physptr); ++ mpc = early_ioremap(mpf->physptr, size); ++ /* ++ * Read the physical hardware table. Anything here will ++ * override the defaults. ++ */ ++ if (!smp_read_mpc(mpc, early)) { ++#ifdef CONFIG_X86_LOCAL_APIC ++ smp_found_config = 0; ++#endif ++ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n" ++ "... disabling SMP support. (tell your hw vendor)\n"); ++ early_iounmap(mpc, size); ++ return -1; ++ } ++ early_iounmap(mpc, size); ++ ++ if (early) ++ return -1; ++ ++#ifdef CONFIG_X86_IO_APIC ++ /* ++ * If there are no explicit MP IRQ entries, then we are ++ * broken. We set up most of the low 16 IO-APIC pins to ++ * ISA defaults and hope it will work. ++ */ ++ if (!mp_irq_entries) { ++ struct mpc_bus bus; ++ ++ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " ++ "using default mptable. (tell your hw vendor)\n"); ++ ++ bus.type = MP_BUS; ++ bus.busid = 0; ++ memcpy(bus.bustype, "ISA ", 6); ++ MP_bus_info(&bus); ++ ++ construct_default_ioirq_mptable(0); ++ } ++#endif ++ ++ return 0; ++} + + /* + * Scan the memory blocks for an SMP configuration block. + */ + static void __init __get_smp_config(unsigned int early) + { +- struct intel_mp_floating *mpf = mpf_found; ++ struct mpf_intel *mpf = mpf_found; + + if (!mpf) + return; +@@ -607,9 +640,9 @@ static void __init __get_smp_config(unsi + } + + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", +- mpf->mpf_specification); ++ mpf->specification); + #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN) +- if (mpf->mpf_feature2 & (1 << 7)) { ++ if (mpf->feature2 & (1 << 7)) { + printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { +@@ -620,7 +653,7 @@ static void __init __get_smp_config(unsi + /* + * Now see if we need to read further. + */ +- if (mpf->mpf_feature1 != 0) { ++ if (mpf->feature1 != 0) { + if (early) { + /* + * local APIC has default address +@@ -630,49 +663,12 @@ static void __init __get_smp_config(unsi + } + + printk(KERN_INFO "Default MP configuration #%d\n", +- mpf->mpf_feature1); +- construct_default_ISA_mptable(mpf->mpf_feature1); +- +- } else if (mpf->mpf_physptr) { +- +- /* +- * Read the physical hardware table. Anything here will +- * override the defaults. +- */ +- if (!smp_read_mpc(_bus_to_virt(mpf->mpf_physptr), early)) { +-#ifdef CONFIG_X86_LOCAL_APIC +- smp_found_config = 0; +-#endif +- printk(KERN_ERR +- "BIOS bug, MP table errors detected!...\n"); +- printk(KERN_ERR "... disabling SMP support. " +- "(tell your hw vendor)\n"); +- return; +- } ++ mpf->feature1); ++ construct_default_ISA_mptable(mpf->feature1); + +- if (early) ++ } else if (mpf->physptr) { ++ if (check_physptr(mpf, early)) + return; +-#ifdef CONFIG_X86_IO_APIC +- /* +- * If there are no explicit MP IRQ entries, then we are +- * broken. We set up most of the low 16 IO-APIC pins to +- * ISA defaults and hope it will work. +- */ +- if (!mp_irq_entries) { +- struct mpc_bus bus; +- +- printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " +- "using default mptable. " +- "(tell your hw vendor)\n"); +- +- bus.type = MP_BUS; +- bus.busid = 0; +- memcpy(bus.bustype, "ISA ", 6); +- MP_bus_info(&bus); +- +- construct_default_ioirq_mptable(0); +- } +-#endif + } else + BUG(); + +@@ -693,58 +689,68 @@ void __init get_smp_config(void) + __get_smp_config(0); + } + ++#ifndef CONFIG_XEN ++static void __init smp_reserve_bootmem(struct mpf_intel *mpf) ++{ ++ unsigned long size = get_mpc_size(mpf->physptr); ++#ifdef CONFIG_X86_32 ++ /* ++ * We cannot access to MPC table to compute table size yet, ++ * as only few megabytes from the bottom is mapped now. ++ * PC-9800's MPC table places on the very last of physical ++ * memory; so that simply reserving PAGE_SIZE from mpf->physptr ++ * yields BUG() in reserve_bootmem. ++ * also need to make sure physptr is below than max_low_pfn ++ * we don't need reserve the area above max_low_pfn ++ */ ++ unsigned long end = max_low_pfn * PAGE_SIZE; ++ ++ if (mpf->physptr < end) { ++ if (mpf->physptr + size > end) ++ size = end - mpf->physptr; ++ reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); ++ } ++#else ++ reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); ++#endif ++} ++#endif ++ + static int __init smp_scan_config(unsigned long base, unsigned long length, + unsigned reserve) + { + unsigned int *bp = _bus_to_virt(base); +- struct intel_mp_floating *mpf; ++ struct mpf_intel *mpf; + + apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", + bp, length); + BUILD_BUG_ON(sizeof(*mpf) != 16); + + while (length > 0) { +- mpf = (struct intel_mp_floating *)bp; ++ mpf = (struct mpf_intel *)bp; + if ((*bp == SMP_MAGIC_IDENT) && +- (mpf->mpf_length == 1) && ++ (mpf->length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && +- ((mpf->mpf_specification == 1) +- || (mpf->mpf_specification == 4))) { ++ ((mpf->specification == 1) ++ || (mpf->specification == 4))) { + #ifdef CONFIG_X86_LOCAL_APIC + smp_found_config = 1; + #endif + mpf_found = mpf; + + #ifndef CONFIG_XEN +- printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", +- mpf, virt_to_phys(mpf)); ++ printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", ++ mpf, (u64)virt_to_phys(mpf)); + + if (!reserve) + return 1; +- reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, +- BOOTMEM_DEFAULT); +- if (mpf->mpf_physptr) { +- unsigned long size = PAGE_SIZE; +-#ifdef CONFIG_X86_32 +- /* +- * We cannot access to MPC table to compute +- * table size yet, as only few megabytes from +- * the bottom is mapped now. +- * PC-9800's MPC table places on the very last +- * of physical memory; so that simply reserving +- * PAGE_SIZE from mpg->mpf_physptr yields BUG() +- * in reserve_bootmem. +- */ +- unsigned long end = max_low_pfn * PAGE_SIZE; +- if (mpf->mpf_physptr + size > end) +- size = end - mpf->mpf_physptr; +-#endif +- reserve_bootmem_generic(mpf->mpf_physptr, size, ++ reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), + BOOTMEM_DEFAULT); +- } ++ if (mpf->physptr) ++ smp_reserve_bootmem(mpf); + #else + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", +- mpf, ((void *)bp - _bus_to_virt(base)) + base); ++ mpf, ((void *)bp - _bus_to_virt(base)) + base); + #endif + return 1; + } +@@ -826,15 +832,15 @@ static int __init get_MP_intsrc_index(s + /* not legacy */ + + for (i = 0; i < mp_irq_entries; i++) { +- if (mp_irqs[i].mp_irqtype != mp_INT) ++ if (mp_irqs[i].irqtype != mp_INT) + continue; + +- if (mp_irqs[i].mp_irqflag != 0x0f) ++ if (mp_irqs[i].irqflag != 0x0f) + continue; + +- if (mp_irqs[i].mp_srcbus != m->srcbus) ++ if (mp_irqs[i].srcbus != m->srcbus) + continue; +- if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) ++ if (mp_irqs[i].srcbusirq != m->srcbusirq) + continue; + if (irq_used[i]) { + /* already claimed */ +@@ -851,7 +857,58 @@ static int __init get_MP_intsrc_index(s + #define SPARE_SLOT_NUM 20 + + static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; +-#endif ++ ++static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) ++{ ++ int i; ++ ++ apic_printk(APIC_VERBOSE, "OLD "); ++ print_MP_intsrc_info(m); ++ ++ i = get_MP_intsrc_index(m); ++ if (i > 0) { ++ assign_to_mpc_intsrc(&mp_irqs[i], m); ++ apic_printk(APIC_VERBOSE, "NEW "); ++ print_mp_irq_info(&mp_irqs[i]); ++ return; ++ } ++ if (!i) { ++ /* legacy, do nothing */ ++ return; ++ } ++ if (*nr_m_spare < SPARE_SLOT_NUM) { ++ /* ++ * not found (-1), or duplicated (-2) are invalid entries, ++ * we need to use the slot later ++ */ ++ m_spare[*nr_m_spare] = m; ++ *nr_m_spare += 1; ++ } ++} ++#else /* CONFIG_X86_IO_APIC */ ++static ++inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} ++#endif /* CONFIG_X86_IO_APIC */ ++ ++static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, ++ int count) ++{ ++ if (!mpc_new_phys) { ++ pr_info("No spare slots, try to append...take your risk, " ++ "new mpc_length %x\n", count); ++ } else { ++ if (count <= mpc_new_length) ++ pr_info("No spare slots, try to append..., " ++ "new mpc_length %x\n", count); ++ else { ++ pr_err("mpc_new_length %lx is too small\n", ++ mpc_new_length); ++ return -1; ++ } ++ } ++ ++ return 0; ++} + + static int __init replace_intsrc_all(struct mpc_table *mpc, + unsigned long mpc_new_phys, +@@ -859,77 +916,33 @@ static int __init replace_intsrc_all(st + { + #ifdef CONFIG_X86_IO_APIC + int i; +- int nr_m_spare = 0; + #endif +- + int count = sizeof(*mpc); ++ int nr_m_spare = 0; + unsigned char *mpt = ((unsigned char *)mpc) + count; + + printk(KERN_INFO "mpc_length %x\n", mpc->length); + while (count < mpc->length) { + switch (*mpt) { + case MP_PROCESSOR: +- { +- struct mpc_cpu *m = (struct mpc_cpu *)mpt; +- mpt += sizeof(*m); +- count += sizeof(*m); +- break; +- } ++ skip_entry(&mpt, &count, sizeof(struct mpc_cpu)); ++ break; + case MP_BUS: +- { +- struct mpc_bus *m = (struct mpc_bus *)mpt; +- mpt += sizeof(*m); +- count += sizeof(*m); +- break; +- } ++ skip_entry(&mpt, &count, sizeof(struct mpc_bus)); ++ break; + case MP_IOAPIC: +- { +- mpt += sizeof(struct mpc_ioapic); +- count += sizeof(struct mpc_ioapic); +- break; +- } ++ skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); ++ break; + case MP_INTSRC: +- { +-#ifdef CONFIG_X86_IO_APIC +- struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; +- +- apic_printk(APIC_VERBOSE, "OLD "); +- print_MP_intsrc_info(m); +- i = get_MP_intsrc_index(m); +- if (i > 0) { +- assign_to_mpc_intsrc(&mp_irqs[i], m); +- apic_printk(APIC_VERBOSE, "NEW "); +- print_mp_irq_info(&mp_irqs[i]); +- } else if (!i) { +- /* legacy, do nothing */ +- } else if (nr_m_spare < SPARE_SLOT_NUM) { +- /* +- * not found (-1), or duplicated (-2) +- * are invalid entries, +- * we need to use the slot later +- */ +- m_spare[nr_m_spare] = m; +- nr_m_spare++; +- } +-#endif +- mpt += sizeof(struct mpc_intsrc); +- count += sizeof(struct mpc_intsrc); +- break; +- } ++ check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare); ++ skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); ++ break; + case MP_LINTSRC: +- { +- struct mpc_lintsrc *m = +- (struct mpc_lintsrc *)mpt; +- mpt += sizeof(*m); +- count += sizeof(*m); +- break; +- } ++ skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc)); ++ break; + default: + /* wrong mptable */ +- printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); +- printk(KERN_ERR "type %x\n", *mpt); +- print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, +- 1, mpc, mpc->length, 1); ++ smp_dump_mptable(mpc, mpt); + goto out; + } + } +@@ -939,10 +952,10 @@ static int __init replace_intsrc_all(st + if (irq_used[i]) + continue; + +- if (mp_irqs[i].mp_irqtype != mp_INT) ++ if (mp_irqs[i].irqtype != mp_INT) + continue; + +- if (mp_irqs[i].mp_irqflag != 0x0f) ++ if (mp_irqs[i].irqflag != 0x0f) + continue; + + if (nr_m_spare > 0) { +@@ -953,16 +966,8 @@ static int __init replace_intsrc_all(st + } else { + struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; + count += sizeof(struct mpc_intsrc); +- if (!mpc_new_phys) { +- printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); +- } else { +- if (count <= mpc_new_length) +- printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); +- else { +- printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); +- goto out; +- } +- } ++ if (!check_slot(mpc_new_phys, mpc_new_length, count)) ++ goto out; + assign_to_mpc_intsrc(&mp_irqs[i], m); + mpc->length = count; + mpt += sizeof(struct mpc_intsrc); +@@ -1018,7 +1023,7 @@ static int __init update_mp_table(void) + { + char str[16]; + char oem[10]; +- struct intel_mp_floating *mpf; ++ struct mpf_intel *mpf; + struct mpc_table *mpc, *mpc_new; + + if (!enable_update_mptable) +@@ -1031,19 +1036,19 @@ static int __init update_mp_table(void) + /* + * Now see if we need to go further. + */ +- if (mpf->mpf_feature1 != 0) ++ if (mpf->feature1 != 0) + return 0; + +- if (!mpf->mpf_physptr) ++ if (!mpf->physptr) + return 0; + +- mpc = _bus_to_virt(mpf->mpf_physptr); ++ mpc = _bus_to_virt(mpf->physptr); + + if (!smp_check_mpc(mpc, oem, str)) + return 0; + +- printk(KERN_INFO "mpf: %lx\n", (long)arbitrary_virt_to_machine(mpf)); +- printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); ++ printk(KERN_INFO "mpf: %llx\n", (u64)arbitrary_virt_to_machine(mpf)); ++ printk(KERN_INFO "physptr: %x\n", mpf->physptr); + + if (mpc_new_phys && mpc->length > mpc_new_length) { + mpc_new_phys = 0; +@@ -1067,23 +1072,23 @@ static int __init update_mp_table(void) + maddr_t mpc_new_bus; + + mpc_new_bus = phys_to_machine(mpc_new_phys); +- mpf->mpf_physptr = mpc_new_bus; ++ mpf->physptr = mpc_new_bus; + mpc_new = phys_to_virt(mpc_new_phys); + memcpy(mpc_new, mpc, mpc->length); + mpc = mpc_new; + /* check if we can modify that */ +- if (mpc_new_bus - mpf->mpf_physptr) { +- struct intel_mp_floating *mpf_new; ++ if (mpc_new_bus - mpf->physptr) { ++ struct mpf_intel *mpf_new; + /* steal 16 bytes from [0, 1k) */ + printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); + mpf_new = isa_bus_to_virt(0x400 - 16); + memcpy(mpf_new, mpf, 16); + mpf = mpf_new; +- mpf->mpf_physptr = mpc_new_bus; ++ mpf->physptr = mpc_new_bus; + } +- mpf->mpf_checksum = 0; +- mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); +- printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); ++ mpf->checksum = 0; ++ mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); ++ printk(KERN_INFO "physptr new: %x\n", mpf->physptr); + } + + /* +--- head-2010-01-18.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -1,4 +1,5 @@ + #include ++#include + #include + #include + #include +@@ -12,7 +13,7 @@ + + static int forbid_dac __read_mostly; + +-struct dma_mapping_ops *dma_ops; ++struct dma_map_ops *dma_ops; + EXPORT_SYMBOL(dma_ops); + + static int iommu_sac_force __read_mostly; +@@ -39,11 +40,14 @@ EXPORT_SYMBOL(bad_dma_address); + to older i386. */ + struct device x86_dma_fallback_dev = { + .init_name = "fallback device", +- .coherent_dma_mask = DMA_32BIT_MASK, ++ .coherent_dma_mask = DMA_BIT_MASK(32), + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, + }; + EXPORT_SYMBOL(x86_dma_fallback_dev); + ++/* Number of entries preallocated for DMA-API debugging */ ++#define PREALLOC_DMA_DEBUG_ENTRIES 32768 ++ + int dma_set_mask(struct device *dev, u64 mask) + { + if (!dev->dma_mask || !dma_supported(dev, mask)) +@@ -103,20 +107,20 @@ static void __init dma32_free_bootmem(vo + } + #endif + +-static struct dma_mapping_ops swiotlb_dma_ops = { ++static struct dma_map_ops swiotlb_dma_ops = { + .alloc_coherent = dma_generic_alloc_coherent, + .free_coherent = dma_generic_free_coherent, + .mapping_error = swiotlb_dma_mapping_error, +- .map_single = swiotlb_map_single_phys, +- .unmap_single = swiotlb_unmap_single, ++ .map_page = swiotlb_map_page, ++ .unmap_page = swiotlb_unmap_page, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, + .sync_single_range_for_device = swiotlb_sync_single_range_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, +- .map_sg = swiotlb_map_sg, +- .unmap_sg = swiotlb_unmap_sg, ++ .map_sg = swiotlb_map_sg_attrs, ++ .unmap_sg = swiotlb_unmap_sg_attrs, + .dma_supported = swiotlb_dma_supported + }; + +@@ -175,7 +179,7 @@ again: + if (!is_buffer_dma_capable(dma_mask, addr, size)) { + __free_pages(page, order); + +- if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { ++ if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { + flag = (flag & ~GFP_DMA32) | GFP_DMA; + goto again; + } +@@ -305,7 +309,7 @@ int range_straddles_page_boundary(paddr_ + + int dma_supported(struct device *dev, u64 mask) + { +- struct dma_mapping_ops *ops = get_dma_ops(dev); ++ struct dma_map_ops *ops = get_dma_ops(dev); + + #ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { +@@ -320,7 +324,7 @@ int dma_supported(struct device *dev, u6 + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ +- if (mask < DMA_24BIT_MASK) ++ if (mask < DMA_BIT_MASK(24)) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This +@@ -335,7 +339,7 @@ int dma_supported(struct device *dev, u6 + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ +- if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { ++ if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { + dev_info(dev, "Force SAC with mask %Lx\n", mask); + return 0; + } +@@ -346,6 +350,12 @@ EXPORT_SYMBOL(dma_supported); + + static int __init pci_iommu_init(void) + { ++ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); ++ ++#ifdef CONFIG_PCI ++ dma_debug_add_bus(&pci_bus_type); ++#endif ++ + calgary_iommu_init(); + + intel_iommu_init(); +@@ -371,8 +381,7 @@ fs_initcall(pci_iommu_init); + static __devinit void via_no_dac(struct pci_dev *dev) + { + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { +- printk(KERN_INFO +- "PCI: VIA PCI bridge detected. Disabling DAC.\n"); ++ dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); + forbid_dac = 1; + } + } +--- head-2010-01-18.orig/arch/x86/kernel/pci-nommu-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-nommu-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -24,7 +24,7 @@ do { \ + + static int + gnttab_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents, +- int direction) ++ enum dma_data_direction dir, struct dma_attrs *attrs) + { + unsigned int i; + struct scatterlist *sg; +@@ -48,7 +48,7 @@ gnttab_map_sg(struct device *hwdev, stru + + static void + gnttab_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents, +- int direction) ++ enum dma_data_direction dir, struct dma_attrs *attrs) + { + unsigned int i; + struct scatterlist *sg; +@@ -58,36 +58,36 @@ gnttab_unmap_sg(struct device *hwdev, st + } + + static dma_addr_t +-gnttab_map_single(struct device *dev, phys_addr_t paddr, size_t size, +- int direction) ++gnttab_map_page(struct device *dev, struct page *page, unsigned long offset, ++ size_t size, enum dma_data_direction dir, ++ struct dma_attrs *attrs) + { + dma_addr_t dma; + + WARN_ON(size == 0); + +- dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) + +- offset_in_page(paddr); +- IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size)); ++ dma = gnttab_dma_map_page(page) + offset; ++ IOMMU_BUG_ON(range_straddles_page_boundary(offset, size)); + IOMMU_BUG_ON(address_needs_mapping(dev, dma, size)); + + return dma; + } + + static void +-gnttab_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, +- int direction) ++gnttab_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, ++ enum dma_data_direction dir, struct dma_attrs *attrs) + { + gnttab_dma_unmap_page(dma_addr); + } + +-struct dma_mapping_ops nommu_dma_ops = { +- .alloc_coherent = dma_generic_alloc_coherent, +- .free_coherent = dma_generic_free_coherent, +- .map_single = gnttab_map_single, +- .unmap_single = gnttab_unmap_single, +- .map_sg = gnttab_map_sg, +- .unmap_sg = gnttab_unmap_sg, +- .dma_supported = swiotlb_dma_supported, ++struct dma_map_ops nommu_dma_ops = { ++ .alloc_coherent = dma_generic_alloc_coherent, ++ .free_coherent = dma_generic_free_coherent, ++ .map_page = gnttab_map_page, ++ .unmap_page = gnttab_unmap_page, ++ .map_sg = gnttab_map_sg, ++ .unmap_sg = gnttab_unmap_sg, ++ .dma_supported = swiotlb_dma_supported, + }; + + void __init no_iommu_init(void) +--- head-2010-01-18.orig/arch/x86/kernel/process-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -1,16 +1,19 @@ + #include + #include + #include +-#include + #include ++#include + #include + #include + #include + #include + #include +-#include ++#include + #include + #include ++#include ++#include ++#include + #include + + unsigned long idle_halt; +@@ -20,6 +23,9 @@ EXPORT_SYMBOL(idle_nomwait); + + struct kmem_cache *task_xstate_cachep; + ++DEFINE_TRACE(power_start); ++DEFINE_TRACE(power_end); ++ + int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) + { + *dst = *src; +@@ -57,6 +63,179 @@ void arch_task_cache_init(void) + } + + /* ++ * Free current thread data structures etc.. ++ */ ++void exit_thread(void) ++{ ++ struct task_struct *me = current; ++ struct thread_struct *t = &me->thread; ++ unsigned long *bp = t->io_bitmap_ptr; ++ ++ if (bp) { ++ struct physdev_set_iobitmap set_iobitmap; ++ ++ t->io_bitmap_ptr = NULL; ++ clear_thread_flag(TIF_IO_BITMAP); ++ /* ++ * Careful, clear this in the TSS too: ++ */ ++ memset(&set_iobitmap, 0, sizeof(set_iobitmap)); ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, ++ &set_iobitmap)); ++ t->io_bitmap_max = 0; ++ kfree(bp); ++ } ++ ++ ds_exit_thread(current); ++} ++ ++void flush_thread(void) ++{ ++ struct task_struct *tsk = current; ++ ++#ifdef CONFIG_X86_64 ++ if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { ++ clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); ++ if (test_tsk_thread_flag(tsk, TIF_IA32)) { ++ clear_tsk_thread_flag(tsk, TIF_IA32); ++ } else { ++ set_tsk_thread_flag(tsk, TIF_IA32); ++ current_thread_info()->status |= TS_COMPAT; ++ } ++ } ++#endif ++ ++ clear_tsk_thread_flag(tsk, TIF_DEBUG); ++ ++ tsk->thread.debugreg0 = 0; ++ tsk->thread.debugreg1 = 0; ++ tsk->thread.debugreg2 = 0; ++ tsk->thread.debugreg3 = 0; ++ tsk->thread.debugreg6 = 0; ++ tsk->thread.debugreg7 = 0; ++ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); ++ /* ++ * Forget coprocessor state.. ++ */ ++ tsk->fpu_counter = 0; ++ clear_fpu(tsk); ++ clear_used_math(); ++} ++ ++static void hard_disable_TSC(void) ++{ ++ write_cr4(read_cr4() | X86_CR4_TSD); ++} ++ ++void disable_TSC(void) ++{ ++ preempt_disable(); ++ if (!test_and_set_thread_flag(TIF_NOTSC)) ++ /* ++ * Must flip the CPU state synchronously with ++ * TIF_NOTSC in the current running context. ++ */ ++ hard_disable_TSC(); ++ preempt_enable(); ++} ++ ++static void hard_enable_TSC(void) ++{ ++ write_cr4(read_cr4() & ~X86_CR4_TSD); ++} ++ ++static void enable_TSC(void) ++{ ++ preempt_disable(); ++ if (test_and_clear_thread_flag(TIF_NOTSC)) ++ /* ++ * Must flip the CPU state synchronously with ++ * TIF_NOTSC in the current running context. ++ */ ++ hard_enable_TSC(); ++ preempt_enable(); ++} ++ ++int get_tsc_mode(unsigned long adr) ++{ ++ unsigned int val; ++ ++ if (test_thread_flag(TIF_NOTSC)) ++ val = PR_TSC_SIGSEGV; ++ else ++ val = PR_TSC_ENABLE; ++ ++ return put_user(val, (unsigned int __user *)adr); ++} ++ ++int set_tsc_mode(unsigned int val) ++{ ++ if (val == PR_TSC_SIGSEGV) ++ disable_TSC(); ++ else if (val == PR_TSC_ENABLE) ++ enable_TSC(); ++ else ++ return -EINVAL; ++ ++ return 0; ++} ++ ++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) ++{ ++ struct thread_struct *prev, *next; ++ ++ prev = &prev_p->thread; ++ next = &next_p->thread; ++ ++ if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || ++ test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) ++ ds_switch_to(prev_p, next_p); ++ else if (next->debugctlmsr != prev->debugctlmsr) ++ update_debugctlmsr(next->debugctlmsr); ++ ++ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { ++ set_debugreg(next->debugreg0, 0); ++ set_debugreg(next->debugreg1, 1); ++ set_debugreg(next->debugreg2, 2); ++ set_debugreg(next->debugreg3, 3); ++ /* no 4 and 5 */ ++ set_debugreg(next->debugreg6, 6); ++ set_debugreg(next->debugreg7, 7); ++ } ++ ++ if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ ++ test_tsk_thread_flag(next_p, TIF_NOTSC)) { ++ /* prev and next are different */ ++ if (test_tsk_thread_flag(next_p, TIF_NOTSC)) ++ hard_disable_TSC(); ++ else ++ hard_enable_TSC(); ++ } ++} ++ ++int sys_fork(struct pt_regs *regs) ++{ ++ return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); ++} ++ ++/* ++ * This is trivial, and on the face of it looks like it ++ * could equally well be done in user mode. ++ * ++ * Not so, for quite unobvious reasons - register pressure. ++ * In user mode vfork() cannot have a stack frame, and if ++ * done by calling the "clone()" system call directly, you ++ * do not have enough call-clobbered registers to hold all ++ * the information you need. ++ */ ++int sys_vfork(struct pt_regs *regs) ++{ ++ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, ++ NULL, NULL); ++} ++ ++ ++/* + * Idle related variables and functions + */ + unsigned long boot_option_idle_override = 0; +@@ -130,7 +309,7 @@ void stop_this_cpu(void *dummy) + /* + * Remove this CPU: + */ +- cpu_clear(smp_processor_id(), cpu_online_map); ++ set_cpu_online(smp_processor_id(), false); + disable_all_local_evtchn(); + + for (;;) { +@@ -283,12 +462,13 @@ static int __cpuinit check_c1e_idle(cons + return 1; + } + +-static cpumask_t c1e_mask = CPU_MASK_NONE; ++static cpumask_var_t c1e_mask; + static int c1e_detected; + + void c1e_remove_cpu(int cpu) + { +- cpu_clear(cpu, c1e_mask); ++ if (c1e_mask != NULL) ++ cpumask_clear_cpu(cpu, c1e_mask); + } + + /* +@@ -317,8 +497,8 @@ static void c1e_idle(void) + if (c1e_detected) { + int cpu = smp_processor_id(); + +- if (!cpu_isset(cpu, c1e_mask)) { +- cpu_set(cpu, c1e_mask); ++ if (!cpumask_test_cpu(cpu, c1e_mask)) { ++ cpumask_set_cpu(cpu, c1e_mask); + /* + * Force broadcast so ACPI can not interfere. Needs + * to run with interrupts enabled as it uses +@@ -350,7 +530,7 @@ static void c1e_idle(void) + void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) + { + #ifndef CONFIG_XEN +-#ifdef CONFIG_X86_SMP ++#ifdef CONFIG_SMP + if (pm_idle == poll_idle && smp_num_siblings > 1) { + printk(KERN_WARNING "WARNING: polling idle and HT enabled," + " performance may degrade.\n"); +@@ -373,6 +553,17 @@ void __cpuinit select_idle_routine(const + #endif + } + ++void __init init_c1e_mask(void) ++{ ++#ifndef CONFIG_XEN ++ /* If we're using c1e_idle, we need to allocate c1e_mask. */ ++ if (pm_idle == c1e_idle) { ++ alloc_cpumask_var(&c1e_mask, GFP_KERNEL); ++ cpumask_clear(c1e_mask); ++ } ++#endif ++} ++ + static int __init idle_setup(char *str) + { + if (!str) +--- head-2010-01-18.orig/arch/x86/kernel/process_32-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -11,6 +11,7 @@ + + #include + ++#include + #include + #include + #include +@@ -71,9 +72,6 @@ asmlinkage void cstar_ret_from_fork(void + DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; + EXPORT_PER_CPU_SYMBOL(current_task); + +-DEFINE_PER_CPU(int, cpu_number); +-EXPORT_PER_CPU_SYMBOL(cpu_number); +- + /* + * Return saved PC of a blocked thread. + */ +@@ -99,6 +97,15 @@ void cpu_idle(void) + { + int cpu = smp_processor_id(); + ++ /* ++ * If we're the non-boot CPU, nothing set the stack canary up ++ * for us. CPU0 already has it initialized but no harm in ++ * doing it again. This is a good place for updating it, as ++ * we wont ever return from this function (so the invalid ++ * canaries already on the stack wont ever trigger). ++ */ ++ boot_init_stack_canary(); ++ + current_thread_info()->status |= TS_POLLING; + + /* endless idle loop with no priority at all */ +@@ -113,7 +120,6 @@ void cpu_idle(void) + play_dead(); + + local_irq_disable(); +- __get_cpu_var(irq_stat).idle_timestamp = jiffies; + /* Don't trace irqs off for idle */ + stop_critical_timings(); + xen_idle(); +@@ -137,7 +143,7 @@ void __show_regs(struct pt_regs *regs, i + if (user_mode_vm(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; +- savesegment(gs, gs); ++ gs = get_user_gs(regs); + } else { + sp = (unsigned long) (®s->sp); + savesegment(ss, ss); +@@ -218,6 +224,7 @@ int kernel_thread(int (*fn)(void *), voi + regs.ds = __USER_DS; + regs.es = __USER_DS; + regs.fs = __KERNEL_PERCPU; ++ regs.gs = __KERNEL_STACK_CANARY; + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; + regs.cs = __KERNEL_CS | get_kernel_rpl(); +@@ -228,47 +235,6 @@ int kernel_thread(int (*fn)(void *), voi + } + EXPORT_SYMBOL(kernel_thread); + +-/* +- * Free current thread data structures etc.. +- */ +-void exit_thread(void) +-{ +- /* The process may have allocated an io port bitmap... nuke it. */ +- if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { +- struct task_struct *tsk = current; +- struct thread_struct *t = &tsk->thread; +- struct physdev_set_iobitmap set_iobitmap; +- memset(&set_iobitmap, 0, sizeof(set_iobitmap)); +- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, +- &set_iobitmap)); +- kfree(t->io_bitmap_ptr); +- t->io_bitmap_ptr = NULL; +- clear_thread_flag(TIF_IO_BITMAP); +- } +- +- ds_exit_thread(current); +-} +- +-void flush_thread(void) +-{ +- struct task_struct *tsk = current; +- +- tsk->thread.debugreg0 = 0; +- tsk->thread.debugreg1 = 0; +- tsk->thread.debugreg2 = 0; +- tsk->thread.debugreg3 = 0; +- tsk->thread.debugreg6 = 0; +- tsk->thread.debugreg7 = 0; +- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); +- clear_tsk_thread_flag(tsk, TIF_DEBUG); +- /* +- * Forget coprocessor state.. +- */ +- tsk->fpu_counter = 0; +- clear_fpu(tsk); +- clear_used_math(); +-} +- + void release_thread(struct task_struct *dead_task) + { + BUG_ON(dead_task->mm); +@@ -284,7 +250,7 @@ void prepare_to_copy(struct task_struct + unlazy_fpu(tsk); + } + +-int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, ++int copy_thread(unsigned long clone_flags, unsigned long sp, + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) + { +@@ -302,7 +268,7 @@ int copy_thread(int nr, unsigned long cl + + p->thread.ip = (unsigned long) ret_from_fork; + +- savesegment(gs, p->thread.gs); ++ task_user_gs(p) = get_user_gs(regs); + + tsk = current; + if (test_tsk_thread_flag(tsk, TIF_CSTAR)) +@@ -344,7 +310,7 @@ int copy_thread(int nr, unsigned long cl + void + start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) + { +- __asm__("movl %0, %%gs" : : "r"(0)); ++ set_user_gs(regs, 0); + regs->fs = 0; + set_fs(USER_DS); + regs->ds = __USER_DS; +@@ -360,98 +326,6 @@ start_thread(struct pt_regs *regs, unsig + } + EXPORT_SYMBOL_GPL(start_thread); + +-static void hard_disable_TSC(void) +-{ +- write_cr4(read_cr4() | X86_CR4_TSD); +-} +- +-void disable_TSC(void) +-{ +- preempt_disable(); +- if (!test_and_set_thread_flag(TIF_NOTSC)) +- /* +- * Must flip the CPU state synchronously with +- * TIF_NOTSC in the current running context. +- */ +- hard_disable_TSC(); +- preempt_enable(); +-} +- +-static void hard_enable_TSC(void) +-{ +- write_cr4(read_cr4() & ~X86_CR4_TSD); +-} +- +-static void enable_TSC(void) +-{ +- preempt_disable(); +- if (test_and_clear_thread_flag(TIF_NOTSC)) +- /* +- * Must flip the CPU state synchronously with +- * TIF_NOTSC in the current running context. +- */ +- hard_enable_TSC(); +- preempt_enable(); +-} +- +-int get_tsc_mode(unsigned long adr) +-{ +- unsigned int val; +- +- if (test_thread_flag(TIF_NOTSC)) +- val = PR_TSC_SIGSEGV; +- else +- val = PR_TSC_ENABLE; +- +- return put_user(val, (unsigned int __user *)adr); +-} +- +-int set_tsc_mode(unsigned int val) +-{ +- if (val == PR_TSC_SIGSEGV) +- disable_TSC(); +- else if (val == PR_TSC_ENABLE) +- enable_TSC(); +- else +- return -EINVAL; +- +- return 0; +-} +- +-static noinline void +-__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) +-{ +- struct thread_struct *prev, *next; +- +- prev = &prev_p->thread; +- next = &next_p->thread; +- +- if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || +- test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) +- ds_switch_to(prev_p, next_p); +- else if (next->debugctlmsr != prev->debugctlmsr) +- update_debugctlmsr(next->debugctlmsr); +- +- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { +- set_debugreg(next->debugreg0, 0); +- set_debugreg(next->debugreg1, 1); +- set_debugreg(next->debugreg2, 2); +- set_debugreg(next->debugreg3, 3); +- /* no 4 and 5 */ +- set_debugreg(next->debugreg6, 6); +- set_debugreg(next->debugreg7, 7); +- } +- +- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ +- test_tsk_thread_flag(next_p, TIF_NOTSC)) { +- /* prev and next are different */ +- if (test_tsk_thread_flag(next_p, TIF_NOTSC)) +- hard_disable_TSC(); +- else +- hard_enable_TSC(); +- } +-} +- + /* + * switch_to(x,yn) should switch tasks from x to y. + * +@@ -532,7 +406,7 @@ __switch_to(struct task_struct *prev_p, + if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \ + next->tls_array[i].b != prev->tls_array[i].b)) { \ + mcl->op = __HYPERVISOR_update_descriptor; \ +- *(u64 *)&mcl->args[0] = virt_to_machine( \ ++ *(u64 *)&mcl->args[0] = arbitrary_virt_to_machine( \ + &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\ + *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i]; \ + mcl++; \ +@@ -612,64 +486,44 @@ __switch_to(struct task_struct *prev_p, + * Restore %gs if needed (which is common) + */ + if (prev->gs | next->gs) +- loadsegment(gs, next->gs); ++ lazy_load_gs(next->gs); + +- x86_write_percpu(current_task, next_p); ++ percpu_write(current_task, next_p); + + return prev_p; + } + +-asmlinkage int sys_fork(struct pt_regs regs) +-{ +- return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); +-} +- +-asmlinkage int sys_clone(struct pt_regs regs) ++int sys_clone(struct pt_regs *regs) + { + unsigned long clone_flags; + unsigned long newsp; + int __user *parent_tidptr, *child_tidptr; + +- clone_flags = regs.bx; +- newsp = regs.cx; +- parent_tidptr = (int __user *)regs.dx; +- child_tidptr = (int __user *)regs.di; ++ clone_flags = regs->bx; ++ newsp = regs->cx; ++ parent_tidptr = (int __user *)regs->dx; ++ child_tidptr = (int __user *)regs->di; + if (!newsp) +- newsp = regs.sp; +- return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); +-} +- +-/* +- * This is trivial, and on the face of it looks like it +- * could equally well be done in user mode. +- * +- * Not so, for quite unobvious reasons - register pressure. +- * In user mode vfork() cannot have a stack frame, and if +- * done by calling the "clone()" system call directly, you +- * do not have enough call-clobbered registers to hold all +- * the information you need. +- */ +-asmlinkage int sys_vfork(struct pt_regs regs) +-{ +- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); ++ newsp = regs->sp; ++ return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); + } + + /* + * sys_execve() executes a new program. + */ +-asmlinkage int sys_execve(struct pt_regs regs) ++int sys_execve(struct pt_regs *regs) + { + int error; + char *filename; + +- filename = getname((char __user *) regs.bx); ++ filename = getname((char __user *) regs->bx); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, +- (char __user * __user *) regs.cx, +- (char __user * __user *) regs.dx, +- ®s); ++ (char __user * __user *) regs->cx, ++ (char __user * __user *) regs->dx, ++ regs); + if (error == 0) { + /* Make sure we don't return using sysenter.. */ + set_thread_flag(TIF_IRET); +--- head-2010-01-18.orig/arch/x86/kernel/process_64-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_64-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -19,6 +19,7 @@ + + #include + ++#include + #include + #include + #include +@@ -50,7 +51,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -67,6 +67,12 @@ + + asmlinkage extern void ret_from_fork(void); + ++DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; ++EXPORT_PER_CPU_SYMBOL(current_task); ++ ++DEFINE_PER_CPU(unsigned long, old_rsp); ++static DEFINE_PER_CPU(unsigned char, is_idle); ++ + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; + + static ATOMIC_NOTIFIER_HEAD(idle_notifier); +@@ -85,13 +91,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist + + void enter_idle(void) + { +- write_pda(isidle, 1); ++ percpu_write(is_idle, 1); + atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); + } + + static void __exit_idle(void) + { +- if (test_and_clear_bit_pda(0, isidle) == 0) ++ if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) + return; + atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); + } +@@ -121,6 +127,16 @@ static inline void play_dead(void) + void cpu_idle(void) + { + current_thread_info()->status |= TS_POLLING; ++ ++ /* ++ * If we're the non-boot CPU, nothing set the stack canary up ++ * for us. CPU0 already has it initialized but no harm in ++ * doing it again. This is a good place for updating it, as ++ * we wont ever return from this function (so the invalid ++ * canaries already on the stack wont ever trigger). ++ */ ++ boot_init_stack_canary(); ++ + /* endless idle loop with no priority at all */ + while (1) { + tick_nohz_stop_sched_tick(1); +@@ -230,78 +246,11 @@ void show_regs(struct pt_regs *regs) + show_trace(NULL, regs, (void *)(regs + 1), regs->bp); + } + +-/* +- * Free current thread data structures etc.. +- */ +-void exit_thread(void) +-{ +- struct task_struct *me = current; +- struct thread_struct *t = &me->thread; +- +- if (me->thread.io_bitmap_ptr) { +-#ifndef CONFIG_X86_NO_TSS +- struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); +-#endif +-#ifdef CONFIG_XEN +- struct physdev_set_iobitmap iobmp_op; +- memset(&iobmp_op, 0, sizeof(iobmp_op)); +-#endif +- +- kfree(t->io_bitmap_ptr); +- t->io_bitmap_ptr = NULL; +- clear_thread_flag(TIF_IO_BITMAP); +- /* +- * Careful, clear this in the TSS too: +- */ +-#ifndef CONFIG_X86_NO_TSS +- memset(tss->io_bitmap, 0xff, t->io_bitmap_max); +- put_cpu(); +-#endif +-#ifdef CONFIG_XEN +- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, +- &iobmp_op)); +-#endif +- t->io_bitmap_max = 0; +- } +- +- ds_exit_thread(current); +-} +- + void xen_load_gs_index(unsigned gs) + { + WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs)); + } + +-void flush_thread(void) +-{ +- struct task_struct *tsk = current; +- +- if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { +- clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); +- if (test_tsk_thread_flag(tsk, TIF_IA32)) { +- clear_tsk_thread_flag(tsk, TIF_IA32); +- } else { +- set_tsk_thread_flag(tsk, TIF_IA32); +- current_thread_info()->status |= TS_COMPAT; +- } +- } +- clear_tsk_thread_flag(tsk, TIF_DEBUG); +- +- tsk->thread.debugreg0 = 0; +- tsk->thread.debugreg1 = 0; +- tsk->thread.debugreg2 = 0; +- tsk->thread.debugreg3 = 0; +- tsk->thread.debugreg6 = 0; +- tsk->thread.debugreg7 = 0; +- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); +- /* +- * Forget coprocessor state.. +- */ +- tsk->fpu_counter = 0; +- clear_fpu(tsk); +- clear_used_math(); +-} +- + void release_thread(struct task_struct *dead_task) + { + if (dead_task->mm) { +@@ -343,7 +292,7 @@ void prepare_to_copy(struct task_struct + unlazy_fpu(tsk); + } + +-int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, ++int copy_thread(unsigned long clone_flags, unsigned long sp, + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) + { +@@ -424,7 +373,7 @@ start_thread(struct pt_regs *regs, unsig + load_gs_index(0); + regs->ip = new_ip; + regs->sp = new_sp; +- write_pda(oldrsp, new_sp); ++ percpu_write(old_rsp, new_sp); + regs->cs = __USER_CS; + regs->ss = __USER_DS; + regs->flags = 0x200; +@@ -436,103 +385,6 @@ start_thread(struct pt_regs *regs, unsig + } + EXPORT_SYMBOL_GPL(start_thread); + +-static void hard_disable_TSC(void) +-{ +- write_cr4(read_cr4() | X86_CR4_TSD); +-} +- +-void disable_TSC(void) +-{ +- preempt_disable(); +- if (!test_and_set_thread_flag(TIF_NOTSC)) +- /* +- * Must flip the CPU state synchronously with +- * TIF_NOTSC in the current running context. +- */ +- hard_disable_TSC(); +- preempt_enable(); +-} +- +-static void hard_enable_TSC(void) +-{ +- write_cr4(read_cr4() & ~X86_CR4_TSD); +-} +- +-static void enable_TSC(void) +-{ +- preempt_disable(); +- if (test_and_clear_thread_flag(TIF_NOTSC)) +- /* +- * Must flip the CPU state synchronously with +- * TIF_NOTSC in the current running context. +- */ +- hard_enable_TSC(); +- preempt_enable(); +-} +- +-int get_tsc_mode(unsigned long adr) +-{ +- unsigned int val; +- +- if (test_thread_flag(TIF_NOTSC)) +- val = PR_TSC_SIGSEGV; +- else +- val = PR_TSC_ENABLE; +- +- return put_user(val, (unsigned int __user *)adr); +-} +- +-int set_tsc_mode(unsigned int val) +-{ +- if (val == PR_TSC_SIGSEGV) +- disable_TSC(); +- else if (val == PR_TSC_ENABLE) +- enable_TSC(); +- else +- return -EINVAL; +- +- return 0; +-} +- +-/* +- * This special macro can be used to load a debugging register +- */ +-#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) +- +-static inline void __switch_to_xtra(struct task_struct *prev_p, +- struct task_struct *next_p) +-{ +- struct thread_struct *prev, *next; +- +- prev = &prev_p->thread, +- next = &next_p->thread; +- +- if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || +- test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) +- ds_switch_to(prev_p, next_p); +- else if (next->debugctlmsr != prev->debugctlmsr) +- update_debugctlmsr(next->debugctlmsr); +- +- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { +- loaddebug(next, 0); +- loaddebug(next, 1); +- loaddebug(next, 2); +- loaddebug(next, 3); +- /* no 4 and 5 */ +- loaddebug(next, 6); +- loaddebug(next, 7); +- } +- +- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ +- test_tsk_thread_flag(next_p, TIF_NOTSC)) { +- /* prev and next are different */ +- if (test_tsk_thread_flag(next_p, TIF_NOTSC)) +- hard_disable_TSC(); +- else +- hard_enable_TSC(); +- } +-} +- + /* + * switch_to(x,y) should switch tasks from x to y. + * +@@ -598,7 +450,7 @@ __switch_to(struct task_struct *prev_p, + if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \ + next->tls_array[i].b != prev->tls_array[i].b)) { \ + mcl->op = __HYPERVISOR_update_descriptor; \ +- mcl->args[0] = virt_to_machine( \ ++ mcl->args[0] = arbitrary_virt_to_machine( \ + &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\ + mcl->args[1] = *(u64 *)&next->tls_array[i]; \ + mcl++; \ +@@ -685,21 +537,13 @@ __switch_to(struct task_struct *prev_p, + /* + * Switch the PDA context. + */ +- prev->usersp = read_pda(oldrsp); +- write_pda(oldrsp, next->usersp); +- write_pda(pcurrent, next_p); +- write_pda(kernelstack, +- (unsigned long)task_stack_page(next_p) + +- THREAD_SIZE - PDA_STACKOFFSET); +-#ifdef CONFIG_CC_STACKPROTECTOR +- write_pda(stack_canary, next_p->stack_canary); ++ prev->usersp = percpu_read(old_rsp); ++ percpu_write(old_rsp, next->usersp); ++ percpu_write(current_task, next_p); + +- /* +- * Build time only check to make sure the stack_canary is at +- * offset 40 in the pda; this is a gcc ABI requirement +- */ +- BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); +-#endif ++ percpu_write(kernel_stack, ++ (unsigned long)task_stack_page(next_p) + ++ THREAD_SIZE - KERNEL_STACK_OFFSET); + + /* + * Now maybe reload the debug registers +@@ -753,11 +597,6 @@ void set_personality_64bit(void) + current->personality &= ~READ_IMPLIES_EXEC; + } + +-asmlinkage long sys_fork(struct pt_regs *regs) +-{ +- return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); +-} +- + asmlinkage long + sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) +@@ -767,22 +606,6 @@ sys_clone(unsigned long clone_flags, uns + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); + } + +-/* +- * This is trivial, and on the face of it looks like it +- * could equally well be done in user mode. +- * +- * Not so, for quite unobvious reasons - register pressure. +- * In user mode vfork() cannot have a stack frame, and if +- * done by calling the "clone()" system call directly, you +- * do not have enough call-clobbered registers to hold all +- * the information you need. +- */ +-asmlinkage long sys_vfork(struct pt_regs *regs) +-{ +- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, +- NULL, NULL); +-} +- + unsigned long get_wchan(struct task_struct *p) + { + unsigned long stack; +--- head-2010-01-18.orig/arch/x86/kernel/quirks-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/quirks-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -75,8 +75,7 @@ static void ich_force_hpet_resume(void) + if (!force_hpet_address) + return; + +- if (rcba_base == NULL) +- BUG(); ++ BUG_ON(rcba_base == NULL); + + /* read the Function Disable register, dword mode only */ + val = readl(rcba_base + 0x3404); +@@ -173,7 +172,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I + ich_force_enable_hpet); + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, + ich_force_enable_hpet); +- ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */ ++ ich_force_enable_hpet); + + static struct pci_dev *cached_dev; + +@@ -262,8 +262,6 @@ static void old_ich_force_enable_hpet_us + { + if (hpet_force_user) + old_ich_force_enable_hpet(dev); +- else +- hpet_print_force_info(); + } + + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, +--- head-2010-01-18.orig/arch/x86/kernel/setup-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -74,14 +74,15 @@ + #include + #include + #include +-#include + #include ++#include ++#include + #include + #include + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -89,7 +90,7 @@ + + #include + #include +-#include ++#include + #include + #include + #include +@@ -97,7 +98,6 @@ + #include + #include + +-#include + #include + #include + +@@ -118,9 +118,6 @@ + #include + #include + +-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; +-EXPORT_SYMBOL(HYPERVISOR_shared_info); +- + static int xen_panic_event(struct notifier_block *, unsigned long, void *); + static struct notifier_block xen_panic_block = { + xen_panic_event, NULL, 0 /* try to go last */ +@@ -145,7 +142,26 @@ EXPORT_SYMBOL(xen_start_info); + #define ARCH_SETUP + #endif + ++RESERVE_BRK(dmi_alloc, 65536); ++ ++unsigned int boot_cpu_id __read_mostly; ++ ++static __initdata unsigned long _brk_start = (unsigned long)__brk_base; ++unsigned long _brk_end = (unsigned long)__brk_base; ++ + #ifndef CONFIG_XEN ++#ifdef CONFIG_X86_64 ++int default_cpu_present_to_apicid(int mps_cpu) ++{ ++ return __default_cpu_present_to_apicid(mps_cpu); ++} ++ ++int default_check_phys_apicid_present(int boot_cpu_physical_apicid) ++{ ++ return __default_check_phys_apicid_present(boot_cpu_physical_apicid); ++} ++#endif ++ + #ifndef CONFIG_DEBUG_BOOT_PARAMS + struct boot_params __initdata boot_params; + #else +@@ -179,14 +195,6 @@ static struct resource bss_resource = { + + + #ifdef CONFIG_X86_32 +-#ifndef CONFIG_XEN +-/* This value is set up by the early boot code to point to the value +- immediately after the boot time page tables. It contains a *physical* +- address, and must not be in the .bss segment! */ +-unsigned long init_pg_tables_start __initdata = ~0UL; +-unsigned long init_pg_tables_end __initdata = ~0UL; +-#endif +- + static struct resource video_ram_resource = { + .name = "Video RAM area", + .start = 0xa0000, +@@ -226,7 +234,9 @@ struct ist_info ist_info; + #endif + + #else +-struct cpuinfo_x86 boot_cpu_data __read_mostly; ++struct cpuinfo_x86 boot_cpu_data __read_mostly = { ++ .x86_phys_bits = MAX_PHYSMEM_BITS, ++}; + EXPORT_SYMBOL(boot_cpu_data); + #endif + +@@ -241,12 +251,6 @@ unsigned long mmu_cr4_features = X86_CR4 + int bootloader_type; + + /* +- * Early DMI memory +- */ +-int dmi_alloc_index; +-char dmi_alloc_data[DMI_MAX_DATA]; +- +-/* + * Setup options + */ + struct screen_info screen_info; +@@ -293,6 +297,35 @@ static inline void copy_edd(void) + } + #endif + ++void * __init extend_brk(size_t size, size_t align) ++{ ++ size_t mask = align - 1; ++ void *ret; ++ ++ BUG_ON(_brk_start == 0); ++ BUG_ON(align & mask); ++ ++ _brk_end = (_brk_end + mask) & ~mask; ++ BUG_ON((char *)(_brk_end + size) > __brk_limit); ++ ++ ret = (void *)_brk_end; ++ _brk_end += size; ++ ++ memset(ret, 0, size); ++ ++ return ret; ++} ++ ++static void __init reserve_brk(void) ++{ ++ if (_brk_end > _brk_start) ++ reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); ++ ++ /* Mark brk area as locked down and no longer taking any ++ new allocations */ ++ _brk_start = 0; ++} ++ + #ifdef CONFIG_BLK_DEV_INITRD + + #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) +@@ -653,24 +686,7 @@ static int __init setup_elfcorehdr(char + early_param("elfcorehdr", setup_elfcorehdr); + #endif + +-#ifndef CONFIG_XEN +-static int __init default_update_genapic(void) +-{ +-#ifdef CONFIG_X86_SMP +-# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) +- genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; +-# endif +-#endif +- +- return 0; +-} +-#else +-#define default_update_genapic NULL +-#endif +- +-static struct x86_quirks default_x86_quirks __initdata = { +- .update_genapic = default_update_genapic, +-}; ++static struct x86_quirks default_x86_quirks __initdata; + + struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; + +@@ -738,19 +754,11 @@ void __init setup_arch(char **cmdline_p) + + /* Register a call for panic conditions. */ + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); +- +- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, +- VMASST_TYPE_writable_pagetables)); +-#ifdef CONFIG_X86_32 +- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, +- VMASST_TYPE_4gb_segments)); +-#endif + #endif /* CONFIG_XEN */ + + #ifdef CONFIG_X86_32 + memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); + visws_early_detect(); +- pre_setup_arch_hook(); + #else + printk(KERN_INFO "Command line: %s\n", boot_command_line); + #endif +@@ -834,16 +842,7 @@ void __init setup_arch(char **cmdline_p) + init_mm.start_code = (unsigned long) _text; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; +-#ifdef CONFIG_X86_32 +-#ifndef CONFIG_XEN +- init_mm.brk = init_pg_tables_end + PAGE_OFFSET; +-#else +- init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) + +- xen_start_info->nr_pt_frames) << PAGE_SHIFT; +-#endif +-#else +- init_mm.brk = (unsigned long) &_end; +-#endif ++ init_mm.brk = _brk_end; + + code_resource.start = virt_to_phys(_text); + code_resource.end = virt_to_phys(_etext)-1; +@@ -956,9 +955,8 @@ void __init setup_arch(char **cmdline_p) + num_physpages = max_pfn; + max_mapnr = max_pfn; + +-#ifndef CONFIG_XEN +- if (cpu_has_x2apic) +- check_x2apic(); ++#ifdef CONFIG_X86_LOCAL_APIC ++ check_x2apic(); + #endif + + /* How many end-of-memory variables you have, grandma! */ +@@ -975,6 +973,8 @@ void __init setup_arch(char **cmdline_p) + setup_bios_corruption_check(); + #endif + ++ reserve_brk(); ++ + /* max_pfn_mapped is updated here */ + max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<arch_pre_intr_init) { ++ if (x86_quirks->arch_pre_intr_init()) ++ return; ++ } ++ init_ISA_irqs(); ++} ++ ++/** ++ * x86_quirk_intr_init - post gate setup interrupt initialisation ++ * ++ * Description: ++ * Fill in any interrupts that may have been left out by the general ++ * init_IRQ() routine. interrupts having to do with the machine rather ++ * than the devices on the I/O bus (like APIC interrupts in intel MP ++ * systems) are started here. ++ **/ ++void __init x86_quirk_intr_init(void) ++{ ++ if (x86_quirks->arch_intr_init) { ++ if (x86_quirks->arch_intr_init()) ++ return; ++ } ++} ++ ++/** ++ * x86_quirk_trap_init - initialise system specific traps ++ * ++ * Description: ++ * Called as the final act of trap_init(). Used in VISWS to initialise ++ * the various board specific APIC traps. ++ **/ ++void __init x86_quirk_trap_init(void) ++{ ++ if (x86_quirks->arch_trap_init) { ++ if (x86_quirks->arch_trap_init()) ++ return; ++ } ++} ++ ++static struct irqaction irq0 = { ++ .handler = timer_interrupt, ++ .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, ++ .name = "timer" ++}; ++ ++/** ++ * x86_quirk_pre_time_init - do any specific initialisations before. ++ * ++ **/ ++void __init x86_quirk_pre_time_init(void) ++{ ++ if (x86_quirks->arch_pre_time_init) ++ x86_quirks->arch_pre_time_init(); ++} ++ ++/** ++ * x86_quirk_time_init - do any specific initialisations for the system timer. ++ * ++ * Description: ++ * Must plug the system timer interrupt source at HZ into the IRQ listed ++ * in irq_vectors.h:TIMER_IRQ ++ **/ ++void __init x86_quirk_time_init(void) ++{ ++ if (x86_quirks->arch_time_init) { ++ /* ++ * A nonzero return code does not mean failure, it means ++ * that the architecture quirk does not want any ++ * generic (timer) setup to be performed after this: ++ */ ++ if (x86_quirks->arch_time_init()) ++ return; ++ } ++ ++ irq0.mask = cpumask_of_cpu(0); ++ setup_irq(0, &irq0); ++} ++#endif /* CONFIG_X86_32 */ ++ + #ifdef CONFIG_XEN + static int + xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) +--- head-2010-01-18.orig/arch/x86/kernel/setup_percpu.c 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup_percpu.c 2009-11-06 10:52:02.000000000 +0100 +@@ -224,7 +224,7 @@ void __init setup_per_cpu_areas(void) + * are zeroed indicating that the static arrays are + * gone. + */ +-#ifdef CONFIG_X86_LOCAL_APIC ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); + per_cpu(x86_bios_cpu_apicid, cpu) = +@@ -248,7 +248,7 @@ void __init setup_per_cpu_areas(void) + } + + /* indicate the early static arrays will soon be gone */ +-#ifdef CONFIG_X86_LOCAL_APIC ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; + #endif +--- head-2010-01-18.orig/arch/x86/kernel/smp-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/smp-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -2,7 +2,7 @@ + * Intel SMP support routines. + * + * (c) 1995 Alan Cox, Building #3 +- * (c) 1998-99, 2000 Ingo Molnar ++ * (c) 1998-99, 2000, 2009 Ingo Molnar + * (c) 2002,2003 Andi Kleen, SuSE Labs. + * + * i386 and x86_64 integration by Glauber Costa +@@ -26,7 +26,7 @@ + #include + #include + #include +-#include ++#include + #include + /* + * Some notes on x86 processor bugs affecting SMP operation: +@@ -118,17 +118,17 @@ void xen_smp_send_reschedule(int cpu) + WARN_ON(1); + return; + } +- send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); ++ xen_send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + } + + void xen_send_call_func_single_ipi(int cpu) + { +- send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_VECTOR); ++ xen_send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_VECTOR); + } + + void xen_send_call_func_ipi(const struct cpumask *mask) + { +- send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR); ++ xen_send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR); + } + + /* +--- head-2010-01-18.orig/arch/x86/kernel/time-xen.c 2009-11-23 10:41:53.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/time-xen.c 2009-11-23 10:43:22.000000000 +0100 +@@ -577,7 +577,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); + + static cycle_t cs_last; + +-static cycle_t xen_clocksource_read(void) ++static cycle_t xen_clocksource_read(struct clocksource *cs) + { + #ifdef CONFIG_SMP + cycle_t last = get64(&cs_last); +--- head-2010-01-18.orig/arch/x86/kernel/traps-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/traps-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -54,15 +54,14 @@ + #include + #include + +-#include ++#include + + #ifdef CONFIG_X86_64 + #include + #include +-#include + #else + #include +-#include ++#include + #include + + #include "cpu/mcheck/mce.h" +@@ -123,49 +122,6 @@ die_if_kernel(const char *str, struct pt + if (!user_mode_vm(regs)) + die(str, regs, err); + } +- +-/* +- * Perform the lazy TSS's I/O bitmap copy. If the TSS has an +- * invalid offset set (the LAZY one) and the faulting thread has +- * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, +- * we set the offset field correctly and return 1. +- */ +-static int lazy_iobitmap_copy(void) +-{ +-#ifndef CONFIG_XEN +- struct thread_struct *thread; +- struct tss_struct *tss; +- int cpu; +- +- cpu = get_cpu(); +- tss = &per_cpu(init_tss, cpu); +- thread = ¤t->thread; +- +- if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && +- thread->io_bitmap_ptr) { +- memcpy(tss->io_bitmap, thread->io_bitmap_ptr, +- thread->io_bitmap_max); +- /* +- * If the previously set map was extending to higher ports +- * than the current one, pad extra space with 0xff (no access). +- */ +- if (thread->io_bitmap_max < tss->io_bitmap_max) { +- memset((char *) tss->io_bitmap + +- thread->io_bitmap_max, 0xff, +- tss->io_bitmap_max - thread->io_bitmap_max); +- } +- tss->io_bitmap_max = thread->io_bitmap_max; +- tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; +- tss->io_bitmap_owner = thread; +- put_cpu(); +- +- return 1; +- } +- put_cpu(); +-#endif +- +- return 0; +-} + #endif + + static void __kprobes +@@ -316,11 +272,6 @@ do_general_protection(struct pt_regs *re + conditional_sti(regs); + + #ifdef CONFIG_X86_32 +- if (lazy_iobitmap_copy()) { +- /* restart the faulting instruction */ +- return; +- } +- + if (regs->flags & X86_VM_MASK) + goto gp_in_vm86; + #endif +@@ -911,19 +862,20 @@ void math_emulate(struct math_emu_info * + } + #endif /* CONFIG_MATH_EMULATION */ + +-dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) ++dotraplinkage void __kprobes ++do_device_not_available(struct pt_regs *regs, long error_code) + { + #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) + if (read_cr0() & X86_CR0_EM) { + struct math_emu_info info = { }; + +- conditional_sti(®s); ++ conditional_sti(regs); + +- info.regs = ®s; ++ info.regs = regs; + math_emulate(&info); + } else { + math_state_restore(); /* interrupts still off */ +- conditional_sti(®s); ++ conditional_sti(regs); + } + #else + math_state_restore(); +@@ -939,7 +891,7 @@ dotraplinkage void do_iret_error(struct + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_BADSTK; +- info.si_addr = 0; ++ info.si_addr = NULL; + if (notify_die(DIE_TRAP, "iret exception", + regs, error_code, 32, SIGILL) == NOTIFY_STOP) + return; +--- head-2010-01-18.orig/arch/x86/kernel/vmlinux.lds.S 2009-12-04 10:51:06.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/vmlinux.lds.S 2009-11-06 10:52:02.000000000 +0100 +@@ -16,8 +16,10 @@ + + #ifdef CONFIG_X86_32 + #define LOAD_OFFSET __PAGE_OFFSET +-#else ++#elif !defined(CONFIG_XEN) || CONFIG_XEN_COMPAT > 0x030002 + #define LOAD_OFFSET __START_KERNEL_map ++#else ++#define LOAD_OFFSET 0 + #endif + + #include +--- head-2010-01-18.orig/arch/x86/mach-xen/Makefile 2007-06-12 13:12:48.000000000 +0200 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,5 +0,0 @@ +-# +-# Makefile for the linux kernel. +-# +- +-obj-y := setup.o +--- head-2010-01-18.orig/arch/x86/mach-xen/setup.c 2009-12-04 11:23:33.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,190 +0,0 @@ +-/* +- * Machine specific setup for generic +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#ifdef CONFIG_X86_32 +- +-#ifdef CONFIG_HOTPLUG_CPU +-#define DEFAULT_SEND_IPI (1) +-#else +-#define DEFAULT_SEND_IPI (0) +-#endif +- +-int no_broadcast=DEFAULT_SEND_IPI; +- +-static __init int no_ipi_broadcast(char *str) +-{ +- get_option(&str, &no_broadcast); +- printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : +- "IPI Broadcast"); +- return 1; +-} +- +-__setup("no_ipi_broadcast", no_ipi_broadcast); +- +-static int __init print_ipi_mode(void) +-{ +- printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : +- "Shortcut"); +- return 0; +-} +- +-late_initcall(print_ipi_mode); +- +-unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +-EXPORT_SYMBOL(machine_to_phys_mapping); +-unsigned int machine_to_phys_order; +-EXPORT_SYMBOL(machine_to_phys_order); +- +-void __init pre_setup_arch_hook(void) +-{ +- struct xen_machphys_mapping mapping; +- unsigned long machine_to_phys_nr_ents; +- struct xen_platform_parameters pp; +- +- init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base; +- +- xen_setup_features(); +- +- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) { +- hypervisor_virt_start = pp.virt_start; +- reserve_top_address(0UL - pp.virt_start); +- } +- +- if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { +- machine_to_phys_mapping = (unsigned long *)mapping.v_start; +- machine_to_phys_nr_ents = mapping.max_mfn + 1; +- } else +- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; +- machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +- +- if (!xen_feature(XENFEAT_auto_translated_physmap)) +- phys_to_machine_mapping = +- (unsigned long *)xen_start_info->mfn_list; +-} +- +-#endif /* CONFIG_X86_32 */ +- +-extern void hypervisor_callback(void); +-extern void failsafe_callback(void); +-extern void nmi(void); +- +-#ifdef CONFIG_X86_64 +-#include +-#define CALLBACK_ADDR(fn) ((unsigned long)(fn)) +-#else +-#define CALLBACK_ADDR(fn) { __KERNEL_CS, (unsigned long)(fn) } +-#endif +- +-void __init machine_specific_arch_setup(void) +-{ +- int ret; +- static struct callback_register __initdata event = { +- .type = CALLBACKTYPE_event, +- .address = CALLBACK_ADDR(hypervisor_callback) +- }; +- static struct callback_register __initdata failsafe = { +- .type = CALLBACKTYPE_failsafe, +- .address = CALLBACK_ADDR(failsafe_callback) +- }; +-#ifdef CONFIG_X86_64 +- static struct callback_register __initdata syscall = { +- .type = CALLBACKTYPE_syscall, +- .address = CALLBACK_ADDR(system_call) +- }; +-#endif +-#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32) +- static struct callback_register __initdata nmi_cb = { +- .type = CALLBACKTYPE_nmi, +- .address = CALLBACK_ADDR(nmi) +- }; +-#endif +- +- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); +- if (ret == 0) +- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); +-#ifdef CONFIG_X86_64 +- if (ret == 0) +- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall); +-#endif +-#if CONFIG_XEN_COMPAT <= 0x030002 +-#ifdef CONFIG_X86_32 +- if (ret == -ENOSYS) +- ret = HYPERVISOR_set_callbacks( +- event.address.cs, event.address.eip, +- failsafe.address.cs, failsafe.address.eip); +-#else +- ret = HYPERVISOR_set_callbacks( +- event.address, +- failsafe.address, +- syscall.address); +-#endif +-#endif +- BUG_ON(ret); +- +-#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32) +- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb); +-#if CONFIG_XEN_COMPAT <= 0x030002 +- if (ret == -ENOSYS) { +- static struct xennmi_callback __initdata cb = { +- .handler_address = (unsigned long)nmi +- }; +- +- HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); +- } +-#endif +-#endif +- +-#ifdef CONFIG_X86_32 +- /* Do an early initialization of the fixmap area */ +- { +- extern pte_t swapper_pg_fixmap[PTRS_PER_PTE]; +- unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); +- pud_t *pud = pud_offset(swapper_pg_dir + pgd_index(addr), addr); +- pmd_t *pmd = pmd_offset(pud, addr); +- unsigned int i; +- +- make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables); +- set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE)); +- +-#define __FIXADDR_TOP (-PAGE_SIZE) +-#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \ +- != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE))) +- FIX_BUG_ON(SHARED_INFO); +- FIX_BUG_ON(ISAMAP_BEGIN); +- FIX_BUG_ON(ISAMAP_END); +-#undef __FIXADDR_TOP +- BUG_ON(pte_index(hypervisor_virt_start)); +- +- /* Switch to the real shared_info page, and clear the +- * dummy page. */ +- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); +- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); +- memset(empty_zero_page, 0, sizeof(empty_zero_page)); +- +- /* Setup mapping of lower 1st MB */ +- for (i = 0; i < NR_FIX_ISAMAPS; i++) +- if (is_initial_xendomain()) +- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); +- else +- __set_fixmap(FIX_ISAMAP_BEGIN - i, +- virt_to_machine(empty_zero_page), +- PAGE_KERNEL_RO); +- } +-#endif +-} +--- head-2010-01-18.orig/arch/x86/mm/Makefile 2009-12-04 10:44:45.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/Makefile 2009-11-06 10:52:02.000000000 +0100 +@@ -26,5 +26,6 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64. + obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o + + obj-$(CONFIG_XEN) += hypervisor.o ++disabled-obj-$(CONFIG_XEN) := tlb.o + + obj-$(CONFIG_MEMTEST) += memtest.o +--- head-2010-01-18.orig/arch/x86/mm/fault-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/fault-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -1,73 +1,79 @@ + /* + * Copyright (C) 1995 Linus Torvalds +- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. ++ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. ++ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar + */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include + #include +-#include +-#include +-#include /* For unblank_screen() */ ++#include ++#include + #include + #include +-#include /* for max_low_pfn */ +-#include +-#include + #include + #include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + #include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include + +-#include +-#include +-#include +-#include +-#include + #include ++#include ++#include ++#include + #include +-#include + #include ++#include + + /* +- * Page fault error code bits +- * bit 0 == 0 means no page found, 1 means protection fault +- * bit 1 == 0 means read, 1 means write +- * bit 2 == 0 means kernel, 1 means user-mode +- * bit 3 == 1 means use of reserved bit detected +- * bit 4 == 1 means fault was an instruction fetch +- */ +-#define PF_PROT (1<<0) +-#define PF_WRITE (1<<1) +-#define PF_USER (1<<2) +-#define PF_RSVD (1<<3) +-#define PF_INSTR (1<<4) ++ * Page fault error code bits: ++ * ++ * bit 0 == 0: no page found 1: protection fault ++ * bit 1 == 0: read access 1: write access ++ * bit 2 == 0: kernel-mode access 1: user-mode access ++ * bit 3 == 1: use of reserved bit detected ++ * bit 4 == 1: fault was an instruction fetch ++ */ ++enum x86_pf_error_code { + ++ PF_PROT = 1 << 0, ++ PF_WRITE = 1 << 1, ++ PF_USER = 1 << 2, ++ PF_RSVD = 1 << 3, ++ PF_INSTR = 1 << 4, ++}; ++ ++/* ++ * Returns 0 if mmiotrace is disabled, or if the fault is not ++ * handled by mmiotrace: ++ */ + static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) + { +-#ifdef CONFIG_MMIOTRACE + if (unlikely(is_kmmio_active())) + if (kmmio_handler(regs, addr) == 1) + return -1; +-#endif + return 0; + } + + static inline int notify_page_fault(struct pt_regs *regs) + { +-#ifdef CONFIG_KPROBES + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ +- if (!user_mode_vm(regs)) { ++ if (kprobes_built_in() && !user_mode_vm(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; +@@ -75,29 +81,76 @@ static inline int notify_page_fault(stru + } + + return ret; +-#else +- return 0; +-#endif + } + + /* +- * X86_32 +- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. +- * Check that here and ignore it. +- * +- * X86_64 +- * Sometimes the CPU reports invalid exceptions on prefetch. +- * Check that here and ignore it. ++ * Prefetch quirks: ++ * ++ * 32-bit mode: + * +- * Opcode checker based on code by Richard Brunner ++ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. ++ * Check that here and ignore it. ++ * ++ * 64-bit mode: ++ * ++ * Sometimes the CPU reports invalid exceptions on prefetch. ++ * Check that here and ignore it. ++ * ++ * Opcode checker based on code by Richard Brunner. + */ +-static int is_prefetch(struct pt_regs *regs, unsigned long addr, +- unsigned long error_code) ++static inline int ++check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, ++ unsigned char opcode, int *prefetch) + { ++ unsigned char instr_hi = opcode & 0xf0; ++ unsigned char instr_lo = opcode & 0x0f; ++ ++ switch (instr_hi) { ++ case 0x20: ++ case 0x30: ++ /* ++ * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. ++ * In X86_64 long mode, the CPU will signal invalid ++ * opcode if some of these prefixes are present so ++ * X86_64 will never get here anyway ++ */ ++ return ((instr_lo & 7) == 0x6); ++#ifdef CONFIG_X86_64 ++ case 0x40: ++ /* ++ * In AMD64 long mode 0x40..0x4F are valid REX prefixes ++ * Need to figure out under what instruction mode the ++ * instruction was issued. Could check the LDT for lm, ++ * but for now it's good enough to assume that long ++ * mode only uses well known segments or kernel. ++ */ ++ return (!user_mode(regs)) || (regs->cs == __USER_CS); ++#endif ++ case 0x60: ++ /* 0x64 thru 0x67 are valid prefixes in all modes. */ ++ return (instr_lo & 0xC) == 0x4; ++ case 0xF0: ++ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ ++ return !instr_lo || (instr_lo>>1) == 1; ++ case 0x00: ++ /* Prefetch instruction is 0x0F0D or 0x0F18 */ ++ if (probe_kernel_address(instr, opcode)) ++ return 0; ++ ++ *prefetch = (instr_lo == 0xF) && ++ (opcode == 0x0D || opcode == 0x18); ++ return 0; ++ default: ++ return 0; ++ } ++} ++ ++static int ++is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) ++{ ++ unsigned char *max_instr; + unsigned char *instr; +- int scan_more = 1; + int prefetch = 0; +- unsigned char *max_instr; + + /* + * If it was a exec (instruction fetch) fault on NX page, then +@@ -106,99 +159,174 @@ static int is_prefetch(struct pt_regs *r + if (error_code & PF_INSTR) + return 0; + +- instr = (unsigned char *)convert_ip_to_linear(current, regs); ++ instr = (void *)convert_ip_to_linear(current, regs); + max_instr = instr + 15; + + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) + return 0; + +- while (scan_more && instr < max_instr) { ++ while (instr < max_instr) { + unsigned char opcode; +- unsigned char instr_hi; +- unsigned char instr_lo; + + if (probe_kernel_address(instr, opcode)) + break; + +- instr_hi = opcode & 0xf0; +- instr_lo = opcode & 0x0f; + instr++; + +- switch (instr_hi) { +- case 0x20: +- case 0x30: +- /* +- * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. +- * In X86_64 long mode, the CPU will signal invalid +- * opcode if some of these prefixes are present so +- * X86_64 will never get here anyway +- */ +- scan_more = ((instr_lo & 7) == 0x6); +- break; +-#ifdef CONFIG_X86_64 +- case 0x40: +- /* +- * In AMD64 long mode 0x40..0x4F are valid REX prefixes +- * Need to figure out under what instruction mode the +- * instruction was issued. Could check the LDT for lm, +- * but for now it's good enough to assume that long +- * mode only uses well known segments or kernel. +- */ +- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); ++ if (!check_prefetch_opcode(regs, instr, opcode, &prefetch)) + break; ++ } ++ return prefetch; ++} ++ ++static void ++force_sig_info_fault(int si_signo, int si_code, unsigned long address, ++ struct task_struct *tsk) ++{ ++ siginfo_t info; ++ ++ info.si_signo = si_signo; ++ info.si_errno = 0; ++ info.si_code = si_code; ++ info.si_addr = (void __user *)address; ++ ++ force_sig_info(si_signo, &info, tsk); ++} ++ ++DEFINE_SPINLOCK(pgd_lock); ++LIST_HEAD(pgd_list); ++ ++#ifdef CONFIG_X86_32 ++static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) ++{ ++ unsigned index = pgd_index(address); ++ pgd_t *pgd_k; ++ pud_t *pud, *pud_k; ++ pmd_t *pmd, *pmd_k; ++ ++ pgd += index; ++ pgd_k = init_mm.pgd + index; ++ ++ if (!pgd_present(*pgd_k)) ++ return NULL; ++ ++ /* ++ * set_pgd(pgd, *pgd_k); here would be useless on PAE ++ * and redundant with the set_pmd() on non-PAE. As would ++ * set_pud. ++ */ ++ pud = pud_offset(pgd, address); ++ pud_k = pud_offset(pgd_k, address); ++ if (!pud_present(*pud_k)) ++ return NULL; ++ ++ pmd = pmd_offset(pud, address); ++ pmd_k = pmd_offset(pud_k, address); ++ if (!pmd_present(*pmd_k)) ++ return NULL; ++ ++ if (!pmd_present(*pmd)) { ++ bool lazy = percpu_read(xen_lazy_mmu); ++ ++ percpu_write(xen_lazy_mmu, false); ++#if CONFIG_XEN_COMPAT > 0x030002 ++ set_pmd(pmd, *pmd_k); ++#else ++ /* ++ * When running on older Xen we must launder *pmd_k through ++ * pmd_val() to ensure that _PAGE_PRESENT is correctly set. ++ */ ++ set_pmd(pmd, __pmd(pmd_val(*pmd_k))); + #endif +- case 0x60: +- /* 0x64 thru 0x67 are valid prefixes in all modes. */ +- scan_more = (instr_lo & 0xC) == 0x4; +- break; +- case 0xF0: +- /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ +- scan_more = !instr_lo || (instr_lo>>1) == 1; +- break; +- case 0x00: +- /* Prefetch instruction is 0x0F0D or 0x0F18 */ +- scan_more = 0; ++ percpu_write(xen_lazy_mmu, lazy); ++ } else { ++ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); ++ } ++ ++ return pmd_k; ++} ++ ++void vmalloc_sync_all(void) ++{ ++ unsigned long address; ++ ++ if (SHARED_KERNEL_PMD) ++ return; ++ ++ for (address = VMALLOC_START & PMD_MASK; ++ address >= TASK_SIZE && address < FIXADDR_TOP; ++ address += PMD_SIZE) { ++ ++ unsigned long flags; ++ struct page *page; + +- if (probe_kernel_address(instr, opcode)) ++ spin_lock_irqsave(&pgd_lock, flags); ++ list_for_each_entry(page, &pgd_list, lru) { ++ if (!vmalloc_sync_one(page_address(page), address)) + break; +- prefetch = (instr_lo == 0xF) && +- (opcode == 0x0D || opcode == 0x18); +- break; +- default: +- scan_more = 0; +- break; + } ++ spin_unlock_irqrestore(&pgd_lock, flags); + } +- return prefetch; + } + +-static void force_sig_info_fault(int si_signo, int si_code, +- unsigned long address, struct task_struct *tsk) ++/* ++ * 32-bit: ++ * ++ * Handle a fault on the vmalloc or module mapping area ++ */ ++static noinline int vmalloc_fault(unsigned long address) + { +- siginfo_t info; ++ unsigned long pgd_paddr; ++ pmd_t *pmd_k; ++ pte_t *pte_k; + +- info.si_signo = si_signo; +- info.si_errno = 0; +- info.si_code = si_code; +- info.si_addr = (void __user *)address; +- force_sig_info(si_signo, &info, tsk); ++ /* Make sure we are in vmalloc area: */ ++ if (!(address >= VMALLOC_START && address < VMALLOC_END)) ++ return -1; ++ ++ /* ++ * Synchronize this task's top level page-table ++ * with the 'reference' page table. ++ * ++ * Do _not_ use "current" here. We might be inside ++ * an interrupt in the middle of a task switch.. ++ */ ++ pgd_paddr = read_cr3(); ++ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); ++ if (!pmd_k) ++ return -1; ++ ++ pte_k = pte_offset_kernel(pmd_k, address); ++ if (!pte_present(*pte_k)) ++ return -1; ++ ++ return 0; + } + +-#ifdef CONFIG_X86_64 +-static int bad_address(void *p) ++/* ++ * Did it hit the DOS screen memory VA from vm86 mode? ++ */ ++static inline void ++check_v8086_mode(struct pt_regs *regs, unsigned long address, ++ struct task_struct *tsk) + { +- unsigned long dummy; +- return probe_kernel_address((unsigned long *)p, dummy); ++ unsigned long bit; ++ ++ if (!v8086_mode(regs)) ++ return; ++ ++ bit = (address - 0xA0000) >> PAGE_SHIFT; ++ if (bit < 32) ++ tsk->thread.screen_bitmap |= 1 << bit; + } +-#endif + + static void dump_pagetable(unsigned long address) + { +-#ifdef CONFIG_X86_32 + __typeof__(pte_val(__pte(0))) page; + + page = read_cr3(); + page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; ++ + #ifdef CONFIG_X86_PAE + printk("*pdpt = %016Lx ", page); + if ((page & _PAGE_PRESENT) +@@ -206,7 +334,7 @@ static void dump_pagetable(unsigned long + page = mfn_to_pfn(page >> PAGE_SHIFT); + page <<= PAGE_SHIFT; + page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) +- & (PTRS_PER_PMD - 1)]; ++ & (PTRS_PER_PMD - 1)]; + printk(KERN_CONT "*pde = %016Lx ", page); + page &= ~_PAGE_NX; + } +@@ -218,20 +346,146 @@ static void dump_pagetable(unsigned long + * We must not directly access the pte in the highpte + * case if the page table is located in highmem. + * And let's rather not kmap-atomic the pte, just in case +- * it's allocated already. ++ * it's allocated already: + */ + if ((page & _PAGE_PRESENT) + && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn + && !(page & _PAGE_PSE)) { ++ + page = mfn_to_pfn(page >> PAGE_SHIFT); + page <<= PAGE_SHIFT; + page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) +- & (PTRS_PER_PTE - 1)]; ++ & (PTRS_PER_PTE - 1)]; + printk(KERN_CONT "*pte = %0*Lx ", sizeof(page)*2, (u64)page); + } + + printk(KERN_CONT "\n"); +-#else /* CONFIG_X86_64 */ ++} ++ ++#else /* CONFIG_X86_64: */ ++ ++void vmalloc_sync_all(void) ++{ ++ unsigned long address; ++ ++ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; ++ address += PGDIR_SIZE) { ++ ++ const pgd_t *pgd_ref = pgd_offset_k(address); ++ unsigned long flags; ++ struct page *page; ++ ++ if (pgd_none(*pgd_ref)) ++ continue; ++ ++ spin_lock_irqsave(&pgd_lock, flags); ++ list_for_each_entry(page, &pgd_list, lru) { ++ pgd_t *pgd; ++ pgd = (pgd_t *)page_address(page) + pgd_index(address); ++ if (pgd_none(*pgd)) ++ set_pgd(pgd, *pgd_ref); ++ else ++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); ++ } ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ } ++} ++ ++/* ++ * 64-bit: ++ * ++ * Handle a fault on the vmalloc area ++ * ++ * This assumes no large pages in there. ++ */ ++static noinline int vmalloc_fault(unsigned long address) ++{ ++ pgd_t *pgd, *pgd_ref; ++ pud_t *pud, *pud_ref; ++ pmd_t *pmd, *pmd_ref; ++ pte_t *pte, *pte_ref; ++ ++ /* Make sure we are in vmalloc area: */ ++ if (!(address >= VMALLOC_START && address < VMALLOC_END)) ++ return -1; ++ ++ /* ++ * Copy kernel mappings over when needed. This can also ++ * happen within a race in page table update. In the later ++ * case just flush: ++ */ ++ pgd = pgd_offset(current->active_mm, address); ++ pgd_ref = pgd_offset_k(address); ++ if (pgd_none(*pgd_ref)) ++ return -1; ++ ++ if (pgd_none(*pgd)) ++ set_pgd(pgd, *pgd_ref); ++ else ++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); ++ ++ /* ++ * Below here mismatches are bugs because these lower tables ++ * are shared: ++ */ ++ ++ pud = pud_offset(pgd, address); ++ pud_ref = pud_offset(pgd_ref, address); ++ if (pud_none(*pud_ref)) ++ return -1; ++ ++ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) ++ BUG(); ++ ++ pmd = pmd_offset(pud, address); ++ pmd_ref = pmd_offset(pud_ref, address); ++ if (pmd_none(*pmd_ref)) ++ return -1; ++ ++ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) ++ BUG(); ++ ++ pte_ref = pte_offset_kernel(pmd_ref, address); ++ if (!pte_present(*pte_ref)) ++ return -1; ++ ++ pte = pte_offset_kernel(pmd, address); ++ ++ /* ++ * Don't use pte_page here, because the mappings can point ++ * outside mem_map, and the NUMA hash lookup cannot handle ++ * that: ++ */ ++ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) ++ BUG(); ++ ++ return 0; ++} ++ ++static const char errata93_warning[] = ++KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" ++KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" ++KERN_ERR "******* Please consider a BIOS update.\n" ++KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; ++ ++/* ++ * No vm86 mode in 64-bit mode: ++ */ ++static inline void ++check_v8086_mode(struct pt_regs *regs, unsigned long address, ++ struct task_struct *tsk) ++{ ++} ++ ++static int bad_address(void *p) ++{ ++ unsigned long dummy; ++ ++ return probe_kernel_address((unsigned long *)p, dummy); ++} ++ ++static void dump_pagetable(unsigned long address) ++{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; +@@ -240,113 +494,77 @@ static void dump_pagetable(unsigned long + pgd = (pgd_t *)read_cr3(); + + pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); ++ + pgd += pgd_index(address); +- if (bad_address(pgd)) goto bad; ++ if (bad_address(pgd)) ++ goto bad; ++ + printk("PGD %lx ", pgd_val(*pgd)); +- if (!pgd_present(*pgd)) goto ret; ++ ++ if (!pgd_present(*pgd)) ++ goto out; + + pud = pud_offset(pgd, address); +- if (bad_address(pud)) goto bad; ++ if (bad_address(pud)) ++ goto bad; ++ + printk(KERN_CONT "PUD %lx ", pud_val(*pud)); + if (!pud_present(*pud) || pud_large(*pud)) +- goto ret; ++ goto out; + + pmd = pmd_offset(pud, address); +- if (bad_address(pmd)) goto bad; ++ if (bad_address(pmd)) ++ goto bad; ++ + printk(KERN_CONT "PMD %lx ", pmd_val(*pmd)); +- if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; ++ if (!pmd_present(*pmd) || pmd_large(*pmd)) ++ goto out; + + pte = pte_offset_kernel(pmd, address); +- if (bad_address(pte)) goto bad; ++ if (bad_address(pte)) ++ goto bad; ++ + printk(KERN_CONT "PTE %lx", pte_val(*pte)); +-ret: ++out: + printk(KERN_CONT "\n"); + return; + bad: + printk("BAD\n"); +-#endif +-} +- +-#ifdef CONFIG_X86_32 +-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +-{ +- unsigned index = pgd_index(address); +- pgd_t *pgd_k; +- pud_t *pud, *pud_k; +- pmd_t *pmd, *pmd_k; +- +- pgd += index; +- pgd_k = init_mm.pgd + index; +- +- if (!pgd_present(*pgd_k)) +- return NULL; +- +- /* +- * set_pgd(pgd, *pgd_k); here would be useless on PAE +- * and redundant with the set_pmd() on non-PAE. As would +- * set_pud. +- */ +- +- pud = pud_offset(pgd, address); +- pud_k = pud_offset(pgd_k, address); +- if (!pud_present(*pud_k)) +- return NULL; +- +- pmd = pmd_offset(pud, address); +- pmd_k = pmd_offset(pud_k, address); +- if (!pmd_present(*pmd_k)) +- return NULL; +- if (!pmd_present(*pmd)) { +- bool lazy = x86_read_percpu(xen_lazy_mmu); +- +- x86_write_percpu(xen_lazy_mmu, false); +-#if CONFIG_XEN_COMPAT > 0x030002 +- set_pmd(pmd, *pmd_k); +-#else +- /* +- * When running on older Xen we must launder *pmd_k through +- * pmd_val() to ensure that _PAGE_PRESENT is correctly set. +- */ +- set_pmd(pmd, __pmd(pmd_val(*pmd_k))); +-#endif +- x86_write_percpu(xen_lazy_mmu, lazy); +- } else +- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); +- return pmd_k; + } +-#endif + +-#ifdef CONFIG_X86_64 +-static const char errata93_warning[] = +-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" +-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" +-KERN_ERR "******* Please consider a BIOS update.\n" +-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; +-#endif ++#endif /* CONFIG_X86_64 */ + +-/* Workaround for K8 erratum #93 & buggy BIOS. +- BIOS SMM functions are required to use a specific workaround +- to avoid corruption of the 64bit RIP register on C stepping K8. +- A lot of BIOS that didn't get tested properly miss this. +- The OS sees this as a page fault with the upper 32bits of RIP cleared. +- Try to work around it here. +- Note we only handle faults in kernel here. +- Does nothing for X86_32 ++/* ++ * Workaround for K8 erratum #93 & buggy BIOS. ++ * ++ * BIOS SMM functions are required to use a specific workaround ++ * to avoid corruption of the 64bit RIP register on C stepping K8. ++ * ++ * A lot of BIOS that didn't get tested properly miss this. ++ * ++ * The OS sees this as a page fault with the upper 32bits of RIP cleared. ++ * Try to work around it here. ++ * ++ * Note we only handle faults in kernel here. ++ * Does nothing on 32-bit. + */ + static int is_errata93(struct pt_regs *regs, unsigned long address) + { + #ifdef CONFIG_X86_64 +- static int warned; ++ static int once; ++ + if (address != regs->ip) + return 0; ++ + if ((address >> 32) != 0) + return 0; ++ + address |= 0xffffffffUL << 32; + if ((address >= (u64)_stext && address <= (u64)_etext) || + (address >= MODULES_VADDR && address <= MODULES_END)) { +- if (!warned) { ++ if (!once) { + printk(errata93_warning); +- warned = 1; ++ once = 1; + } + regs->ip = address; + return 1; +@@ -356,16 +574,17 @@ static int is_errata93(struct pt_regs *r + } + + /* +- * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal +- * addresses >4GB. We catch this in the page fault handler because these +- * addresses are not reachable. Just detect this case and return. Any code ++ * Work around K8 erratum #100 K8 in compat mode occasionally jumps ++ * to illegal addresses >4GB. ++ * ++ * We catch this in the page fault handler because these addresses ++ * are not reachable. Just detect this case and return. Any code + * segment in LDT is compatibility mode. + */ + static int is_errata100(struct pt_regs *regs, unsigned long address) + { + #ifdef CONFIG_X86_64 +- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && +- (address >> 32)) ++ if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) + return 1; + #endif + return 0; +@@ -375,8 +594,9 @@ static int is_f00f_bug(struct pt_regs *r + { + #ifdef CONFIG_X86_F00F_BUG + unsigned long nr; ++ + /* +- * Pentium F0 0F C7 C8 bug workaround. ++ * Pentium F0 0F C7 C8 bug workaround: + */ + if (boot_cpu_data.f00f_bug) { + nr = (address - idt_descr.address) >> 3; +@@ -390,62 +610,277 @@ static int is_f00f_bug(struct pt_regs *r + return 0; + } + +-static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, +- unsigned long address) ++static const char nx_warning[] = KERN_CRIT ++"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; ++ ++static void ++show_fault_oops(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) + { +-#ifdef CONFIG_X86_32 + if (!oops_may_print()) + return; +-#endif + +-#ifdef CONFIG_X86_PAE + if (error_code & PF_INSTR) { + unsigned int level; ++ + pte_t *pte = lookup_address(address, &level); + + if (pte && pte_present(*pte) && !pte_exec(*pte)) +- printk(KERN_CRIT "kernel tried to execute " +- "NX-protected page - exploit attempt? " +- "(uid: %d)\n", current_uid()); ++ printk(nx_warning, current_uid()); + } +-#endif + + printk(KERN_ALERT "BUG: unable to handle kernel "); + if (address < PAGE_SIZE) + printk(KERN_CONT "NULL pointer dereference"); + else + printk(KERN_CONT "paging request"); ++ + printk(KERN_CONT " at %p\n", (void *) address); + printk(KERN_ALERT "IP:"); + printk_address(regs->ip, 1); ++ + dump_pagetable(address); + } + +-#ifdef CONFIG_X86_64 +-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, +- unsigned long error_code) ++static noinline void ++pgtable_bad(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) + { +- unsigned long flags = oops_begin(); +- int sig = SIGKILL; + struct task_struct *tsk; ++ unsigned long flags; ++ int sig; ++ ++ flags = oops_begin(); ++ tsk = current; ++ sig = SIGKILL; + + printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", +- current->comm, address); ++ tsk->comm, address); + dump_pagetable(address); +- tsk = current; +- tsk->thread.cr2 = address; +- tsk->thread.trap_no = 14; +- tsk->thread.error_code = error_code; ++ ++ tsk->thread.cr2 = address; ++ tsk->thread.trap_no = 14; ++ tsk->thread.error_code = error_code; ++ + if (__die("Bad pagetable", regs, error_code)) + sig = 0; ++ + oops_end(flags, regs, sig); + } +-#endif ++ ++static noinline void ++no_context(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) ++{ ++ struct task_struct *tsk = current; ++ unsigned long *stackend; ++ unsigned long flags; ++ int sig; ++ ++ /* Are we prepared to handle this kernel fault? */ ++ if (fixup_exception(regs)) ++ return; ++ ++ /* ++ * 32-bit: ++ * ++ * Valid to do another page fault here, because if this fault ++ * had been triggered by is_prefetch fixup_exception would have ++ * handled it. ++ * ++ * 64-bit: ++ * ++ * Hall of shame of CPU/BIOS bugs. ++ */ ++ if (is_prefetch(regs, error_code, address)) ++ return; ++ ++ if (is_errata93(regs, address)) ++ return; ++ ++ /* ++ * Oops. The kernel tried to access some bad page. We'll have to ++ * terminate things with extreme prejudice: ++ */ ++ flags = oops_begin(); ++ ++ show_fault_oops(regs, error_code, address); ++ ++ stackend = end_of_stack(tsk); ++ if (*stackend != STACK_END_MAGIC) ++ printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); ++ ++ tsk->thread.cr2 = address; ++ tsk->thread.trap_no = 14; ++ tsk->thread.error_code = error_code; ++ ++ sig = SIGKILL; ++ if (__die("Oops", regs, error_code)) ++ sig = 0; ++ ++ /* Executive summary in case the body of the oops scrolled away */ ++ printk(KERN_EMERG "CR2: %016lx\n", address); ++ ++ oops_end(flags, regs, sig); ++} ++ ++/* ++ * Print out info about fatal segfaults, if the show_unhandled_signals ++ * sysctl is set: ++ */ ++static inline void ++show_signal_msg(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, struct task_struct *tsk) ++{ ++ if (!unhandled_signal(tsk, SIGSEGV)) ++ return; ++ ++ if (!printk_ratelimit()) ++ return; ++ ++ printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", ++ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, ++ tsk->comm, task_pid_nr(tsk), address, ++ (void *)regs->ip, (void *)regs->sp, error_code); ++ ++ print_vma_addr(KERN_CONT " in ", regs->ip); ++ ++ printk(KERN_CONT "\n"); ++} ++ ++static void ++__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, int si_code) ++{ ++ struct task_struct *tsk = current; ++ ++ /* User mode accesses just cause a SIGSEGV */ ++ if (error_code & PF_USER) { ++ /* ++ * It's possible to have interrupts off here: ++ */ ++ local_irq_enable(); ++ ++ /* ++ * Valid to do another page fault here because this one came ++ * from user space: ++ */ ++ if (is_prefetch(regs, error_code, address)) ++ return; ++ ++ if (is_errata100(regs, address)) ++ return; ++ ++ if (unlikely(show_unhandled_signals)) ++ show_signal_msg(regs, error_code, address, tsk); ++ ++ /* Kernel addresses are always protection faults: */ ++ tsk->thread.cr2 = address; ++ tsk->thread.error_code = error_code | (address >= TASK_SIZE); ++ tsk->thread.trap_no = 14; ++ ++ force_sig_info_fault(SIGSEGV, si_code, address, tsk); ++ ++ return; ++ } ++ ++ if (is_f00f_bug(regs, address)) ++ return; ++ ++ no_context(regs, error_code, address); ++} ++ ++static noinline void ++bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) ++{ ++ __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); ++} ++ ++static void ++__bad_area(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, int si_code) ++{ ++ struct mm_struct *mm = current->mm; ++ ++ /* ++ * Something tried to access memory that isn't in our memory map.. ++ * Fix it, but check if it's kernel or user first.. ++ */ ++ up_read(&mm->mmap_sem); ++ ++ __bad_area_nosemaphore(regs, error_code, address, si_code); ++} ++ ++static noinline void ++bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) ++{ ++ __bad_area(regs, error_code, address, SEGV_MAPERR); ++} ++ ++static noinline void ++bad_area_access_error(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) ++{ ++ __bad_area(regs, error_code, address, SEGV_ACCERR); ++} ++ ++/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ ++static void ++out_of_memory(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) ++{ ++ /* ++ * We ran out of memory, call the OOM killer, and return the userspace ++ * (which will retry the fault, or kill us if we got oom-killed): ++ */ ++ up_read(¤t->mm->mmap_sem); ++ ++ pagefault_out_of_memory(); ++} ++ ++static void ++do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) ++{ ++ struct task_struct *tsk = current; ++ struct mm_struct *mm = tsk->mm; ++ ++ up_read(&mm->mmap_sem); ++ ++ /* Kernel mode? Handle exceptions or die: */ ++ if (!(error_code & PF_USER)) ++ no_context(regs, error_code, address); ++ ++ /* User-space => ok to do another page fault: */ ++ if (is_prefetch(regs, error_code, address)) ++ return; ++ ++ tsk->thread.cr2 = address; ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = 14; ++ ++ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); ++} ++ ++static noinline void ++mm_fault_error(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, unsigned int fault) ++{ ++ if (fault & VM_FAULT_OOM) { ++ out_of_memory(regs, error_code, address); ++ } else { ++ if (fault & VM_FAULT_SIGBUS) ++ do_sigbus(regs, error_code, address); ++ else ++ BUG(); ++ } ++} + + static int spurious_fault_check(unsigned long error_code, pte_t *pte) + { + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; ++ + if ((error_code & PF_INSTR) && !pte_exec(*pte)) + return 0; + +@@ -453,21 +888,25 @@ static int spurious_fault_check(unsigned + } + + /* +- * Handle a spurious fault caused by a stale TLB entry. This allows +- * us to lazily refresh the TLB when increasing the permissions of a +- * kernel page (RO -> RW or NX -> X). Doing it eagerly is very +- * expensive since that implies doing a full cross-processor TLB +- * flush, even if no stale TLB entries exist on other processors. ++ * Handle a spurious fault caused by a stale TLB entry. ++ * ++ * This allows us to lazily refresh the TLB when increasing the ++ * permissions of a kernel page (RO -> RW or NX -> X). Doing it ++ * eagerly is very expensive since that implies doing a full ++ * cross-processor TLB flush, even if no stale TLB entries exist ++ * on other processors. ++ * + * There are no security implications to leaving a stale TLB when + * increasing the permissions on a page. + */ +-static int spurious_fault(unsigned long address, +- unsigned long error_code) ++static noinline int ++spurious_fault(unsigned long error_code, unsigned long address) + { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; ++ int ret; + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & (PF_USER | PF_RSVD)) +@@ -495,117 +934,62 @@ static int spurious_fault(unsigned long + if (!pte_present(*pte)) + return 0; + +- return spurious_fault_check(error_code, pte); +-} +- +-/* +- * X86_32 +- * Handle a fault on the vmalloc or module mapping area +- * +- * X86_64 +- * Handle a fault on the vmalloc area +- * +- * This assumes no large pages in there. +- */ +-static int vmalloc_fault(unsigned long address) +-{ +-#ifdef CONFIG_X86_32 +- unsigned long pgd_paddr; +- pmd_t *pmd_k; +- pte_t *pte_k; +- +- /* Make sure we are in vmalloc area */ +- if (!(address >= VMALLOC_START && address < VMALLOC_END)) +- return -1; ++ ret = spurious_fault_check(error_code, pte); ++ if (!ret) ++ return 0; + + /* +- * Synchronize this task's top level page-table +- * with the 'reference' page table. +- * +- * Do _not_ use "current" here. We might be inside +- * an interrupt in the middle of a task switch.. ++ * Make sure we have permissions in PMD. ++ * If not, then there's a bug in the page tables: + */ +- pgd_paddr = read_cr3(); +- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); +- if (!pmd_k) +- return -1; +- pte_k = pte_offset_kernel(pmd_k, address); +- if (!pte_present(*pte_k)) +- return -1; +- return 0; +-#else +- pgd_t *pgd, *pgd_ref; +- pud_t *pud, *pud_ref; +- pmd_t *pmd, *pmd_ref; +- pte_t *pte, *pte_ref; ++ ret = spurious_fault_check(error_code, (pte_t *) pmd); ++ WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); + +- /* Make sure we are in vmalloc area */ +- if (!(address >= VMALLOC_START && address < VMALLOC_END)) +- return -1; ++ return ret; ++} + +- /* Copy kernel mappings over when needed. This can also +- happen within a race in page table update. In the later +- case just flush. */ ++int show_unhandled_signals = 1; + +- pgd = pgd_offset(current->active_mm, address); +- pgd_ref = pgd_offset_k(address); +- if (pgd_none(*pgd_ref)) +- return -1; +- if (pgd_none(*pgd)) +- set_pgd(pgd, *pgd_ref); +- else +- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); ++static inline int ++access_error(unsigned long error_code, int write, struct vm_area_struct *vma) ++{ ++ if (write) { ++ /* write, present and write, not present: */ ++ if (unlikely(!(vma->vm_flags & VM_WRITE))) ++ return 1; ++ return 0; ++ } + +- /* Below here mismatches are bugs because these lower tables +- are shared */ ++ /* read, present: */ ++ if (unlikely(error_code & PF_PROT)) ++ return 1; ++ ++ /* read, not present: */ ++ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) ++ return 1; + +- pud = pud_offset(pgd, address); +- pud_ref = pud_offset(pgd_ref, address); +- if (pud_none(*pud_ref)) +- return -1; +- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) +- BUG(); +- pmd = pmd_offset(pud, address); +- pmd_ref = pmd_offset(pud_ref, address); +- if (pmd_none(*pmd_ref)) +- return -1; +- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) +- BUG(); +- pte_ref = pte_offset_kernel(pmd_ref, address); +- if (!pte_present(*pte_ref)) +- return -1; +- pte = pte_offset_kernel(pmd, address); +- /* Don't use pte_page here, because the mappings can point +- outside mem_map, and the NUMA hash lookup cannot handle +- that. */ +- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) +- BUG(); + return 0; +-#endif + } + +-int show_unhandled_signals = 1; ++static int fault_in_kernel_space(unsigned long address) ++{ ++ return address >= TASK_SIZE_MAX; ++} + + /* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +-#ifdef CONFIG_X86_64 +-asmlinkage +-#endif +-void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ++dotraplinkage void __kprobes ++do_page_fault(struct pt_regs *regs, unsigned long error_code) + { +- struct task_struct *tsk; +- struct mm_struct *mm; + struct vm_area_struct *vma; ++ struct task_struct *tsk; + unsigned long address; +- int write, si_code; ++ struct mm_struct *mm; ++ int write; + int fault; +-#ifdef CONFIG_X86_64 +- unsigned long flags; +- int sig; +-#endif + + /* Set the "privileged fault" bit to something sane. */ + if (user_mode_vm(regs)) +@@ -615,13 +999,12 @@ void __kprobes do_page_fault(struct pt_r + + tsk = current; + mm = tsk->mm; ++ + prefetchw(&mm->mmap_sem); + +- /* get the address */ ++ /* Get the faulting address: */ + address = read_cr2(); + +- si_code = SEGV_MAPERR; +- + if (unlikely(kmmio_fault(regs, address))) + return; + +@@ -638,328 +1021,158 @@ void __kprobes do_page_fault(struct pt_r + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 9) == 0. + */ +-#ifdef CONFIG_X86_32 +- if (unlikely(address >= TASK_SIZE)) { +-#else +- if (unlikely(address >= TASK_SIZE64)) { +-#endif ++ if (unlikely(fault_in_kernel_space(address))) { + /* Faults in hypervisor area can never be patched up. */ + #if defined(CONFIG_X86_XEN) +- if (address >= hypervisor_virt_start) +- goto bad_area_nosemaphore; ++ if (address >= hypervisor_virt_start) { + #elif defined(CONFIG_X86_64_XEN) + if (address >= HYPERVISOR_VIRT_START +- && address < HYPERVISOR_VIRT_END) +- goto bad_area_nosemaphore; ++ && address < HYPERVISOR_VIRT_END) { + #endif ++ bad_area_nosemaphore(regs, error_code, address); ++ return; ++ } ++ + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && + vmalloc_fault(address) >= 0) + return; + +- /* Can handle a stale RO->RW TLB */ +- if (spurious_fault(address, error_code)) ++ /* Can handle a stale RO->RW TLB: */ ++ if (spurious_fault(error_code, address)) + return; + +- /* kprobes don't want to hook the spurious faults. */ ++ /* kprobes don't want to hook the spurious faults: */ + if (notify_page_fault(regs)) + return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch +- * fault we could otherwise deadlock. ++ * fault we could otherwise deadlock: + */ +- goto bad_area_nosemaphore; +- } ++ bad_area_nosemaphore(regs, error_code, address); + +- /* kprobes don't want to hook the spurious faults. */ +- if (notify_page_fault(regs)) + return; ++ } + ++ /* kprobes don't want to hook the spurious faults: */ ++ if (unlikely(notify_page_fault(regs))) ++ return; + /* + * It's safe to allow irq's after cr2 has been saved and the + * vmalloc fault has been handled. + * + * User-mode registers count as a user access even for any +- * potential system fault or CPU buglet. ++ * potential system fault or CPU buglet: + */ + if (user_mode_vm(regs)) { + local_irq_enable(); + error_code |= PF_USER; +- } else if (regs->flags & X86_EFLAGS_IF) +- local_irq_enable(); ++ } else { ++ if (regs->flags & X86_EFLAGS_IF) ++ local_irq_enable(); ++ } + +-#ifdef CONFIG_X86_64 + if (unlikely(error_code & PF_RSVD)) +- pgtable_bad(address, regs, error_code); +-#endif ++ pgtable_bad(regs, error_code, address); + + /* +- * If we're in an interrupt, have no user context or are running in an +- * atomic region then we must not take the fault. ++ * If we're in an interrupt, have no user context or are running ++ * in an atomic region then we must not take the fault: + */ +- if (unlikely(in_atomic() || !mm)) +- goto bad_area_nosemaphore; ++ if (unlikely(in_atomic() || !mm)) { ++ bad_area_nosemaphore(regs, error_code, address); ++ return; ++ } + + /* + * When running in the kernel we expect faults to occur only to +- * addresses in user space. All other faults represent errors in the +- * kernel and should generate an OOPS. Unfortunately, in the case of an +- * erroneous fault occurring in a code path which already holds mmap_sem +- * we will deadlock attempting to validate the fault against the +- * address space. Luckily the kernel only validly references user +- * space from well defined areas of code, which are listed in the +- * exceptions table. ++ * addresses in user space. All other faults represent errors in ++ * the kernel and should generate an OOPS. Unfortunately, in the ++ * case of an erroneous fault occurring in a code path which already ++ * holds mmap_sem we will deadlock attempting to validate the fault ++ * against the address space. Luckily the kernel only validly ++ * references user space from well defined areas of code, which are ++ * listed in the exceptions table. + * + * As the vast majority of faults will be valid we will only perform +- * the source reference check when there is a possibility of a deadlock. +- * Attempt to lock the address space, if we cannot we then validate the +- * source. If this is invalid we can skip the address space check, +- * thus avoiding the deadlock. ++ * the source reference check when there is a possibility of a ++ * deadlock. Attempt to lock the address space, if we cannot we then ++ * validate the source. If this is invalid we can skip the address ++ * space check, thus avoiding the deadlock: + */ +- if (!down_read_trylock(&mm->mmap_sem)) { ++ if (unlikely(!down_read_trylock(&mm->mmap_sem))) { + if ((error_code & PF_USER) == 0 && +- !search_exception_tables(regs->ip)) +- goto bad_area_nosemaphore; ++ !search_exception_tables(regs->ip)) { ++ bad_area_nosemaphore(regs, error_code, address); ++ return; ++ } + down_read(&mm->mmap_sem); ++ } else { ++ /* ++ * The above down_read_trylock() might have succeeded in ++ * which case we'll have missed the might_sleep() from ++ * down_read(): ++ */ ++ might_sleep(); + } + + vma = find_vma(mm, address); +- if (!vma) +- goto bad_area; +- if (vma->vm_start <= address) ++ if (unlikely(!vma)) { ++ bad_area(regs, error_code, address); ++ return; ++ } ++ if (likely(vma->vm_start <= address)) + goto good_area; +- if (!(vma->vm_flags & VM_GROWSDOWN)) +- goto bad_area; ++ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { ++ bad_area(regs, error_code, address); ++ return; ++ } + if (error_code & PF_USER) { + /* + * Accessing the stack below %sp is always a bug. + * The large cushion allows instructions like enter +- * and pusha to work. ("enter $65535,$31" pushes ++ * and pusha to work. ("enter $65535, $31" pushes + * 32 pointers and then decrements %sp by 65535.) + */ +- if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) +- goto bad_area; ++ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { ++ bad_area(regs, error_code, address); ++ return; ++ } + } +- if (expand_stack(vma, address)) +- goto bad_area; +-/* +- * Ok, we have a good vm_area for this memory access, so +- * we can handle it.. +- */ ++ if (unlikely(expand_stack(vma, address))) { ++ bad_area(regs, error_code, address); ++ return; ++ } ++ ++ /* ++ * Ok, we have a good vm_area for this memory access, so ++ * we can handle it.. ++ */ + good_area: +- si_code = SEGV_ACCERR; +- write = 0; +- switch (error_code & (PF_PROT|PF_WRITE)) { +- default: /* 3: write, present */ +- /* fall through */ +- case PF_WRITE: /* write, not present */ +- if (!(vma->vm_flags & VM_WRITE)) +- goto bad_area; +- write++; +- break; +- case PF_PROT: /* read, present */ +- goto bad_area; +- case 0: /* read, not present */ +- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) +- goto bad_area; ++ write = error_code & PF_WRITE; ++ ++ if (unlikely(access_error(error_code, write, vma))) { ++ bad_area_access_error(regs, error_code, address); ++ return; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo +- * the fault. ++ * the fault: + */ + fault = handle_mm_fault(mm, vma, address, write); ++ + if (unlikely(fault & VM_FAULT_ERROR)) { +- if (fault & VM_FAULT_OOM) +- goto out_of_memory; +- else if (fault & VM_FAULT_SIGBUS) +- goto do_sigbus; +- BUG(); ++ mm_fault_error(regs, error_code, address, fault); ++ return; + } ++ + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + +-#ifdef CONFIG_X86_32 +- /* +- * Did it hit the DOS screen memory VA from vm86 mode? +- */ +- if (v8086_mode(regs)) { +- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; +- if (bit < 32) +- tsk->thread.screen_bitmap |= 1 << bit; +- } +-#endif +- up_read(&mm->mmap_sem); +- return; +- +-/* +- * Something tried to access memory that isn't in our memory map.. +- * Fix it, but check if it's kernel or user first.. +- */ +-bad_area: +- up_read(&mm->mmap_sem); +- +-bad_area_nosemaphore: +- /* User mode accesses just cause a SIGSEGV */ +- if (error_code & PF_USER) { +- /* +- * It's possible to have interrupts off here. +- */ +- local_irq_enable(); +- +- /* +- * Valid to do another page fault here because this one came +- * from user space. +- */ +- if (is_prefetch(regs, address, error_code)) +- return; +- +- if (is_errata100(regs, address)) +- return; +- +- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && +- printk_ratelimit()) { +- printk( +- "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", +- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, +- tsk->comm, task_pid_nr(tsk), address, +- (void *) regs->ip, (void *) regs->sp, error_code); +- print_vma_addr(" in ", regs->ip); +- printk("\n"); +- } +- +- tsk->thread.cr2 = address; +- /* Kernel addresses are always protection faults */ +- tsk->thread.error_code = error_code | (address >= TASK_SIZE); +- tsk->thread.trap_no = 14; +- force_sig_info_fault(SIGSEGV, si_code, address, tsk); +- return; +- } +- +- if (is_f00f_bug(regs, address)) +- return; +- +-no_context: +- /* Are we prepared to handle this kernel fault? */ +- if (fixup_exception(regs)) +- return; +- +- /* +- * X86_32 +- * Valid to do another page fault here, because if this fault +- * had been triggered by is_prefetch fixup_exception would have +- * handled it. +- * +- * X86_64 +- * Hall of shame of CPU/BIOS bugs. +- */ +- if (is_prefetch(regs, address, error_code)) +- return; +- +- if (is_errata93(regs, address)) +- return; +- +-/* +- * Oops. The kernel tried to access some bad page. We'll have to +- * terminate things with extreme prejudice. +- */ +-#ifdef CONFIG_X86_32 +- bust_spinlocks(1); +-#else +- flags = oops_begin(); +-#endif +- +- show_fault_oops(regs, error_code, address); +- +- tsk->thread.cr2 = address; +- tsk->thread.trap_no = 14; +- tsk->thread.error_code = error_code; +- +-#ifdef CONFIG_X86_32 +- die("Oops", regs, error_code); +- bust_spinlocks(0); +- do_exit(SIGKILL); +-#else +- sig = SIGKILL; +- if (__die("Oops", regs, error_code)) +- sig = 0; +- /* Executive summary in case the body of the oops scrolled away */ +- printk(KERN_EMERG "CR2: %016lx\n", address); +- oops_end(flags, regs, sig); +-#endif +- +-out_of_memory: +- /* +- * We ran out of memory, call the OOM killer, and return the userspace +- * (which will retry the fault, or kill us if we got oom-killed). +- */ +- up_read(&mm->mmap_sem); +- pagefault_out_of_memory(); +- return; ++ check_v8086_mode(regs, address, tsk); + +-do_sigbus: + up_read(&mm->mmap_sem); +- +- /* Kernel mode? Handle exceptions or die */ +- if (!(error_code & PF_USER)) +- goto no_context; +-#ifdef CONFIG_X86_32 +- /* User space => ok to do another page fault */ +- if (is_prefetch(regs, address, error_code)) +- return; +-#endif +- tsk->thread.cr2 = address; +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = 14; +- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +-} +- +-DEFINE_SPINLOCK(pgd_lock); +-LIST_HEAD(pgd_list); +- +-void vmalloc_sync_all(void) +-{ +- unsigned long address; +- +-#ifdef CONFIG_X86_32 +- if (SHARED_KERNEL_PMD) +- return; +- +- for (address = VMALLOC_START & PMD_MASK; +- address >= TASK_SIZE && address < FIXADDR_TOP; +- address += PMD_SIZE) { +- unsigned long flags; +- struct page *page; +- +- spin_lock_irqsave(&pgd_lock, flags); +- list_for_each_entry(page, &pgd_list, lru) { +- if (!vmalloc_sync_one(page_address(page), +- address)) +- break; +- } +- spin_unlock_irqrestore(&pgd_lock, flags); +- } +-#else /* CONFIG_X86_64 */ +- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; +- address += PGDIR_SIZE) { +- const pgd_t *pgd_ref = pgd_offset_k(address); +- unsigned long flags; +- struct page *page; +- +- if (pgd_none(*pgd_ref)) +- continue; +- spin_lock_irqsave(&pgd_lock, flags); +- list_for_each_entry(page, &pgd_list, lru) { +- pgd_t *pgd; +- pgd = (pgd_t *)page_address(page) + pgd_index(address); +- if (pgd_none(*pgd)) +- set_pgd(pgd, *pgd_ref); +- else +- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); +- } +- spin_unlock_irqrestore(&pgd_lock, flags); +- } +-#endif + } +--- head-2010-01-18.orig/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -1,5 +1,6 @@ + #include + #include ++#include /* for totalram_pages */ + + void *kmap(struct page *page) + { +@@ -18,49 +19,6 @@ void kunmap(struct page *page) + kunmap_high(page); + } + +-static void debug_kmap_atomic_prot(enum km_type type) +-{ +-#ifdef CONFIG_DEBUG_HIGHMEM +- static unsigned warn_count = 10; +- +- if (unlikely(warn_count == 0)) +- return; +- +- if (unlikely(in_interrupt())) { +- if (in_irq()) { +- if (type != KM_IRQ0 && type != KM_IRQ1 && +- type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && +- type != KM_BOUNCE_READ) { +- WARN_ON(1); +- warn_count--; +- } +- } else if (!irqs_disabled()) { /* softirq */ +- if (type != KM_IRQ0 && type != KM_IRQ1 && +- type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && +- type != KM_SKB_SUNRPC_DATA && +- type != KM_SKB_DATA_SOFTIRQ && +- type != KM_BOUNCE_READ) { +- WARN_ON(1); +- warn_count--; +- } +- } +- } +- +- if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || +- type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { +- if (!irqs_disabled()) { +- WARN_ON(1); +- warn_count--; +- } +- } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { +- if (irq_count() == 0 && !irqs_disabled()) { +- WARN_ON(1); +- warn_count--; +- } +- } +-#endif +-} +- + /* + * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because + * no global lock is needed and because the kmap code must perform a global TLB +@@ -80,7 +38,7 @@ void *kmap_atomic_prot(struct page *page + if (!PageHighMem(page)) + return page_address(page); + +- debug_kmap_atomic_prot(type); ++ debug_kmap_atomic(type); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +@@ -120,22 +78,13 @@ void kunmap_atomic(void *kvaddr, enum km + pagefault_enable(); + } + +-/* This is the same as kmap_atomic() but can map memory that doesn't ++/* ++ * This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ + void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) + { +- enum fixed_addresses idx; +- unsigned long vaddr; +- +- pagefault_disable(); +- +- idx = type + KM_TYPE_NR*smp_processor_id(); +- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +- set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); +- /*arch_flush_lazy_mmu_mode();*/ +- +- return (void*) vaddr; ++ return kmap_atomic_prot_pfn(pfn, type, kmap_prot); + } + EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ + +@@ -206,3 +155,35 @@ EXPORT_SYMBOL(kmap_atomic_to_page); + #endif + EXPORT_SYMBOL(clear_highpage); + EXPORT_SYMBOL(copy_highpage); ++ ++void __init set_highmem_pages_init(void) ++{ ++ struct zone *zone; ++ int nid; ++ ++ for_each_zone(zone) { ++ unsigned long zone_start_pfn, zone_end_pfn; ++ ++ if (!is_highmem(zone)) ++ continue; ++ ++ zone_start_pfn = zone->zone_start_pfn; ++ zone_end_pfn = zone_start_pfn + zone->spanned_pages; ++ ++ nid = zone_to_nid(zone); ++ printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", ++ zone->name, nid, zone_start_pfn, zone_end_pfn); ++ ++ add_highpages_with_active_regions(nid, zone_start_pfn, ++ zone_end_pfn); ++ ++ /* XEN: init high-mem pages outside initial allocation. */ ++ if (zone_start_pfn < xen_start_info->nr_pages) ++ zone_start_pfn = xen_start_info->nr_pages; ++ for (; zone_start_pfn < zone_end_pfn; zone_start_pfn++) { ++ ClearPageReserved(pfn_to_page(zone_start_pfn)); ++ init_page_count(pfn_to_page(zone_start_pfn)); ++ } ++ } ++ totalram_pages += totalhigh_pages; ++} +--- head-2010-01-18.orig/arch/x86/mm/hypervisor.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/hypervisor.c 2009-11-06 10:52:02.000000000 +0100 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -47,6 +48,9 @@ + + EXPORT_SYMBOL(hypercall_page); + ++shared_info_t *__read_mostly HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; ++EXPORT_SYMBOL(HYPERVISOR_shared_info); ++ + #define NR_MC BITS_PER_LONG + #define NR_MMU BITS_PER_LONG + #define NR_MMUEXT (BITS_PER_LONG / 4) +@@ -538,7 +542,7 @@ int xen_create_contiguous_region( + unsigned int level; + + if (vstart < __START_KERNEL_map +- || vstart + (PAGE_SIZE << order) > (unsigned long)_end) ++ || vstart + (PAGE_SIZE << order) > _brk_end) + return -EINVAL; + ptep = lookup_address((unsigned long)__va(__pa(vstart)), + &level); +@@ -953,6 +957,6 @@ int write_ldt_entry(struct desc_struct * + int write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, + int type) + { +- maddr_t mach_gp = virt_to_machine(gdt + entry); ++ maddr_t mach_gp = arbitrary_virt_to_machine(gdt + entry); + return HYPERVISOR_update_descriptor(mach_gp, *(const u64*)desc); + } +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/mm/init-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -0,0 +1,459 @@ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned long __meminitdata e820_table_start; ++unsigned long __meminitdata e820_table_end; ++unsigned long __meminitdata e820_table_top; ++ ++int after_bootmem; ++ ++#if !defined(CONFIG_XEN) ++int direct_gbpages ++#ifdef CONFIG_DIRECT_GBPAGES ++ = 1 ++#endif ++; ++#elif defined(CONFIG_X86_32) ++#define direct_gbpages 0 ++extern unsigned long extend_init_mapping(unsigned long tables_space); ++#else ++extern void xen_finish_init_mapping(void); ++#endif ++ ++static void __init find_early_table_space(unsigned long end, int use_pse, ++ int use_gbpages) ++{ ++ unsigned long puds, pmds, ptes, tables; ++ ++ puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; ++ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); ++ ++ if (use_gbpages) { ++ unsigned long extra; ++ ++ extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); ++ pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; ++ } else ++ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; ++ ++ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); ++ ++ if (use_pse) { ++ unsigned long extra; ++ ++ extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); ++#ifdef CONFIG_X86_32 ++ extra += PMD_SIZE; ++#endif ++ ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ } else ++ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ ++ tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); ++ ++#ifdef CONFIG_X86_32 ++ /* for fixmap */ ++ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); ++#endif ++ ++ /* ++ * RED-PEN putting page tables only on node 0 could ++ * cause a hotspot and fill up ZONE_DMA. The page tables ++ * need roughly 0.5KB per GB. ++ */ ++#ifdef CONFIG_X86_32 ++ e820_table_start = extend_init_mapping(tables); ++ e820_table_end = e820_table_start; ++#else /* CONFIG_X86_64 */ ++ if (!e820_table_top) { ++ e820_table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + ++ xen_start_info->nr_pt_frames; ++ e820_table_end = e820_table_start; ++ } else { ++ /* ++ * [table_start, table_top) gets passed to reserve_early(), ++ * so we must not use table_end here, despite continuing ++ * to allocate from there. table_end possibly being below ++ * table_start is otoh not a problem. ++ */ ++ e820_table_start = e820_table_top; ++ } ++#endif ++ if (e820_table_start == -1UL) ++ panic("Cannot find space for the kernel page tables"); ++ ++ e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); ++ ++ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", ++ end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); ++} ++ ++struct map_range { ++ unsigned long start; ++ unsigned long end; ++ unsigned page_size_mask; ++}; ++ ++#ifdef CONFIG_X86_32 ++#define NR_RANGE_MR 3 ++#else /* CONFIG_X86_64 */ ++#define NR_RANGE_MR 5 ++#endif ++ ++static int __meminit save_mr(struct map_range *mr, int nr_range, ++ unsigned long start_pfn, unsigned long end_pfn, ++ unsigned long page_size_mask) ++{ ++ if (start_pfn < end_pfn) { ++ if (nr_range >= NR_RANGE_MR) ++ panic("run out of range for init_memory_mapping\n"); ++ mr[nr_range].start = start_pfn<> PAGE_SHIFT; ++ pos = start_pfn << PAGE_SHIFT; ++#ifdef CONFIG_X86_32 ++ /* ++ * Don't use a large page for the first 2/4MB of memory ++ * because there are often fixed size MTRRs in there ++ * and overlapping MTRRs into large pages can cause ++ * slowdowns. ++ */ ++ if (pos == 0) ++ end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); ++ else ++ end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) ++ << (PMD_SHIFT - PAGE_SHIFT); ++#else /* CONFIG_X86_64 */ ++ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) ++ << (PMD_SHIFT - PAGE_SHIFT); ++#endif ++ if (end_pfn > (end >> PAGE_SHIFT)) ++ end_pfn = end >> PAGE_SHIFT; ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); ++ pos = end_pfn << PAGE_SHIFT; ++ } ++ ++ /* big page (2M) range */ ++ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) ++ << (PMD_SHIFT - PAGE_SHIFT); ++#ifdef CONFIG_X86_32 ++ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); ++#else /* CONFIG_X86_64 */ ++ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) ++ << (PUD_SHIFT - PAGE_SHIFT); ++ if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) ++ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); ++#endif ++ ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, ++ page_size_mask & (1<>PUD_SHIFT) ++ << (PUD_SHIFT - PAGE_SHIFT); ++ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, ++ page_size_mask & ++ ((1<>PMD_SHIFT) ++ << (PMD_SHIFT - PAGE_SHIFT); ++ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, ++ page_size_mask & (1<>PAGE_SHIFT; ++ end_pfn = end>>PAGE_SHIFT; ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); ++ ++ /* try to merge same page size and continuous */ ++ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { ++ unsigned long old_start; ++ if (mr[i].end != mr[i+1].start || ++ mr[i].page_size_mask != mr[i+1].page_size_mask) ++ continue; ++ /* move it */ ++ old_start = mr[i].start; ++ memmove(&mr[i], &mr[i+1], ++ (nr_range - 1 - i) * sizeof(struct map_range)); ++ mr[i--].start = old_start; ++ nr_range--; ++ } ++ ++ for (i = 0; i < nr_range; i++) ++ printk(KERN_DEBUG " %010lx - %010lx page %s\n", ++ mr[i].start, mr[i].end, ++ (mr[i].page_size_mask & (1<> PAGE_SHIFT) \ ++ << PAGE_SHIFT) + __START_KERNEL_map)) ++ ++ if (!start) { ++ unsigned long addr, va = __START_KERNEL_map; ++ unsigned long *page = (unsigned long *)init_level4_pgt; ++ ++ /* Kill mapping of memory below _text. */ ++ while (va < (unsigned long)&_text) { ++ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0)) ++ BUG(); ++ va += PAGE_SIZE; ++ } ++ ++ /* Blow away any spurious initial mappings. */ ++ va = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT); ++ ++ addr = page[pgd_index(va)]; ++ page = addr_to_page(addr); ++ addr = page[pud_index(va)]; ++ page = addr_to_page(addr); ++ while (pmd_index(va) | pte_index(va)) { ++ if (pmd_none(*(pmd_t *)&page[pmd_index(va)])) ++ break; ++ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0)) ++ BUG(); ++ va += PAGE_SIZE; ++ } ++ } ++ ++ for (i = 0; i < nr_range; i++) ++ ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, ++ mr[i].page_size_mask); ++#undef addr_to_page ++#endif ++ ++#ifdef CONFIG_X86_32 ++ early_ioremap_page_table_range_init(); ++#endif ++ ++#ifdef CONFIG_X86_64 ++ BUG_ON(e820_table_end > e820_table_top); ++ if (!start) ++ xen_finish_init_mapping(); ++ else ++#endif ++ if (e820_table_end < e820_table_top) ++ /* Disable the 'table_end' allocator. */ ++ e820_table_top = e820_table_end; ++ ++ __flush_tlb_all(); ++ ++ if (!after_bootmem && e820_table_top > e820_table_start) ++ reserve_early(e820_table_start << PAGE_SHIFT, ++ e820_table_top << PAGE_SHIFT, "PGTABLE"); ++ ++ if (!after_bootmem) ++ early_memtest(start, end); ++ ++ return ret >> PAGE_SHIFT; ++} ++ ++ ++/* ++ * devmem_is_allowed() checks to see if /dev/mem access to a certain address ++ * is valid. The argument is a physical page number. ++ * ++ * ++ * On x86, access has to be given to the first megabyte of ram because that area ++ * contains bios code and data regions used by X and dosemu and similar apps. ++ * Access has to be given to non-kernel-ram areas as well, these contain the PCI ++ * mmio resources as well as potential bios/acpi data regions. ++ */ ++int devmem_is_allowed(unsigned long pagenr) ++{ ++ if (pagenr <= 256) ++ return 1; ++ if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) ++ return 0; ++ if (mfn_to_local_pfn(pagenr) >= max_pfn) ++ return 1; ++ return 0; ++} ++ ++void free_init_pages(char *what, unsigned long begin, unsigned long end) ++{ ++ unsigned long addr = begin; ++ ++ if (addr >= end) ++ return; ++ ++ /* ++ * If debugging page accesses then do not free this memory but ++ * mark them not present - any buggy init-section access will ++ * create a kernel page fault: ++ */ ++#ifdef CONFIG_DEBUG_PAGEALLOC ++ printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", ++ begin, PAGE_ALIGN(end)); ++ set_memory_np(begin, (end - begin) >> PAGE_SHIFT); ++#else ++ /* ++ * We just marked the kernel text read only above, now that ++ * we are going to free part of that, we need to make that ++ * writeable first. ++ */ ++ set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); ++ ++ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); ++ ++ for (; addr < end; addr += PAGE_SIZE) { ++ ClearPageReserved(virt_to_page(addr)); ++ init_page_count(virt_to_page(addr)); ++ memset((void *)(addr & ~(PAGE_SIZE-1)), ++ POISON_FREE_INITMEM, PAGE_SIZE); ++#ifdef CONFIG_X86_64 ++ if (addr >= __START_KERNEL_map) { ++ /* make_readonly() reports all kernel addresses. */ ++ if (HYPERVISOR_update_va_mapping((unsigned long)__va(__pa(addr)), ++ pfn_pte(__pa(addr) >> PAGE_SHIFT, ++ PAGE_KERNEL), ++ 0)) ++ BUG(); ++ if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) ++ BUG(); ++ } ++#endif ++ free_page(addr); ++ totalram_pages++; ++ } ++#endif ++} ++ ++void free_initmem(void) ++{ ++ free_init_pages("unused kernel memory", ++ (unsigned long)(&__init_begin), ++ (unsigned long)(&__init_end)); ++} ++ ++#ifdef CONFIG_BLK_DEV_INITRD ++void free_initrd_mem(unsigned long start, unsigned long end) ++{ ++ free_init_pages("initrd memory", start, end); ++} ++#endif +--- head-2010-01-18.orig/arch/x86/mm/init_32-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -52,9 +52,7 @@ + #include + #include + #include +-#include +- +-unsigned int __VMALLOC_RESERVE = 128 << 20; ++#include + + unsigned long max_low_pfn_mapped; + unsigned long max_pfn_mapped; +@@ -64,19 +62,14 @@ unsigned long highstart_pfn, highend_pfn + + static noinline int do_test_wp_bit(void); + +- +-static unsigned long __initdata table_start; +-static unsigned long __initdata table_end; +-static unsigned long __initdata table_top; +- +-static int __initdata after_init_bootmem; ++bool __read_mostly __vmalloc_start_set = false; + + static __init void *alloc_low_page(void) + { +- unsigned long pfn = table_end++; ++ unsigned long pfn = e820_table_end++; + void *adr; + +- if (pfn >= table_top) ++ if (pfn >= e820_table_top) + panic("alloc_low_page: ran out of memory"); + + adr = __va(pfn * PAGE_SIZE); +@@ -96,7 +89,7 @@ static pmd_t * __init one_md_table_init( + + #ifdef CONFIG_X86_PAE + if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) { +- if (after_init_bootmem) ++ if (after_bootmem) + pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + else + pmd_table = (pmd_t *)alloc_low_page(); +@@ -128,7 +121,7 @@ static pte_t * __init one_page_table_ini + #endif + pte_t *page_table = NULL; + +- if (after_init_bootmem) { ++ if (after_bootmem) { + #ifdef CONFIG_DEBUG_PAGEALLOC + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); + #endif +@@ -148,6 +141,23 @@ static pte_t * __init one_page_table_ini + return pte_offset_kernel(pmd, 0); + } + ++pmd_t * __init populate_extra_pmd(unsigned long vaddr) ++{ ++ int pgd_idx = pgd_index(vaddr); ++ int pmd_idx = pmd_index(vaddr); ++ ++ return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx; ++} ++ ++pte_t * __init populate_extra_pte(unsigned long vaddr) ++{ ++ int pte_idx = pte_index(vaddr); ++ pmd_t *pmd; ++ ++ pmd = populate_extra_pmd(vaddr); ++ return one_page_table_init(pmd) + pte_idx; ++} ++ + static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, + unsigned long vaddr, pte_t *lastpte) + { +@@ -164,12 +174,12 @@ static pte_t *__init page_table_kmap_che + if (pmd_idx_kmap_begin != pmd_idx_kmap_end + && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin + && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end +- && ((__pa(pte) >> PAGE_SHIFT) < table_start +- || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { ++ && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start ++ || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { + pte_t *newpte; + int i; + +- BUG_ON(after_init_bootmem); ++ BUG_ON(after_bootmem); + newpte = alloc_low_page(); + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(newpte + i, pte[i]); +@@ -244,11 +254,14 @@ static inline int is_kernel_text(unsigne + * of max_low_pfn pages, by creating page tables starting from address + * PAGE_OFFSET: + */ +-static void __init kernel_physical_mapping_init(pgd_t *pgd_base, +- unsigned long start_pfn, +- unsigned long end_pfn, +- int use_pse) ++unsigned long __init ++kernel_physical_mapping_init(unsigned long start, ++ unsigned long end, ++ unsigned long page_size_mask) + { ++ int use_pse = page_size_mask == (1<> PAGE_SHIFT; ++ end_pfn = end >> PAGE_SHIFT; ++ + /* + * First iteration will setup identity mapping using large/small pages + * based on use_pse, with other attributes same as set by +@@ -391,26 +407,6 @@ repeat: + mapping_iter = 2; + goto repeat; + } +-} +- +-/* +- * devmem_is_allowed() checks to see if /dev/mem access to a certain address +- * is valid. The argument is a physical page number. +- * +- * +- * On x86, access has to be given to the first megabyte of ram because that area +- * contains bios code and data regions used by X and dosemu and similar apps. +- * Access has to be given to non-kernel-ram areas as well, these contain the PCI +- * mmio resources as well as potential bios/acpi data regions. +- */ +-int devmem_is_allowed(unsigned long pagenr) +-{ +- if (pagenr <= 256) +- return 1; +- if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) +- return 0; +- if (mfn_to_local_pfn(pagenr) >= max_pfn) +- return 1; + return 0; + } + +@@ -506,30 +502,10 @@ void __init add_highpages_with_active_re + work_with_active_regions(nid, add_highpages_work_fn, &data); + } + +-#ifndef CONFIG_NUMA +-static void __init set_highmem_pages_init(void) +-{ +- int pfn; +- +- add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); +- +- /* XEN: init high-mem pages outside initial allocation. */ +- for (pfn = xen_start_info->nr_pages; pfn < highend_pfn; pfn++) { +- ClearPageReserved(pfn_to_page(pfn)); +- init_page_count(pfn_to_page(pfn)); +- } +- +- totalram_pages += totalhigh_pages; +-} +-#endif /* !CONFIG_NUMA */ +- + #else + static inline void permanent_kmaps_init(pgd_t *pgd_base) + { + } +-static inline void set_highmem_pages_init(void) +-{ +-} + #endif /* CONFIG_HIGHMEM */ + + pgd_t *swapper_pg_dir; +@@ -553,8 +529,9 @@ pgd_t *swapper_pg_dir; + * be partially populated, and so it avoids stomping on any existing + * mappings. + */ +-static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) ++void __init early_ioremap_page_table_range_init(void) + { ++ pgd_t *pgd_base = swapper_pg_dir; + unsigned long vaddr, end; + + /* +@@ -649,7 +626,7 @@ static int __init noexec_setup(char *str + } + early_param("noexec", noexec_setup); + +-static void __init set_nx(void) ++void __init set_nx(void) + { + unsigned int v[4], l, h; + +@@ -685,75 +662,97 @@ static int __init parse_highmem(char *ar + } + early_param("highmem", parse_highmem); + ++#define MSG_HIGHMEM_TOO_BIG \ ++ "highmem size (%luMB) is bigger than pages available (%luMB)!\n" ++ ++#define MSG_LOWMEM_TOO_SMALL \ ++ "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n" + /* +- * Determine low and high memory ranges: ++ * All of RAM fits into lowmem - but if user wants highmem ++ * artificially via the highmem=x boot parameter then create ++ * it: + */ +-void __init find_low_pfn_range(void) ++void __init lowmem_pfn_init(void) + { +- /* it could update max_pfn */ +- + /* max_low_pfn is 0, we already have early_res support */ +- + max_low_pfn = max_pfn; +- if (max_low_pfn > MAXMEM_PFN) { +- if (highmem_pages == -1) +- highmem_pages = max_pfn - MAXMEM_PFN; +- if (highmem_pages + MAXMEM_PFN < max_pfn) +- max_pfn = MAXMEM_PFN + highmem_pages; +- if (highmem_pages + MAXMEM_PFN > max_pfn) { +- printk(KERN_WARNING "only %luMB highmem pages " +- "available, ignoring highmem size of %uMB.\n", +- pages_to_mb(max_pfn - MAXMEM_PFN), ++ ++ if (highmem_pages == -1) ++ highmem_pages = 0; ++#ifdef CONFIG_HIGHMEM ++ if (highmem_pages >= max_pfn) { ++ printk(KERN_ERR MSG_HIGHMEM_TOO_BIG, ++ pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); ++ highmem_pages = 0; ++ } ++ if (highmem_pages) { ++ if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) { ++ printk(KERN_ERR MSG_LOWMEM_TOO_SMALL, + pages_to_mb(highmem_pages)); + highmem_pages = 0; + } +- max_low_pfn = MAXMEM_PFN; ++ max_low_pfn -= highmem_pages; ++ } ++#else ++ if (highmem_pages) ++ printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); ++#endif ++} ++ ++#define MSG_HIGHMEM_TOO_SMALL \ ++ "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" ++ ++#define MSG_HIGHMEM_TRIMMED \ ++ "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n" ++/* ++ * We have more RAM than fits into lowmem - we try to put it into ++ * highmem, also taking the highmem=x boot parameter into account: ++ */ ++void __init highmem_pfn_init(void) ++{ ++ max_low_pfn = MAXMEM_PFN; ++ ++ if (highmem_pages == -1) ++ highmem_pages = max_pfn - MAXMEM_PFN; ++ ++ if (highmem_pages + MAXMEM_PFN < max_pfn) ++ max_pfn = MAXMEM_PFN + highmem_pages; ++ ++ if (highmem_pages + MAXMEM_PFN > max_pfn) { ++ printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL, ++ pages_to_mb(max_pfn - MAXMEM_PFN), ++ pages_to_mb(highmem_pages)); ++ highmem_pages = 0; ++ } + #ifndef CONFIG_HIGHMEM +- /* Maximum memory usable is what is directly addressable */ +- printk(KERN_WARNING "Warning only %ldMB will be used.\n", +- MAXMEM>>20); +- if (max_pfn > MAX_NONPAE_PFN) +- printk(KERN_WARNING +- "Use a HIGHMEM64G enabled kernel.\n"); +- else +- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); +- max_pfn = MAXMEM_PFN; ++ /* Maximum memory usable is what is directly addressable */ ++ printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); ++ if (max_pfn > MAX_NONPAE_PFN) ++ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); ++ else ++ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); ++ max_pfn = MAXMEM_PFN; + #else /* !CONFIG_HIGHMEM */ + #ifndef CONFIG_HIGHMEM64G +- if (max_pfn > MAX_NONPAE_PFN) { +- max_pfn = MAX_NONPAE_PFN; +- printk(KERN_WARNING "Warning only 4GB will be used." +- "Use a HIGHMEM64G enabled kernel.\n"); +- } ++ if (max_pfn > MAX_NONPAE_PFN) { ++ max_pfn = MAX_NONPAE_PFN; ++ printk(KERN_WARNING MSG_HIGHMEM_TRIMMED); ++ } + #endif /* !CONFIG_HIGHMEM64G */ + #endif /* !CONFIG_HIGHMEM */ +- } else { +- if (highmem_pages == -1) +- highmem_pages = 0; +-#ifdef CONFIG_HIGHMEM +- if (highmem_pages >= max_pfn) { +- printk(KERN_ERR "highmem size specified (%uMB) is " +- "bigger than pages available (%luMB)!.\n", +- pages_to_mb(highmem_pages), +- pages_to_mb(max_pfn)); +- highmem_pages = 0; +- } +- if (highmem_pages) { +- if (max_low_pfn - highmem_pages < +- 64*1024*1024/PAGE_SIZE){ +- printk(KERN_ERR "highmem size %uMB results in " +- "smaller than 64MB lowmem, ignoring it.\n" +- , pages_to_mb(highmem_pages)); +- highmem_pages = 0; +- } +- max_low_pfn -= highmem_pages; +- } +-#else +- if (highmem_pages) +- printk(KERN_ERR "ignoring highmem size on non-highmem" +- " kernel!\n"); +-#endif +- } ++} ++ ++/* ++ * Determine low and high memory ranges: ++ */ ++void __init find_low_pfn_range(void) ++{ ++ /* it could update max_pfn */ ++ ++ if (max_pfn <= MAXMEM_PFN) ++ lowmem_pfn_init(); ++ else ++ highmem_pfn_init(); + } + + #ifndef CONFIG_NEED_MULTIPLE_NODES +@@ -779,6 +778,8 @@ void __init initmem_init(unsigned long s + #ifdef CONFIG_FLATMEM + max_mapnr = num_physpages; + #endif ++ __vmalloc_start_set = true; ++ + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(max_low_pfn)); + +@@ -800,40 +801,70 @@ static void __init zone_sizes_init(void) + free_area_init_nodes(max_zone_pfns); + } + ++static unsigned long __init setup_node_bootmem(int nodeid, ++ unsigned long start_pfn, ++ unsigned long end_pfn, ++ unsigned long bootmap) ++{ ++ unsigned long bootmap_size; ++ ++ /* don't touch min_low_pfn */ ++ bootmap_size = init_bootmem_node(NODE_DATA(nodeid), ++ bootmap >> PAGE_SHIFT, ++ start_pfn, end_pfn); ++ printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", ++ nodeid, start_pfn<nr_pages); ++ unsigned long end_xen_pfn = min(max_low_pfn, xen_start_info->nr_pages); + + /* + * Initialize the boot-time allocator (with low memory only): + */ +- bootmap_size = bootmem_bootmap_pages(end_pfn)<nr_pages)<nr_pages)<> PAGE_SHIFT, +- min_low_pfn, end_pfn); + printk(KERN_INFO " mapped low ram: 0 - %08lx\n", + max_pfn_mapped< end_xen_pfn) ++ continue; ++ if (end_pfn > end_xen_pfn) ++ end_pfn = end_xen_pfn; ++#else ++ start_pfn = 0; ++ end_pfn = end_xen_pfn; ++#endif ++ bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, ++ bootmap); ++ } ++ ++ after_bootmem = 1; + } + +-static unsigned long __init extend_init_mapping(unsigned long tables_space) ++unsigned long __init extend_init_mapping(unsigned long tables_space) + { + unsigned long start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + + xen_start_info->nr_pt_frames; +@@ -885,133 +916,6 @@ static unsigned long __init extend_init_ + return start_pfn; + } + +-static void __init find_early_table_space(unsigned long end, int use_pse) +-{ +- unsigned long puds, pmds, ptes, tables; +- +- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; +- tables = PAGE_ALIGN(puds * sizeof(pud_t)); +- +- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; +- tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); +- +- if (use_pse) { +- unsigned long extra; +- +- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); +- extra += PMD_SIZE; +- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; +- } else +- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; +- +- tables += PAGE_ALIGN(ptes * sizeof(pte_t)); +- +- /* for fixmap */ +- tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t)); +- +- table_start = extend_init_mapping(tables); +- +- table_end = table_start; +- table_top = table_start + (tables>>PAGE_SHIFT); +- +- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", +- end, table_start << PAGE_SHIFT, +- (table_start << PAGE_SHIFT) + tables); +-} +- +-unsigned long __init_refok init_memory_mapping(unsigned long start, +- unsigned long end) +-{ +- pgd_t *pgd_base = swapper_pg_dir; +- unsigned long start_pfn, end_pfn; +- unsigned long big_page_start; +-#ifdef CONFIG_DEBUG_PAGEALLOC +- /* +- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. +- * This will simplify cpa(), which otherwise needs to support splitting +- * large pages into small in interrupt context, etc. +- */ +- int use_pse = 0; +-#else +- int use_pse = cpu_has_pse; +-#endif +- +- /* +- * Find space for the kernel direct mapping tables. +- */ +- if (!after_init_bootmem) +- find_early_table_space(end, use_pse); +- +-#ifdef CONFIG_X86_PAE +- set_nx(); +- if (nx_enabled) +- printk(KERN_INFO "NX (Execute Disable) protection: active\n"); +-#endif +- +- /* Enable PSE if available */ +- if (cpu_has_pse) +- set_in_cr4(X86_CR4_PSE); +- +- /* Enable PGE if available */ +- if (cpu_has_pge) { +- set_in_cr4(X86_CR4_PGE); +- __supported_pte_mask |= _PAGE_GLOBAL; +- } +- +- /* +- * Don't use a large page for the first 2/4MB of memory +- * because there are often fixed size MTRRs in there +- * and overlapping MTRRs into large pages can cause +- * slowdowns. +- */ +- big_page_start = PMD_SIZE; +- +- if (start < big_page_start) { +- start_pfn = start >> PAGE_SHIFT; +- end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); +- } else { +- /* head is not big page alignment ? */ +- start_pfn = start >> PAGE_SHIFT; +- end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) +- << (PMD_SHIFT - PAGE_SHIFT); +- } +- if (start_pfn < end_pfn) +- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); +- +- /* big page range */ +- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) +- << (PMD_SHIFT - PAGE_SHIFT); +- if (start_pfn < (big_page_start >> PAGE_SHIFT)) +- start_pfn = big_page_start >> PAGE_SHIFT; +- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +- if (start_pfn < end_pfn) +- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, +- use_pse); +- +- /* tail is not big page alignment ? */ +- start_pfn = end_pfn; +- if (start_pfn > (big_page_start>>PAGE_SHIFT)) { +- end_pfn = end >> PAGE_SHIFT; +- if (start_pfn < end_pfn) +- kernel_physical_mapping_init(pgd_base, start_pfn, +- end_pfn, 0); +- } +- +- early_ioremap_page_table_range_init(pgd_base); +- +- __flush_tlb_all(); +- +- if (!after_init_bootmem) +- reserve_early(table_start << PAGE_SHIFT, +- table_end << PAGE_SHIFT, "PGTABLE"); +- +- if (!after_init_bootmem) +- early_memtest(start, end); +- +- return end >> PAGE_SHIFT; +-} +- +- + /* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. +@@ -1215,17 +1119,47 @@ static noinline int do_test_wp_bit(void) + const int rodata_test_data = 0xC3; + EXPORT_SYMBOL_GPL(rodata_test_data); + ++static int kernel_set_to_readonly; ++ ++void set_kernel_text_rw(void) ++{ ++ unsigned long start = PFN_ALIGN(_text); ++ unsigned long size = PFN_ALIGN(_etext) - start; ++ ++ if (!kernel_set_to_readonly) ++ return; ++ ++ pr_debug("Set kernel text: %lx - %lx for read write\n", ++ start, start+size); ++ ++ set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); ++} ++ ++void set_kernel_text_ro(void) ++{ ++ unsigned long start = PFN_ALIGN(_text); ++ unsigned long size = PFN_ALIGN(_etext) - start; ++ ++ if (!kernel_set_to_readonly) ++ return; ++ ++ pr_debug("Set kernel text: %lx - %lx for read only\n", ++ start, start+size); ++ ++ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); ++} ++ + void mark_rodata_ro(void) + { + unsigned long start = PFN_ALIGN(_text); + unsigned long size = PFN_ALIGN(_etext) - start; + +-#ifndef CONFIG_DYNAMIC_FTRACE +- /* Dynamic tracing modifies the kernel text section */ + set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); + printk(KERN_INFO "Write protecting the kernel text: %luk\n", + size >> 10); + ++ kernel_set_to_readonly = 1; ++ + #ifdef CONFIG_CPA_DEBUG + printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", + start, start+size); +@@ -1234,7 +1168,6 @@ void mark_rodata_ro(void) + printk(KERN_INFO "Testing CPA: write protecting again\n"); + set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); + #endif +-#endif /* CONFIG_DYNAMIC_FTRACE */ + + start += size; + size = (unsigned long)__end_rodata - start; +@@ -1253,52 +1186,6 @@ void mark_rodata_ro(void) + } + #endif + +-void free_init_pages(char *what, unsigned long begin, unsigned long end) +-{ +-#ifdef CONFIG_DEBUG_PAGEALLOC +- /* +- * If debugging page accesses then do not free this memory but +- * mark them not present - any buggy init-section access will +- * create a kernel page fault: +- */ +- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", +- begin, PAGE_ALIGN(end)); +- set_memory_np(begin, (end - begin) >> PAGE_SHIFT); +-#else +- unsigned long addr; +- +- /* +- * We just marked the kernel text read only above, now that +- * we are going to free part of that, we need to make that +- * writeable first. +- */ +- set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); +- +- for (addr = begin; addr < end; addr += PAGE_SIZE) { +- ClearPageReserved(virt_to_page(addr)); +- init_page_count(virt_to_page(addr)); +- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); +- free_page(addr); +- totalram_pages++; +- } +- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); +-#endif +-} +- +-void free_initmem(void) +-{ +- free_init_pages("unused kernel memory", +- (unsigned long)(&__init_begin), +- (unsigned long)(&__init_end)); +-} +- +-#ifdef CONFIG_BLK_DEV_INITRD +-void free_initrd_mem(unsigned long start, unsigned long end) +-{ +- free_init_pages("initrd memory", start, end); +-} +-#endif +- + int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) + { +--- head-2010-01-18.orig/arch/x86/mm/init_64-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_64-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -51,6 +51,8 @@ + #include + #include + #include ++#include ++#include + + #include + +@@ -67,8 +69,6 @@ unsigned int __kernel_page_user; + EXPORT_SYMBOL(__kernel_page_user); + #endif + +-int after_bootmem; +- + DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + + extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD]; +@@ -127,12 +127,6 @@ void __meminit early_make_page_readonly( + } + + #ifndef CONFIG_XEN +-int direct_gbpages +-#ifdef CONFIG_DIRECT_GBPAGES +- = 1 +-#endif +-; +- + static int __init parse_direct_gbpages_off(char *arg) + { + direct_gbpages = 0; +@@ -154,14 +148,10 @@ early_param("gbpages", parse_direct_gbpa + * around without checking the pgd every time. + */ + +-static unsigned long __meminitdata table_start; +-static unsigned long __meminitdata table_cur; +-static unsigned long __meminitdata table_top; +- + pteval_t __supported_pte_mask __read_mostly = ~0UL; + EXPORT_SYMBOL_GPL(__supported_pte_mask); + +-static int do_not_nx __cpuinitdata; ++static int disable_nx __cpuinitdata; + + /* + * noexec=on|off +@@ -176,9 +166,9 @@ static int __init nonx_setup(char *str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; +- do_not_nx = 0; ++ disable_nx = 0; + } else if (!strncmp(str, "off", 3)) { +- do_not_nx = 1; ++ disable_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +@@ -190,7 +180,7 @@ void __cpuinit check_efer(void) + unsigned long efer; + + rdmsrl(MSR_EFER, efer); +- if (!(efer & EFER_NX) || do_not_nx) ++ if (!(efer & EFER_NX) || disable_nx) + __supported_pte_mask &= ~_PAGE_NX; + } + +@@ -224,9 +214,9 @@ static __ref void *spp_getpage(void) + + if (after_bootmem) + ptr = (void *) get_zeroed_page(GFP_ATOMIC); +- else if (table_cur < table_top) { +- ptr = __va(table_cur << PAGE_SHIFT); +- table_cur++; ++ else if (e820_table_end < e820_table_top) { ++ ptr = __va(e820_table_end << PAGE_SHIFT); ++ e820_table_end++; + memset(ptr, 0, PAGE_SIZE); + } else + ptr = alloc_bootmem_pages(PAGE_SIZE); +@@ -241,36 +231,54 @@ static __ref void *spp_getpage(void) + return ptr; + } + +-void +-set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) ++static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr) + { +- pud_t *pud; +- pmd_t *pmd; +- pte_t *pte; ++ if (pgd_none(*pgd)) { ++ pud_t *pud = (pud_t *)spp_getpage(); ++ make_page_readonly(pud, XENFEAT_writable_page_tables); ++ pgd_populate(&init_mm, pgd, pud); ++ if (pud != pud_offset(pgd, 0)) ++ printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", ++ pud, pud_offset(pgd, 0)); ++ } ++ return pud_offset(pgd, vaddr); ++} + +- pud = pud_page + pud_index(vaddr); ++static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr) ++{ + if (pud_none(*pud)) { +- pmd = (pmd_t *) spp_getpage(); ++ pmd_t *pmd = (pmd_t *) spp_getpage(); + make_page_readonly(pmd, XENFEAT_writable_page_tables); + pud_populate(&init_mm, pud, pmd); +- if (pmd != pmd_offset(pud, 0)) { ++ if (pmd != pmd_offset(pud, 0)) + printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", +- pmd, pmd_offset(pud, 0)); +- return; +- } ++ pmd, pmd_offset(pud, 0)); + } +- pmd = pmd_offset(pud, vaddr); ++ return pmd_offset(pud, vaddr); ++} ++ ++static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr) ++{ + if (pmd_none(*pmd)) { +- pte = (pte_t *) spp_getpage(); ++ pte_t *pte = (pte_t *) spp_getpage(); + make_page_readonly(pte, XENFEAT_writable_page_tables); + pmd_populate_kernel(&init_mm, pmd, pte); +- if (pte != pte_offset_kernel(pmd, 0)) { ++ if (pte != pte_offset_kernel(pmd, 0)) + printk(KERN_ERR "PAGETABLE BUG #02!\n"); +- return; +- } + } ++ return pte_offset_kernel(pmd, vaddr); ++} ++ ++void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) ++{ ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ ++ pud = pud_page + pud_index(vaddr); ++ pmd = fill_pmd(pud, vaddr); ++ pte = fill_pte(pmd, vaddr); + +- pte = pte_offset_kernel(pmd, vaddr); + set_pte(pte, new_pte); + + /* +@@ -280,8 +288,7 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig + __flush_tlb_one(vaddr); + } + +-void +-set_pte_vaddr(unsigned long vaddr, pte_t pteval) ++void set_pte_vaddr(unsigned long vaddr, pte_t pteval) + { + pgd_t *pgd; + pud_t *pud_page; +@@ -298,6 +305,24 @@ set_pte_vaddr(unsigned long vaddr, pte_t + set_pte_vaddr_pud(pud_page, vaddr, pteval); + } + ++pmd_t * __init populate_extra_pmd(unsigned long vaddr) ++{ ++ pgd_t *pgd; ++ pud_t *pud; ++ ++ pgd = pgd_offset_k(vaddr); ++ pud = fill_pud(pgd, vaddr); ++ return fill_pmd(pud, vaddr); ++} ++ ++pte_t * __init populate_extra_pte(unsigned long vaddr) ++{ ++ pmd_t *pmd; ++ ++ pmd = populate_extra_pmd(vaddr); ++ return fill_pte(pmd, vaddr); ++} ++ + #ifndef CONFIG_XEN + /* + * Create large page table mappings for a range of physical addresses. +@@ -380,9 +405,9 @@ static __ref void *alloc_low_page(unsign + return adr; + } + +- BUG_ON(!table_cur); +- pfn = table_cur++; +- if (pfn >= table_top) ++ BUG_ON(!e820_table_end); ++ pfn = e820_table_end++; ++ if (pfn >= e820_table_top) + panic("alloc_low_page: ran out of memory"); + + adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); +@@ -407,13 +432,13 @@ static inline int __meminit make_readonl + /* Make new page tables read-only on the first pass. */ + if (!xen_feature(XENFEAT_writable_page_tables) + && !max_pfn_mapped +- && (paddr >= (table_start << PAGE_SHIFT)) +- && (paddr < (table_top << PAGE_SHIFT))) ++ && (paddr >= (e820_table_start << PAGE_SHIFT)) ++ && (paddr < (e820_table_top << PAGE_SHIFT))) + readonly = 1; + /* Make old page tables read-only. */ + if (!xen_feature(XENFEAT_writable_page_tables) + && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map)) +- && (paddr < (table_cur << PAGE_SHIFT))) ++ && (paddr < (e820_table_end << PAGE_SHIFT))) + readonly = 1; + + /* +@@ -422,7 +447,7 @@ static inline int __meminit make_readonl + * mappings. Exclude the vsyscall area here, allowing alternative + * instruction patching to work. + */ +- if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)) ++ if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa(_brk_end)) + && !(paddr >= __pa_symbol(&__vsyscall_0) + && paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE)) + readonly = 1; +@@ -747,43 +772,9 @@ void __init xen_init_pt(void) + } + } + +-static void __init find_early_table_space(unsigned long end, int use_pse, +- int use_gbpages) +-{ +- unsigned long puds, pmds, ptes, tables; +- +- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; +- tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); +- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; +- tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); +- +- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; +- tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); +- +- if (!table_top) { +- table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + +- xen_start_info->nr_pt_frames; +- table_cur = table_start; +- } else { +- /* +- * [table_start, table_top) gets passed to reserve_early(), +- * so we must not use table_cur here, despite continuing +- * to allocate from there. table_cur possibly being below +- * table_start is otoh not a problem. +- */ +- table_start = table_top; +- } +- __flush_tlb_all(); +- +- table_top = table_cur + (tables >> PAGE_SHIFT); +- +- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", +- end, table_cur << PAGE_SHIFT, table_top << PAGE_SHIFT); +-} +- +-static void __init xen_finish_init_mapping(void) ++void __init xen_finish_init_mapping(void) + { +- unsigned long i, start, end; ++ unsigned long start, end; + + /* Re-vector virtual addresses pointing into the initial + mapping to the just-established permanent ones. */ +@@ -801,49 +792,22 @@ static void __init xen_finish_init_mappi + __va(__pa(xen_start_info->mod_start)); + + /* Destroy the Xen-created mappings beyond the kernel image. */ +- start = PAGE_ALIGN((unsigned long)_end); +- end = __START_KERNEL_map + (table_start << PAGE_SHIFT); ++ start = PAGE_ALIGN(_brk_end); ++ end = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT); + for (; start < end; start += PAGE_SIZE) + if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0)) + BUG(); + +- /* Allocate pte's for initial fixmaps from 'table_cur' allocator. */ +- start = table_top; +- WARN(table_cur != start, "start=%lx cur=%lx top=%lx\n", +- table_start, table_cur, start); +- table_top = ~0UL; +- +- /* Switch to the real shared_info page, and clear the dummy page. */ +- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); +- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); +- memset(empty_zero_page, 0, sizeof(empty_zero_page)); +- +- /* Set up mapping of lowest 1MB of physical memory. */ +- for (i = 0; i < NR_FIX_ISAMAPS; i++) +- if (is_initial_xendomain()) +- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); +- else +- __set_fixmap(FIX_ISAMAP_BEGIN - i, +- virt_to_mfn(empty_zero_page) +- << PAGE_SHIFT, +- PAGE_KERNEL_RO); +- +- table_top = max(table_cur, start); +-} +- +-static void __init init_gbpages(void) +-{ +-#ifndef CONFIG_XEN +- if (direct_gbpages && cpu_has_gbpages) +- printk(KERN_INFO "Using GB pages for direct mapping\n"); +- else +- direct_gbpages = 0; +-#endif ++ WARN(e820_table_end != e820_table_top, "start=%lx cur=%lx top=%lx\n", ++ e820_table_start, e820_table_end, e820_table_top); ++ if (e820_table_end > e820_table_top) ++ e820_table_top = e820_table_end; + } + +-static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, +- unsigned long end, +- unsigned long page_size_mask) ++unsigned long __init ++kernel_physical_mapping_init(unsigned long start, ++ unsigned long end, ++ unsigned long page_size_mask) + { + + unsigned long next, last_map_addr = end; +@@ -887,207 +851,6 @@ static unsigned long __meminit kernel_ph + return last_map_addr; + } + +-struct map_range { +- unsigned long start; +- unsigned long end; +- unsigned page_size_mask; +-}; +- +-#define NR_RANGE_MR 5 +- +-static int save_mr(struct map_range *mr, int nr_range, +- unsigned long start_pfn, unsigned long end_pfn, +- unsigned long page_size_mask) +-{ +- +- if (start_pfn < end_pfn) { +- if (nr_range >= NR_RANGE_MR) +- panic("run out of range for init_memory_mapping\n"); +- mr[nr_range].start = start_pfn<> PAGE_SHIFT; +- pos = start_pfn << PAGE_SHIFT; +- end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) +- << (PMD_SHIFT - PAGE_SHIFT); +- if (end_pfn > (end >> PAGE_SHIFT)) +- end_pfn = end >> PAGE_SHIFT; +- if (start_pfn < end_pfn) { +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); +- pos = end_pfn << PAGE_SHIFT; +- } +- +- /* big page (2M) range*/ +- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) +- << (PMD_SHIFT - PAGE_SHIFT); +- end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) +- << (PUD_SHIFT - PAGE_SHIFT); +- if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) +- end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); +- if (start_pfn < end_pfn) { +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, +- page_size_mask & (1<>PUD_SHIFT) +- << (PUD_SHIFT - PAGE_SHIFT); +- end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); +- if (start_pfn < end_pfn) { +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, +- page_size_mask & +- ((1<>PMD_SHIFT) +- << (PMD_SHIFT - PAGE_SHIFT); +- end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +- if (start_pfn < end_pfn) { +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, +- page_size_mask & (1<>PAGE_SHIFT; +- end_pfn = end>>PAGE_SHIFT; +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); +- +- /* try to merge same page size and continuous */ +- for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { +- unsigned long old_start; +- if (mr[i].end != mr[i+1].start || +- mr[i].page_size_mask != mr[i+1].page_size_mask) +- continue; +- /* move it */ +- old_start = mr[i].start; +- memmove(&mr[i], &mr[i+1], +- (nr_range - 1 - i) * sizeof (struct map_range)); +- mr[i--].start = old_start; +- nr_range--; +- } +- +- for (i = 0; i < nr_range; i++) +- printk(KERN_DEBUG " %010lx - %010lx page %s\n", +- mr[i].start, mr[i].end, +- (mr[i].page_size_mask & (1< table_top); +- if (!start) +- xen_finish_init_mapping(); +- else if (table_cur < table_top) +- /* Disable the 'table_cur' allocator. */ +- table_top = table_cur; +- +- __flush_tlb_all(); +- +- if (!after_bootmem && table_top > table_start) +- reserve_early(table_start << PAGE_SHIFT, +- table_top << PAGE_SHIFT, "PGTABLE"); +- +- printk(KERN_INFO "last_map_addr: %lx end: %lx\n", +- last_map_addr, end); +- +- if (!after_bootmem) +- early_memtest(start, end); +- +- return last_map_addr >> PAGE_SHIFT; +-} +- + #ifndef CONFIG_NUMA + void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) + { +@@ -1165,28 +928,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to + + #endif /* CONFIG_MEMORY_HOTPLUG */ + +-/* +- * devmem_is_allowed() checks to see if /dev/mem access to a certain address +- * is valid. The argument is a physical page number. +- * +- * +- * On x86, access has to be given to the first megabyte of ram because that area +- * contains bios code and data regions used by X and dosemu and similar apps. +- * Access has to be given to non-kernel-ram areas as well, these contain the PCI +- * mmio resources as well as potential bios/acpi data regions. +- */ +-int devmem_is_allowed(unsigned long pagenr) +-{ +- if (pagenr <= 256) +- return 1; +- if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) +- return 0; +- if (mfn_to_local_pfn(pagenr) >= max_pfn) +- return 1; +- return 0; +-} +- +- + static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, + kcore_modules, kcore_vsyscall; + +@@ -1243,56 +984,39 @@ void __init mem_init(void) + initsize >> 10); + } + +-void free_init_pages(char *what, unsigned long begin, unsigned long end) ++#ifdef CONFIG_DEBUG_RODATA ++const int rodata_test_data = 0xC3; ++EXPORT_SYMBOL_GPL(rodata_test_data); ++ ++static int kernel_set_to_readonly; ++ ++void set_kernel_text_rw(void) + { +- unsigned long addr = begin; ++ unsigned long start = PFN_ALIGN(_stext); ++ unsigned long end = PFN_ALIGN(__start_rodata); + +- if (addr >= end) ++ if (!kernel_set_to_readonly) + return; + +- /* +- * If debugging page accesses then do not free this memory but +- * mark them not present - any buggy init-section access will +- * create a kernel page fault: +- */ +-#ifdef CONFIG_DEBUG_PAGEALLOC +- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", +- begin, PAGE_ALIGN(end)); +- set_memory_np(begin, (end - begin) >> PAGE_SHIFT); +-#else +- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); ++ pr_debug("Set kernel text: %lx - %lx for read write\n", ++ start, end); + +- for (; addr < end; addr += PAGE_SIZE) { +- ClearPageReserved(virt_to_page(addr)); +- init_page_count(virt_to_page(addr)); +- memset((void *)(addr & ~(PAGE_SIZE-1)), +- POISON_FREE_INITMEM, PAGE_SIZE); +- if (addr >= __START_KERNEL_map) { +- /* make_readonly() reports all kernel addresses. */ +- if (HYPERVISOR_update_va_mapping((unsigned long)__va(__pa(addr)), +- pfn_pte(__pa(addr) >> PAGE_SHIFT, +- PAGE_KERNEL), +- 0)) +- BUG(); +- if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) +- BUG(); +- } +- free_page(addr); +- totalram_pages++; +- } +-#endif ++ set_memory_rw(start, (end - start) >> PAGE_SHIFT); + } + +-void free_initmem(void) ++void set_kernel_text_ro(void) + { +- free_init_pages("unused kernel memory", +- (unsigned long)(&__init_begin), +- (unsigned long)(&__init_end)); +-} ++ unsigned long start = PFN_ALIGN(_stext); ++ unsigned long end = PFN_ALIGN(__start_rodata); + +-#ifdef CONFIG_DEBUG_RODATA +-const int rodata_test_data = 0xC3; +-EXPORT_SYMBOL_GPL(rodata_test_data); ++ if (!kernel_set_to_readonly) ++ return; ++ ++ pr_debug("Set kernel text: %lx - %lx for read only\n", ++ start, end); ++ ++ set_memory_ro(start, (end - start) >> PAGE_SHIFT); ++} + + void mark_rodata_ro(void) + { +@@ -1300,15 +1024,12 @@ void mark_rodata_ro(void) + unsigned long rodata_start = + ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; + +-#ifdef CONFIG_DYNAMIC_FTRACE +- /* Dynamic tracing modifies the kernel text section */ +- start = rodata_start; +-#endif +- + printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", + (end - start) >> 10); + set_memory_ro(start, (end - start) >> PAGE_SHIFT); + ++ kernel_set_to_readonly = 1; ++ + /* + * The rodata section (but not the kernel text!) should also be + * not-executable. +@@ -1328,13 +1049,6 @@ void mark_rodata_ro(void) + + #endif + +-#ifdef CONFIG_BLK_DEV_INITRD +-void free_initrd_mem(unsigned long start, unsigned long end) +-{ +- free_init_pages("initrd memory", start, end); +-} +-#endif +- + int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) + { +--- head-2010-01-18.orig/arch/x86/mm/iomap_32-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/iomap_32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -20,10 +20,11 @@ + #include + #include + #include ++#include + + int is_io_mapping_possible(resource_size_t base, unsigned long size) + { +-#ifndef CONFIG_X86_PAE ++#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) + /* There is no way to map greater than 1 << 32 address without PAE */ + if (base + size > 0x100000000ULL) + return 0; +@@ -32,16 +33,28 @@ int is_io_mapping_possible(resource_size + } + EXPORT_SYMBOL_GPL(is_io_mapping_possible); + +-/* Map 'mfn' using fixed map 'type' and protections 'prot' +- */ +-void * +-iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot) ++void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) + { + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + ++ debug_kmap_atomic(type); ++ idx = type + KM_TYPE_NR * smp_processor_id(); ++ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ++ set_pte_at(&init_mm, vaddr, kmap_pte - idx, pfn_pte(pfn, prot)); ++ /*arch_flush_lazy_mmu_mode();*/ ++ ++ return (void *)vaddr; ++} ++ ++/* ++ * Map 'mfn' using fixed map 'type' and protections 'prot' ++ */ ++void * ++iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot) ++{ + /* + * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. + * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the +@@ -51,13 +64,8 @@ iomap_atomic_prot_pfn(unsigned long mfn, + if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) + prot = PAGE_KERNEL_UC_MINUS; + +- idx = type + KM_TYPE_NR*smp_processor_id(); +- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + pgprot_val(prot) |= _PAGE_IOMAP; +- set_pte_at(&init_mm, vaddr, kmap_pte-idx, pfn_pte_ma(mfn, prot)); +- /*arch_flush_lazy_mmu_mode()*/; +- +- return (void*) vaddr; ++ return kmap_atomic_prot_pfn(mfn, type, prot); + } + EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); + +--- head-2010-01-18.orig/arch/x86/mm/ioremap-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/ioremap-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -23,13 +23,17 @@ + #include + #include + +-#ifdef CONFIG_X86_64 +- +-static inline int phys_addr_valid(unsigned long addr) ++static inline int phys_addr_valid(resource_size_t addr) + { +- return addr < (1UL << boot_cpu_data.x86_phys_bits); ++#ifdef CONFIG_PHYS_ADDR_T_64BIT ++ return !(addr >> boot_cpu_data.x86_phys_bits); ++#else ++ return 1; ++#endif + } + ++#ifdef CONFIG_X86_64 ++ + #define phys_base 0 + + unsigned long __phys_addr(unsigned long x) +@@ -41,8 +45,7 @@ unsigned long __phys_addr(unsigned long + } else { + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + x -= PAGE_OFFSET; +- VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : +- !phys_addr_valid(x)); ++ VIRTUAL_BUG_ON(!phys_addr_valid(x)); + } + return x; + } +@@ -59,10 +62,8 @@ bool __virt_addr_valid(unsigned long x) + if (x < PAGE_OFFSET) + return false; + x -= PAGE_OFFSET; +- if (system_state == SYSTEM_BOOTING ? +- x > MAXMEM : !phys_addr_valid(x)) { ++ if (!phys_addr_valid(x)) + return false; +- } + } + + return pfn_valid(x >> PAGE_SHIFT); +@@ -73,18 +74,12 @@ EXPORT_SYMBOL(__virt_addr_valid); + + #else + +-static inline int phys_addr_valid(unsigned long addr) +-{ +- return 1; +-} +- + #ifdef CONFIG_DEBUG_VIRTUAL + unsigned long __phys_addr(unsigned long x) + { +- /* VMALLOC_* aren't constants; not available at the boot time */ ++ /* VMALLOC_* aren't constants */ + VIRTUAL_BUG_ON(x < PAGE_OFFSET); +- VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && +- is_vmalloc_addr((void *) x)); ++ VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); + return x - PAGE_OFFSET; + } + EXPORT_SYMBOL(__phys_addr); +@@ -94,7 +89,9 @@ bool __virt_addr_valid(unsigned long x) + { + if (x < PAGE_OFFSET) + return false; +- if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) ++ if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) ++ return false; ++ if (x >= FIXADDR_START) + return false; + return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); + } +@@ -462,16 +459,17 @@ static void __iomem *__ioremap_caller(re + return NULL; + area->phys_addr = phys_addr; + vaddr = (unsigned long) area->addr; +- if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr), +- size, prot, domid)) { ++ ++ if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { + free_memtype(phys_addr, phys_addr + size); + free_vm_area(area); + return NULL; + } + +- if (ioremap_change_attr(vaddr, size, prot_val) < 0) { ++ if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr), ++ size, prot, domid)) { + free_memtype(phys_addr, phys_addr + size); +- vunmap(area->addr); ++ free_vm_area(area); + return NULL; + } + +@@ -528,7 +526,7 @@ EXPORT_SYMBOL(ioremap_nocache); + * + * Must be freed with iounmap. + */ +-void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) ++void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) + { + if (pat_enabled) + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, +@@ -558,7 +556,8 @@ static void __iomem *ioremap_default(res + * - UC_MINUS for non-WB-able memory with no other conflicting mappings + * - Inherit from confliting mappings otherwise + */ +- err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags); ++ err = reserve_memtype(phys_addr, phys_addr + size, ++ _PAGE_CACHE_WB, &flags); + if (err < 0) + return NULL; + +@@ -697,13 +696,19 @@ static inline pte_t * __init early_iorem + return &bm_pte[pte_index(addr)]; + } + ++static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; ++ + void __init early_ioremap_init(void) + { + pmd_t *pmd; ++ int i; + + if (early_ioremap_debug) + printk(KERN_INFO "early_ioremap_init()\n"); + ++ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) ++ slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); ++ + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); + memset(bm_pte, 0, sizeof(bm_pte)); + make_lowmem_page_readonly(bm_pte, XENFEAT_writable_page_tables); +@@ -734,7 +739,7 @@ void __init early_ioremap_reset(void) + } + + static void __init __early_set_fixmap(enum fixed_addresses idx, +- unsigned long phys, pgprot_t flags) ++ phys_addr_t phys, pgprot_t flags) + { + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; +@@ -753,7 +758,7 @@ static void __init __early_set_fixmap(en + } + + static inline void __init early_set_fixmap(enum fixed_addresses idx, +- unsigned long phys, pgprot_t prot) ++ phys_addr_t phys, pgprot_t prot) + { + if (after_paging_init) + __set_fixmap(idx, phys, prot); +@@ -771,6 +776,7 @@ static inline void __init early_clear_fi + + static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; + static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; ++ + static int __init check_early_ioremap_leak(void) + { + int count = 0; +@@ -792,9 +798,11 @@ static int __init check_early_ioremap_le + } + late_initcall(check_early_ioremap_leak); + +-static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) ++static void __init __iomem * ++__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) + { +- unsigned long offset, last_addr; ++ unsigned long offset; ++ resource_size_t last_addr; + unsigned int nrpages; + enum fixed_addresses idx0, idx; + int i, slot; +@@ -810,15 +818,15 @@ static void __init __iomem *__early_iore + } + + if (slot < 0) { +- printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n", +- phys_addr, size); ++ printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n", ++ (u64)phys_addr, size); + WARN_ON(1); + return NULL; + } + + if (early_ioremap_debug) { +- printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", +- phys_addr, size, slot); ++ printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ", ++ (u64)phys_addr, size, slot); + dump_stack(); + } + +@@ -858,20 +866,28 @@ static void __init __iomem *__early_iore + --nrpages; + } + if (early_ioremap_debug) +- printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); ++ printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); + +- prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); ++ prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); + return prev_map[slot]; + } + + /* Remap an IO device */ +-void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size) ++void __init __iomem * ++early_ioremap(resource_size_t phys_addr, unsigned long size) + { ++ /* ++ * Don't remap the low PCI/ISA area, it's always mapped. ++ */ ++ if (is_initial_xendomain() && is_ISA_range(phys_addr, phys_addr + size - 1)) ++ return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr); ++ + return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); + } + + /* Remap memory */ +-void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size) ++void __init __iomem * ++early_memremap(resource_size_t phys_addr, unsigned long size) + { + return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL); + } +@@ -884,6 +900,15 @@ void __init early_iounmap(void __iomem * + enum fixed_addresses idx; + int i, slot; + ++ /* ++ * early_ioremap special-cases the PCI/ISA range by not instantiating a ++ * vm_area and by simply returning an address into the kernel mapping ++ * of ISA space. So handle that here. ++ */ ++ if ((unsigned long)addr >= fix_to_virt(FIX_ISAMAP_BEGIN) ++ && (unsigned long)addr < fix_to_virt(FIX_ISAMAP_END - 1)) ++ return; ++ + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i] == addr) { +@@ -928,8 +953,3 @@ void __init early_iounmap(void __iomem * + } + prev_map[slot] = NULL; + } +- +-void __this_fixmap_does_not_exist(void) +-{ +- WARN_ON(1); +-} +--- head-2010-01-18.orig/arch/x86/mm/pageattr-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pageattr-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -33,6 +34,7 @@ struct cpa_data { + unsigned long pfn; + unsigned force_split : 1; + int curpage; ++ struct page **pages; + }; + + /* +@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock); + + #define CPA_FLUSHTLB 1 + #define CPA_ARRAY 2 ++#define CPA_PAGES_ARRAY 4 + + #ifdef CONFIG_PROC_FS + static unsigned long direct_pages_count[PG_LEVEL_NUM]; +@@ -95,7 +98,7 @@ static inline unsigned long highmap_star + + static inline unsigned long highmap_end_pfn(void) + { +- return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; ++ return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; + } + + #endif +@@ -150,7 +153,7 @@ static void __cpa_flush_all(void *arg) + */ + __flush_tlb_all(); + +- if (cache && boot_cpu_data.x86_model >= 4) ++ if (cache && boot_cpu_data.x86 >= 4) + wbinvd(); + } + +@@ -201,38 +204,41 @@ static void cpa_flush_range(unsigned lon + } + } + +-static void cpa_flush_array(unsigned long *start, int numpages, int cache) ++static void cpa_flush_array(unsigned long *start, int numpages, int cache, ++ int in_flags, struct page **pages) + { + unsigned int i, level; +- unsigned long *addr; ++ unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ + + BUG_ON(irqs_disabled()); + +- on_each_cpu(__cpa_flush_range, NULL, 1); ++ on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); + +- if (!cache) ++ if (!cache || do_wbinvd) + return; + +- /* 4M threshold */ +- if (numpages >= 1024) { +- if (boot_cpu_data.x86_model >= 4) +- wbinvd(); +- return; +- } + /* + * We only need to flush on one CPU, + * clflush is a MESI-coherent instruction that + * will cause all other CPUs to flush the same + * cachelines: + */ +- for (i = 0, addr = start; i < numpages; i++, addr++) { +- pte_t *pte = lookup_address(*addr, &level); ++ for (i = 0; i < numpages; i++) { ++ unsigned long addr; ++ pte_t *pte; ++ ++ if (in_flags & CPA_PAGES_ARRAY) ++ addr = (unsigned long)page_address(pages[i]); ++ else ++ addr = start[i]; ++ ++ pte = lookup_address(addr, &level); + + /* + * Only flush present addresses: + */ + if (pte && (__pte_val(*pte) & _PAGE_PRESENT)) +- clflush_cache_range((void *) *addr, PAGE_SIZE); ++ clflush_cache_range((void *)addr, PAGE_SIZE); + } + } + +@@ -498,6 +504,13 @@ static int split_large_page(pte_t *kpte, + pbase = (pte_t *)page_address(base); + paravirt_alloc_pte(&init_mm, page_to_pfn(base)); + ref_prot = pte_pgprot(pte_clrhuge(*kpte)); ++ /* ++ * If we ever want to utilize the PAT bit, we need to ++ * update this function to make sure it's converted from ++ * bit 12 to bit 7 when we cross from the 2MB level to ++ * the 4K level: ++ */ ++ WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE); + + #ifdef CONFIG_X86_64 + if (level == PG_LEVEL_1G) { +@@ -597,7 +610,9 @@ static int __change_page_attr(struct cpa + unsigned int level; + pte_t *kpte, old_pte; + +- if (cpa->flags & CPA_ARRAY) ++ if (cpa->flags & CPA_PAGES_ARRAY) ++ address = (unsigned long)page_address(cpa->pages[cpa->curpage]); ++ else if (cpa->flags & CPA_ARRAY) + address = cpa->vaddr[cpa->curpage]; + else + address = *cpa->vaddr; +@@ -701,7 +716,9 @@ static int cpa_process_alias(struct cpa_ + * No need to redo, when the primary call touched the direct + * mapping already: + */ +- if (cpa->flags & CPA_ARRAY) ++ if (cpa->flags & CPA_PAGES_ARRAY) ++ vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); ++ else if (cpa->flags & CPA_ARRAY) + vaddr = cpa->vaddr[cpa->curpage]; + else + vaddr = *cpa->vaddr; +@@ -712,7 +729,7 @@ static int cpa_process_alias(struct cpa_ + alias_cpa = *cpa; + temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); + alias_cpa.vaddr = &temp_cpa_vaddr; +- alias_cpa.flags &= ~CPA_ARRAY; ++ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + + + ret = __change_page_attr_set_clr(&alias_cpa, 0); +@@ -725,7 +742,7 @@ static int cpa_process_alias(struct cpa_ + * No need to redo, when the primary call touched the high + * mapping already: + */ +- if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) ++ if (within(vaddr, (unsigned long) _text, _brk_end)) + return 0; + + /* +@@ -738,7 +755,7 @@ static int cpa_process_alias(struct cpa_ + alias_cpa = *cpa; + temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map; + alias_cpa.vaddr = &temp_cpa_vaddr; +- alias_cpa.flags &= ~CPA_ARRAY; ++ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + + /* + * The high mapping range is imprecise, so ignore the return value. +@@ -759,7 +776,7 @@ static int __change_page_attr_set_clr(st + */ + cpa->numpages = numpages; + /* for array changes, we can't use large page */ +- if (cpa->flags & CPA_ARRAY) ++ if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) + cpa->numpages = 1; + + if (!debug_pagealloc) +@@ -783,7 +800,7 @@ static int __change_page_attr_set_clr(st + */ + BUG_ON(cpa->numpages > numpages); + numpages -= cpa->numpages; +- if (cpa->flags & CPA_ARRAY) ++ if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) + cpa->curpage++; + else + *cpa->vaddr += cpa->numpages * PAGE_SIZE; +@@ -800,7 +817,8 @@ static inline int cache_attr(pgprot_t at + + static int change_page_attr_set_clr(unsigned long *addr, int numpages, + pgprot_t mask_set, pgprot_t mask_clr, +- int force_split, int array) ++ int force_split, int in_flag, ++ struct page **pages) + { + struct cpa_data cpa; + int ret, cache, checkalias; +@@ -815,15 +833,7 @@ static int change_page_attr_set_clr(unsi + return 0; + + /* Ensure we are PAGE_SIZE aligned */ +- if (!array) { +- if (*addr & ~PAGE_MASK) { +- *addr &= PAGE_MASK; +- /* +- * People should not be passing in unaligned addresses: +- */ +- WARN_ON_ONCE(1); +- } +- } else { ++ if (in_flag & CPA_ARRAY) { + int i; + for (i = 0; i < numpages; i++) { + if (addr[i] & ~PAGE_MASK) { +@@ -831,6 +841,18 @@ static int change_page_attr_set_clr(unsi + WARN_ON_ONCE(1); + } + } ++ } else if (!(in_flag & CPA_PAGES_ARRAY)) { ++ /* ++ * in_flag of CPA_PAGES_ARRAY implies it is aligned. ++ * No need to cehck in that case ++ */ ++ if (*addr & ~PAGE_MASK) { ++ *addr &= PAGE_MASK; ++ /* ++ * People should not be passing in unaligned addresses: ++ */ ++ WARN_ON_ONCE(1); ++ } + } + + /* Must avoid aliasing mappings in the highmem code */ +@@ -848,6 +870,7 @@ static int change_page_attr_set_clr(unsi + xen_multicall_flush(true); + + cpa.vaddr = addr; ++ cpa.pages = pages; + cpa.numpages = numpages; + cpa.mask_set = mask_set; + cpa.mask_clr = mask_clr; +@@ -855,8 +878,8 @@ static int change_page_attr_set_clr(unsi + cpa.curpage = 0; + cpa.force_split = force_split; + +- if (array) +- cpa.flags |= CPA_ARRAY; ++ if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) ++ cpa.flags |= in_flag; + + /* No alias checking for _NX bit modifications */ + checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; +@@ -882,9 +905,10 @@ static int change_page_attr_set_clr(unsi + * wbindv): + */ + if (!ret && cpu_has_clflush) { +- if (cpa.flags & CPA_ARRAY) +- cpa_flush_array(addr, numpages, cache); +- else ++ if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { ++ cpa_flush_array(addr, numpages, cache, ++ cpa.flags, pages); ++ } else + cpa_flush_range(*addr, numpages, cache); + } else + cpa_flush_all(cache); +@@ -905,14 +929,28 @@ static inline int change_page_attr_set(u + pgprot_t mask, int array) + { + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, +- array); ++ (array ? CPA_ARRAY : 0), NULL); + } + + static inline int change_page_attr_clear(unsigned long *addr, int numpages, + pgprot_t mask, int array) + { + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, +- array); ++ (array ? CPA_ARRAY : 0), NULL); ++} ++ ++static inline int cpa_set_pages_array(struct page **pages, int numpages, ++ pgprot_t mask) ++{ ++ return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, ++ CPA_PAGES_ARRAY, pages); ++} ++ ++static inline int cpa_clear_pages_array(struct page **pages, int numpages, ++ pgprot_t mask) ++{ ++ return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, ++ CPA_PAGES_ARRAY, pages); + } + + #ifdef CONFIG_XEN +@@ -971,71 +1009,94 @@ int _set_memory_uc(unsigned long addr, i + + int set_memory_uc(unsigned long addr, int numpages) + { ++ int ret; ++ + /* + * for now UC MINUS. see comments in ioremap_nocache() + */ +- if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, +- _PAGE_CACHE_UC_MINUS, NULL)) +- return -EINVAL; ++ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, ++ _PAGE_CACHE_UC_MINUS, NULL); ++ if (ret) ++ goto out_err; ++ ++ ret = _set_memory_uc(addr, numpages); ++ if (ret) ++ goto out_free; + +- return _set_memory_uc(addr, numpages); ++ return 0; ++ ++out_free: ++ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); ++out_err: ++ return ret; + } + EXPORT_SYMBOL(set_memory_uc); + + int set_memory_array_uc(unsigned long *addr, int addrinarray) + { +- unsigned long start; +- unsigned long end; +- int i; ++ int i, j; ++ int ret; ++ + /* + * for now UC MINUS. see comments in ioremap_nocache() + */ + for (i = 0; i < addrinarray; i++) { +- start = __pa(addr[i]); +- for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { +- if (end != __pa(addr[i + 1])) +- break; +- i++; +- } +- if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) +- goto out; ++ ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, ++ _PAGE_CACHE_UC_MINUS, NULL); ++ if (ret) ++ goto out_free; + } + +- return change_page_attr_set(addr, addrinarray, ++ ret = change_page_attr_set(addr, addrinarray, + __pgprot(_PAGE_CACHE_UC_MINUS), 1); +-out: +- for (i = 0; i < addrinarray; i++) { +- unsigned long tmp = __pa(addr[i]); ++ if (ret) ++ goto out_free; + +- if (tmp == start) +- break; +- for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { +- if (end != __pa(addr[i + 1])) +- break; +- i++; +- } +- free_memtype(tmp, end); +- } +- return -EINVAL; ++ return 0; ++ ++out_free: ++ for (j = 0; j < i; j++) ++ free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE); ++ ++ return ret; + } + EXPORT_SYMBOL(set_memory_array_uc); + + int _set_memory_wc(unsigned long addr, int numpages) + { +- return change_page_attr_set(&addr, numpages, ++ int ret; ++ ret = change_page_attr_set(&addr, numpages, ++ __pgprot(_PAGE_CACHE_UC_MINUS), 0); ++ ++ if (!ret) { ++ ret = change_page_attr_set(&addr, numpages, + __pgprot(_PAGE_CACHE_WC), 0); ++ } ++ return ret; + } + + int set_memory_wc(unsigned long addr, int numpages) + { ++ int ret; ++ + if (!pat_enabled) + return set_memory_uc(addr, numpages); + +- if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, +- _PAGE_CACHE_WC, NULL)) +- return -EINVAL; ++ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, ++ _PAGE_CACHE_WC, NULL); ++ if (ret) ++ goto out_err; + +- return _set_memory_wc(addr, numpages); ++ ret = _set_memory_wc(addr, numpages); ++ if (ret) ++ goto out_free; ++ ++ return 0; ++ ++out_free: ++ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); ++out_err: ++ return ret; + } + EXPORT_SYMBOL(set_memory_wc); + +@@ -1047,29 +1108,31 @@ int _set_memory_wb(unsigned long addr, i + + int set_memory_wb(unsigned long addr, int numpages) + { +- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); ++ int ret; + +- return _set_memory_wb(addr, numpages); ++ ret = _set_memory_wb(addr, numpages); ++ if (ret) ++ return ret; ++ ++ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); ++ return 0; + } + EXPORT_SYMBOL(set_memory_wb); + + int set_memory_array_wb(unsigned long *addr, int addrinarray) + { + int i; ++ int ret; + +- for (i = 0; i < addrinarray; i++) { +- unsigned long start = __pa(addr[i]); +- unsigned long end; +- +- for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { +- if (end != __pa(addr[i + 1])) +- break; +- i++; +- } +- free_memtype(start, end); +- } +- return change_page_attr_clear(addr, addrinarray, ++ ret = change_page_attr_clear(addr, addrinarray, + __pgprot(_PAGE_CACHE_MASK), 1); ++ if (ret) ++ return ret; ++ ++ for (i = 0; i < addrinarray; i++) ++ free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE); ++ ++ return 0; + } + EXPORT_SYMBOL(set_memory_array_wb); + +@@ -1105,7 +1168,7 @@ int set_memory_np(unsigned long addr, in + int set_memory_4k(unsigned long addr, int numpages) + { + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), +- __pgprot(0), 1, 0); ++ __pgprot(0), 1, 0, NULL); + } + + int set_pages_uc(struct page *page, int numpages) +@@ -1116,6 +1179,35 @@ int set_pages_uc(struct page *page, int + } + EXPORT_SYMBOL(set_pages_uc); + ++int set_pages_array_uc(struct page **pages, int addrinarray) ++{ ++ unsigned long start; ++ unsigned long end; ++ int i; ++ int free_idx; ++ ++ for (i = 0; i < addrinarray; i++) { ++ start = (unsigned long)page_address(pages[i]); ++ end = start + PAGE_SIZE; ++ if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) ++ goto err_out; ++ } ++ ++ if (cpa_set_pages_array(pages, addrinarray, ++ __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) { ++ return 0; /* Success */ ++ } ++err_out: ++ free_idx = i; ++ for (i = 0; i < free_idx; i++) { ++ start = (unsigned long)page_address(pages[i]); ++ end = start + PAGE_SIZE; ++ free_memtype(start, end); ++ } ++ return -EINVAL; ++} ++EXPORT_SYMBOL(set_pages_array_uc); ++ + int set_pages_wb(struct page *page, int numpages) + { + unsigned long addr = (unsigned long)page_address(page); +@@ -1124,6 +1216,28 @@ int set_pages_wb(struct page *page, int + } + EXPORT_SYMBOL(set_pages_wb); + ++int set_pages_array_wb(struct page **pages, int addrinarray) ++{ ++ int retval; ++ unsigned long start; ++ unsigned long end; ++ int i; ++ ++ retval = cpa_clear_pages_array(pages, addrinarray, ++ __pgprot(_PAGE_CACHE_MASK)); ++ if (retval) ++ return retval; ++ ++ for (i = 0; i < addrinarray; i++) { ++ start = (unsigned long)page_address(pages[i]); ++ end = start + PAGE_SIZE; ++ free_memtype(start, end); ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL(set_pages_array_wb); ++ + int set_pages_x(struct page *page, int numpages) + { + unsigned long addr = (unsigned long)page_address(page); +--- head-2010-01-18.orig/arch/x86/mm/pat-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pat-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -31,7 +31,7 @@ + #ifdef CONFIG_X86_PAT + int __read_mostly pat_enabled = 1; + +-void __cpuinit pat_disable(char *reason) ++static inline void pat_disable(const char *reason) + { + pat_enabled = 0; + printk(KERN_INFO "%s\n", reason); +@@ -43,6 +43,11 @@ static int __init nopat(char *str) + return 0; + } + early_param("nopat", nopat); ++#else ++static inline void pat_disable(const char *reason) ++{ ++ (void)reason; ++} + #endif + + +@@ -79,16 +84,20 @@ void pat_init(void) + if (!pat_enabled) + return; + +- /* Paranoia check. */ +- if (!cpu_has_pat && boot_pat_state) { +- /* +- * If this happens we are on a secondary CPU, but +- * switched to PAT on the boot CPU. We have no way to +- * undo PAT. +- */ +- printk(KERN_ERR "PAT enabled, " +- "but not supported by secondary CPU\n"); +- BUG(); ++ if (!cpu_has_pat) { ++ if (!boot_pat_state) { ++ pat_disable("PAT not supported by CPU."); ++ return; ++ } else { ++ /* ++ * If this happens we are on a secondary CPU, but ++ * switched to PAT on the boot CPU. We have no way to ++ * undo PAT. ++ */ ++ printk(KERN_ERR "PAT enabled, " ++ "but not supported by secondary CPU\n"); ++ BUG(); ++ } + } + + #ifndef CONFIG_XEN +@@ -185,10 +194,10 @@ static unsigned long pat_x_mtrr_type(u64 + u8 mtrr_type; + + mtrr_type = mtrr_type_lookup(start, end); +- if (mtrr_type == MTRR_TYPE_UNCACHABLE) +- return _PAGE_CACHE_UC; +- if (mtrr_type == MTRR_TYPE_WRCOMB) +- return _PAGE_CACHE_WC; ++ if (mtrr_type != MTRR_TYPE_WRBACK) ++ return _PAGE_CACHE_UC_MINUS; ++ ++ return _PAGE_CACHE_WB; + } + + return req_type; +@@ -355,23 +364,13 @@ int reserve_memtype(u64 start, u64 end, + return 0; + } + +- if (req_type == -1) { +- /* +- * Call mtrr_lookup to get the type hint. This is an +- * optimization for /dev/mem mmap'ers into WB memory (BIOS +- * tools and ACPI tools). Use WB request for WB memory and use +- * UC_MINUS otherwise. +- */ +- u8 mtrr_type = mtrr_type_lookup(start, end); +- +- if (mtrr_type == MTRR_TYPE_WRBACK) +- actual_type = _PAGE_CACHE_WB; +- else +- actual_type = _PAGE_CACHE_UC_MINUS; +- } else { +- actual_type = pat_x_mtrr_type(start, end, +- req_type & _PAGE_CACHE_MASK); +- } ++ /* ++ * Call mtrr_lookup to get the type hint. This is an ++ * optimization for /dev/mem mmap'ers into WB memory (BIOS ++ * tools and ACPI tools). Use WB request for WB memory and use ++ * UC_MINUS otherwise. ++ */ ++ actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK); + + if (new_type) + *new_type = actual_type; +@@ -549,9 +548,7 @@ static inline int range_is_allowed(unsig + int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn, + unsigned long size, pgprot_t *vma_prot) + { +- u64 addr = (u64)mfn << PAGE_SHIFT; +- unsigned long flags = -1; +- int retval; ++ unsigned long flags = _PAGE_CACHE_WB; + + if (!range_is_allowed(mfn, size)) + return 0; +@@ -581,60 +578,21 @@ int phys_mem_access_prot_allowed(struct + #endif + #endif + +- /* +- * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. +- * +- * Without O_SYNC, we want to get +- * - WB for WB-able memory and no other conflicting mappings +- * - UC_MINUS for non-WB-able memory with no other conflicting mappings +- * - Inherit from confliting mappings otherwise +- */ +- if (flags != -1) { +- retval = reserve_memtype(addr, addr + size, flags, NULL); +- } else { +- retval = reserve_memtype(addr, addr + size, -1, &flags); +- } +- +- if (retval < 0) +- return 0; +- +- if (ioremap_check_change_attr(mfn, size, flags) < 0) { +- free_memtype(addr, addr + size); +- printk(KERN_INFO +- "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", +- current->comm, current->pid, +- cattr_name(flags), +- addr, addr + size); +- return 0; +- } +- + *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | + flags); + return 1; + } + +-void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot) +-{ +- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); +- u64 addr = (u64)mfn << PAGE_SHIFT; +- unsigned long flags; +- +- reserve_memtype(addr, addr + size, want_flags, &flags); +- if (flags != want_flags) { +- printk(KERN_INFO +- "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", +- current->comm, current->pid, +- cattr_name(want_flags), +- addr, (unsigned long long)(addr + size), +- cattr_name(flags)); +- } +-} +- +-void unmap_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot) ++/* ++ * Change the memory type for the physial address range in kernel identity ++ * mapping space if that range is a part of identity map. ++ */ ++int kernel_map_sync_memtype(u64 ma, unsigned long size, unsigned long flags) + { +- u64 addr = (u64)mfn << PAGE_SHIFT; ++ if (!pat_enabled) ++ return 0; + +- free_memtype(addr, addr + size); ++ return ioremap_check_change_attr(ma >> PAGE_SHIFT, size, flags); + } + + #ifndef CONFIG_XEN +@@ -647,17 +605,18 @@ static int reserve_pfn_range(u64 paddr, + int strict_prot) + { + int is_ram = 0; +- int id_sz, ret; +- unsigned long flags; ++ int ret; + unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); ++ unsigned long flags = want_flags; + + is_ram = pat_pagerange_is_ram(paddr, paddr + size); + + /* +- * reserve_pfn_range() doesn't support RAM pages. ++ * reserve_pfn_range() doesn't support RAM pages. Maintain the current ++ * behavior with RAM pages by returning success. + */ + if (is_ram != 0) +- return -EINVAL; ++ return 0; + + ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); + if (ret) +@@ -684,23 +643,8 @@ static int reserve_pfn_range(u64 paddr, + flags); + } + +- /* Need to keep identity mapping in sync */ +- if (paddr >= __pa(high_memory)) +- return 0; +- +- id_sz = (__pa(high_memory) < paddr + size) ? +- __pa(high_memory) - paddr : +- size; +- +- if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { ++ if (kernel_map_sync_memtype(paddr, size, flags) < 0) { + free_memtype(paddr, paddr + size); +- printk(KERN_ERR +- "%s:%d reserve_pfn_range ioremap_change_attr failed %s " +- "for %Lx-%Lx\n", +- current->comm, current->pid, +- cattr_name(flags), +- (unsigned long long)paddr, +- (unsigned long long)(paddr + size)); + return -EINVAL; + } + return 0; +@@ -725,29 +669,28 @@ static void free_pfn_range(u64 paddr, un + * + * If the vma has a linear pfn mapping for the entire range, we get the prot + * from pte and reserve the entire vma range with single reserve_pfn_range call. +- * Otherwise, we reserve the entire vma range, my ging through the PTEs page +- * by page to get physical address and protection. + */ + int track_pfn_vma_copy(struct vm_area_struct *vma) + { +- int retval = 0; +- unsigned long i, j; + resource_size_t paddr; + unsigned long prot; +- unsigned long vma_start = vma->vm_start; +- unsigned long vma_end = vma->vm_end; +- unsigned long vma_size = vma_end - vma_start; ++ unsigned long vma_size = vma->vm_end - vma->vm_start; + pgprot_t pgprot; + + if (!pat_enabled) + return 0; + ++ /* ++ * For now, only handle remap_pfn_range() vmas where ++ * is_linear_pfn_mapping() == TRUE. Handling of ++ * vm_insert_pfn() is TBD. ++ */ + if (is_linear_pfn_mapping(vma)) { + /* + * reserve the whole chunk covered by vma. We need the + * starting address and protection from pte. + */ +- if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { ++ if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { + WARN_ON_ONCE(1); + return -EINVAL; + } +@@ -755,28 +698,7 @@ int track_pfn_vma_copy(struct vm_area_st + return reserve_pfn_range(paddr, vma_size, &pgprot, 1); + } + +- /* reserve entire vma page by page, using pfn and prot from pte */ +- for (i = 0; i < vma_size; i += PAGE_SIZE) { +- if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) +- continue; +- +- pgprot = __pgprot(prot); +- retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1); +- if (retval) +- goto cleanup_ret; +- } + return 0; +- +-cleanup_ret: +- /* Reserve error: Cleanup partial reservation and return error */ +- for (j = 0; j < i; j += PAGE_SIZE) { +- if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) +- continue; +- +- free_pfn_range(paddr, PAGE_SIZE); +- } +- +- return retval; + } + + /* +@@ -786,50 +708,28 @@ cleanup_ret: + * prot is passed in as a parameter for the new mapping. If the vma has a + * linear pfn mapping for the entire range reserve the entire vma range with + * single reserve_pfn_range call. +- * Otherwise, we look t the pfn and size and reserve only the specified range +- * page by page. +- * +- * Note that this function can be called with caller trying to map only a +- * subrange/page inside the vma. + */ + int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, + unsigned long pfn, unsigned long size) + { +- int retval = 0; +- unsigned long i, j; +- resource_size_t base_paddr; + resource_size_t paddr; +- unsigned long vma_start = vma->vm_start; +- unsigned long vma_end = vma->vm_end; +- unsigned long vma_size = vma_end - vma_start; ++ unsigned long vma_size = vma->vm_end - vma->vm_start; + + if (!pat_enabled) + return 0; + ++ /* ++ * For now, only handle remap_pfn_range() vmas where ++ * is_linear_pfn_mapping() == TRUE. Handling of ++ * vm_insert_pfn() is TBD. ++ */ + if (is_linear_pfn_mapping(vma)) { + /* reserve the whole chunk starting from vm_pgoff */ + paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return reserve_pfn_range(paddr, vma_size, prot, 0); + } + +- /* reserve page by page using pfn and size */ +- base_paddr = (resource_size_t)pfn << PAGE_SHIFT; +- for (i = 0; i < size; i += PAGE_SIZE) { +- paddr = base_paddr + i; +- retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0); +- if (retval) +- goto cleanup_ret; +- } + return 0; +- +-cleanup_ret: +- /* Reserve error: Cleanup partial reservation and return error */ +- for (j = 0; j < i; j += PAGE_SIZE) { +- paddr = base_paddr + j; +- free_pfn_range(paddr, PAGE_SIZE); +- } +- +- return retval; + } + + /* +@@ -840,39 +740,23 @@ cleanup_ret: + void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size) + { +- unsigned long i; + resource_size_t paddr; +- unsigned long prot; +- unsigned long vma_start = vma->vm_start; +- unsigned long vma_end = vma->vm_end; +- unsigned long vma_size = vma_end - vma_start; ++ unsigned long vma_size = vma->vm_end - vma->vm_start; + + if (!pat_enabled) + return; + ++ /* ++ * For now, only handle remap_pfn_range() vmas where ++ * is_linear_pfn_mapping() == TRUE. Handling of ++ * vm_insert_pfn() is TBD. ++ */ + if (is_linear_pfn_mapping(vma)) { + /* free the whole chunk starting from vm_pgoff */ + paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + free_pfn_range(paddr, vma_size); + return; + } +- +- if (size != 0 && size != vma_size) { +- /* free page by page, using pfn and size */ +- paddr = (resource_size_t)pfn << PAGE_SHIFT; +- for (i = 0; i < size; i += PAGE_SIZE) { +- paddr = paddr + i; +- free_pfn_range(paddr, PAGE_SIZE); +- } +- } else { +- /* free entire vma, page by page, using the pfn from pte */ +- for (i = 0; i < vma_size; i += PAGE_SIZE) { +- if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) +- continue; +- +- free_pfn_range(paddr, PAGE_SIZE); +- } +- } + } + #endif /* CONFIG_XEN */ + +--- head-2010-01-18.orig/arch/x86/mm/pgtable-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pgtable-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -122,10 +122,6 @@ void __pud_free_tlb(struct mmu_gather *t + #endif /* PAGETABLE_LEVELS > 3 */ + #endif /* PAGETABLE_LEVELS > 2 */ + +-#ifndef CONFIG_X86_64 +-#define TASK_SIZE64 TASK_SIZE +-#endif +- + static void _pin_lock(struct mm_struct *mm, int lock) { + if (lock) + spin_lock(&mm->page_table_lock); +@@ -149,7 +145,7 @@ static void _pin_lock(struct mm_struct * + pgd_t *pgd = mm->pgd; + unsigned g; + +- for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { ++ for (g = 0; g <= ((TASK_SIZE_MAX-1) / PGDIR_SIZE); g++, pgd++) { + pud_t *pud; + unsigned u; + +@@ -230,10 +226,10 @@ static void pgd_walk(pgd_t *pgd_base, pg + * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables + * may not be the 'current' task's pagetables (e.g., current may be + * 32-bit, but the pagetables may be for a 64-bit task). +- * Subtracting 1 from TASK_SIZE64 means the loop limit is correct +- * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE. ++ * Subtracting 1 from TASK_SIZE_MAX means the loop limit is correct ++ * regardless of whether TASK_SIZE_MAX is a multiple of PGDIR_SIZE. + */ +- for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { ++ for (g = 0, seq = 0; g <= ((TASK_SIZE_MAX-1) / PGDIR_SIZE); g++, pgd++) { + if (pgd_none(*pgd)) + continue; + pud = pud_offset(pgd, 0); +@@ -736,9 +732,26 @@ int ptep_clear_flush_young(struct vm_are + return young; + } + ++/** ++ * reserve_top_address - reserves a hole in the top of kernel address space ++ * @reserve - size of hole to reserve ++ * ++ * Can be used to relocate the fixmap area and poke a hole in the top ++ * of kernel address space to make room for a hypervisor. ++ */ ++void __init reserve_top_address(unsigned long reserve) ++{ ++#ifdef CONFIG_X86_32 ++ BUG_ON(fixmaps_set > 0); ++ printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", ++ (int)-reserve); ++ __FIXADDR_TOP = -reserve - PAGE_SIZE; ++#endif ++} ++ + int fixmaps_set; + +-void xen_set_fixmap(enum fixed_addresses idx, maddr_t phys, pgprot_t flags) ++void xen_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) + { + unsigned long address = __fix_to_virt(idx); + pte_t pte; +@@ -757,6 +770,8 @@ void xen_set_fixmap(enum fixed_addresses + set_pte_vaddr_pud(level3_user_pgt, address, pte); + break; + case FIX_EARLYCON_MEM_BASE: ++ case FIX_SHARED_INFO: ++ case FIX_ISAMAP_END ... FIX_ISAMAP_BEGIN: + xen_l1_entry_update(level1_fixmap_pgt + pte_index(address), + pfn_pte_ma(phys >> PAGE_SHIFT, flags)); + fixmaps_set++; +--- head-2010-01-18.orig/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:52:02.000000000 +0100 +@@ -25,6 +25,8 @@ + #include + #include + ++unsigned int __VMALLOC_RESERVE = 128 << 20; ++ + /* + * Associate a virtual page frame with a given physical page frame + * and protection flags for that frame. +@@ -54,7 +56,7 @@ void set_pte_vaddr(unsigned long vaddr, + } + pte = pte_offset_kernel(pmd, vaddr); + if (pte_val(pteval)) +- set_pte_present(&init_mm, vaddr, pte, pteval); ++ set_pte_at(&init_mm, vaddr, pte, pteval); + else + pte_clear(&init_mm, vaddr, pte); + +@@ -109,21 +111,6 @@ unsigned long hypervisor_virt_start = HY + unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE); + EXPORT_SYMBOL(__FIXADDR_TOP); + +-/** +- * reserve_top_address - reserves a hole in the top of kernel address space +- * @reserve - size of hole to reserve +- * +- * Can be used to relocate the fixmap area and poke a hole in the top +- * of kernel address space to make room for a hypervisor. +- */ +-void __init reserve_top_address(unsigned long reserve) +-{ +- BUG_ON(fixmaps_set > 0); +- printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", +- (int)-reserve); +- __FIXADDR_TOP = -reserve - PAGE_SIZE; +-} +- + /* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the +--- head-2010-01-18.orig/drivers/acpi/Makefile 2009-12-04 10:44:41.000000000 +0100 ++++ head-2010-01-18/drivers/acpi/Makefile 2009-11-06 10:52:02.000000000 +0100 +@@ -62,8 +62,6 @@ obj-$(CONFIG_ACPI_POWER_METER) += power_ + processor-y := processor_core.o processor_throttling.o + processor-y += processor_idle.o processor_thermal.o + processor-$(CONFIG_CPU_FREQ) += processor_perflib.o +-ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL +-processor-objs += processor_perflib.o processor_extcntl.o +-endif ++processor-$(CONFIG_PROCESSOR_EXTERNAL_CONTROL) += processor_perflib.o processor_extcntl.o + + obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o +--- head-2010-01-18.orig/drivers/acpi/acpica/hwsleep.c 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/drivers/acpi/acpica/hwsleep.c 2009-11-06 10:52:02.000000000 +0100 +@@ -394,7 +394,7 @@ acpi_status asmlinkage acpi_enter_sleep_ + #else + /* PV ACPI just need check hypercall return value */ + err = acpi_notify_hypervisor_state(sleep_state, +- PM1Acontrol, PM1Bcontrol); ++ pm1a_control, pm1b_control); + if (err) { + printk(KERN_ERR "ACPI: Hypervisor failure [%d]\n", err); + return_ACPI_STATUS(AE_ERROR); +--- head-2010-01-18.orig/drivers/acpi/processor_idle.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/drivers/acpi/processor_idle.c 2009-11-06 10:52:02.000000000 +0100 +@@ -523,7 +523,7 @@ static void acpi_processor_power_verify_ + #ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL + cx->latency_ticks = cx->latency; + #else +- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); ++ cx->latency_ticks = us_to_pm_timer_ticks(cx->latency); + #endif + + return; +@@ -610,7 +610,7 @@ static void acpi_processor_power_verify_ + #ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL + cx->latency_ticks = cx->latency; + #else +- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); ++ cx->latency_ticks = us_to_pm_timer_ticks(cx->latency); + #endif + /* + * On older chipsets, BM_RLD needs to be set +--- head-2010-01-18.orig/drivers/oprofile/oprofile_files.c 2009-11-06 10:45:37.000000000 +0100 ++++ head-2010-01-18/drivers/oprofile/oprofile_files.c 2009-11-06 10:52:02.000000000 +0100 +@@ -172,6 +172,7 @@ static const struct file_operations dump + }; + + #ifdef CONFIG_XEN ++#include + + #define TMPBUFSIZE 512 + +--- head-2010-01-18.orig/drivers/pci/msi-xen.c 2009-12-04 11:28:44.000000000 +0100 ++++ head-2010-01-18/drivers/pci/msi-xen.c 2009-12-04 11:29:54.000000000 +0100 +@@ -47,47 +47,50 @@ struct msi_pirq_entry { + + /* Arch hooks */ + +-int __attribute__ ((weak)) +-arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +-{ +- return 0; +-} +- +-#ifndef CONFIG_XEN +-int __attribute__ ((weak)) +-arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) ++#ifndef arch_msi_check_device ++int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) + { + return 0; + } ++#endif + +-int __attribute__ ((weak)) +-arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ++#ifndef arch_setup_msi_irqs ++int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) + { + struct msi_desc *entry; + int ret; + ++ /* ++ * If an architecture wants to support multiple MSI, it needs to ++ * override arch_setup_msi_irqs() ++ */ ++ if (type == PCI_CAP_ID_MSI && nvec > 1) ++ return 1; ++ + list_for_each_entry(entry, &dev->msi_list, list) { + ret = arch_setup_msi_irq(dev, entry); +- if (ret) ++ if (ret < 0) + return ret; ++ if (ret > 0) ++ return -ENOSPC; + } + + return 0; + } ++#endif + +-void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) +-{ +- return; +-} +- +-void __attribute__ ((weak)) +-arch_teardown_msi_irqs(struct pci_dev *dev) ++#ifndef arch_teardown_msi_irqs ++void arch_teardown_msi_irqs(struct pci_dev *dev) + { + struct msi_desc *entry; + + list_for_each_entry(entry, &dev->msi_list, list) { +- if (entry->irq != 0) +- arch_teardown_msi_irq(entry->irq); ++ int i, nvec; ++ if (entry->irq == 0) ++ continue; ++ nvec = 1 << entry->msi_attrib.multiple; ++ for (i = 0; i < nvec; i++) ++ arch_teardown_msi_irq(entry->irq + i); + } + } + #endif +@@ -347,13 +350,15 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state) + /** + * msi_capability_init - configure device's MSI capability structure + * @dev: pointer to the pci_dev data structure of MSI device function ++ * @nvec: number of interrupts to allocate + * +- * Setup the MSI capability structure of device function with a single +- * MSI irq, regardless of device function is capable of handling +- * multiple messages. A return of zero indicates the successful setup +- * of an entry zero with the new MSI irq or non-zero for otherwise. +- **/ +-static int msi_capability_init(struct pci_dev *dev) ++ * Setup the MSI capability structure of the device with the requested ++ * number of interrupts. A return value of zero indicates the successful ++ * setup of an entry with the new MSI irq. A negative return value indicates ++ * an error, and a positive return value indicates the number of interrupts ++ * which could have been allocated. ++ */ ++static int msi_capability_init(struct pci_dev *dev, int nvec) + { + int pos, pirq; + u16 control; +@@ -363,6 +368,7 @@ static int msi_capability_init(struct pc + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + pci_read_config_word(dev, msi_control_reg(pos), &control); + ++ WARN_ON(nvec > 1); /* XXX */ + pirq = msi_map_vector(dev, 0, 0); + if (pirq < 0) + return -EBUSY; +@@ -496,22 +502,34 @@ static int pci_msi_check_device(struct p + } + + /** +- * pci_enable_msi - configure device's MSI capability structure +- * @dev: pointer to the pci_dev data structure of MSI device function ++ * pci_enable_msi_block - configure device's MSI capability structure ++ * @dev: device to configure ++ * @nvec: number of interrupts to configure + * +- * Setup the MSI capability structure of device function with +- * a single MSI irq upon its software driver call to request for +- * MSI mode enabled on its hardware device function. A return of zero +- * indicates the successful setup of an entry zero with the new MSI +- * vector or non-zero for otherwise. +- **/ ++ * Allocate IRQs for a device with the MSI capability. ++ * This function returns a negative errno if an error occurs. If it ++ * is unable to allocate the number of interrupts requested, it returns ++ * the number of interrupts it might be able to allocate. If it successfully ++ * allocates at least the number of interrupts requested, it returns 0 and ++ * updates the @dev's irq member to the lowest new interrupt number; the ++ * other interrupt numbers allocated to this device are consecutive. ++ */ + extern int pci_frontend_enable_msi(struct pci_dev *dev); +-int pci_enable_msi(struct pci_dev* dev) ++int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) + { +- int temp, status; ++ int temp, status, pos, maxvec; ++ u16 msgctl; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + +- status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); ++ pos = pci_find_capability(dev, PCI_CAP_ID_MSI); ++ if (!pos) ++ return -EINVAL; ++ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); ++ maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); ++ if (nvec > maxvec) ++ return maxvec; ++ ++ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI); + if (status) + return status; + +@@ -521,6 +539,7 @@ int pci_enable_msi(struct pci_dev* dev) + int ret; + + temp = dev->irq; ++ WARN_ON(nvec > 1); /* XXX */ + ret = pci_frontend_enable_msi(dev); + if (ret) + return ret; +@@ -535,23 +554,23 @@ int pci_enable_msi(struct pci_dev* dev) + + temp = dev->irq; + +- /* Check whether driver already requested for MSI-X irqs */ ++ /* Check whether driver already requested MSI-X irqs */ + if (dev->msix_enabled) { + dev_info(&dev->dev, "can't enable MSI " + "(MSI-X already enabled)\n"); + return -EINVAL; + } + +- status = msi_capability_init(dev); ++ status = msi_capability_init(dev, nvec); + if ( !status ) + msi_dev_entry->default_irq = temp; + + return status; + } +-EXPORT_SYMBOL(pci_enable_msi); ++EXPORT_SYMBOL(pci_enable_msi_block); + + extern void pci_frontend_disable_msi(struct pci_dev* dev); +-void pci_msi_shutdown(struct pci_dev* dev) ++void pci_msi_shutdown(struct pci_dev *dev) + { + int pirq; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); +@@ -579,6 +598,7 @@ void pci_msi_shutdown(struct pci_dev* de + pci_intx_for_msi(dev, 1); + dev->msi_enabled = 0; + } ++ + void pci_disable_msi(struct pci_dev* dev) + { + pci_msi_shutdown(dev); +@@ -586,6 +606,23 @@ void pci_disable_msi(struct pci_dev* dev + EXPORT_SYMBOL(pci_disable_msi); + + /** ++ * pci_msix_table_size - return the number of device's MSI-X table entries ++ * @dev: pointer to the pci_dev data structure of MSI-X device function ++ */ ++int pci_msix_table_size(struct pci_dev *dev) ++{ ++ int pos; ++ u16 control; ++ ++ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); ++ if (!pos) ++ return 0; ++ ++ pci_read_config_word(dev, msi_control_reg(pos), &control); ++ return multi_msix_capable(control); ++} ++ ++/** + * pci_enable_msix - configure device's MSI-X capability structure + * @dev: pointer to the pci_dev data structure of MSI-X device function + * @entries: pointer to an array of MSI-X entries +@@ -604,9 +641,8 @@ extern int pci_frontend_enable_msix(stru + struct msix_entry *entries, int nvec); + int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) + { +- int status, pos, nr_entries; ++ int status, nr_entries; + int i, j, temp; +- u16 control; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + + if (!entries) +@@ -653,9 +689,7 @@ int pci_enable_msix(struct pci_dev* dev, + if (status) + return status; + +- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); +- pci_read_config_word(dev, msi_control_reg(pos), &control); +- nr_entries = multi_msix_capable(control); ++ nr_entries = pci_msix_table_size(dev); + if (nvec > nr_entries) + return -EINVAL; + +--- head-2010-01-18.orig/drivers/xen/Kconfig 2009-12-18 12:27:38.000000000 +0100 ++++ head-2010-01-18/drivers/xen/Kconfig 2009-12-18 12:27:45.000000000 +0100 +@@ -14,7 +14,6 @@ menu "XEN" + + config XEN_PRIVILEGED_GUEST + bool "Privileged Guest (domain 0)" +- select PCI_REASSIGN if PCI + help + Support for privileged operation (domain 0) + +@@ -333,10 +332,6 @@ endmenu + config HAVE_IRQ_IGNORE_UNHANDLED + def_bool y + +-config GENERIC_HARDIRQS_NO__DO_IRQ +- def_bool y +- depends on X86 +- + config NO_IDLE_HZ + def_bool y + +--- head-2010-01-18.orig/drivers/xen/char/mem.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/char/mem.c 2009-11-06 10:52:02.000000000 +0100 +@@ -158,21 +158,7 @@ static ssize_t write_mem(struct file * f + } + + #ifndef ARCH_HAS_DEV_MEM_MMAP_MEM +-static void mmap_mem_open(struct vm_area_struct *vma) +-{ +- map_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start, +- vma->vm_page_prot); +-} +- +-static void mmap_mem_close(struct vm_area_struct *vma) +-{ +- unmap_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start, +- vma->vm_page_prot); +-} +- + static struct vm_operations_struct mmap_mem_ops = { +- .open = mmap_mem_open, +- .close = mmap_mem_close, + #ifdef CONFIG_HAVE_IOREMAP_PROT + .access = generic_access_phys + #endif +--- head-2010-01-18.orig/drivers/xen/core/Makefile 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/Makefile 2009-11-06 10:52:02.000000000 +0100 +@@ -10,6 +10,6 @@ obj-$(CONFIG_SYS_HYPERVISOR) += hypervis + obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o + obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o + obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o +-obj-$(CONFIG_X86_SMP) += spinlock.o ++obj-$(CONFIG_SMP) += spinlock.o + obj-$(CONFIG_KEXEC) += machine_kexec.o + obj-$(CONFIG_XEN_XENCOMM) += xencomm.o +--- head-2010-01-18.orig/drivers/xen/core/evtchn.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/evtchn.c 2009-11-06 10:52:02.000000000 +0100 +@@ -150,13 +150,15 @@ DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) + #ifdef CONFIG_SMP + + static u8 cpu_evtchn[NR_EVENT_CHANNELS]; +-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; ++static DEFINE_PER_CPU(unsigned long[BITS_TO_LONGS(NR_EVENT_CHANNELS)], ++ cpu_evtchn_mask); + +-static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh, +- unsigned int idx) ++static inline unsigned long active_evtchns(unsigned int idx) + { ++ shared_info_t *sh = HYPERVISOR_shared_info; ++ + return (sh->evtchn_pending[idx] & +- cpu_evtchn_mask[cpu][idx] & ++ percpu_read(cpu_evtchn_mask[idx]) & + ~sh->evtchn_mask[idx]); + } + +@@ -168,10 +170,10 @@ static void bind_evtchn_to_cpu(unsigned + BUG_ON(!test_bit(chn, s->evtchn_mask)); + + if (irq != -1) +- irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu); ++ cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); + +- clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); +- set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); ++ clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_evtchn[chn])); ++ set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); + cpu_evtchn[chn] = cpu; + } + +@@ -184,11 +186,11 @@ static void init_evtchn_cpu_bindings(voi + struct irq_desc *desc = irq_to_desc(i); + + if (desc) +- desc->affinity = cpumask_of_cpu(0); ++ cpumask_copy(desc->affinity, cpumask_of(0)); + } + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); +- memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); ++ memset(per_cpu(cpu_evtchn_mask, 0), ~0, sizeof(per_cpu(cpu_evtchn_mask, 0))); + } + + static inline unsigned int cpu_from_evtchn(unsigned int evtchn) +@@ -198,9 +200,10 @@ static inline unsigned int cpu_from_evtc + + #else + +-static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh, +- unsigned int idx) ++static inline unsigned long active_evtchns(unsigned int idx) + { ++ shared_info_t *sh = HYPERVISOR_shared_info; ++ + return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]); + } + +@@ -219,25 +222,15 @@ static inline unsigned int cpu_from_evtc + + #endif + +-/* Upcall to generic IRQ layer. */ + #ifdef CONFIG_X86 +-extern unsigned int do_IRQ(struct pt_regs *regs); + void __init xen_init_IRQ(void); + void __init init_IRQ(void) + { + irq_ctx_init(0); + xen_init_IRQ(); + } +-#if defined (__i386__) +-static inline void exit_idle(void) {} +-#elif defined (__x86_64__) + #include + #endif +-#define do_IRQ(irq, regs) do { \ +- (regs)->orig_ax = ~(irq); \ +- do_IRQ((regs)); \ +-} while (0) +-#endif + + /* Xen will never allocate port zero for any purpose. */ + #define VALID_EVTCHN(chn) ((chn) != 0) +@@ -261,13 +254,12 @@ static DEFINE_PER_CPU(unsigned int, last + /* NB. Interrupts are disabled on entry. */ + asmlinkage void __irq_entry evtchn_do_upcall(struct pt_regs *regs) + { ++ struct pt_regs *old_regs = set_irq_regs(regs); + unsigned long l1, l2; + unsigned long masked_l1, masked_l2; + unsigned int l1i, l2i, port, count; + int irq; +- unsigned int cpu = smp_processor_id(); +- shared_info_t *s = HYPERVISOR_shared_info; +- vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; ++ vcpu_info_t *vcpu_info = current_vcpu_info(); + + exit_idle(); + irq_enter(); +@@ -277,7 +269,8 @@ asmlinkage void __irq_entry evtchn_do_up + vcpu_info->evtchn_upcall_pending = 0; + + /* Nested invocations bail immediately. */ +- if (unlikely(per_cpu(upcall_count, cpu)++)) ++ percpu_add(upcall_count, 1); ++ if (unlikely(percpu_read(upcall_count) != 1)) + break; + + #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ +@@ -286,8 +279,8 @@ asmlinkage void __irq_entry evtchn_do_up + #endif + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + +- l1i = per_cpu(last_processed_l1i, cpu); +- l2i = per_cpu(last_processed_l2i, cpu); ++ l1i = percpu_read(last_processed_l1i); ++ l2i = percpu_read(last_processed_l2i); + + while (l1 != 0) { + +@@ -302,7 +295,7 @@ asmlinkage void __irq_entry evtchn_do_up + l1i = __ffs(masked_l1); + + do { +- l2 = active_evtchns(cpu, s, l1i); ++ l2 = active_evtchns(l1i); + + l2i = (l2i + 1) % BITS_PER_LONG; + masked_l2 = l2 & ((~0UL) << l2i); +@@ -316,29 +309,31 @@ asmlinkage void __irq_entry evtchn_do_up + + /* process port */ + port = (l1i * BITS_PER_LONG) + l2i; +- if ((irq = evtchn_to_irq[port]) != -1) +- do_IRQ(irq, regs); +- else ++ if (unlikely((irq = evtchn_to_irq[port]) == -1)) + evtchn_device_upcall(port); ++ else if (!handle_irq(irq, regs) && printk_ratelimit()) ++ printk(KERN_EMERG "%s(%d): No handler for irq %d\n", ++ __func__, smp_processor_id(), irq); + + /* if this is the final port processed, we'll pick up here+1 next time */ +- per_cpu(last_processed_l1i, cpu) = l1i; +- per_cpu(last_processed_l2i, cpu) = l2i; ++ percpu_write(last_processed_l1i, l1i); ++ percpu_write(last_processed_l2i, l2i); + + } while (l2i != BITS_PER_LONG - 1); + +- l2 = active_evtchns(cpu, s, l1i); ++ l2 = active_evtchns(l1i); + if (l2 == 0) /* we handled all ports, so we can clear the selector bit */ + l1 &= ~(1UL << l1i); + + } + + /* If there were nested callbacks then we have more to do. */ +- count = per_cpu(upcall_count, cpu); +- per_cpu(upcall_count, cpu) = 0; ++ count = percpu_read(upcall_count); ++ percpu_write(upcall_count, 0); + } while (unlikely(count != 1)); + + irq_exit(); ++ set_irq_regs(old_regs); + } + + static struct irq_chip dynirq_chip; +@@ -545,7 +540,7 @@ static void unbind_from_irq(unsigned int + + /* Zap stats across IRQ changes of use. */ + for_each_possible_cpu(cpu) +-#ifdef CONFIG_SPARSE_IRQ ++#ifdef CONFIG_GENERIC_HARDIRQS + irq_to_desc(irq)->kstat_irqs[cpu] = 0; + #else + kstat_cpu(cpu).irqs[irq] = 0; +@@ -663,7 +658,8 @@ int bind_ipi_to_irqhandler( + if (irq < 0) + return irq; + +- retval = request_irq(irq, handler, irqflags, devname, dev_id); ++ retval = request_irq(irq, handler, irqflags | IRQF_NO_SUSPEND, ++ devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; +@@ -1132,7 +1128,7 @@ void irq_resume(void) + mask_evtchn(evtchn); + + /* Check that no PIRQs are still bound. */ +- for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++) { ++ for (irq = PIRQ_BASE; irq < (PIRQ_BASE + nr_pirqs); irq++) { + cfg = irq_cfg(irq); + BUG_ON(cfg && cfg->info != IRQ_UNBOUND); + } +@@ -1169,7 +1165,7 @@ int arch_init_chip_data(struct irq_desc + { + if (!desc->chip_data) { + /* By default all event channels notify CPU#0. */ +- desc->affinity = cpumask_of_cpu(0); ++ cpumask_copy(desc->affinity, cpumask_of(0)); + + desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC); + } +@@ -1183,11 +1179,44 @@ int arch_init_chip_data(struct irq_desc + #endif + + #if defined(CONFIG_X86_IO_APIC) ++#ifdef CONFIG_SPARSE_IRQ ++int nr_pirqs = NR_PIRQS; ++EXPORT_SYMBOL_GPL(nr_pirqs); ++ ++int __init arch_probe_nr_irqs(void) ++{ ++ int nr_irqs_gsi, nr = acpi_probe_gsi(); ++ ++ if (nr <= NR_IRQS_LEGACY) { ++ /* for acpi=off or acpi not compiled in */ ++ int idx; ++ ++ for (nr = idx = 0; idx < nr_ioapics; idx++) ++ nr += io_apic_get_redir_entries(idx) + 1; ++ } ++ nr_irqs_gsi = max(nr, NR_IRQS_LEGACY); ++ ++ nr = nr_irqs_gsi + 8 * nr_cpu_ids; ++#ifdef CONFIG_PCI_MSI ++ nr += nr_irqs_gsi * 16; ++#endif ++ if (nr_pirqs > nr) { ++ nr_pirqs = nr; ++ nr_irqs = nr + NR_DYNIRQS; ++ } ++ ++ printk(KERN_DEBUG "nr_irqs_gsi=%d nr_pirqs=%d\n", ++ nr_irqs_gsi, nr_pirqs); ++ ++ return 0; ++} ++#endif ++ + int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) + { + struct physdev_irq irq_op; + +- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS) ++ if (irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs) + return -EINVAL; + + if (cfg->vector) +@@ -1210,7 +1239,7 @@ int assign_irq_vector(int irq, struct ir + + void evtchn_register_pirq(int irq) + { +- BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS); ++ BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs); + if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND) + return; + irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0); +@@ -1223,7 +1252,7 @@ int evtchn_map_pirq(int irq, int xen_pir + if (irq < 0) { + static DEFINE_SPINLOCK(irq_alloc_lock); + +- irq = PIRQ_BASE + NR_PIRQS - 1; ++ irq = PIRQ_BASE + nr_pirqs - 1; + spin_lock(&irq_alloc_lock); + do { + struct irq_desc *desc; +@@ -1283,7 +1312,7 @@ void __init xen_init_IRQ(void) + init_evtchn_cpu_bindings(); + + pirq_needs_eoi = alloc_bootmem_pages(sizeof(unsigned long) +- * BITS_TO_LONGS(ALIGN(NR_PIRQS, PAGE_SIZE * 8))); ++ * BITS_TO_LONGS(ALIGN(nr_pirqs, PAGE_SIZE * 8))); + eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT; + if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0) + pirq_eoi_does_unmask = true; +@@ -1299,7 +1328,7 @@ void __init xen_init_IRQ(void) + handle_level_irq, "level"); + } + +- for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) { ++ for (i = PIRQ_BASE; i < (PIRQ_BASE + nr_pirqs); i++) { + #else + for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_IRQS_LEGACY); i++) { + #endif +--- head-2010-01-18.orig/drivers/xen/core/smpboot.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/smpboot.c 2009-11-06 10:52:02.000000000 +0100 +@@ -18,7 +18,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -54,8 +53,8 @@ static char call1func_name[NR_CPUS][15]; + #define set_cpu_to_apicid(cpu, apicid) + #endif + +-DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +-DEFINE_PER_CPU(cpumask_t, cpu_core_map); ++DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); ++DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); + + void __init prefill_possible_map(void) + { +@@ -80,8 +79,8 @@ set_cpu_sibling_map(unsigned int cpu) + cpu_data(cpu).phys_proc_id = cpu; + cpu_data(cpu).cpu_core_id = 0; + +- per_cpu(cpu_sibling_map, cpu) = cpumask_of_cpu(cpu); +- per_cpu(cpu_core_map, cpu) = cpumask_of_cpu(cpu); ++ cpumask_copy(cpu_sibling_mask(cpu), cpumask_of(cpu)); ++ cpumask_copy(cpu_core_mask(cpu), cpumask_of(cpu)); + + cpu_data(cpu).booted_cores = 1; + } +@@ -92,8 +91,8 @@ remove_siblinginfo(unsigned int cpu) + cpu_data(cpu).phys_proc_id = BAD_APICID; + cpu_data(cpu).cpu_core_id = BAD_APICID; + +- cpus_clear(per_cpu(cpu_sibling_map, cpu)); +- cpus_clear(per_cpu(cpu_core_map, cpu)); ++ cpumask_clear(cpu_sibling_mask(cpu)); ++ cpumask_clear(cpu_core_mask(cpu)); + + cpu_data(cpu).booted_cores = 0; + } +@@ -216,7 +215,7 @@ static void __cpuinit cpu_initialize_con + smp_trap_init(ctxt.trap_ctxt); + + ctxt.ldt_ents = 0; +- ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu)); ++ ctxt.gdt_frames[0] = arbitrary_virt_to_mfn(get_cpu_gdt_table(cpu)); + ctxt.gdt_ents = GDT_SIZE / 8; + + ctxt.user_regs.cs = __KERNEL_CS; +@@ -234,12 +233,13 @@ static void __cpuinit cpu_initialize_con + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); + + ctxt.user_regs.fs = __KERNEL_PERCPU; ++ ctxt.user_regs.gs = __KERNEL_STACK_CANARY; + #else /* __x86_64__ */ + ctxt.syscall_callback_eip = (unsigned long)system_call; + + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); + +- ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); ++ ctxt.gs_base_kernel = per_cpu_offset(cpu); + #endif + + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)) +@@ -267,8 +267,10 @@ void __init smp_prepare_cpus(unsigned in + current_thread_info()->cpu = 0; + + for_each_possible_cpu (cpu) { +- cpus_clear(per_cpu(cpu_sibling_map, cpu)); +- cpus_clear(per_cpu(cpu_core_map, cpu)); ++ alloc_cpumask_var(&per_cpu(cpu_sibling_map, cpu), GFP_KERNEL); ++ alloc_cpumask_var(&per_cpu(cpu_core_map, cpu), GFP_KERNEL); ++ cpumask_clear(cpu_sibling_mask(cpu)); ++ cpumask_clear(cpu_core_mask(cpu)); + } + + set_cpu_sibling_map(0); +@@ -295,9 +297,6 @@ void __init smp_prepare_cpus(unsigned in + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + +-#ifdef __i386__ +- init_gdt(cpu); +-#endif + gdt_addr = get_cpu_gdt_table(cpu); + make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables); + +@@ -311,12 +310,12 @@ void __init smp_prepare_cpus(unsigned in + set_cpu_to_apicid(cpu, apicid); + + #ifdef __x86_64__ +- cpu_pda(cpu)->pcurrent = idle; +- cpu_pda(cpu)->cpunumber = cpu; + clear_tsk_thread_flag(idle, TIF_FORK); +-#else +- per_cpu(current_task, cpu) = idle; ++ per_cpu(kernel_stack, cpu) = ++ (unsigned long)task_stack_page(idle) - ++ KERNEL_STACK_OFFSET + THREAD_SIZE; + #endif ++ per_cpu(current_task, cpu) = idle; + + irq_ctx_init(cpu); + +@@ -340,10 +339,7 @@ void __init smp_prepare_cpus(unsigned in + + void __init smp_prepare_boot_cpu(void) + { +-#ifdef __i386__ +- init_gdt(smp_processor_id()); +-#endif +- switch_to_new_gdt(); ++ switch_to_new_gdt(smp_processor_id()); + prefill_possible_map(); + } + +--- head-2010-01-18.orig/drivers/xen/core/spinlock.c 2009-11-17 14:57:38.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/spinlock.c 2010-01-18 16:53:32.000000000 +0100 +@@ -59,7 +59,7 @@ void __cpuinit xen_spinlock_cleanup(unsi + + int xen_spin_wait(raw_spinlock_t *lock, unsigned int token) + { +- int rc = 0, irq = __get_cpu_var(spinlock_irq); ++ int rc = 0, irq = percpu_read(spinlock_irq); + raw_rwlock_t *rm_lock; + unsigned long flags; + struct spinning spinning; +@@ -73,9 +73,9 @@ int xen_spin_wait(raw_spinlock_t *lock, + /* announce we're spinning */ + spinning.ticket = token; + spinning.lock = lock; +- spinning.prev = x86_read_percpu(spinning); ++ spinning.prev = percpu_read(spinning); + smp_wmb(); +- x86_write_percpu(spinning, &spinning); ++ percpu_write(spinning, &spinning); + + /* clear pending */ + xen_clear_irq_pending(irq); +@@ -103,7 +103,7 @@ int xen_spin_wait(raw_spinlock_t *lock, + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + + /* announce we're done */ +- x86_write_percpu(spinning, spinning.prev); ++ percpu_write(spinning, spinning.prev); + rm_lock = &__get_cpu_var(spinning_rm_lock); + raw_local_irq_save(flags); + __raw_write_lock(rm_lock); +@@ -117,7 +117,7 @@ unsigned int xen_spin_adjust(raw_spinloc + { + struct spinning *spinning; + +- for (spinning = x86_read_percpu(spinning); spinning; spinning = spinning->prev) ++ for (spinning = percpu_read(spinning); spinning; spinning = spinning->prev) + if (spinning->lock == lock) { + unsigned int ticket = spinning->ticket; + +@@ -144,9 +144,9 @@ int xen_spin_wait_flags(raw_spinlock_t * + /* announce we're spinning */ + spinning.ticket = *ptok >> TICKET_SHIFT; + spinning.lock = lock; +- spinning.prev = x86_read_percpu(spinning); ++ spinning.prev = percpu_read(spinning); + smp_wmb(); +- x86_write_percpu(spinning, &spinning); ++ percpu_write(spinning, &spinning); + + for (nested = spinning.prev; nested; nested = nested->prev) + if (nested->lock == lock) +@@ -185,7 +185,7 @@ int xen_spin_wait_flags(raw_spinlock_t * + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + + /* announce we're done */ +- x86_write_percpu(spinning, spinning.prev); ++ percpu_write(spinning, spinning.prev); + rm_lock = &__get_cpu_var(spinning_rm_lock); + __raw_write_lock(rm_lock); + __raw_write_unlock(rm_lock); +--- head-2010-01-18.orig/drivers/xen/netback/interface.c 2010-01-04 12:40:36.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netback/interface.c 2010-01-04 12:40:53.000000000 +0100 +@@ -121,7 +121,7 @@ static void netbk_get_drvinfo(struct net + struct ethtool_drvinfo *info) + { + strcpy(info->driver, "netbk"); +- strcpy(info->bus_info, dev->dev.parent->bus_id); ++ strcpy(info->bus_info, dev_name(dev->dev.parent)); + } + + static const struct netif_stat { +--- head-2010-01-18.orig/drivers/xen/netback/netback.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netback/netback.c 2010-01-04 12:40:57.000000000 +0100 +@@ -333,7 +333,7 @@ int netif_be_start_xmit(struct sk_buff * + */ + netif->tx_queue_timeout.data = (unsigned long)netif; + netif->tx_queue_timeout.function = tx_queue_callback; +- __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); ++ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); + } + } + +@@ -354,7 +354,7 @@ static void xen_network_done_notify(void + static struct net_device *eth0_dev = NULL; + if (unlikely(eth0_dev == NULL)) + eth0_dev = __dev_get_by_name(&init_net, "eth0"); +- netif_rx_schedule(???); ++ napi_schedule(???); + } + /* + * Add following to poll() function in NAPI driver (Tigon3 is example): +@@ -1308,8 +1308,7 @@ static void net_tx_action(unsigned long + (unsigned long)netif; + netif->credit_timeout.function = + tx_credit_callback; +- __mod_timer(&netif->credit_timeout, +- next_credit); ++ mod_timer(&netif->credit_timeout, next_credit); + netif_put(netif); + continue; + } +--- head-2010-01-18.orig/drivers/xen/netfront/netfront.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netfront/netfront.c 2009-11-06 10:52:02.000000000 +0100 +@@ -102,7 +102,7 @@ static const int MODPARM_rx_flip = 0; + static inline void dev_disable_gso_features(struct net_device *dev) + { + /* Turn off all GSO bits except ROBUST. */ +- dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; ++ dev->features &= ~NETIF_F_GSO_MASK; + dev->features |= NETIF_F_GSO_ROBUST; + } + #elif defined(NETIF_F_TSO) +@@ -635,7 +635,7 @@ static int network_open(struct net_devic + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ + netfront_accelerator_call_stop_napi_irq(np, dev); + +- netif_rx_schedule(&np->napi); ++ napi_schedule(&np->napi); + } + } + spin_unlock_bh(&np->rx_lock); +@@ -707,7 +707,7 @@ static void rx_refill_timeout(unsigned l + + netfront_accelerator_call_stop_napi_irq(np, dev); + +- netif_rx_schedule(&np->napi); ++ napi_schedule(&np->napi); + } + + static void network_alloc_rx_buffers(struct net_device *dev) +@@ -1064,7 +1064,7 @@ static irqreturn_t netif_int(int irq, vo + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { + netfront_accelerator_call_stop_napi_irq(np, dev); + +- netif_rx_schedule(&np->napi); ++ napi_schedule(&np->napi); + } + } + +@@ -1521,7 +1521,7 @@ err: + } + + if (!more_to_do && !accel_more_to_do) +- __netif_rx_complete(napi); ++ __napi_complete(napi); + + local_irq_restore(flags); + } +--- head-2010-01-18.orig/drivers/xen/sfc_netfront/accel_msg.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/drivers/xen/sfc_netfront/accel_msg.c 2009-11-06 10:52:02.000000000 +0100 +@@ -47,7 +47,7 @@ static void vnic_start_interrupts(netfro + netfront_accel_disable_net_interrupts(vnic); + vnic->irq_enabled = 0; + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++); +- netif_rx_schedule(&np->napi); ++ napi_schedule(&np->napi); + } else { + /* + * Nothing yet, make sure we get interrupts through +@@ -532,7 +532,7 @@ irqreturn_t netfront_accel_net_channel_i + vnic->stats.event_count_since_irq; + vnic->stats.event_count_since_irq = 0; + #endif +- netif_rx_schedule(&np->napi); ++ napi_schedule(&np->napi); + } + else { + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); +--- head-2010-01-18.orig/drivers/xen/usbback/usbstub.c 2009-11-06 10:45:48.000000000 +0100 ++++ head-2010-01-18/drivers/xen/usbback/usbstub.c 2009-11-06 10:52:02.000000000 +0100 +@@ -188,7 +188,7 @@ static int usbstub_probe(struct usb_inte + const struct usb_device_id *id) + { + struct usb_device *udev = interface_to_usbdev(intf); +- char *busid = intf->dev.parent->bus_id; ++ const char *busid = dev_name(intf->dev.parent); + struct vusb_port_id *portid = NULL; + struct usbstub *stub = NULL; + usbif_t *usbif = NULL; +--- head-2010-01-18.orig/drivers/xen/usbfront/usbfront-dbg.c 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/drivers/xen/usbfront/usbfront-dbg.c 2009-11-06 10:52:02.000000000 +0100 +@@ -64,7 +64,7 @@ static ssize_t show_statistics(struct de + "%s\n" + "xenhcd, hcd state %d\n", + hcd->self.controller->bus->name, +- hcd->self.controller->bus_id, ++ dev_name(hcd->self.controller), + hcd->product_desc, + hcd->state); + size -= temp; +--- head-2010-01-18.orig/drivers/xen/usbfront/xenbus.c 2009-11-06 10:51:07.000000000 +0100 ++++ head-2010-01-18/drivers/xen/usbfront/xenbus.c 2009-11-06 10:52:02.000000000 +0100 +@@ -240,10 +240,10 @@ static struct usb_hcd *create_hcd(struct + } + switch (usb_ver) { + case USB_VER_USB11: +- hcd = usb_create_hcd(&xen_usb11_hc_driver, &dev->dev, dev->dev.bus_id); ++ hcd = usb_create_hcd(&xen_usb11_hc_driver, &dev->dev, dev_name(&dev->dev)); + break; + case USB_VER_USB20: +- hcd = usb_create_hcd(&xen_usb20_hc_driver, &dev->dev, dev->dev.bus_id); ++ hcd = usb_create_hcd(&xen_usb20_hc_driver, &dev->dev, dev_name(&dev->dev)); + break; + default: + xenbus_dev_fatal(dev, err, "invalid usb-ver"); +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_probe.c 2009-12-04 11:28:50.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_probe.c 2009-11-06 10:52:02.000000000 +0100 +@@ -230,7 +230,7 @@ static struct xen_bus_type xenbus_fronte + }, + #if defined(CONFIG_XEN) || defined(MODULE) + .dev = { +- .bus_id = "xen", ++ .init_name = "xen", + }, + #endif + }; +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_probe_backend.c 2009-11-06 10:52:02.000000000 +0100 +@@ -129,7 +129,7 @@ static struct xen_bus_type xenbus_backen + .dev_attrs = xenbus_backend_attrs, + }, + .dev = { +- .bus_id = "xen-backend", ++ .init_name = "xen-backend", + }, + }; + +--- head-2010-01-18.orig/include/linux/interrupt.h 2009-12-04 10:44:41.000000000 +0100 ++++ head-2010-01-18/include/linux/interrupt.h 2009-11-06 10:52:02.000000000 +0100 +@@ -52,6 +52,7 @@ + * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. + * Used by threaded interrupts which need to keep the + * irq line disabled until the threaded handler has been run. ++ * IRQF_NO_SUSPEND - Prevent this interrupt from being disabled during suspend. + */ + #define IRQF_DISABLED 0x00000020 + #define IRQF_SAMPLE_RANDOM 0x00000040 +@@ -62,6 +63,7 @@ + #define IRQF_NOBALANCING 0x00000800 + #define IRQF_IRQPOLL 0x00001000 + #define IRQF_ONESHOT 0x00002000 ++#define IRQF_NO_SUSPEND 0x00008000 + + /* + * Bits used by threaded handlers: +--- head-2010-01-18.orig/kernel/irq/manage.c 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/kernel/irq/manage.c 2009-11-06 10:52:02.000000000 +0100 +@@ -200,7 +200,8 @@ static inline int setup_affinity(unsigne + void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) + { + if (suspend) { +- if (!desc->action || (desc->action->flags & IRQF_TIMER)) ++ if (!desc->action || ++ (desc->action->flags & (IRQF_TIMER | IRQF_NO_SUSPEND))) + return; + desc->status |= IRQ_SUSPENDED; + } +--- head-2010-01-18.orig/lib/swiotlb-xen.c 2009-12-14 17:27:59.000000000 +0100 ++++ head-2010-01-18/lib/swiotlb-xen.c 2009-12-14 17:28:34.000000000 +0100 +@@ -175,7 +175,7 @@ static void *swiotlb_bus_to_virt(dma_add + return phys_to_virt(swiotlb_bus_to_phys(address)); + } + +-int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) ++int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) + { + return 0; + } +@@ -523,13 +523,13 @@ swiotlb_full(struct device *dev, size_t + * Once the device is given the dma address, the device owns this memory until + * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. + */ +-static dma_addr_t +-_swiotlb_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, +- int dir, struct dma_attrs *attrs) +-{ +- struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); +- dma_addr_t dev_addr = gnttab_dma_map_page(page) + +- offset_in_page(paddr); ++dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, ++ unsigned long offset, size_t size, ++ enum dma_data_direction dir, ++ struct dma_attrs *attrs) ++{ ++ phys_addr_t phys = page_to_pseudophys(page) + offset; ++ dma_addr_t dev_addr = gnttab_dma_map_page(page) + offset; + void *map; + + BUG_ON(dir == DMA_NONE); +@@ -539,44 +539,24 @@ _swiotlb_map_single(struct device *hwdev + * we can safely return the device addr and not worry about bounce + * buffering it. + */ +- if (!address_needs_mapping(hwdev, dev_addr, size) && +- !range_needs_mapping(paddr, size)) ++ if (!address_needs_mapping(dev, dev_addr, size) && ++ !range_needs_mapping(phys, size)) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + gnttab_dma_unmap_page(dev_addr); +- map = map_single(hwdev, paddr, size, dir); ++ map = map_single(dev, phys, size, dir); + if (!map) { +- swiotlb_full(hwdev, size, dir, 1); ++ swiotlb_full(dev, size, dir, 1); + map = io_tlb_overflow_buffer; + } + +- dev_addr = swiotlb_virt_to_bus(hwdev, map); ++ dev_addr = swiotlb_virt_to_bus(dev, map); + return dev_addr; + } +- +-dma_addr_t +-swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, +- int dir, struct dma_attrs *attrs) +-{ +- return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, attrs); +-} +-EXPORT_SYMBOL(swiotlb_map_single_attrs); +- +-dma_addr_t +-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) +-{ +- return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, NULL); +-} +-EXPORT_SYMBOL(swiotlb_map_single); +- +-dma_addr_t +-swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) +-{ +- return _swiotlb_map_single(hwdev, paddr, size, dir, NULL); +-} ++EXPORT_SYMBOL_GPL(swiotlb_map_page); + + /* + * Unmap a single streaming mode DMA translation. The dma_addr and size must +@@ -586,9 +566,9 @@ swiotlb_map_single_phys(struct device *h + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +-void +-swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, +- size_t size, int dir, struct dma_attrs *attrs) ++void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, ++ size_t size, enum dma_data_direction dir, ++ struct dma_attrs *attrs) + { + char *dma_addr = swiotlb_bus_to_virt(dev_addr); + +@@ -598,15 +578,7 @@ swiotlb_unmap_single_attrs(struct device + else + gnttab_dma_unmap_page(dev_addr); + } +-EXPORT_SYMBOL(swiotlb_unmap_single_attrs); +- +-void +-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, +- int dir) +-{ +- return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL); +-} +-EXPORT_SYMBOL(swiotlb_unmap_single); ++EXPORT_SYMBOL_GPL(swiotlb_unmap_page); + + /* + * Make physical memory consistent for a single streaming mode DMA translation +@@ -620,7 +592,7 @@ EXPORT_SYMBOL(swiotlb_unmap_single); + */ + void + swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, +- size_t size, int dir) ++ size_t size, enum dma_data_direction dir) + { + char *dma_addr = swiotlb_bus_to_virt(dev_addr); + +@@ -632,7 +604,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_cp + + void + swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, +- size_t size, int dir) ++ size_t size, enum dma_data_direction dir) + { + char *dma_addr = swiotlb_bus_to_virt(dev_addr); + +@@ -644,7 +616,8 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_de + + void + swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, +- unsigned long offset, size_t size, int dir) ++ unsigned long offset, size_t size, ++ enum dma_data_direction dir) + { + char *dma_addr = swiotlb_bus_to_virt(dev_addr); + +@@ -656,7 +629,8 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra + + void + swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, +- unsigned long offset, size_t size, int dir) ++ unsigned long offset, size_t size, ++ enum dma_data_direction dir) + { + char *dma_addr = swiotlb_bus_to_virt(dev_addr); + +@@ -684,7 +658,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra + */ + int + swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, +- int dir, struct dma_attrs *attrs) ++ enum dma_data_direction dir, struct dma_attrs *attrs) + { + struct scatterlist *sg; + int i; +@@ -736,7 +710,7 @@ EXPORT_SYMBOL(swiotlb_map_sg); + */ + void + swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, +- int nelems, int dir, struct dma_attrs *attrs) ++ int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) + { + struct scatterlist *sg; + int i; +@@ -770,7 +744,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg); + */ + void + swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sgl, +- int nelems, int dir) ++ int nelems, enum dma_data_direction dir) + { + struct scatterlist *sg; + int i; +@@ -787,7 +761,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); + + void + swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl, +- int nelems, int dir) ++ int nelems, enum dma_data_direction dir) + { + struct scatterlist *sg; + int i; +--- head-2010-01-18.orig/mm/page_alloc.c 2009-12-04 10:49:40.000000000 +0100 ++++ head-2010-01-18/mm/page_alloc.c 2009-12-04 11:30:00.000000000 +0100 +@@ -4662,11 +4662,9 @@ static void __setup_per_zone_wmarks(void + } + + #ifdef CONFIG_XEN +- for_each_zone(zone) { ++ for_each_populated_zone(zone) { + unsigned int cpu; + +- if (!populated_zone(zone)) +- continue; + for_each_online_cpu(cpu) { + unsigned long high; + --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.28 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.28 @@ -0,0 +1,24180 @@ +From: Linux Kernel Mailing List +Subject: [PATCH] Linux: Update to 2.6.28 +Patch-mainline: 2.6.28 + + This patch contains the differences between Linux 2.6.27 and 2.6.28. + +Acked-by: Jeff Mahoney +Automatically created from "patches.kernel.org/patch-2.6.28" by xen-port-patches.py + +--- head-2010-01-18.orig/arch/Kconfig 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/Kconfig 2009-11-06 10:51:47.000000000 +0100 +@@ -20,7 +20,7 @@ config OPROFILE + config OPROFILE_IBS + bool "OProfile AMD IBS support (EXPERIMENTAL)" + default n +- depends on OPROFILE && SMP && X86 ++ depends on OPROFILE && SMP && X86 && !XEN + help + Instruction-Based Sampling (IBS) is a new profiling + technique that provides rich, precise program performance +--- head-2010-01-18.orig/arch/ia64/Kconfig 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/ia64/Kconfig 2009-11-06 10:51:47.000000000 +0100 +@@ -231,7 +231,7 @@ config IA64_HP_SIM + config IA64_XEN_GUEST + bool "Xen guest" + select SWIOTLB +- depends on XEN ++ depends on PARAVIRT_XEN + help + Build a kernel that runs on Xen guest domain. At this moment only + 16KB page size in supported. +--- head-2010-01-18.orig/arch/ia64/Makefile 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/ia64/Makefile 2009-11-20 11:02:40.000000000 +0100 +@@ -57,7 +57,7 @@ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64 + core-$(CONFIG_PERFMON) += arch/ia64/perfmon/ + core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ + core-$(CONFIG_KVM) += arch/ia64/kvm/ +-core-$(CONFIG_XEN) += arch/ia64/xen/ ++core-$(CONFIG_PARAVIRT_XEN) += arch/ia64/xen/ + + drivers-$(CONFIG_KDB) += arch/$(ARCH)/kdb/ + drivers-$(CONFIG_PCI) += arch/ia64/pci/ +--- head-2010-01-18.orig/arch/ia64/include/asm/xen/hypervisor.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/ia64/include/asm/xen/hypervisor.h 2009-11-06 10:51:47.000000000 +0100 +@@ -46,7 +46,7 @@ enum xen_domain_type { + XEN_HVM_DOMAIN, /* running in a Xen hvm domain*/ + }; + +-#ifdef CONFIG_XEN ++#ifdef CONFIG_PARAVIRT_XEN + extern enum xen_domain_type xen_domain_type; + #else + #define xen_domain_type XEN_NATIVE +@@ -66,7 +66,7 @@ extern enum xen_domain_type xen_domain_t + #endif + + +-#ifdef CONFIG_XEN ++#ifdef CONFIG_PARAVIRT_XEN + extern struct shared_info *HYPERVISOR_shared_info; + extern struct start_info *xen_start_info; + +--- head-2010-01-18.orig/arch/ia64/include/asm/xen/interface.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/ia64/include/asm/xen/interface.h 2010-01-04 17:34:44.000000000 +0100 +@@ -56,29 +56,21 @@ + #ifndef _ASM_IA64_XEN_INTERFACE_H + #define _ASM_IA64_XEN_INTERFACE_H + +-#define __DEFINE_GUEST_HANDLE(name, type) \ ++#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name + + #define DEFINE_GUEST_HANDLE_STRUCT(name) \ +- __DEFINE_GUEST_HANDLE(name, struct name) +-#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +-#define GUEST_HANDLE(name) __guest_handle_ ## name +-#define GUEST_HANDLE_64(name) GUEST_HANDLE(name) ++ __DEFINE_XEN_GUEST_HANDLE(name, struct name) ++#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) ++#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name ++#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) + #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) + + #ifndef __ASSEMBLY__ +-/* Guest handles for primitive C types. */ +-__DEFINE_GUEST_HANDLE(uchar, unsigned char); +-__DEFINE_GUEST_HANDLE(uint, unsigned int); +-__DEFINE_GUEST_HANDLE(ulong, unsigned long); +-__DEFINE_GUEST_HANDLE(u64, unsigned long); +-DEFINE_GUEST_HANDLE(char); +-DEFINE_GUEST_HANDLE(int); +-DEFINE_GUEST_HANDLE(long); +-DEFINE_GUEST_HANDLE(void); ++__DEFINE_XEN_GUEST_HANDLE(u64, unsigned long); + ++typedef unsigned long xen_ulong_t; + typedef unsigned long xen_pfn_t; +-DEFINE_GUEST_HANDLE(xen_pfn_t); + #define PRI_xen_pfn "lx" + #endif + +@@ -90,7 +82,7 @@ DEFINE_GUEST_HANDLE(xen_pfn_t); + /* Maximum number of virtual CPUs in multi-processor guests. */ + /* keep sizeof(struct shared_page) <= PAGE_SIZE. + * this is checked in arch/ia64/xen/hypervisor.c. */ +-#define MAX_VIRT_CPUS 64 ++#define XEN_LEGACY_MAX_VCPUS 64 + + #ifndef __ASSEMBLY__ + +--- head-2010-01-18.orig/arch/ia64/kernel/asm-offsets.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/ia64/kernel/asm-offsets.c 2009-11-06 10:51:47.000000000 +0100 +@@ -290,7 +290,7 @@ void foo(void) + DEFINE(IA64_ITC_LASTCYCLE_OFFSET, + offsetof (struct itc_jitter_data_t, itc_lastcycle)); + +-#ifdef CONFIG_XEN ++#ifdef CONFIG_PARAVIRT_XEN + BLANK(); + + DEFINE(XEN_NATIVE_ASM, XEN_NATIVE); +--- head-2010-01-18.orig/arch/ia64/xen/Kconfig 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/ia64/xen/Kconfig 2009-11-06 10:51:47.000000000 +0100 +@@ -2,7 +2,7 @@ + # This Kconfig describes xen/ia64 options + # + +-config XEN ++config PARAVIRT_XEN + bool "Xen hypervisor support" + default y + depends on PARAVIRT && MCKINLEY && IA64_PAGE_SIZE_16KB && EXPERIMENTAL +@@ -17,9 +17,9 @@ config XEN + both as a guest OS on Xen and natively on hardware. + + config XEN_XENCOMM +- depends on XEN ++ depends on PARAVIRT_XEN + bool + + config NO_IDLE_HZ +- depends on XEN ++ depends on PARAVIRT_XEN + bool +--- head-2010-01-18.orig/arch/ia64/xen/xcom_hcall.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/ia64/xen/xcom_hcall.c 2009-11-06 10:51:47.000000000 +0100 +@@ -343,7 +343,7 @@ xencommize_memory_reservation(struct xen + int + xencomm_hypercall_memory_op(unsigned int cmd, void *arg) + { +- GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} }; ++ XEN_GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} }; + struct xen_memory_reservation *xmr = NULL; + int rc; + struct xencomm_handle *desc; +--- head-2010-01-18.orig/arch/x86/Kconfig 2009-11-20 11:02:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig 2009-11-20 11:02:23.000000000 +0100 +@@ -1045,7 +1045,7 @@ config MICROCODE + + config MICROCODE_INTEL + bool "Intel microcode patch loading support" +- depends on MICROCODE ++ depends on MICROCODE && !XEN + default MICROCODE + select FW_LOADER + ---help--- +@@ -1058,7 +1058,7 @@ config MICROCODE_INTEL + + config MICROCODE_AMD + bool "AMD microcode patch loading support" +- depends on MICROCODE ++ depends on MICROCODE && !XEN + select FW_LOADER + ---help--- + If you select this option, microcode patch loading support for AMD +@@ -1357,6 +1357,7 @@ config HIGHPTE + + config X86_CHECK_BIOS_CORRUPTION + bool "Check for low memory corruption" ++ depends on !XEN + ---help--- + Periodically check for memory corruption in low memory, which + is suspected to be caused by BIOS. Even when enabled in the +@@ -1387,6 +1388,7 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_C + + config X86_RESERVE_LOW_64K + bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" ++ depends on !XEN + default y + ---help--- + Reserve the first 64K of physical RAM on BIOSes that are known +@@ -2079,7 +2081,7 @@ config DMAR_FLOPPY_WA + + config INTR_REMAP + bool "Support for Interrupt Remapping (EXPERIMENTAL)" +- depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL ++ depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && !XEN && EXPERIMENTAL + ---help--- + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or +--- head-2010-01-18.orig/arch/x86/Kconfig.cpu 2009-12-04 10:44:40.000000000 +0100 ++++ head-2010-01-18/arch/x86/Kconfig.cpu 2009-11-06 10:51:47.000000000 +0100 +@@ -497,7 +497,7 @@ config CPU_SUP_TRANSMETA_32 + config CPU_SUP_UMC_32 + default y + bool "Support UMC processors" if PROCESSOR_SELECT +- depends on !64BIT ++ depends on !64BIT && !XEN + ---help--- + This enables detection, tunings and quirks for UMC processors + +@@ -510,13 +510,13 @@ config CPU_SUP_UMC_32 + + config X86_DS + def_bool X86_PTRACE_BTS +- depends on X86_DEBUGCTLMSR ++ depends on X86_DEBUGCTLMSR && !XEN + select HAVE_HW_BRANCH_TRACER + + config X86_PTRACE_BTS + bool "Branch Trace Store" + default y +- depends on X86_DEBUGCTLMSR ++ depends on X86_DEBUGCTLMSR && !XEN + depends on BROKEN + ---help--- + This adds a ptrace interface to the hardware's branch trace store. +--- head-2010-01-18.orig/arch/x86/Makefile 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/Makefile 2009-11-06 10:51:47.000000000 +0100 +@@ -112,7 +112,7 @@ endif + KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) + + # Xen subarch support +-mflags-$(CONFIG_XEN) := -Iinclude/asm-x86/mach-xen ++mflags-$(CONFIG_XEN) := -Iarch/x86/include/mach-xen + mcore-$(CONFIG_XEN) := arch/x86/mach-xen/ + + KBUILD_CFLAGS += $(mflags-y) +@@ -159,7 +159,7 @@ PHONY += bzImage vmlinuz $(BOOT_TARGETS) + + ifdef CONFIG_XEN + KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ +- -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(KBUILD_CPPFLAGS) ++ -I$(srctree)/arch/x86/include/mach-xen $(KBUILD_CPPFLAGS) + + ifdef CONFIG_X86_64 + LDFLAGS_vmlinux := -e startup_64 +--- head-2010-01-18.orig/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:51:47.000000000 +0100 +@@ -39,11 +39,11 @@ + .endm + + /* clobbers %eax */ +- .macro CLEAR_RREGS ++ .macro CLEAR_RREGS _r9=rax + xorl %eax,%eax + movq %rax,R11(%rsp) + movq %rax,R10(%rsp) +- movq %rax,R9(%rsp) ++ movq %\_r9,R9(%rsp) + movq %rax,R8(%rsp) + .endm + +@@ -52,11 +52,10 @@ + * We don't reload %eax because syscall_trace_enter() returned + * the value it wants us to use in the table lookup. + */ +- .macro LOAD_ARGS32 offset +- movl \offset(%rsp),%r11d +- movl \offset+8(%rsp),%r10d ++ .macro LOAD_ARGS32 offset, _r9=0 ++ .if \_r9 + movl \offset+16(%rsp),%r9d +- movl \offset+24(%rsp),%r8d ++ .endif + movl \offset+40(%rsp),%ecx + movl \offset+48(%rsp),%edx + movl \offset+56(%rsp),%esi +@@ -135,7 +134,7 @@ ENTRY(ia32_sysenter_target) + SAVE_ARGS 0,0,1 + /* no need to do an access_ok check here because rbp has been + 32bit zero extended */ +-1: movl (%rbp),%r9d ++1: movl (%rbp),%ebp + .section __ex_table,"a" + .quad 1b,ia32_badarg + .previous +@@ -146,7 +145,7 @@ ENTRY(ia32_sysenter_target) + cmpl $(IA32_NR_syscalls-1),%eax + ja ia32_badsys + sysenter_do_call: +- IA32_ARG_FIXUP 1 ++ IA32_ARG_FIXUP + sysenter_dispatch: + call *ia32_sys_call_table(,%rax,8) + movq %rax,RAX-ARGOFFSET(%rsp) +@@ -204,20 +203,17 @@ sysexit_audit: + #endif + + sysenter_tracesys: +- xchgl %r9d,%ebp + #ifdef CONFIG_AUDITSYSCALL + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + jz sysenter_auditsys + #endif + SAVE_REST + CLEAR_RREGS +- movq %r9,R9(%rsp) + movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- xchgl %ebp,%r9d + cmpl $(IA32_NR_syscalls-1),%eax + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + jmp sysenter_do_call +@@ -272,9 +268,9 @@ ENTRY(ia32_cstar_target) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz cstar_tracesys +-cstar_do_call: + cmpl $IA32_NR_syscalls-1,%eax + ja ia32_badsys ++cstar_do_call: + IA32_ARG_FIXUP 1 + cstar_dispatch: + call *ia32_sys_call_table(,%rax,8) +@@ -303,15 +299,13 @@ cstar_tracesys: + #endif + xchgl %r9d,%ebp + SAVE_REST +- CLEAR_RREGS +- movq %r9,R9(%rsp) ++ CLEAR_RREGS r9 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter +- LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ ++ LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ + RESTORE_REST + xchgl %ebp,%r9d +- movl RSP-ARGOFFSET(%rsp), %r8d + cmpl $(IA32_NR_syscalls-1),%eax + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + jmp cstar_do_call +@@ -522,8 +516,8 @@ ia32_sys_call_table: + .quad compat_sys_setrlimit /* 75 */ + .quad compat_sys_old_getrlimit /* old_getrlimit */ + .quad compat_sys_getrusage +- .quad sys32_gettimeofday +- .quad sys32_settimeofday ++ .quad compat_sys_gettimeofday ++ .quad compat_sys_settimeofday + .quad sys_getgroups16 /* 80 */ + .quad sys_setgroups16 + .quad sys32_old_select +--- head-2010-01-18.orig/arch/x86/include/asm/cpufeature.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/cpufeature.h 2009-11-18 14:49:48.000000000 +0100 +@@ -246,7 +246,11 @@ extern const char * const x86_power_flag + #define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) + #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) + #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) ++#ifndef CONFIG_XEN + #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) ++#else ++#define cpu_has_xsave boot_cpu_has(X86_FEATURE_OSXSAVE) ++#endif + #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) + #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) + +--- head-2010-01-18.orig/arch/x86/include/asm/hw_irq.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/hw_irq.h 2009-11-06 10:51:47.000000000 +0100 +@@ -107,6 +107,7 @@ extern void smp_error_interrupt(struct p + extern asmlinkage void smp_irq_move_cleanup_interrupt(void); + #endif + #ifdef CONFIG_SMP ++#ifndef CONFIG_XEN + extern void smp_reschedule_interrupt(struct pt_regs *); + extern void smp_call_function_interrupt(struct pt_regs *); + extern void smp_call_function_single_interrupt(struct pt_regs *); +@@ -115,6 +116,12 @@ extern void smp_invalidate_interrupt(str + #else + extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); + #endif ++#else ++#include ++extern irqreturn_t smp_reschedule_interrupt(int, void *); ++extern irqreturn_t smp_call_function_interrupt(int, void *); ++extern irqreturn_t smp_call_function_single_interrupt(int, void *); ++#endif + #endif + + extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); +--- head-2010-01-18.orig/arch/x86/include/asm/segment.h 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/segment.h 2009-11-06 10:51:47.000000000 +0100 +@@ -186,7 +186,9 @@ + #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3) + #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3) +-#ifndef CONFIG_PARAVIRT ++#if defined(CONFIG_X86_XEN) ++#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ++#elif !defined(CONFIG_PARAVIRT) + #define get_kernel_rpl() 0 + #endif + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/agp.h 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/agp.h 2009-11-06 10:51:47.000000000 +0100 +@@ -40,4 +40,4 @@ + #define free_gatt_pages(table, order) \ + dma_free_coherent(NULL,PAGE_SIZE<<(order),(table),virt_to_bus(table)) + +-#endif ++#endif /* _ASM_X86_AGP_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/desc.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/desc.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _ASM_DESC_H_ +-#define _ASM_DESC_H_ ++#ifndef _ASM_X86_DESC_H ++#define _ASM_X86_DESC_H + + #ifndef __ASSEMBLY__ + #include +@@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_ + desc->d = info->seg_32bit; + desc->g = info->limit_in_pages; + desc->base2 = (info->base_addr & 0xff000000) >> 24; ++ /* ++ * Don't allow setting of the lm bit. It is useless anyway ++ * because 64bit system calls require __USER_CS: ++ */ ++ desc->l = 0; + } + + #ifndef CONFIG_X86_NO_IDT +@@ -98,6 +103,14 @@ static inline int desc_empty(const void + #define write_idt_entry(dt, entry, g) \ + native_write_idt_entry(dt, entry, g) + ++static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) ++{ ++} ++ ++static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) ++{ ++} ++ + static inline void native_write_idt_entry(gate_desc *idt, int entry, + const gate_desc *gate) + { +@@ -360,20 +373,16 @@ static inline void set_system_intr_gate( + _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); + } + +-static inline void set_trap_gate(unsigned int n, void *addr) ++static inline void set_system_trap_gate(unsigned int n, void *addr) + { + BUG_ON((unsigned)n > 0xFF); +- _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); ++ _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); + } + +-static inline void set_system_gate(unsigned int n, void *addr) ++static inline void set_trap_gate(unsigned int n, void *addr) + { + BUG_ON((unsigned)n > 0xFF); +-#ifdef CONFIG_X86_32 +- _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); +-#else +- _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); +-#endif ++ _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); + } + + static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) +@@ -388,7 +397,7 @@ static inline void set_intr_gate_ist(int + _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); + } + +-static inline void set_system_gate_ist(int n, void *addr, unsigned ist) ++static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) + { + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); +@@ -420,4 +429,4 @@ static inline void set_system_gate_ist(i + + #endif /* __ASSEMBLY__ */ + +-#endif ++#endif /* _ASM_X86_DESC_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/dma-mapping.h 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/dma-mapping.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,17 +1,12 @@ +-#ifndef _ASM_DMA_MAPPING_H_ ++#ifndef _ASM_X86_DMA_MAPPING_H_ + + #include_next + +-static inline int +-address_needs_mapping(struct device *hwdev, dma_addr_t addr) +-{ +- dma_addr_t mask = 0xffffffff; +- /* If the device has a mask, use it, otherwise default to 32 bits */ +- if (hwdev && hwdev->dma_mask) +- mask = *hwdev->dma_mask; +- return (addr & ~mask) != 0; +-} ++void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t); ++ ++#define address_needs_mapping(hwdev, addr, size) \ ++ !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size) + + extern int range_straddles_page_boundary(paddr_t p, size_t size); + +-#endif /* _ASM_DMA_MAPPING_H_ */ ++#endif /* _ASM_X86_DMA_MAPPING_H_ */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/fixmap.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _ASM_FIXMAP_H +-#define _ASM_FIXMAP_H ++#ifndef _ASM_X86_FIXMAP_H ++#define _ASM_X86_FIXMAP_H + + #ifdef CONFIG_X86_32 + # include "fixmap_32.h" +@@ -9,6 +9,10 @@ + + extern int fixmaps_set; + ++extern pte_t *kmap_pte; ++extern pgprot_t kmap_prot; ++extern pte_t *pkmap_page_table; ++ + void xen_set_fixmap(enum fixed_addresses, maddr_t, pgprot_t); + + static inline void __set_fixmap(enum fixed_addresses idx, +@@ -61,4 +65,4 @@ static inline unsigned long virt_to_fix( + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); + } +-#endif ++#endif /* _ASM_X86_FIXMAP_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/fixmap_32.h 2009-11-06 10:51:47.000000000 +0100 +@@ -10,8 +10,8 @@ + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +-#ifndef _ASM_FIXMAP_32_H +-#define _ASM_FIXMAP_32_H ++#ifndef _ASM_X86_FIXMAP_32_H ++#define _ASM_X86_FIXMAP_32_H + + /* used by vmalloc.c, vsyscall.lds.S. + * +@@ -27,10 +27,8 @@ extern unsigned long __FIXADDR_TOP; + #include + #include + #include +-#ifdef CONFIG_HIGHMEM + #include + #include +-#endif + + /* + * Here we define all the compile-time 'special' virtual +@@ -81,10 +79,8 @@ enum fixed_addresses { + #ifdef CONFIG_X86_CYCLONE_TIMER + FIX_CYCLONE_TIMER, /*cyclone timer register*/ + #endif +-#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +-#endif + #ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, + #endif +@@ -100,10 +96,10 @@ enum fixed_addresses { + * can have a single pgd entry and a single pte table: + */ + #define NR_FIX_BTMAPS 64 +-#define FIX_BTMAPS_NESTING 4 ++#define FIX_BTMAPS_SLOTS 4 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - + (__end_of_permanent_fixed_addresses & 255), +- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, ++ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, + FIX_WP_TEST, + #ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, +@@ -126,4 +122,4 @@ extern void reserve_top_address(unsigned + #define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) + + #endif /* !__ASSEMBLY__ */ +-#endif ++#endif /* _ASM_X86_FIXMAP_32_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/fixmap_64.h 2009-11-06 10:51:47.000000000 +0100 +@@ -8,8 +8,8 @@ + * Copyright (C) 1998 Ingo Molnar + */ + +-#ifndef _ASM_FIXMAP_64_H +-#define _ASM_FIXMAP_64_H ++#ifndef _ASM_X86_FIXMAP_64_H ++#define _ASM_X86_FIXMAP_64_H + + #include + #include +@@ -47,6 +47,10 @@ enum fixed_addresses { + #ifndef CONFIG_XEN + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, ++#else ++#define NR_FIX_ISAMAPS 256 ++ FIX_ISAMAP_END, ++ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, + #endif + #ifdef CONFIG_EFI + FIX_EFI_IO_MAP_LAST_PAGE, +@@ -58,29 +62,26 @@ enum fixed_addresses { + #else + FIX_SHARED_INFO, + #endif ++ __end_of_permanent_fixed_addresses, + #ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, + #endif +-#define NR_FIX_ISAMAPS 256 +- FIX_ISAMAP_END, +- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, + #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, + #endif +- __end_of_permanent_fixed_addresses, + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * +- * We round it up to the next 512 pages boundary so that we ++ * We round it up to the next 256 pages boundary so that we + * can have a single pgd entry and a single pte table: + */ + #define NR_FIX_BTMAPS 64 +-#define FIX_BTMAPS_NESTING 4 +- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 - +- (__end_of_permanent_fixed_addresses & 511), +- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, ++#define FIX_BTMAPS_SLOTS 4 ++ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - ++ (__end_of_permanent_fixed_addresses & 255), ++ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, + __end_of_fixed_addresses + }; + +@@ -92,4 +93,4 @@ enum fixed_addresses { + #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) + #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) + +-#endif ++#endif /* _ASM_X86_FIXMAP_64_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/highmem.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/highmem.h 2009-11-06 10:51:47.000000000 +0100 +@@ -15,8 +15,8 @@ + * Copyright (C) 1999 Ingo Molnar + */ + +-#ifndef _ASM_HIGHMEM_H +-#define _ASM_HIGHMEM_H ++#ifndef _ASM_X86_HIGHMEM_H ++#define _ASM_X86_HIGHMEM_H + + #ifdef __KERNEL__ + +@@ -24,14 +24,11 @@ + #include + #include + #include ++#include + + /* declarations for highmem.c */ + extern unsigned long highstart_pfn, highend_pfn; + +-extern pte_t *kmap_pte; +-extern pgprot_t kmap_prot; +-extern pte_t *pkmap_page_table; +- + /* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical +@@ -96,4 +93,4 @@ static inline void copy_user_highpage(st + + #endif /* __KERNEL__ */ + +-#endif /* _ASM_HIGHMEM_H */ ++#endif /* _ASM_X86_HIGHMEM_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/io.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/io.h 2009-12-11 15:20:59.000000000 +0100 +@@ -5,20 +5,6 @@ + + #include + +-/* +- * early_ioremap() and early_iounmap() are for temporary early boot-time +- * mappings, before the real ioremap() is functional. +- * A boot-time mapping is currently limited to at most 16 pages. +- */ +-#ifndef __ASSEMBLY__ +-extern void early_ioremap_init(void); +-extern void early_ioremap_clear(void); +-extern void early_ioremap_reset(void); +-extern void *early_ioremap(unsigned long offset, unsigned long size); +-extern void early_iounmap(void *addr, unsigned long size); +-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); +-#endif +- + #define build_mmio_read(name, size, type, reg, barrier) \ + static inline type name(const volatile void __iomem *addr) \ + { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ +@@ -73,12 +59,14 @@ build_mmio_write(__writeq, "q", unsigned + #define writeq writeq + #endif + ++extern int iommu_bio_merge; ++ + #define native_io_delay xen_io_delay + + #ifdef CONFIG_X86_32 +-# include "../../io_32.h" ++# include "../../asm/io_32.h" + #else +-# include "../../io_64.h" ++# include "../../asm/io_64.h" + #endif + + #if defined(__KERNEL__) && !defined(__ASSEMBLY__) +@@ -95,7 +83,7 @@ build_mmio_write(__writeq, "q", unsigned + (unsigned long)(bv)->bv_offset) + + #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ +- (bvec_to_phys(vec1) + (vec1)->bv_len == bvec_to_phys(vec2) \ ++ (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + && bvec_to_pseudophys(vec1) + (vec1)->bv_len \ + == bvec_to_pseudophys(vec2)) + +@@ -134,8 +122,9 @@ extern void __iomem *ioremap_wc(unsigned + extern void early_ioremap_init(void); + extern void early_ioremap_clear(void); + extern void early_ioremap_reset(void); +-extern void *early_ioremap(unsigned long offset, unsigned long size); +-extern void early_iounmap(void *addr, unsigned long size); ++extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); ++extern void __iomem *early_memremap(unsigned long offset, unsigned long size); ++extern void early_iounmap(void __iomem *addr, unsigned long size); + extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _ASM_IRQ_VECTORS_H +-#define _ASM_IRQ_VECTORS_H ++#ifndef _ASM_X86_IRQ_VECTORS_H ++#define _ASM_X86_IRQ_VECTORS_H + + #ifdef CONFIG_X86_32 + # define SYSCALL_VECTOR 0x80 +@@ -47,6 +47,5 @@ + #define NR_DYNIRQS 256 + + #define NR_IRQS (NR_PIRQS + NR_DYNIRQS) +-#define NR_IRQ_VECTORS NR_IRQS + +-#endif /* _ASM_IRQ_VECTORS_H */ ++#endif /* _ASM_X86_IRQ_VECTORS_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 10:51:47.000000000 +0100 +@@ -157,23 +157,6 @@ static inline int raw_irqs_disabled_flag + raw_irqs_disabled_flags(flags); \ + }) + +-/* +- * makes the traced hardirq state match with the machine state +- * +- * should be a rarely used function, only in places where its +- * otherwise impossible to know the irq state, like in traps. +- */ +-static inline void trace_hardirqs_fixup_flags(unsigned long flags) +-{ +- if (raw_irqs_disabled_flags(flags)) +- trace_hardirqs_off(); +- else +- trace_hardirqs_on(); +-} +- +-#define trace_hardirqs_fixup() \ +- trace_hardirqs_fixup_flags(__raw_local_save_flags()) +- + #else + + #ifdef CONFIG_X86_64 +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef __ASM_X86_MMU_CONTEXT_H +-#define __ASM_X86_MMU_CONTEXT_H ++#ifndef _ASM_X86_MMU_CONTEXT_H ++#define _ASM_X86_MMU_CONTEXT_H + + #include + #include +@@ -39,4 +39,4 @@ do { \ + } while (0); + + +-#endif /* __ASM_X86_MMU_CONTEXT_H */ ++#endif /* _ASM_X86_MMU_CONTEXT_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef __I386_SCHED_H +-#define __I386_SCHED_H ++#ifndef _ASM_X86_MMU_CONTEXT_32_H ++#define _ASM_X86_MMU_CONTEXT_32_H + + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) + { +@@ -81,4 +81,4 @@ static inline void switch_mm(struct mm_s + #define deactivate_mm(tsk, mm) \ + asm("movl %0,%%gs": :"r" (0)); + +-#endif ++#endif /* _ASM_X86_MMU_CONTEXT_32_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context_64.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context_64.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef __X86_64_MMU_CONTEXT_H +-#define __X86_64_MMU_CONTEXT_H ++#ifndef _ASM_X86_MMU_CONTEXT_64_H ++#define _ASM_X86_MMU_CONTEXT_64_H + + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) + { +@@ -103,4 +103,4 @@ do { \ + asm volatile("movl %0,%%fs"::"r"(0)); \ + } while (0) + +-#endif ++#endif /* _ASM_X86_MMU_CONTEXT_64_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef __x86_PCI_H +-#define __x86_PCI_H ++#ifndef _ASM_X86_PCI_H ++#define _ASM_X86_PCI_H + + #include /* for struct page */ + #include +@@ -93,7 +93,7 @@ static inline void early_quirks(void) { + #ifdef CONFIG_X86_32 + # include "pci_32.h" + #else +-# include "pci_64.h" ++# include "../../asm/pci_64.h" + #endif + + /* implement the pci_ DMA API in terms of the generic device dma_ one */ +@@ -117,4 +117,4 @@ static inline cpumask_t __pcibus_to_cpum + } + #endif + +-#endif ++#endif /* _ASM_X86_PCI_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgalloc.h 2009-11-06 10:51:47.000000000 +0100 +@@ -149,4 +149,4 @@ extern void __pud_free_tlb(struct mmu_ga + #endif /* PAGETABLE_LEVELS > 3 */ + #endif /* PAGETABLE_LEVELS > 2 */ + +-#endif /* _ASM_X86_PGALLOC_H */ ++#endif /* _ASM_X86_PGALLOC_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-06 10:51:47.000000000 +0100 +@@ -14,11 +14,11 @@ + #define _PAGE_BIT_PAT 7 /* on 4KB pages */ + #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ + #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +-#define _PAGE_BIT_UNUSED2 10 +-#define _PAGE_BIT_IO 11 /* Mapped page is I/O or foreign and +- * has no associated page struct. */ ++#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ ++#define _PAGE_BIT_UNUSED3 11 + #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ + #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 ++#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 + #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + + /* If _PAGE_BIT_PRESENT is clear, we use these: */ +@@ -39,11 +39,12 @@ + #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) + #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) + #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +-#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2) +-#define _PAGE_IO (_AT(pteval_t, 1) << _PAGE_BIT_IO) ++#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) ++#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) + #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) + #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) + #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) ++#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) + #define __HAVE_ARCH_PTE_SPECIAL + + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +@@ -69,7 +70,7 @@ extern unsigned int __kernel_page_user; + _PAGE_DIRTY | __kernel_page_user) + + /* Set of bits not changed in pte_modify */ +-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \ ++#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) + + /* +@@ -116,6 +117,11 @@ extern unsigned int __kernel_page_user; + #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) + ++#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) ++#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) ++#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) ++#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) ++ + #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) + #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) + #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +@@ -130,6 +136,11 @@ extern unsigned int __kernel_page_user; + #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) + #define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) + ++#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) ++#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) ++#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) ++#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) ++ + /* xwr */ + #define __P000 PAGE_NONE + #define __P001 PAGE_READONLY +@@ -149,6 +160,22 @@ extern unsigned int __kernel_page_user; + #define __S110 PAGE_SHARED_EXEC + #define __S111 PAGE_SHARED_EXEC + ++/* ++ * early identity mapping pte attrib macros. ++ */ ++#ifdef CONFIG_X86_64 ++#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC ++#else ++/* ++ * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection ++ * bits are combined, this will alow user to access the high address mapped ++ * VDSO in the presence of CONFIG_COMPAT_VDSO ++ */ ++#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ ++#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ ++#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ ++#endif ++ + #ifndef __ASSEMBLY__ + + /* +@@ -205,6 +232,15 @@ static inline int pte_special(pte_t pte) + return pte_flags(pte) & _PAGE_SPECIAL; + } + ++#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ ++ __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) ++#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_IOMAP ? max_mapnr : \ ++ (_pte).pte_low & _PAGE_PRESENT ? \ ++ mfn_to_local_pfn(__pte_mfn(_pte)) : \ ++ __pte_mfn(_pte)) ++ ++#define pte_page(pte) pfn_to_page(pte_pfn(pte)) ++ + static inline int pmd_large(pmd_t pte) + { + return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == +@@ -347,6 +383,9 @@ static inline void xen_pagetable_setup_s + static inline void xen_pagetable_setup_done(pgd_t *base) {} + #endif + ++struct seq_file; ++extern void arch_report_meminfo(struct seq_file *m); ++ + #define set_pte(ptep, pte) xen_set_pte(ptep, pte) + #define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) + +@@ -641,4 +680,4 @@ int touch_pte_range(struct mm_struct *mm + + #endif /* __ASSEMBLY__ */ + +-#endif /* _ASM_X86_PGTABLE_H */ ++#endif /* _ASM_X86_PGTABLE_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _I386_PGTABLE_3LEVEL_H +-#define _I386_PGTABLE_3LEVEL_H ++#ifndef _ASM_X86_PGTABLE_3LEVEL_H ++#define _ASM_X86_PGTABLE_3LEVEL_H + + /* + * Intel Physical Address Extension (PAE) Mode - three-level page +@@ -102,13 +102,13 @@ static inline void pud_clear(pud_t *pudp + xen_tlb_flush(); + } + +-#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK)) ++#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) + + #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) + + + /* Find an entry in the second-level page table.. */ +-#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \ ++#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ + pmd_index(address)) + + #ifdef CONFIG_SMP +@@ -133,8 +133,6 @@ static inline int pte_same(pte_t a, pte_ + return a.pte_low == b.pte_low && a.pte_high == b.pte_high; + } + +-#define pte_page(x) pfn_to_page(pte_pfn(x)) +- + static inline int pte_none(pte_t pte) + { + return !(pte.pte_low | pte.pte_high); +@@ -142,12 +140,6 @@ static inline int pte_none(pte_t pte) + + #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ + ((_pte).pte_high << (32-PAGE_SHIFT))) +-#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ +- __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +-#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_IO ? max_mapnr : \ +- (_pte).pte_low & _PAGE_PRESENT ? \ +- mfn_to_local_pfn(__pte_mfn(_pte)) : \ +- __pte_mfn(_pte)) + + /* + * Bits 0, 6 and 7 are taken in the low part of the pte, +@@ -165,4 +157,4 @@ static inline int pte_none(pte_t pte) + #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) + #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) + +-#endif /* _I386_PGTABLE_3LEVEL_H */ ++#endif /* _ASM_X86_PGTABLE_3LEVEL_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2009-11-06 10:49:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _I386_PGTABLE_3LEVEL_DEFS_H +-#define _I386_PGTABLE_3LEVEL_DEFS_H ++#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H ++#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H + + #define SHARED_KERNEL_PMD 0 + +@@ -21,4 +21,4 @@ + */ + #define PTRS_PER_PTE 512 + +-#endif /* _I386_PGTABLE_3LEVEL_DEFS_H */ ++#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _I386_PGTABLE_H +-#define _I386_PGTABLE_H ++#ifndef _ASM_X86_PGTABLE_32_H ++#define _ASM_X86_PGTABLE_32_H + + /* + * The Linux memory management assumes a three-level page table setup. On +@@ -29,6 +29,7 @@ static inline void pgtable_cache_init(vo + static inline void check_pgt_cache(void) { } + void paging_init(void); + ++extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); + + /* + * The Linux x86 paging architecture is 'compile-time dual-mode', it +@@ -54,8 +55,7 @@ void paging_init(void); + * area for the same reason. ;) + */ + #define VMALLOC_OFFSET (8 * 1024 * 1024) +-#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \ +- & ~(VMALLOC_OFFSET - 1)) ++#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) + #ifdef CONFIG_X86_PAE + #define LAST_PKMAP 512 + #else +@@ -71,6 +71,8 @@ void paging_init(void); + # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) + #endif + ++#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) ++ + /* + * Define this if things work differently on an i386 and an i486: + * it will (on an i486) warn about kernel memory accesses that are +@@ -195,4 +197,4 @@ void make_lowmem_page_writable(void *va, + #define io_remap_pfn_range(vma, from, pfn, size, prot) \ + direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO) + +-#endif /* _I386_PGTABLE_H */ ++#endif /* _ASM_X86_PGTABLE_32_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _X86_64_PGTABLE_H +-#define _X86_64_PGTABLE_H ++#ifndef _ASM_X86_PGTABLE_64_H ++#define _ASM_X86_PGTABLE_64_H + + #include + #ifndef __ASSEMBLY__ +@@ -65,14 +65,14 @@ extern void paging_init(void); + printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", \ + __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e)) + #define pmd_ERROR(e) \ +- printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", \ ++ printk("%s:%d: bad pmd %p(%016lx pfn %010Lx).\n", \ + __FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e)) + #define pud_ERROR(e) \ +- printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", \ ++ printk("%s:%d: bad pud %p(%016lx pfn %010Lx).\n", \ + __FILE__, __LINE__, &(e), __pud_val(e), \ + (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT) + #define pgd_ERROR(e) \ +- printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", \ ++ printk("%s:%d: bad pgd %p(%016lx pfn %010Lx).\n", \ + __FILE__, __LINE__, &(e), __pgd_val(e), \ + (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT) + +@@ -181,14 +181,6 @@ static inline int pmd_bad(pmd_t pmd) + #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ + + #define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT) +-#define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \ +- __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +-#define pte_pfn(_pte) ((_pte).pte & _PAGE_IO ? max_mapnr : \ +- (_pte).pte & _PAGE_PRESENT ? \ +- mfn_to_local_pfn(__pte_mfn(_pte)) : \ +- __pte_mfn(_pte)) +- +-#define pte_page(x) pfn_to_page(pte_pfn((x))) + + /* + * Macro to mark a page protection value as "uncacheable". +@@ -312,4 +304,4 @@ extern void cleanup_highmap(void); + #define __HAVE_ARCH_PTE_SAME + #endif /* !__ASSEMBLY__ */ + +-#endif /* _X86_64_PGTABLE_H */ ++#endif /* _ASM_X86_PGTABLE_64_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/processor.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/processor.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef __ASM_X86_PROCESSOR_H +-#define __ASM_X86_PROCESSOR_H ++#ifndef _ASM_X86_PROCESSOR_H ++#define _ASM_X86_PROCESSOR_H + + #include + +@@ -20,6 +20,7 @@ struct mm_struct; + #include + #include + #include ++#include + + #include + #include +@@ -76,11 +77,11 @@ struct cpuinfo_x86 { + int x86_tlbsize; + __u8 x86_virt_bits; + __u8 x86_phys_bits; ++#endif + /* CPUID returned core id bits: */ + __u8 x86_coreid_bits; + /* Max extended CPUID function supported: */ + __u32 extended_cpuid_level; +-#endif + /* Maximum supported CPUID level, -1=no CPUID: */ + int cpuid_level; + __u32 x86_capability[NCAPINTS]; +@@ -140,6 +141,8 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_ + #define current_cpu_data boot_cpu_data + #endif + ++extern const struct seq_operations cpuinfo_op; ++ + static inline int hlt_works(int cpu) + { + #ifdef CONFIG_X86_32 +@@ -153,6 +156,8 @@ static inline int hlt_works(int cpu) + + extern void cpu_detect(struct cpuinfo_x86 *c); + ++extern struct pt_regs *idle_regs(struct pt_regs *); ++ + extern void early_cpu_init(void); + extern void identify_boot_cpu(void); + extern void identify_secondary_cpu(struct cpuinfo_x86 *); +@@ -161,11 +166,8 @@ extern void init_scattered_cpuid_feature + extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); + extern unsigned short num_cache_leaves; + +-#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) ++extern void detect_extended_topology(struct cpuinfo_x86 *c); + extern void detect_ht(struct cpuinfo_x86 *c); +-#else +-static inline void detect_ht(struct cpuinfo_x86 *c) {} +-#endif + + static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +@@ -327,7 +329,12 @@ struct i387_fxsave_struct { + /* 16*16 bytes for each XMM-reg = 256 bytes: */ + u32 xmm_space[64]; + +- u32 padding[24]; ++ u32 padding[12]; ++ ++ union { ++ u32 padding1[12]; ++ u32 sw_reserved[12]; ++ }; + + } __attribute__((aligned(16))); + +@@ -351,10 +358,23 @@ struct i387_soft_struct { + u32 entry_eip; + }; + ++struct xsave_hdr_struct { ++ u64 xstate_bv; ++ u64 reserved1[2]; ++ u64 reserved2[5]; ++} __attribute__((packed)); ++ ++struct xsave_struct { ++ struct i387_fxsave_struct i387; ++ struct xsave_hdr_struct xsave_hdr; ++ /* new processor state extensions will go here */ ++} __attribute__ ((packed, aligned (64))); ++ + union thread_xstate { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; ++ struct xsave_struct xsave; + }; + + #if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS) +@@ -413,9 +433,14 @@ struct thread_struct { + unsigned io_bitmap_max; + /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ + unsigned long debugctlmsr; +-/* Debug Store - if not 0 points to a DS Save Area configuration; +- * goes into MSR_IA32_DS_AREA */ +- unsigned long ds_area_msr; ++#ifdef CONFIG_X86_DS ++/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ ++ struct ds_context *ds_ctx; ++#endif /* CONFIG_X86_DS */ ++#ifdef CONFIG_X86_PTRACE_BTS ++/* the signal to send on a bts buffer overflow */ ++ unsigned int bts_ovfl_signal; ++#endif /* CONFIG_X86_PTRACE_BTS */ + }; + + static inline unsigned long xen_get_debugreg(int regno) +@@ -503,41 +528,6 @@ static inline void clear_in_cr4(unsigned + write_cr4(cr4); + } + +-struct microcode_header { +- unsigned int hdrver; +- unsigned int rev; +- unsigned int date; +- unsigned int sig; +- unsigned int cksum; +- unsigned int ldrver; +- unsigned int pf; +- unsigned int datasize; +- unsigned int totalsize; +- unsigned int reserved[3]; +-}; +- +-struct microcode { +- struct microcode_header hdr; +- unsigned int bits[0]; +-}; +- +-typedef struct microcode microcode_t; +-typedef struct microcode_header microcode_header_t; +- +-/* microcode format is extended from prescott processors */ +-struct extended_signature { +- unsigned int sig; +- unsigned int pf; +- unsigned int cksum; +-}; +- +-struct extended_sigtable { +- unsigned int count; +- unsigned int cksum; +- unsigned int reserved[3]; +- struct extended_signature sigs[0]; +-}; +- + typedef struct { + unsigned long seg; + } mm_segment_t; +@@ -887,4 +877,4 @@ extern void start_thread(struct pt_regs + extern int get_tsc_mode(unsigned long adr); + extern int set_tsc_mode(unsigned int val); + +-#endif ++#endif /* _ASM_X86_PROCESSOR_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:14:24.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:14:43.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _ASM_X86_SMP_H_ +-#define _ASM_X86_SMP_H_ ++#ifndef _ASM_X86_SMP_H ++#define _ASM_X86_SMP_H + #ifndef __ASSEMBLY__ + #include + #include +@@ -34,6 +34,9 @@ extern cpumask_t cpu_initialized; + DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); + DECLARE_PER_CPU(cpumask_t, cpu_core_map); + DECLARE_PER_CPU(u16, cpu_llc_id); ++#ifdef CONFIG_X86_32 ++DECLARE_PER_CPU(int, cpu_number); ++#endif + + DECLARE_PER_CPU(u16, x86_cpu_to_apicid); + DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); +@@ -51,12 +54,16 @@ extern struct { + struct smp_ops { + void (*smp_prepare_boot_cpu)(void); + void (*smp_prepare_cpus)(unsigned max_cpus); +- int (*cpu_up)(unsigned cpu); + void (*smp_cpus_done)(unsigned max_cpus); + + void (*smp_send_stop)(void); + void (*smp_send_reschedule)(int cpu); + ++ int (*cpu_up)(unsigned cpu); ++ int (*cpu_disable)(void); ++ void (*cpu_die)(unsigned int cpu); ++ void (*play_dead)(void); ++ + void (*send_call_func_ipi)(cpumask_t mask); + void (*send_call_func_single_ipi)(int cpu); + }; +@@ -91,6 +98,21 @@ static inline int __cpu_up(unsigned int + return smp_ops.cpu_up(cpu); + } + ++static inline int __cpu_disable(void) ++{ ++ return smp_ops.cpu_disable(); ++} ++ ++static inline void __cpu_die(unsigned int cpu) ++{ ++ smp_ops.cpu_die(cpu); ++} ++ ++static inline void play_dead(void) ++{ ++ smp_ops.play_dead(); ++} ++ + static inline void smp_send_reschedule(int cpu) + { + smp_ops.smp_send_reschedule(cpu); +@@ -106,13 +128,20 @@ static inline void arch_send_call_functi + smp_ops.send_call_func_ipi(mask); + } + ++void cpu_disable_common(void); + void native_smp_prepare_boot_cpu(void); + void native_smp_prepare_cpus(unsigned int max_cpus); + void native_smp_cpus_done(unsigned int max_cpus); + int native_cpu_up(unsigned int cpunum); ++int native_cpu_disable(void); ++void native_cpu_die(unsigned int cpu); ++void native_play_dead(void); ++void play_dead_common(void); + + #else /* CONFIG_XEN */ + ++extern int __cpu_disable(void); ++extern void __cpu_die(unsigned int cpu); + void xen_smp_send_stop(void); + void xen_smp_send_reschedule(int cpu); + void xen_send_call_func_ipi(cpumask_t mask); +@@ -123,10 +152,11 @@ void xen_send_call_func_single_ipi(int c + #define arch_send_call_function_single_ipi xen_send_call_func_single_ipi + #define arch_send_call_function_ipi xen_send_call_func_ipi + ++void play_dead(void); ++ + #endif /* CONFIG_XEN */ + +-extern int __cpu_disable(void); +-extern void __cpu_die(unsigned int cpu); ++extern void prefill_possible_map(void); + + void smp_store_cpu_info(int id); + #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) +@@ -136,15 +166,11 @@ static inline int num_booting_cpus(void) + { + return cpus_weight(cpu_callout_map); + } +-#endif /* CONFIG_SMP */ +- +-#if defined(CONFIG_SMP) && (defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_XEN)) +-extern void prefill_possible_map(void); + #else + static inline void prefill_possible_map(void) + { + } +-#endif ++#endif /* CONFIG_SMP */ + + extern unsigned disabled_cpus __cpuinitdata; + +@@ -154,7 +180,6 @@ extern unsigned disabled_cpus __cpuinitd + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ +-DECLARE_PER_CPU(int, cpu_number); + #define raw_smp_processor_id() (x86_read_percpu(cpu_number)) + #define safe_smp_processor_id() smp_processor_id() + +@@ -177,30 +202,33 @@ DECLARE_PER_CPU(int, cpu_number); + + #ifdef CONFIG_X86_LOCAL_APIC + ++#ifndef CONFIG_X86_64 + static inline int logical_smp_processor_id(void) + { + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); + } + +-#ifndef CONFIG_X86_64 ++#include + static inline unsigned int read_apic_id(void) + { +- return *(u32 *)(APIC_BASE + APIC_ID); ++ unsigned int reg; ++ ++ reg = *(u32 *)(APIC_BASE + APIC_ID); ++ ++ return GET_APIC_ID(reg); + } +-#else +-extern unsigned int read_apic_id(void); + #endif + + +-# ifdef APIC_DEFINITION ++# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) + extern int hard_smp_processor_id(void); + # else +-# include ++#include + static inline int hard_smp_processor_id(void) + { + /* we don't want to mark this access volatile - bad code generation */ +- return GET_APIC_ID(read_apic_id()); ++ return read_apic_id(); + } + # endif /* APIC_DEFINITION */ + +@@ -212,9 +240,11 @@ static inline int hard_smp_processor_id( + + #endif /* CONFIG_X86_LOCAL_APIC */ + +-#ifdef CONFIG_HOTPLUG_CPU +-extern void cpu_uninit(void); ++#ifdef CONFIG_X86_HAS_BOOT_CPU_ID ++extern unsigned char boot_cpu_id; ++#else ++#define boot_cpu_id 0 + #endif + + #endif /* __ASSEMBLY__ */ +-#endif ++#endif /* _ASM_X86_SMP_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/spinlock.h 2009-11-17 15:30:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/spinlock.h 2009-11-17 15:30:35.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _X86_SPINLOCK_H_ +-#define _X86_SPINLOCK_H_ ++#ifndef _ASM_X86_SPINLOCK_H ++#define _ASM_X86_SPINLOCK_H + + #include + #include +@@ -448,4 +448,4 @@ static inline void __raw_write_unlock(ra + #define _raw_read_relax(lock) cpu_relax() + #define _raw_write_relax(lock) cpu_relax() + +-#endif ++#endif /* _ASM_X86_SPINLOCK_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/spinlock_types.h 2010-01-18 16:47:41.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/spinlock_types.h 2010-01-18 16:48:59.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef __ASM_SPINLOCK_TYPES_H +-#define __ASM_SPINLOCK_TYPES_H ++#ifndef _ASM_X86_SPINLOCK_TYPES_H ++#define _ASM_X86_SPINLOCK_TYPES_H + + #ifndef __LINUX_SPINLOCK_TYPES_H + # error "please don't include this file directly" +@@ -36,4 +36,4 @@ typedef struct { + + #define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } + +-#endif ++#endif /* _ASM_X86_SPINLOCK_TYPES_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef _ASM_X86_SYSTEM_H_ +-#define _ASM_X86_SYSTEM_H_ ++#ifndef _ASM_X86_SYSTEM_H ++#define _ASM_X86_SYSTEM_H + + #include + #include +@@ -65,7 +65,10 @@ do { \ + \ + /* regparm parameters for __switch_to(): */ \ + [prev] "a" (prev), \ +- [next] "d" (next)); \ ++ [next] "d" (next) \ ++ \ ++ : /* reloaded segment registers */ \ ++ "memory"); \ + } while (0) + + /* +@@ -403,4 +406,4 @@ static inline void rdtsc_barrier(void) + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); + } + +-#endif ++#endif /* _ASM_X86_SYSTEM_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/system_64.h 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/system_64.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef __ASM_SYSTEM_H +-#define __ASM_SYSTEM_H ++#ifndef _ASM_X86_SYSTEM_64_H ++#define _ASM_X86_SYSTEM_64_H + + #include + #include +@@ -17,4 +17,4 @@ static inline void write_cr8(unsigned lo + + #include + +-#endif ++#endif /* _ASM_X86_SYSTEM_64_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/tlbflush.h 2009-11-06 10:51:47.000000000 +0100 +@@ -63,6 +63,10 @@ static inline void flush_tlb_range(struc + __flush_tlb(); + } + ++static inline void reset_lazy_tlbstate(void) ++{ ++} ++ + #else /* SMP */ + + #include +@@ -92,6 +96,12 @@ struct tlb_state { + char __cacheline_padding[L1_CACHE_BYTES-8]; + }; + DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); ++ ++void reset_lazy_tlbstate(void); ++#else ++static inline void reset_lazy_tlbstate(void) ++{ ++} + #endif + + #endif /* SMP */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/vga.h 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/vga.h 2009-11-06 10:51:47.000000000 +0100 +@@ -4,8 +4,8 @@ + * (c) 1998 Martin Mares + */ + +-#ifndef _LINUX_ASM_VGA_H_ +-#define _LINUX_ASM_VGA_H_ ++#ifndef _ASM_X86_VGA_H ++#define _ASM_X86_VGA_H + + /* + * On the PC, we can just recalculate addresses and then +@@ -17,4 +17,4 @@ + #define vga_readb(x) (*(x)) + #define vga_writeb(x, y) (*(y) = (x)) + +-#endif ++#endif /* _ASM_X86_VGA_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/xor.h 2009-11-06 10:51:17.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/xor.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ + #ifdef CONFIG_X86_32 +-# include "../../xor_32.h" ++# include "../../asm/xor_32.h" + #else + # include "xor_64.h" + #endif +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/xor_64.h 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/xor_64.h 2009-11-06 10:51:47.000000000 +0100 +@@ -1,5 +1,5 @@ +-#ifndef ASM_X86__XOR_64_H +-#define ASM_X86__XOR_64_H ++#ifndef _ASM_X86_XOR_64_H ++#define _ASM_X86_XOR_64_H + + /* + * x86-64 changes / gcc fixes from Andi Kleen. +@@ -334,4 +334,4 @@ do { \ + deals with a load to a line that is being prefetched. */ + #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse) + +-#endif /* ASM_X86__XOR_64_H */ ++#endif /* _ASM_X86_XOR_64_H */ +--- head-2010-01-18.orig/arch/x86/kernel/Makefile 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/Makefile 2009-11-06 10:51:47.000000000 +0100 +@@ -138,7 +138,7 @@ ifeq ($(CONFIG_X86_64),y) + time_64-$(CONFIG_XEN) += time_32.o + endif + +-disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \ +- i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o smpboot.o \ +- tlb_$(BITS).o tsc.o tsc_sync.o vsmp_64.o ++disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o genx2apic_%.o \ ++ hpet.o i8253.o i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o \ ++ smpboot.o tlb_$(BITS).o tsc.o tsc_sync.o uv_%.o vsmp_64.o + disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o +--- head-2010-01-18.orig/arch/x86/kernel/acpi/sleep-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/acpi/sleep-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include "realmode/wakeup.h" + #include "sleep.h" +@@ -22,7 +23,7 @@ unsigned long acpi_realmode_flags; + static unsigned long acpi_realmode; + + #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) +-static char temp_stack[10240]; ++static char temp_stack[4096]; + #endif + #endif + +@@ -100,7 +101,9 @@ int acpi_save_state_mem(void) + #else /* CONFIG_64BIT */ + header->trampoline_segment = setup_trampoline() >> 4; + #ifdef CONFIG_SMP +- stack_start.sp = temp_stack + 4096; ++ stack_start.sp = temp_stack + sizeof(temp_stack); ++ early_gdt_descr.address = ++ (unsigned long)get_cpu_gdt_table(smp_processor_id()); + #endif + initial_code = (unsigned long)wakeup_long64; + saved_magic = 0x123456789abcdef0; +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/kernel/apic/apic-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -0,0 +1,61 @@ ++/* ++ * Local APIC handling stubs ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* ++ * Debug level, exported for io_apic.c ++ */ ++unsigned int apic_verbosity; ++ ++/* Have we found an MP table */ ++int smp_found_config; ++ ++static int __init apic_set_verbosity(char *arg) ++{ ++ if (!arg) { ++#ifdef CONFIG_X86_64 ++ skip_ioapic_setup = 0; ++ return 0; ++#endif ++ return -EINVAL; ++ } ++ ++ if (strcmp("debug", arg) == 0) ++ apic_verbosity = APIC_DEBUG; ++ else if (strcmp("verbose", arg) == 0) ++ apic_verbosity = APIC_VERBOSE; ++ else { ++ printk(KERN_WARNING "APIC Verbosity level %s not recognised" ++ " use apic=verbose or apic=debug\n", arg); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++early_param("apic", apic_set_verbosity); ++ ++int setup_profiling_timer(unsigned int multiplier) ++{ ++ return -EINVAL; ++} ++ ++int __init APIC_init_uniprocessor(void) ++{ ++#ifdef CONFIG_X86_IO_APIC ++ if (smp_found_config && !skip_ioapic_setup && nr_ioapics) ++ setup_IO_APIC(); ++# ifdef CONFIG_X86_64 ++ else ++ nr_ioapics = 0; ++# endif ++#endif ++ ++ return 0; ++} +--- head-2010-01-18.orig/arch/x86/kernel/apic_32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,101 +0,0 @@ +-/* +- * Local APIC handling, local APIC timers +- * +- * (c) 1999, 2000 Ingo Molnar +- * +- * Fixes +- * Maciej W. Rozycki : Bits for genuine 82489DX APICs; +- * thanks to Eric Gilmore +- * and Rolf G. Tews +- * for testing these extensively. +- * Maciej W. Rozycki : Various updates and fixes. +- * Mikael Pettersson : Power Management for UP-APIC. +- * Pavel Machek and +- * Mikael Pettersson : PM converted to driver model. +- */ +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +- +-#include "io_ports.h" +- +-#ifndef CONFIG_XEN +-/* +- * cpu_mask that denotes the CPUs that needs timer interrupt coming in as +- * IPIs in place of local APIC timers +- */ +-static cpumask_t timer_bcast_ipi; +-#endif +- +-/* +- * Knob to control our willingness to enable the local APIC. +- */ +- +-/* +- * Debug level, exported for io_apic.c +- */ +-unsigned int apic_verbosity; +- +-/* Have we found an MP table */ +-int smp_found_config; +- +-#ifndef CONFIG_XEN +-static int modern_apic(void) +-{ +- /* AMD systems use old APIC versions, so check the CPU */ +- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && +- boot_cpu_data.x86 >= 0xf) +- return 1; +- return lapic_get_version() >= 0x14; +-} +-#endif /* !CONFIG_XEN */ +- +-int get_physical_broadcast(void) +-{ +- return 0xff; +-} +- +-int setup_profiling_timer(unsigned int multiplier) +-{ +- return -EINVAL; +-} +- +-/* +- * This initializes the IO-APIC and APIC hardware if this is +- * a UP kernel. +- */ +-int __init APIC_init_uniprocessor(void) +-{ +-#ifdef CONFIG_X86_IO_APIC +- if (smp_found_config) +- if (!skip_ioapic_setup && nr_ioapics) +- setup_IO_APIC(); +-#endif +- +- return 0; +-} +--- head-2010-01-18.orig/arch/x86/kernel/apic_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,188 +0,0 @@ +-/* +- * Local APIC handling, local APIC timers +- * +- * (c) 1999, 2000 Ingo Molnar +- * +- * Fixes +- * Maciej W. Rozycki : Bits for genuine 82489DX APICs; +- * thanks to Eric Gilmore +- * and Rolf G. Tews +- * for testing these extensively. +- * Maciej W. Rozycki : Various updates and fixes. +- * Mikael Pettersson : Power Management for UP-APIC. +- * Pavel Machek and +- * Mikael Pettersson : PM converted to driver model. +- */ +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-int disable_apic; +- +-/* +- * Debug level, exported for io_apic.c +- */ +-unsigned int apic_verbosity; +- +-/* Have we found an MP table */ +-int smp_found_config; +- +-/* +- * The guts of the apic timer interrupt +- */ +-static void local_apic_timer_interrupt(void) +-{ +-#ifndef CONFIG_XEN +- int cpu = smp_processor_id(); +- struct clock_event_device *evt = &per_cpu(lapic_events, cpu); +- +- /* +- * Normally we should not be here till LAPIC has been initialized but +- * in some cases like kdump, its possible that there is a pending LAPIC +- * timer interrupt from previous kernel's context and is delivered in +- * new kernel the moment interrupts are enabled. +- * +- * Interrupts are enabled early and LAPIC is setup much later, hence +- * its possible that when we get here evt->event_handler is NULL. +- * Check for event_handler being NULL and discard the interrupt as +- * spurious. +- */ +- if (!evt->event_handler) { +- printk(KERN_WARNING +- "Spurious LAPIC timer interrupt on cpu %d\n", cpu); +- /* Switch it off */ +- lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); +- return; +- } +-#endif +- +- /* +- * the NMI deadlock-detector uses this. +- */ +- add_pda(apic_timer_irqs, 1); +- +-#ifndef CONFIG_XEN +- evt->event_handler(evt); +-#endif +-} +- +-/* +- * Local APIC timer interrupt. This is the most natural way for doing +- * local interrupts, but local timer interrupts can be emulated by +- * broadcast interrupts too. [in case the hw doesn't support APIC timers] +- * +- * [ if a single-CPU system runs an SMP kernel then we call the local +- * interrupt as well. Thus we cannot inline the local irq ... ] +- */ +-void smp_apic_timer_interrupt(struct pt_regs *regs) +-{ +- struct pt_regs *old_regs = set_irq_regs(regs); +- +- /* +- * NOTE! We'd better ACK the irq immediately, +- * because timer handling can be slow. +- */ +- ack_APIC_irq(); +- /* +- * update_process_times() expects us to have done irq_enter(). +- * Besides, if we don't timer interrupts ignore the global +- * interrupt lock, which is the WrongThing (tm) to do. +- */ +- exit_idle(); +- irq_enter(); +- local_apic_timer_interrupt(); +- irq_exit(); +- set_irq_regs(old_regs); +-} +- +-int setup_profiling_timer(unsigned int multiplier) +-{ +- return -EINVAL; +-} +- +-/* +- * This initializes the IO-APIC and APIC hardware if this is +- * a UP kernel. +- */ +-int __init APIC_init_uniprocessor(void) +-{ +-#ifdef CONFIG_X86_IO_APIC +- if (smp_found_config && !skip_ioapic_setup && nr_ioapics) +- setup_IO_APIC(); +-#endif +- +- return 1; +-} +- +-/* +- * Local APIC interrupts +- */ +- +-/* +- * This interrupt should _never_ happen with our APIC/SMP architecture +- */ +-asmlinkage void smp_spurious_interrupt(void) +-{ +- unsigned int v; +- exit_idle(); +- irq_enter(); +- /* +- * Check if this really is a spurious interrupt and ACK it +- * if it is a vectored one. Just in case... +- * Spurious interrupts should not be ACKed. +- */ +- v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); +- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) +- ack_APIC_irq(); +- +- add_pda(irq_spurious_count, 1); +- irq_exit(); +-} +- +-/* +- * This interrupt should never happen with our APIC/SMP architecture +- */ +-asmlinkage void smp_error_interrupt(void) +-{ +- unsigned int v, v1; +- +- exit_idle(); +- irq_enter(); +- /* First tickle the hardware, only then report what went on. -- REW */ +- v = apic_read(APIC_ESR); +- apic_write(APIC_ESR, 0); +- v1 = apic_read(APIC_ESR); +- ack_APIC_irq(); +- atomic_inc(&irq_err_count); +- +- /* Here is what the APIC error bits mean: +- 0: Send CS error +- 1: Receive CS error +- 2: Send accept error +- 3: Receive accept error +- 4: Reserved +- 5: Send illegal vector +- 6: Received illegal vector +- 7: Illegal register address +- */ +- printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", +- smp_processor_id(), v , v1); +- irq_exit(); +-} +--- head-2010-01-18.orig/arch/x86/kernel/cpu/addon_cpuid_features.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/addon_cpuid_features.c 2009-11-06 10:51:47.000000000 +0100 +@@ -70,7 +70,7 @@ void __cpuinit init_scattered_cpuid_feat + */ + void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) + { +-#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int ht_mask_width, core_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; +--- head-2010-01-18.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -1,33 +1,73 @@ + #include ++#include ++#include + #include ++#include ++#include ++#include ++#include ++#include + #include + #include +-#include + #include +-#include +-#include + #include + #include + #include ++#include + #include + #include + #include + #include + #include ++#include ++#include + #ifdef CONFIG_X86_LOCAL_APIC + #include + #include + #include +-#else ++#include ++#elif defined(CONFIG_X86_64_XEN) ++#include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ + #ifdef CONFIG_XEN ++#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_LOCAL_APIC) + #define phys_pkg_id(a,b) a + #endif +-#endif + #include ++#include ++#endif + + #include "cpu.h" + ++static struct cpu_dev *this_cpu __cpuinitdata; ++ ++#ifdef CONFIG_X86_64 ++/* We need valid kernel segments for data and code in long mode too ++ * IRET will check the segment types kkeil 2000/10/28 ++ * Also sysret mandates a special GDT layout ++ */ ++/* The TLS descriptors are currently at a different place compared to i386. ++ Hopefully nobody expects them at a fixed place (Wine?) */ + DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { ++ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, ++ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, ++ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, ++ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, ++ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, ++ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, ++} }; ++#else ++DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, +@@ -63,17 +103,168 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page + #endif + [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, + } }; ++#endif + EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + +-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +- ++#ifdef CONFIG_X86_32 + static int cachesize_override __cpuinitdata = -1; + static int disable_x86_serial_nr __cpuinitdata = 1; + +-struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; ++static int __init cachesize_setup(char *str) ++{ ++ get_option(&str, &cachesize_override); ++ return 1; ++} ++__setup("cachesize=", cachesize_setup); ++ ++static int __init x86_fxsr_setup(char *s) ++{ ++ setup_clear_cpu_cap(X86_FEATURE_FXSR); ++ setup_clear_cpu_cap(X86_FEATURE_XMM); ++ return 1; ++} ++__setup("nofxsr", x86_fxsr_setup); ++ ++static int __init x86_sep_setup(char *s) ++{ ++ setup_clear_cpu_cap(X86_FEATURE_SEP); ++ return 1; ++} ++__setup("nosep", x86_sep_setup); ++ ++/* Standard macro to see if a specific flag is changeable */ ++static inline int flag_is_changeable_p(u32 flag) ++{ ++ u32 f1, f2; ++ ++ /* ++ * Cyrix and IDT cpus allow disabling of CPUID ++ * so the code below may return different results ++ * when it is executed before and after enabling ++ * the CPUID. Add "volatile" to not allow gcc to ++ * optimize the subsequent calls to this function. ++ */ ++ asm volatile ("pushfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "movl %0,%1\n\t" ++ "xorl %2,%0\n\t" ++ "pushl %0\n\t" ++ "popfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "popfl\n\t" ++ : "=&r" (f1), "=&r" (f2) ++ : "ir" (flag)); ++ ++ return ((f1^f2) & flag) != 0; ++} ++ ++/* Probe for the CPUID instruction */ ++static int __cpuinit have_cpuid_p(void) ++{ ++ return flag_is_changeable_p(X86_EFLAGS_ID); ++} ++ ++static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) ++{ ++ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { ++ /* Disable processor serial number */ ++ unsigned long lo, hi; ++ rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); ++ lo |= 0x200000; ++ wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); ++ printk(KERN_NOTICE "CPU serial number disabled.\n"); ++ clear_cpu_cap(c, X86_FEATURE_PN); ++ ++ /* Disabling the serial number may affect the cpuid level */ ++ c->cpuid_level = cpuid_eax(0); ++ } ++} ++ ++static int __init x86_serial_nr_setup(char *s) ++{ ++ disable_x86_serial_nr = 0; ++ return 1; ++} ++__setup("serialnumber", x86_serial_nr_setup); ++#else ++static inline int flag_is_changeable_p(u32 flag) ++{ ++ return 1; ++} ++/* Probe for the CPUID instruction */ ++static inline int have_cpuid_p(void) ++{ ++ return 1; ++} ++static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) ++{ ++} ++#endif ++ ++/* ++ * Naming convention should be: [()] ++ * This table only is used unless init_() below doesn't set it; ++ * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used ++ * ++ */ ++ ++/* Look up CPU names by table lookup. */ ++static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) ++{ ++ struct cpu_model_info *info; ++ ++ if (c->x86_model >= 16) ++ return NULL; /* Range check */ ++ ++ if (!this_cpu) ++ return NULL; ++ ++ info = this_cpu->c_models; ++ ++ while (info && info->family) { ++ if (info->family == c->x86) ++ return info->model_names[c->x86_model]; ++ info++; ++ } ++ return NULL; /* Not found */ ++} ++ ++__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; ++ ++/* Current gdt points %fs at the "master" per-cpu area: after this, ++ * it's on the real one. */ ++void switch_to_new_gdt(void) ++{ ++ struct desc_ptr gdt_descr; ++ unsigned long va, frames[16]; ++ int f; ++ ++ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); ++ gdt_descr.size = GDT_SIZE - 1; ++ ++ for (va = gdt_descr.address, f = 0; ++ va < gdt_descr.address + gdt_descr.size; ++ va += PAGE_SIZE, f++) { ++ frames[f] = virt_to_mfn(va); ++ make_lowmem_page_readonly( ++ (void *)va, XENFEAT_writable_descriptor_tables); ++ } ++ if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8)) ++ BUG(); ++#ifdef CONFIG_X86_32 ++ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); ++#endif ++} ++ ++static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; + + static void __cpuinit default_init(struct cpuinfo_x86 *c) + { ++#ifdef CONFIG_X86_64 ++ display_cacheinfo(c); ++#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { +@@ -83,28 +274,22 @@ static void __cpuinit default_init(struc + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } ++#endif + } + + static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", ++ .c_x86_vendor = X86_VENDOR_UNKNOWN, + }; +-static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +- +-static int __init cachesize_setup(char *str) +-{ +- get_option(&str, &cachesize_override); +- return 1; +-} +-__setup("cachesize=", cachesize_setup); + +-int __cpuinit get_model_name(struct cpuinfo_x86 *c) ++static void __cpuinit get_model_name(struct cpuinfo_x86 *c) + { + unsigned int *v; + char *p, *q; + +- if (cpuid_eax(0x80000000) < 0x80000004) +- return 0; ++ if (c->extended_cpuid_level < 0x80000004) ++ return; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); +@@ -123,30 +308,34 @@ int __cpuinit get_model_name(struct cpui + while (q <= &c->x86_model_id[48]) + *q++ = '\0'; /* Zero-pad the rest */ + } +- +- return 1; + } + +- + void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) + { +- unsigned int n, dummy, ecx, edx, l2size; ++ unsigned int n, dummy, ebx, ecx, edx, l2size; + +- n = cpuid_eax(0x80000000); ++ n = c->extended_cpuid_level; + + if (n >= 0x80000005) { +- cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); ++ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", +- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); +- c->x86_cache_size = (ecx>>24)+(edx>>24); ++ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); ++ c->x86_cache_size = (ecx>>24) + (edx>>24); ++#ifdef CONFIG_X86_64 ++ /* On K8 L1 TLB is inclusive, so don't count it */ ++ c->x86_tlbsize = 0; ++#endif + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + +- ecx = cpuid_ecx(0x80000006); ++ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + l2size = ecx >> 16; + ++#ifdef CONFIG_X86_64 ++ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); ++#else + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c, l2size); +@@ -157,116 +346,106 @@ void __cpuinit display_cacheinfo(struct + + if (l2size == 0) + return; /* Again, no L2 cache is possible */ ++#endif + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", +- l2size, ecx & 0xFF); ++ l2size, ecx & 0xFF); + } + +-/* +- * Naming convention should be: [()] +- * This table only is used unless init_() below doesn't set it; +- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used +- * +- */ +- +-/* Look up CPU names by table lookup. */ +-static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) ++void __cpuinit detect_ht(struct cpuinfo_x86 *c) + { +- struct cpu_model_info *info; ++#ifdef CONFIG_X86_HT ++ u32 eax, ebx, ecx, edx; ++ int index_msb, core_bits; + +- if (c->x86_model >= 16) +- return NULL; /* Range check */ ++ if (!cpu_has(c, X86_FEATURE_HT)) ++ return; + +- if (!this_cpu) +- return NULL; ++ if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) ++ goto out; + +- info = this_cpu->c_models; ++ if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) ++ return; + +- while (info && info->family) { +- if (info->family == c->x86) +- return info->model_names[c->x86_model]; +- info++; ++ cpuid(1, &eax, &ebx, &ecx, &edx); ++ ++ smp_num_siblings = (ebx & 0xff0000) >> 16; ++ ++ if (smp_num_siblings == 1) { ++ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); ++ } else if (smp_num_siblings > 1) { ++ ++ if (smp_num_siblings > NR_CPUS) { ++ printk(KERN_WARNING "CPU: Unsupported number of siblings %d", ++ smp_num_siblings); ++ smp_num_siblings = 1; ++ return; ++ } ++ ++ index_msb = get_count_order(smp_num_siblings); ++#ifdef CONFIG_X86_64 ++ c->phys_proc_id = phys_pkg_id(index_msb); ++#else ++ c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); ++#endif ++ ++ smp_num_siblings = smp_num_siblings / c->x86_max_cores; ++ ++ index_msb = get_count_order(smp_num_siblings); ++ ++ core_bits = get_count_order(c->x86_max_cores); ++ ++#ifdef CONFIG_X86_64 ++ c->cpu_core_id = phys_pkg_id(index_msb) & ++ ((1 << core_bits) - 1); ++#else ++ c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ++ ((1 << core_bits) - 1); ++#endif + } +- return NULL; /* Not found */ +-} + ++out: ++ if ((c->x86_max_cores * smp_num_siblings) > 1) { ++ printk(KERN_INFO "CPU: Physical Processor ID: %d\n", ++ c->phys_proc_id); ++ printk(KERN_INFO "CPU: Processor Core ID: %d\n", ++ c->cpu_core_id); ++ } ++#endif ++} + +-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) ++static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) + { + char *v = c->x86_vendor_id; + int i; + static int printed; + + for (i = 0; i < X86_VENDOR_NUM; i++) { +- if (cpu_devs[i]) { +- if (!strcmp(v, cpu_devs[i]->c_ident[0]) || +- (cpu_devs[i]->c_ident[1] && +- !strcmp(v, cpu_devs[i]->c_ident[1]))) { +- c->x86_vendor = i; +- if (!early) +- this_cpu = cpu_devs[i]; +- return; +- } ++ if (!cpu_devs[i]) ++ break; ++ ++ if (!strcmp(v, cpu_devs[i]->c_ident[0]) || ++ (cpu_devs[i]->c_ident[1] && ++ !strcmp(v, cpu_devs[i]->c_ident[1]))) { ++ this_cpu = cpu_devs[i]; ++ c->x86_vendor = this_cpu->c_x86_vendor; ++ return; + } + } ++ + if (!printed) { + printed++; +- printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); ++ printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } ++ + c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; + } + +- +-static int __init x86_fxsr_setup(char *s) +-{ +- setup_clear_cpu_cap(X86_FEATURE_FXSR); +- setup_clear_cpu_cap(X86_FEATURE_XMM); +- return 1; +-} +-__setup("nofxsr", x86_fxsr_setup); +- +- +-static int __init x86_sep_setup(char *s) +-{ +- setup_clear_cpu_cap(X86_FEATURE_SEP); +- return 1; +-} +-__setup("nosep", x86_sep_setup); +- +- +-/* Standard macro to see if a specific flag is changeable */ +-static inline int flag_is_changeable_p(u32 flag) +-{ +- u32 f1, f2; +- +- asm("pushfl\n\t" +- "pushfl\n\t" +- "popl %0\n\t" +- "movl %0,%1\n\t" +- "xorl %2,%0\n\t" +- "pushl %0\n\t" +- "popfl\n\t" +- "pushfl\n\t" +- "popl %0\n\t" +- "popfl\n\t" +- : "=&r" (f1), "=&r" (f2) +- : "ir" (flag)); +- +- return ((f1^f2) & flag) != 0; +-} +- +- +-/* Probe for the CPUID instruction */ +-static int __cpuinit have_cpuid_p(void) +-{ +- return flag_is_changeable_p(X86_EFLAGS_ID); +-} +- +-void __init cpu_detect(struct cpuinfo_x86 *c) ++void __cpuinit cpu_detect(struct cpuinfo_x86 *c) + { + /* Get vendor name */ + cpuid(0x00000000, (unsigned int *)&c->cpuid_level, +@@ -275,50 +454,87 @@ void __init cpu_detect(struct cpuinfo_x8 + (unsigned int *)&c->x86_vendor_id[4]); + + c->x86 = 4; ++ /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); +- c->x86 = (tfms >> 8) & 15; +- c->x86_model = (tfms >> 4) & 15; ++ c->x86 = (tfms >> 8) & 0xf; ++ c->x86_model = (tfms >> 4) & 0xf; ++ c->x86_mask = tfms & 0xf; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) +- c->x86_model += ((tfms >> 16) & 0xF) << 4; +- c->x86_mask = tfms & 15; ++ c->x86_model += ((tfms >> 16) & 0xf) << 4; + if (cap0 & (1<<19)) { +- c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; + c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; ++ c->x86_cache_alignment = c->x86_clflush_size; + } + } + } +-static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) ++ ++static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) + { + u32 tfms, xlvl; +- unsigned int ebx; ++ u32 ebx; + +- memset(&c->x86_capability, 0, sizeof c->x86_capability); +- if (have_cpuid_p()) { +- /* Intel-defined flags: level 0x00000001 */ +- if (c->cpuid_level >= 0x00000001) { +- u32 capability, excap; +- cpuid(0x00000001, &tfms, &ebx, &excap, &capability); +- c->x86_capability[0] = capability; +- c->x86_capability[4] = excap; +- } ++ /* Intel-defined flags: level 0x00000001 */ ++ if (c->cpuid_level >= 0x00000001) { ++ u32 capability, excap; ++ cpuid(0x00000001, &tfms, &ebx, &excap, &capability); ++ c->x86_capability[0] = capability; ++ c->x86_capability[4] = excap; ++ } + +- /* AMD-defined flags: level 0x80000001 */ +- xlvl = cpuid_eax(0x80000000); +- if ((xlvl & 0xffff0000) == 0x80000000) { +- if (xlvl >= 0x80000001) { +- c->x86_capability[1] = cpuid_edx(0x80000001); +- c->x86_capability[6] = cpuid_ecx(0x80000001); +- } ++ /* AMD-defined flags: level 0x80000001 */ ++ xlvl = cpuid_eax(0x80000000); ++ c->extended_cpuid_level = xlvl; ++ if ((xlvl & 0xffff0000) == 0x80000000) { ++ if (xlvl >= 0x80000001) { ++ c->x86_capability[1] = cpuid_edx(0x80000001); ++ c->x86_capability[6] = cpuid_ecx(0x80000001); + } ++ } ++ ++#ifdef CONFIG_X86_64 ++ if (c->extended_cpuid_level >= 0x80000008) { ++ u32 eax = cpuid_eax(0x80000008); + ++ c->x86_virt_bits = (eax >> 8) & 0xff; ++ c->x86_phys_bits = eax & 0xff; + } ++#endif ++ ++ if (c->extended_cpuid_level >= 0x80000007) ++ c->x86_power = cpuid_edx(0x80000007); + + } + ++static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_X86_32 ++ int i; ++ ++ /* ++ * First of all, decide if this is a 486 or higher ++ * It's a 486 if we can modify the AC flag ++ */ ++ if (flag_is_changeable_p(X86_EFLAGS_AC)) ++ c->x86 = 4; ++ else ++ c->x86 = 3; ++ ++ for (i = 0; i < X86_VENDOR_NUM; i++) ++ if (cpu_devs[i] && cpu_devs[i]->c_identify) { ++ c->x86_vendor_id[0] = 0; ++ cpu_devs[i]->c_identify(c); ++ if (c->x86_vendor_id[0]) { ++ get_cpu_vendor(c); ++ break; ++ } ++ } ++#endif ++} ++ + /* + * Do minimum CPU detection early. + * Fields really needed: vendor, cpuid_level, family, model, mask, +@@ -328,25 +544,65 @@ static void __cpuinit early_get_cap(stru + * WARNING: this function is only called on the BP. Don't add code here + * that is supposed to run on all CPUs. + */ +-static void __init early_cpu_detect(void) ++static void __init early_identify_cpu(struct cpuinfo_x86 *c) + { +- struct cpuinfo_x86 *c = &boot_cpu_data; +- +- c->x86_cache_alignment = 32; ++#ifdef CONFIG_X86_64 ++ c->x86_clflush_size = 64; ++#else + c->x86_clflush_size = 32; ++#endif ++ c->x86_cache_alignment = c->x86_clflush_size; ++ ++ memset(&c->x86_capability, 0, sizeof c->x86_capability); ++ c->extended_cpuid_level = 0; ++ ++ if (!have_cpuid_p()) ++ identify_cpu_without_cpuid(c); + ++ /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid_p()) + return; + + cpu_detect(c); + +- get_cpu_vendor(c, 1); ++ get_cpu_vendor(c); ++ ++ get_cpu_cap(c); ++ ++ if (this_cpu->c_early_init) ++ this_cpu->c_early_init(c); ++ ++ validate_pat_support(c); ++ ++#ifdef CONFIG_SMP ++ c->cpu_index = boot_cpu_id; ++#endif ++} ++ ++void __init early_cpu_init(void) ++{ ++ struct cpu_dev **cdev; ++ int count = 0; + +- early_get_cap(c); ++ printk("KERNEL supported cpus:\n"); ++ for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { ++ struct cpu_dev *cpudev = *cdev; ++ unsigned int j; ++ ++ if (count >= X86_VENDOR_NUM) ++ break; ++ cpu_devs[count] = cpudev; ++ count++; ++ ++ for (j = 0; j < 2; j++) { ++ if (!cpudev->c_ident[j]) ++ continue; ++ printk(" %s %s\n", cpudev->c_vendor, ++ cpudev->c_ident[j]); ++ } ++ } + +- if (c->x86_vendor != X86_VENDOR_UNKNOWN && +- cpu_devs[c->x86_vendor]->c_early_init) +- cpu_devs[c->x86_vendor]->c_early_init(c); ++ early_identify_cpu(&boot_cpu_data); + } + + /* +@@ -364,88 +620,41 @@ static void __cpuinit detect_nopl(struct + + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) + { +- u32 tfms, xlvl; +- unsigned int ebx; ++ c->extended_cpuid_level = 0; + +- if (have_cpuid_p()) { +- /* Get vendor name */ +- cpuid(0x00000000, (unsigned int *)&c->cpuid_level, +- (unsigned int *)&c->x86_vendor_id[0], +- (unsigned int *)&c->x86_vendor_id[8], +- (unsigned int *)&c->x86_vendor_id[4]); +- +- get_cpu_vendor(c, 0); +- /* Initialize the standard set of capabilities */ +- /* Note that the vendor-specific code below might override */ +- /* Intel-defined flags: level 0x00000001 */ +- if (c->cpuid_level >= 0x00000001) { +- u32 capability, excap; +- cpuid(0x00000001, &tfms, &ebx, &excap, &capability); +- c->x86_capability[0] = capability; +- c->x86_capability[4] = excap; +- c->x86 = (tfms >> 8) & 15; +- c->x86_model = (tfms >> 4) & 15; +- if (c->x86 == 0xf) +- c->x86 += (tfms >> 20) & 0xff; +- if (c->x86 >= 0x6) +- c->x86_model += ((tfms >> 16) & 0xF) << 4; +- c->x86_mask = tfms & 15; +- c->initial_apicid = (ebx >> 24) & 0xFF; +-#ifndef CONFIG_XEN +-#ifdef CONFIG_X86_HT +- c->apicid = phys_pkg_id(c->initial_apicid, 0); +- c->phys_proc_id = c->initial_apicid; +-#else +- c->apicid = c->initial_apicid; +-#endif +-#endif +- if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) +- c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; +- } else { +- /* Have CPUID level 0 only - unheard of */ +- c->x86 = 4; +- } ++ if (!have_cpuid_p()) ++ identify_cpu_without_cpuid(c); + +- /* AMD-defined flags: level 0x80000001 */ +- xlvl = cpuid_eax(0x80000000); +- if ((xlvl & 0xffff0000) == 0x80000000) { +- if (xlvl >= 0x80000001) { +- c->x86_capability[1] = cpuid_edx(0x80000001); +- c->x86_capability[6] = cpuid_ecx(0x80000001); +- } +- if (xlvl >= 0x80000004) +- get_model_name(c); /* Default name */ +- } ++ /* cyrix could have cpuid enabled via c_identify()*/ ++ if (!have_cpuid_p()) ++ return; + +- init_scattered_cpuid_features(c); +- detect_nopl(c); +- } +-} ++ cpu_detect(c); + +-static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +-{ +- if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { +- /* Disable processor serial number */ +- unsigned long lo, hi; +- rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); +- lo |= 0x200000; +- wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); +- printk(KERN_NOTICE "CPU serial number disabled.\n"); +- clear_cpu_cap(c, X86_FEATURE_PN); ++ get_cpu_vendor(c); + +- /* Disabling the serial number may affect the cpuid level */ +- c->cpuid_level = cpuid_eax(0); +- } +-} ++ get_cpu_cap(c); + +-static int __init x86_serial_nr_setup(char *s) +-{ +- disable_x86_serial_nr = 0; +- return 1; +-} +-__setup("serialnumber", x86_serial_nr_setup); ++ if (c->cpuid_level >= 0x00000001) { ++ c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; ++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) ++# ifdef CONFIG_X86_HT ++ c->apicid = phys_pkg_id(c->initial_apicid, 0); ++# else ++ c->apicid = c->initial_apicid; ++# endif ++#endif ++ ++#ifdef CONFIG_X86_HT ++ c->phys_proc_id = c->initial_apicid; ++#endif ++ } + ++ get_model_name(c); /* Default name */ + ++ init_scattered_cpuid_features(c); ++ detect_nopl(c); ++} + + /* + * This does the hard work of actually picking apart the CPU stuff... +@@ -457,32 +666,31 @@ static void __cpuinit identify_cpu(struc + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; +- c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_max_cores = 1; ++ c->x86_coreid_bits = 0; ++#ifdef CONFIG_X86_64 ++ c->x86_clflush_size = 64; ++#else ++ c->cpuid_level = -1; /* CPUID not detected */ + c->x86_clflush_size = 32; ++#endif ++ c->x86_cache_alignment = c->x86_clflush_size; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) + set_cpu_cap(c, X86_FEATURE_SYSCALL32); + +- if (!have_cpuid_p()) { +- /* +- * First of all, decide if this is a 486 or higher +- * It's a 486 if we can modify the AC flag +- */ +- if (flag_is_changeable_p(X86_EFLAGS_AC)) +- c->x86 = 4; +- else +- c->x86 = 3; +- } +- + generic_identify(c); + + if (this_cpu->c_identify) + this_cpu->c_identify(c); + ++#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) ++ c->apicid = phys_pkg_id(0); ++#endif ++ + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are +@@ -516,6 +724,10 @@ static void __cpuinit identify_cpu(struc + c->x86, c->x86_model); + } + ++#ifdef CONFIG_X86_64 ++ detect_ht(c); ++#endif ++ + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are +@@ -524,7 +736,7 @@ static void __cpuinit identify_cpu(struc + */ + if (c != &boot_cpu_data) { + /* AND the already accumulated flags with these */ +- for (i = 0 ; i < NCAPINTS ; i++) ++ for (i = 0; i < NCAPINTS; i++) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + +@@ -532,72 +744,91 @@ static void __cpuinit identify_cpu(struc + for (i = 0; i < NCAPINTS; i++) + c->x86_capability[i] &= ~cleared_cpu_caps[i]; + ++#ifdef CONFIG_X86_MCE + /* Init Machine Check Exception if available. */ + mcheck_init(c); ++#endif + + select_idle_routine(c); ++ ++#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) ++ numa_add_cpu(smp_processor_id()); ++#endif ++} ++ ++#ifdef CONFIG_X86_64 ++static void vgetcpu_set_mode(void) ++{ ++ if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) ++ vgetcpu_mode = VGETCPU_RDTSCP; ++ else ++ vgetcpu_mode = VGETCPU_LSL; + } ++#endif + + void __init identify_boot_cpu(void) + { + identify_cpu(&boot_cpu_data); ++#ifdef CONFIG_X86_32 + sysenter_setup(); + enable_sep_cpu(); ++#else ++ vgetcpu_set_mode(); ++#endif + } + + void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) + { + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); ++#ifdef CONFIG_X86_32 + enable_sep_cpu(); ++#endif + mtrr_ap_init(); + } + +-#ifdef CONFIG_X86_HT +-void __cpuinit detect_ht(struct cpuinfo_x86 *c) +-{ +- u32 eax, ebx, ecx, edx; +- int index_msb, core_bits; +- +- cpuid(1, &eax, &ebx, &ecx, &edx); +- +- if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) +- return; ++struct msr_range { ++ unsigned min; ++ unsigned max; ++}; + +- smp_num_siblings = (ebx & 0xff0000) >> 16; ++static struct msr_range msr_range_array[] __cpuinitdata = { ++ { 0x00000000, 0x00000418}, ++ { 0xc0000000, 0xc000040b}, ++ { 0xc0010000, 0xc0010142}, ++ { 0xc0011000, 0xc001103b}, ++}; + +- if (smp_num_siblings == 1) { +- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); +- } else if (smp_num_siblings > 1) { ++static void __cpuinit print_cpu_msr(void) ++{ ++ unsigned index; ++ u64 val; ++ int i; ++ unsigned index_min, index_max; + +- if (smp_num_siblings > NR_CPUS) { +- printk(KERN_WARNING "CPU: Unsupported number of the " +- "siblings %d", smp_num_siblings); +- smp_num_siblings = 1; +- return; ++ for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { ++ index_min = msr_range_array[i].min; ++ index_max = msr_range_array[i].max; ++ for (index = index_min; index < index_max; index++) { ++ if (rdmsrl_amd_safe(index, &val)) ++ continue; ++ printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } ++ } ++} + +- index_msb = get_count_order(smp_num_siblings); +- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); +- +- printk(KERN_INFO "CPU: Physical Processor ID: %d\n", +- c->phys_proc_id); +- +- smp_num_siblings = smp_num_siblings / c->x86_max_cores; +- +- index_msb = get_count_order(smp_num_siblings) ; +- +- core_bits = get_count_order(c->x86_max_cores); ++static int show_msr __cpuinitdata; ++static __init int setup_show_msr(char *arg) ++{ ++ int num; + +- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & +- ((1 << core_bits) - 1); ++ get_option(&arg, &num); + +- if (c->x86_max_cores > 1) +- printk(KERN_INFO "CPU: Processor Core ID: %d\n", +- c->cpu_core_id); +- } ++ if (num > 0) ++ show_msr = num; ++ return 1; + } +-#endif ++__setup("show_msr=", setup_show_msr); + + static __init int setup_noclflush(char *arg) + { +@@ -615,18 +846,26 @@ void __cpuinit print_cpu_info(struct cpu + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + +- if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) +- printk("%s ", vendor); ++ if (vendor && !strstr(c->x86_model_id, vendor)) ++ printk(KERN_CONT "%s ", vendor); + +- if (!c->x86_model_id[0]) +- printk("%d86", c->x86); ++ if (c->x86_model_id[0]) ++ printk(KERN_CONT "%s", c->x86_model_id); + else +- printk("%s", c->x86_model_id); ++ printk(KERN_CONT "%d86", c->x86); + + if (c->x86_mask || c->cpuid_level >= 0) +- printk(" stepping %02x\n", c->x86_mask); ++ printk(KERN_CONT " stepping %02x\n", c->x86_mask); + else +- printk("\n"); ++ printk(KERN_CONT "\n"); ++ ++#ifdef CONFIG_SMP ++ if (c->cpu_index < show_msr) ++ print_cpu_msr(); ++#else ++ if (show_msr) ++ print_cpu_msr(); ++#endif + } + + static __init int setup_disablecpuid(char *arg) +@@ -642,19 +881,124 @@ __setup("clearcpuid=", setup_disablecpui + + cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; + +-void __init early_cpu_init(void) ++#ifdef CONFIG_X86_64 ++struct x8664_pda **_cpu_pda __read_mostly; ++EXPORT_SYMBOL(_cpu_pda); ++ ++#ifndef CONFIG_X86_NO_IDT ++struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; ++#endif ++ ++char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; ++ ++static void __ref switch_pt(int cpu) ++{ ++#ifdef CONFIG_XEN ++ if (cpu == 0) ++ xen_init_pt(); ++ xen_pt_switch(__pa_symbol(init_level4_pgt)); ++ xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); ++#endif ++} ++ ++void __cpuinit pda_init(int cpu) ++{ ++ struct x8664_pda *pda = cpu_pda(cpu); ++ ++ /* Setup up data that may be needed in __get_free_pages early */ ++ loadsegment(fs, 0); ++ loadsegment(gs, 0); ++#ifndef CONFIG_XEN ++ /* Memory clobbers used to order PDA accessed */ ++ mb(); ++ wrmsrl(MSR_GS_BASE, pda); ++ mb(); ++#else ++ if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, ++ (unsigned long)pda)) ++ BUG(); ++#endif ++ ++ pda->cpunumber = cpu; ++ pda->irqcount = -1; ++ pda->kernelstack = (unsigned long)stack_thread_info() - ++ PDA_STACKOFFSET + THREAD_SIZE; ++ pda->active_mm = &init_mm; ++ pda->mmu_state = 0; ++ ++ if (cpu == 0) { ++ /* others are initialized in smpboot.c */ ++ pda->pcurrent = &init_task; ++ pda->irqstackptr = boot_cpu_stack; ++ pda->irqstackptr += IRQSTACKSIZE - 64; ++ } else { ++ if (!pda->irqstackptr) { ++ pda->irqstackptr = (char *) ++ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); ++ if (!pda->irqstackptr) ++ panic("cannot allocate irqstack for cpu %d", ++ cpu); ++ pda->irqstackptr += IRQSTACKSIZE - 64; ++ } ++ ++ if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) ++ pda->nodenumber = cpu_to_node(cpu); ++ } ++ ++ switch_pt(cpu); ++} ++ ++#ifndef CONFIG_X86_NO_TSS ++char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + ++ DEBUG_STKSZ] __page_aligned_bss; ++#endif ++ ++extern asmlinkage void ignore_sysret(void); ++ ++void __cpuinit syscall_init(void) + { +- struct cpu_vendor_dev *cvdev; ++#ifndef CONFIG_XEN ++ /* ++ * LSTAR and STAR live in a bit strange symbiosis. ++ * They both write to the same internal register. STAR allows to ++ * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. ++ */ ++ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); ++ wrmsrl(MSR_LSTAR, system_call); ++ wrmsrl(MSR_CSTAR, ignore_sysret); ++#endif + +- for (cvdev = __x86cpuvendor_start ; +- cvdev < __x86cpuvendor_end ; +- cvdev++) +- cpu_devs[cvdev->vendor] = cvdev->cpu_dev; ++#ifdef CONFIG_IA32_EMULATION ++ syscall32_cpu_init(); ++#elif defined(CONFIG_XEN) ++ static const struct callback_register __cpuinitconst cstar = { ++ .type = CALLBACKTYPE_syscall32, ++ .address = (unsigned long)ignore_sysret ++ }; + +- early_cpu_detect(); +- validate_pat_support(&boot_cpu_data); ++ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) ++ printk(KERN_WARNING "Unable to register CSTAR callback\n"); ++#endif ++ ++#ifndef CONFIG_XEN ++ /* Flags to clear on syscall */ ++ wrmsrl(MSR_SYSCALL_MASK, ++ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); ++#endif + } + ++unsigned long kernel_eflags; ++ ++#ifndef CONFIG_X86_NO_TSS ++/* ++ * Copies of the original ist values from the tss are only accessed during ++ * debugging, no special alignment required. ++ */ ++DEFINE_PER_CPU(struct orig_ist, orig_ist); ++#endif ++ ++#else ++ + /* Make sure %fs is initialized properly in idle threads */ + struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) + { +@@ -662,36 +1006,154 @@ struct pt_regs * __cpuinit idle_regs(str + regs->fs = __KERNEL_PERCPU; + return regs; + } ++#endif + +-/* Current gdt points %fs at the "master" per-cpu area: after this, +- * it's on the real one. */ +-void switch_to_new_gdt(void) ++/* ++ * cpu_init() initializes state that is per-CPU. Some data is already ++ * initialized (naturally) in the bootstrap process, such as the GDT ++ * and IDT. We reload them nevertheless, this function acts as a ++ * 'CPU state barrier', nothing should get across. ++ * A lot of state is already set up in PDA init for 64 bit ++ */ ++#ifdef CONFIG_X86_64 ++void __cpuinit cpu_init(void) + { +- struct desc_ptr gdt_descr; +- unsigned long va, frames[16]; +- int f; ++ int cpu = stack_smp_processor_id(); ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); ++ unsigned long v; ++ char *estacks = NULL; ++ int i; ++#endif ++ struct task_struct *me; + +- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); +- gdt_descr.size = GDT_SIZE - 1; ++ /* CPU 0 is initialised in head64.c */ ++ if (cpu != 0) ++ pda_init(cpu); ++#ifndef CONFIG_X86_NO_TSS ++ else ++ estacks = boot_exception_stacks; ++#endif + +- for (va = gdt_descr.address, f = 0; +- va < gdt_descr.address + gdt_descr.size; +- va += PAGE_SIZE, f++) { +- frames[f] = virt_to_mfn(va); +- make_lowmem_page_readonly( +- (void *)va, XENFEAT_writable_descriptor_tables); ++ me = current; ++ ++ if (cpu_test_and_set(cpu, cpu_initialized)) ++ panic("CPU#%d already initialized!\n", cpu); ++ ++ printk(KERN_INFO "Initializing CPU#%d\n", cpu); ++ ++ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); ++ ++ /* ++ * Initialize the per-CPU GDT with the boot GDT, ++ * and set up the GDT descriptor: ++ */ ++ ++ switch_to_new_gdt(); ++#ifndef CONFIG_X86_NO_IDT ++ load_idt((const struct desc_ptr *)&idt_descr); ++#endif ++ ++ memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); ++ syscall_init(); ++ ++ wrmsrl(MSR_FS_BASE, 0); ++ wrmsrl(MSR_KERNEL_GS_BASE, 0); ++ barrier(); ++ ++ check_efer(); ++#ifndef CONFIG_XEN ++ if (cpu != 0 && x2apic) ++ enable_x2apic(); ++#endif ++ ++#ifndef CONFIG_X86_NO_TSS ++ /* ++ * set up and load the per-CPU TSS ++ */ ++ if (!orig_ist->ist[0]) { ++ static const unsigned int order[N_EXCEPTION_STACKS] = { ++ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, ++ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER ++ }; ++ for (v = 0; v < N_EXCEPTION_STACKS; v++) { ++ if (cpu) { ++ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); ++ if (!estacks) ++ panic("Cannot allocate exception " ++ "stack %ld %d\n", v, cpu); ++ } ++ estacks += PAGE_SIZE << order[v]; ++ orig_ist->ist[v] = t->x86_tss.ist[v] = ++ (unsigned long)estacks; ++ } + } +- if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8)) ++ ++ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); ++ /* ++ * <= is required because the CPU will access up to ++ * 8 bits beyond the end of the IO permission bitmap. ++ */ ++ for (i = 0; i <= IO_BITMAP_LONGS; i++) ++ t->io_bitmap[i] = ~0UL; ++#endif ++ ++ atomic_inc(&init_mm.mm_count); ++ me->active_mm = &init_mm; ++ if (me->mm) + BUG(); +- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); ++ enter_lazy_tlb(&init_mm, me); ++ ++ load_sp0(t, ¤t->thread); ++#ifndef CONFIG_X86_NO_TSS ++ set_tss_desc(cpu, t); ++ load_TR_desc(); ++#endif ++ load_LDT(&init_mm.context); ++ ++#ifdef CONFIG_KGDB ++ /* ++ * If the kgdb is connected no debug regs should be altered. This ++ * is only applicable when KGDB and a KGDB I/O module are built ++ * into the kernel and you are using early debugging with ++ * kgdbwait. KGDB will control the kernel HW breakpoint registers. ++ */ ++ if (kgdb_connected && arch_kgdb_ops.correct_hw_break) ++ arch_kgdb_ops.correct_hw_break(); ++ else { ++#endif ++ /* ++ * Clear all 6 debug registers: ++ */ ++ ++ set_debugreg(0UL, 0); ++ set_debugreg(0UL, 1); ++ set_debugreg(0UL, 2); ++ set_debugreg(0UL, 3); ++ set_debugreg(0UL, 6); ++ set_debugreg(0UL, 7); ++#ifdef CONFIG_KGDB ++ /* If the kgdb is connected no debug regs should be altered. */ ++ } ++#endif ++ ++ fpu_init(); ++ ++#ifndef CONFIG_XEN ++ raw_local_save_flags(kernel_eflags); ++#else ++ asm ("pushfq; popq %0" : "=rm" (kernel_eflags)); ++ if (raw_irqs_disabled()) ++ kernel_eflags &= ~X86_EFLAGS_IF; ++#endif ++ ++ if (is_uv_system()) ++ uv_cpu_init(); + } + +-/* +- * cpu_init() initializes state that is per-CPU. Some data is already +- * initialized (naturally) in the bootstrap process, such as the GDT +- * and IDT. We reload them nevertheless, this function acts as a +- * 'CPU state barrier', nothing should get across. +- */ ++#else ++ + void __cpuinit cpu_init(void) + { + int cpu = smp_processor_id(); +@@ -745,19 +1207,21 @@ void __cpuinit cpu_init(void) + /* + * Force FPU initialization: + */ +- current_thread_info()->status = 0; ++ if (cpu_has_xsave) ++ current_thread_info()->status = TS_XSAVE; ++ else ++ current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); +-} + +-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN) +-void __cpuinit cpu_uninit(void) +-{ +- int cpu = raw_smp_processor_id(); +- cpu_clear(cpu, cpu_initialized); ++ /* ++ * Boot processor to setup the FP and extended state context info. ++ */ ++ if (smp_processor_id() == boot_cpu_id) ++ init_thread_xstate(); + +- /* lazy TLB state */ +- per_cpu(cpu_tlbstate, cpu).state = 0; +- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; ++ xsave_init(); + } ++ ++ + #endif +--- head-2010-01-18.orig/arch/x86/kernel/cpu/common_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,773 +0,0 @@ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#ifdef CONFIG_X86_LOCAL_APIC +-#include +-#include +-#include +-#elif defined(CONFIG_XEN) +-#include +-#endif +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "cpu.h" +- +-/* We need valid kernel segments for data and code in long mode too +- * IRET will check the segment types kkeil 2000/10/28 +- * Also sysret mandates a special GDT layout +- */ +-/* The TLS descriptors are currently at a different place compared to i386. +- Hopefully nobody expects them at a fixed place (Wine?) */ +-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, +- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +-} }; +-EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +- +-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +- +-/* Current gdt points %fs at the "master" per-cpu area: after this, +- * it's on the real one. */ +-void switch_to_new_gdt(void) +-{ +-#ifndef CONFIG_XEN +- struct desc_ptr gdt_descr; +- +- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); +- gdt_descr.size = GDT_SIZE - 1; +- load_gdt(&gdt_descr); +-#else +- void *va, *gdt_addr = get_cpu_gdt_table(smp_processor_id()); +- unsigned long frames[16]; +- unsigned int f = 0; +- +- for (va = gdt_addr; va < gdt_addr + GDT_SIZE; va += PAGE_SIZE) { +- frames[f++] = virt_to_mfn(va); +- make_page_readonly(va, XENFEAT_writable_descriptor_tables); +- } +- if (HYPERVISOR_set_gdt(frames, GDT_SIZE / sizeof(struct desc_struct))) +- BUG(); +-#endif +-} +- +-struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +- +-static void __cpuinit default_init(struct cpuinfo_x86 *c) +-{ +- display_cacheinfo(c); +-} +- +-static struct cpu_dev __cpuinitdata default_cpu = { +- .c_init = default_init, +- .c_vendor = "Unknown", +-}; +-static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +- +-int __cpuinit get_model_name(struct cpuinfo_x86 *c) +-{ +- unsigned int *v; +- +- if (c->extended_cpuid_level < 0x80000004) +- return 0; +- +- v = (unsigned int *) c->x86_model_id; +- cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); +- cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); +- cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); +- c->x86_model_id[48] = 0; +- return 1; +-} +- +- +-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +-{ +- unsigned int n, dummy, ebx, ecx, edx; +- +- n = c->extended_cpuid_level; +- +- if (n >= 0x80000005) { +- cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); +- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " +- "D cache %dK (%d bytes/line)\n", +- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); +- c->x86_cache_size = (ecx>>24) + (edx>>24); +- /* On K8 L1 TLB is inclusive, so don't count it */ +- c->x86_tlbsize = 0; +- } +- +- if (n >= 0x80000006) { +- cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); +- ecx = cpuid_ecx(0x80000006); +- c->x86_cache_size = ecx >> 16; +- c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +- +- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", +- c->x86_cache_size, ecx & 0xFF); +- } +-} +- +-void __cpuinit detect_ht(struct cpuinfo_x86 *c) +-{ +-#ifdef CONFIG_SMP +- u32 eax, ebx, ecx, edx; +- int index_msb, core_bits; +- +- cpuid(1, &eax, &ebx, &ecx, &edx); +- +- +- if (!cpu_has(c, X86_FEATURE_HT)) +- return; +- if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) +- goto out; +- +- smp_num_siblings = (ebx & 0xff0000) >> 16; +- +- if (smp_num_siblings == 1) { +- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); +- } else if (smp_num_siblings > 1) { +- +- if (smp_num_siblings > NR_CPUS) { +- printk(KERN_WARNING "CPU: Unsupported number of " +- "siblings %d", smp_num_siblings); +- smp_num_siblings = 1; +- return; +- } +- +- index_msb = get_count_order(smp_num_siblings); +- c->phys_proc_id = phys_pkg_id(index_msb); +- +- smp_num_siblings = smp_num_siblings / c->x86_max_cores; +- +- index_msb = get_count_order(smp_num_siblings); +- +- core_bits = get_count_order(c->x86_max_cores); +- +- c->cpu_core_id = phys_pkg_id(index_msb) & +- ((1 << core_bits) - 1); +- } +-out: +- if ((c->x86_max_cores * smp_num_siblings) > 1) { +- printk(KERN_INFO "CPU: Physical Processor ID: %d\n", +- c->phys_proc_id); +- printk(KERN_INFO "CPU: Processor Core ID: %d\n", +- c->cpu_core_id); +- } +- +-#endif +-} +- +-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) +-{ +- char *v = c->x86_vendor_id; +- int i; +- static int printed; +- +- for (i = 0; i < X86_VENDOR_NUM; i++) { +- if (cpu_devs[i]) { +- if (!strcmp(v, cpu_devs[i]->c_ident[0]) || +- (cpu_devs[i]->c_ident[1] && +- !strcmp(v, cpu_devs[i]->c_ident[1]))) { +- c->x86_vendor = i; +- this_cpu = cpu_devs[i]; +- return; +- } +- } +- } +- if (!printed) { +- printed++; +- printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); +- printk(KERN_ERR "CPU: Your system may be unstable.\n"); +- } +- c->x86_vendor = X86_VENDOR_UNKNOWN; +-} +- +-static void __init early_cpu_support_print(void) +-{ +- int i,j; +- struct cpu_dev *cpu_devx; +- +- printk("KERNEL supported cpus:\n"); +- for (i = 0; i < X86_VENDOR_NUM; i++) { +- cpu_devx = cpu_devs[i]; +- if (!cpu_devx) +- continue; +- for (j = 0; j < 2; j++) { +- if (!cpu_devx->c_ident[j]) +- continue; +- printk(" %s %s\n", cpu_devx->c_vendor, +- cpu_devx->c_ident[j]); +- } +- } +-} +- +-/* +- * The NOPL instruction is supposed to exist on all CPUs with +- * family >= 6, unfortunately, that's not true in practice because +- * of early VIA chips and (more importantly) broken virtualizers that +- * are not easy to detect. Hence, probe for it based on first +- * principles. +- * +- * Note: no 64-bit chip is known to lack these, but put the code here +- * for consistency with 32 bits, and to make it utterly trivial to +- * diagnose the problem should it ever surface. +- */ +-static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +-{ +- const u32 nopl_signature = 0x888c53b1; /* Random number */ +- u32 has_nopl = nopl_signature; +- +- clear_cpu_cap(c, X86_FEATURE_NOPL); +- if (c->x86 >= 6) { +- asm volatile("\n" +- "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ +- "2:\n" +- " .section .fixup,\"ax\"\n" +- "3: xor %0,%0\n" +- " jmp 2b\n" +- " .previous\n" +- _ASM_EXTABLE(1b,3b) +- : "+a" (has_nopl)); +- +- if (has_nopl == nopl_signature) +- set_cpu_cap(c, X86_FEATURE_NOPL); +- } +-} +- +-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); +- +-void __init early_cpu_init(void) +-{ +- struct cpu_vendor_dev *cvdev; +- +- for (cvdev = __x86cpuvendor_start ; +- cvdev < __x86cpuvendor_end ; +- cvdev++) +- cpu_devs[cvdev->vendor] = cvdev->cpu_dev; +- early_cpu_support_print(); +- early_identify_cpu(&boot_cpu_data); +-} +- +-/* Do some early cpuid on the boot CPU to get some parameter that are +- needed before check_bugs. Everything advanced is in identify_cpu +- below. */ +-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) +-{ +- u32 tfms, xlvl; +- +- c->loops_per_jiffy = loops_per_jiffy; +- c->x86_cache_size = -1; +- c->x86_vendor = X86_VENDOR_UNKNOWN; +- c->x86_model = c->x86_mask = 0; /* So far unknown... */ +- c->x86_vendor_id[0] = '\0'; /* Unset */ +- c->x86_model_id[0] = '\0'; /* Unset */ +- c->x86_clflush_size = 64; +- c->x86_cache_alignment = c->x86_clflush_size; +- c->x86_max_cores = 1; +- c->x86_coreid_bits = 0; +- c->extended_cpuid_level = 0; +- memset(&c->x86_capability, 0, sizeof c->x86_capability); +- +- /* Get vendor name */ +- cpuid(0x00000000, (unsigned int *)&c->cpuid_level, +- (unsigned int *)&c->x86_vendor_id[0], +- (unsigned int *)&c->x86_vendor_id[8], +- (unsigned int *)&c->x86_vendor_id[4]); +- +- get_cpu_vendor(c); +- +- /* Initialize the standard set of capabilities */ +- /* Note that the vendor-specific code below might override */ +- +- /* Intel-defined flags: level 0x00000001 */ +- if (c->cpuid_level >= 0x00000001) { +- __u32 misc; +- cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], +- &c->x86_capability[0]); +- c->x86 = (tfms >> 8) & 0xf; +- c->x86_model = (tfms >> 4) & 0xf; +- c->x86_mask = tfms & 0xf; +- if (c->x86 == 0xf) +- c->x86 += (tfms >> 20) & 0xff; +- if (c->x86 >= 0x6) +- c->x86_model += ((tfms >> 16) & 0xF) << 4; +- if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) +- c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; +- } else { +- /* Have CPUID level 0 only - unheard of */ +- c->x86 = 4; +- } +- +- c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; +-#ifdef CONFIG_SMP +- c->phys_proc_id = c->initial_apicid; +-#endif +- /* AMD-defined flags: level 0x80000001 */ +- xlvl = cpuid_eax(0x80000000); +- c->extended_cpuid_level = xlvl; +- if ((xlvl & 0xffff0000) == 0x80000000) { +- if (xlvl >= 0x80000001) { +- c->x86_capability[1] = cpuid_edx(0x80000001); +- c->x86_capability[6] = cpuid_ecx(0x80000001); +- } +- if (xlvl >= 0x80000004) +- get_model_name(c); /* Default name */ +- } +- +- /* Transmeta-defined flags: level 0x80860001 */ +- xlvl = cpuid_eax(0x80860000); +- if ((xlvl & 0xffff0000) == 0x80860000) { +- /* Don't set x86_cpuid_level here for now to not confuse. */ +- if (xlvl >= 0x80860001) +- c->x86_capability[2] = cpuid_edx(0x80860001); +- } +- +- if (c->extended_cpuid_level >= 0x80000007) +- c->x86_power = cpuid_edx(0x80000007); +- +- if (c->extended_cpuid_level >= 0x80000008) { +- u32 eax = cpuid_eax(0x80000008); +- +- c->x86_virt_bits = (eax >> 8) & 0xff; +- c->x86_phys_bits = eax & 0xff; +- } +- +- detect_nopl(c); +- +- if (c->x86_vendor != X86_VENDOR_UNKNOWN && +- cpu_devs[c->x86_vendor]->c_early_init) +- cpu_devs[c->x86_vendor]->c_early_init(c); +- +- validate_pat_support(c); +-} +- +-/* +- * This does the hard work of actually picking apart the CPU stuff... +- */ +-static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +-{ +- int i; +- +- early_identify_cpu(c); +- +- init_scattered_cpuid_features(c); +- +-#ifndef CONFIG_XEN +- c->apicid = phys_pkg_id(0); +-#endif +- +- /* +- * Vendor-specific initialization. In this section we +- * canonicalize the feature flags, meaning if there are +- * features a certain CPU supports which CPUID doesn't +- * tell us, CPUID claiming incorrect flags, or other bugs, +- * we handle them here. +- * +- * At the end of this section, c->x86_capability better +- * indicate the features this CPU genuinely supports! +- */ +- if (this_cpu->c_init) +- this_cpu->c_init(c); +- +- detect_ht(c); +- +- /* +- * On SMP, boot_cpu_data holds the common feature set between +- * all CPUs; so make sure that we indicate which features are +- * common between the CPUs. The first time this routine gets +- * executed, c == &boot_cpu_data. +- */ +- if (c != &boot_cpu_data) { +- /* AND the already accumulated flags with these */ +- for (i = 0; i < NCAPINTS; i++) +- boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; +- } +- +- /* Clear all flags overriden by options */ +- for (i = 0; i < NCAPINTS; i++) +- c->x86_capability[i] &= ~cleared_cpu_caps[i]; +- +-#ifdef CONFIG_X86_MCE +- mcheck_init(c); +-#endif +- select_idle_routine(c); +- +-#ifdef CONFIG_NUMA +- numa_add_cpu(smp_processor_id()); +-#endif +- +-} +- +-void __cpuinit identify_boot_cpu(void) +-{ +- identify_cpu(&boot_cpu_data); +-} +- +-void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +-{ +- BUG_ON(c == &boot_cpu_data); +- identify_cpu(c); +- mtrr_ap_init(); +-} +- +-static __init int setup_noclflush(char *arg) +-{ +- setup_clear_cpu_cap(X86_FEATURE_CLFLSH); +- return 1; +-} +-__setup("noclflush", setup_noclflush); +- +-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) +-{ +- if (c->x86_model_id[0]) +- printk(KERN_CONT "%s", c->x86_model_id); +- +- if (c->x86_mask || c->cpuid_level >= 0) +- printk(KERN_CONT " stepping %02x\n", c->x86_mask); +- else +- printk(KERN_CONT "\n"); +-} +- +-static __init int setup_disablecpuid(char *arg) +-{ +- int bit; +- if (get_option(&arg, &bit) && bit < NCAPINTS*32) +- setup_clear_cpu_cap(bit); +- else +- return 0; +- return 1; +-} +-__setup("clearcpuid=", setup_disablecpuid); +- +-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +- +-struct x8664_pda **_cpu_pda __read_mostly; +-EXPORT_SYMBOL(_cpu_pda); +- +-#ifndef CONFIG_X86_NO_IDT +-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; +-#endif +- +-char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; +- +-unsigned long __supported_pte_mask __read_mostly = ~0UL; +-EXPORT_SYMBOL_GPL(__supported_pte_mask); +- +-static int do_not_nx __cpuinitdata; +- +-/* noexec=on|off +-Control non executable mappings for 64bit processes. +- +-on Enable(default) +-off Disable +-*/ +-static int __init nonx_setup(char *str) +-{ +- if (!str) +- return -EINVAL; +- if (!strncmp(str, "on", 2)) { +- __supported_pte_mask |= _PAGE_NX; +- do_not_nx = 0; +- } else if (!strncmp(str, "off", 3)) { +- do_not_nx = 1; +- __supported_pte_mask &= ~_PAGE_NX; +- } +- return 0; +-} +-early_param("noexec", nonx_setup); +- +-int force_personality32; +- +-/* noexec32=on|off +-Control non executable heap for 32bit processes. +-To control the stack too use noexec=off +- +-on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +-off PROT_READ implies PROT_EXEC +-*/ +-static int __init nonx32_setup(char *str) +-{ +- if (!strcmp(str, "on")) +- force_personality32 &= ~READ_IMPLIES_EXEC; +- else if (!strcmp(str, "off")) +- force_personality32 |= READ_IMPLIES_EXEC; +- return 1; +-} +-__setup("noexec32=", nonx32_setup); +- +-static void __init_refok switch_pt(int cpu) +-{ +-#ifdef CONFIG_XEN +- if (cpu == 0) +- xen_init_pt(); +- xen_pt_switch(__pa_symbol(init_level4_pgt)); +- xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); +-#endif +-} +- +-void pda_init(int cpu) +-{ +- struct x8664_pda *pda = cpu_pda(cpu); +- +- /* Setup up data that may be needed in __get_free_pages early */ +- loadsegment(fs, 0); +- loadsegment(gs, 0); +-#ifndef CONFIG_XEN +- /* Memory clobbers used to order PDA accessed */ +- mb(); +- wrmsrl(MSR_GS_BASE, pda); +- mb(); +-#else +- if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, +- (unsigned long)pda)) +- BUG(); +-#endif +- +- pda->cpunumber = cpu; +- pda->irqcount = -1; +- pda->kernelstack = (unsigned long)stack_thread_info() - +- PDA_STACKOFFSET + THREAD_SIZE; +- pda->active_mm = &init_mm; +- pda->mmu_state = 0; +- +- if (cpu == 0) { +- /* others are initialized in smpboot.c */ +- pda->pcurrent = &init_task; +- pda->irqstackptr = boot_cpu_stack; +- pda->irqstackptr += IRQSTACKSIZE - 64; +- } else { +- if (!pda->irqstackptr) { +- pda->irqstackptr = (char *) +- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); +- if (!pda->irqstackptr) +- panic("cannot allocate irqstack for cpu %d", +- cpu); +- pda->irqstackptr += IRQSTACKSIZE - 64; +- } +- +- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) +- pda->nodenumber = cpu_to_node(cpu); +- } +- +- switch_pt(cpu); +-} +- +-#ifndef CONFIG_X86_NO_TSS +-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + +- DEBUG_STKSZ] __page_aligned_bss; +-#endif +- +-extern asmlinkage void ignore_sysret(void); +- +-void __cpuinit syscall_init(void) +-{ +-#ifndef CONFIG_XEN +- /* +- * LSTAR and STAR live in a bit strange symbiosis. +- * They both write to the same internal register. STAR allows to +- * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. +- */ +- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); +- wrmsrl(MSR_LSTAR, system_call); +- wrmsrl(MSR_CSTAR, ignore_sysret); +- +- /* Flags to clear on syscall */ +- wrmsrl(MSR_SYSCALL_MASK, +- X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); +-#endif +-#ifdef CONFIG_IA32_EMULATION +- syscall32_cpu_init(); +-#else +- static const struct callback_register __cpuinitconst cstar = { +- .type = CALLBACKTYPE_syscall32, +- .address = (unsigned long)ignore_sysret +- }; +- +- if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) +- printk(KERN_WARNING "Unable to register CSTAR callback\n"); +-#endif +-} +- +-void __cpuinit check_efer(void) +-{ +- unsigned long efer; +- +- rdmsrl(MSR_EFER, efer); +- if (!(efer & EFER_NX) || do_not_nx) +- __supported_pte_mask &= ~_PAGE_NX; +-} +- +-unsigned long kernel_eflags; +- +-#ifndef CONFIG_X86_NO_TSS +-/* +- * Copies of the original ist values from the tss are only accessed during +- * debugging, no special alignment required. +- */ +-DEFINE_PER_CPU(struct orig_ist, orig_ist); +-#endif +- +-/* +- * cpu_init() initializes state that is per-CPU. Some data is already +- * initialized (naturally) in the bootstrap process, such as the GDT +- * and IDT. We reload them nevertheless, this function acts as a +- * 'CPU state barrier', nothing should get across. +- * A lot of state is already set up in PDA init. +- */ +-void __cpuinit cpu_init(void) +-{ +- int cpu = stack_smp_processor_id(); +-#ifndef CONFIG_X86_NO_TSS +- struct tss_struct *t = &per_cpu(init_tss, cpu); +- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); +- unsigned long v; +- char *estacks = NULL; +- int i; +-#endif +- struct task_struct *me; +- +- /* CPU 0 is initialised in head64.c */ +- if (cpu != 0) +- pda_init(cpu); +-#ifndef CONFIG_X86_NO_TSS +- else +- estacks = boot_exception_stacks; +-#endif +- +- me = current; +- +- if (cpu_test_and_set(cpu, cpu_initialized)) +- panic("CPU#%d already initialized!\n", cpu); +- +- printk(KERN_INFO "Initializing CPU#%d\n", cpu); +- +- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); +- +- /* +- * Initialize the per-CPU GDT with the boot GDT, +- * and set up the GDT descriptor: +- */ +- +- switch_to_new_gdt(); +-#ifndef CONFIG_X86_NO_IDT +- load_idt((const struct desc_ptr *)&idt_descr); +-#endif +- +- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); +- syscall_init(); +- +- wrmsrl(MSR_FS_BASE, 0); +- wrmsrl(MSR_KERNEL_GS_BASE, 0); +- barrier(); +- +- check_efer(); +- +-#ifndef CONFIG_X86_NO_TSS +- /* +- * set up and load the per-CPU TSS +- */ +- if (!orig_ist->ist[0]) { +- static const unsigned int order[N_EXCEPTION_STACKS] = { +- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, +- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER +- }; +- for (v = 0; v < N_EXCEPTION_STACKS; v++) { +- if (cpu) { +- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); +- if (!estacks) +- panic("Cannot allocate exception " +- "stack %ld %d\n", v, cpu); +- } +- estacks += PAGE_SIZE << order[v]; +- orig_ist->ist[v] = t->x86_tss.ist[v] = +- (unsigned long)estacks; +- } +- } +- +- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); +- /* +- * <= is required because the CPU will access up to +- * 8 bits beyond the end of the IO permission bitmap. +- */ +- for (i = 0; i <= IO_BITMAP_LONGS; i++) +- t->io_bitmap[i] = ~0UL; +-#endif +- +- atomic_inc(&init_mm.mm_count); +- me->active_mm = &init_mm; +- if (me->mm) +- BUG(); +- enter_lazy_tlb(&init_mm, me); +- +- load_sp0(t, ¤t->thread); +-#ifndef CONFIG_X86_NO_TSS +- set_tss_desc(cpu, t); +- load_TR_desc(); +-#endif +- load_LDT(&init_mm.context); +- +-#ifdef CONFIG_KGDB +- /* +- * If the kgdb is connected no debug regs should be altered. This +- * is only applicable when KGDB and a KGDB I/O module are built +- * into the kernel and you are using early debugging with +- * kgdbwait. KGDB will control the kernel HW breakpoint registers. +- */ +- if (kgdb_connected && arch_kgdb_ops.correct_hw_break) +- arch_kgdb_ops.correct_hw_break(); +- else { +-#endif +- /* +- * Clear all 6 debug registers: +- */ +- +- set_debugreg(0UL, 0); +- set_debugreg(0UL, 1); +- set_debugreg(0UL, 2); +- set_debugreg(0UL, 3); +- set_debugreg(0UL, 6); +- set_debugreg(0UL, 7); +-#ifdef CONFIG_KGDB +- /* If the kgdb is connected no debug regs should be altered. */ +- } +-#endif +- +- fpu_init(); +- +- asm ("pushfq; popq %0" : "=rm" (kernel_eflags)); +- if (raw_irqs_disabled()) +- kernel_eflags &= ~X86_EFLAGS_IF; +- +- if (is_uv_system()) +- uv_cpu_init(); +-} +--- head-2010-01-18.orig/arch/x86/kernel/dumpstack_64.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/dumpstack_64.c 2009-11-06 10:51:47.000000000 +0100 +@@ -20,6 +20,7 @@ + #include "dumpstack.h" + + ++#ifndef CONFIG_X86_NO_TSS + static char x86_stack_ids[][8] = { + [DEBUG_STACK - 1] = "#DB", + [NMI_STACK - 1] = "NMI", +@@ -31,6 +32,7 @@ static char x86_stack_ids[][8] = { + N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + #endif + }; ++#endif + + int x86_is_stack_id(int id, char *name) + { +@@ -40,6 +42,7 @@ int x86_is_stack_id(int id, char *name) + static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, + unsigned *usedp, char **idp) + { ++#ifndef CONFIG_X86_NO_TSS + unsigned k; + + /* +@@ -99,6 +102,7 @@ static unsigned long *in_exception_stack + } + #endif + } ++#endif /* CONFIG_X86_NO_TSS */ + return NULL; + } + +--- head-2010-01-18.orig/arch/x86/kernel/e820-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/e820-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -167,6 +167,9 @@ static void __init _e820_print_map(const + case E820_NVS: + printk(KERN_CONT "(ACPI NVS)\n"); + break; ++ case E820_UNUSABLE: ++ printk("(unusable)\n"); ++ break; + default: + printk(KERN_CONT "type %u\n", e820->map[i].type); + break; +@@ -1399,6 +1402,7 @@ static inline const char *e820_type_to_s + case E820_RAM: return "System RAM"; + case E820_ACPI: return "ACPI Tables"; + case E820_NVS: return "ACPI Non-volatile Storage"; ++ case E820_UNUSABLE: return "Unusable memory"; + default: return "reserved"; + } + } +@@ -1410,6 +1414,7 @@ static inline const char *e820_type_to_s + /* + * Mark e820 reserved areas as busy for the resource manager. + */ ++static struct resource __initdata *e820_res; + void __init e820_reserve_resources(void) + { + int i; +@@ -1417,20 +1422,28 @@ void __init e820_reserve_resources(void) + u64 end; + + res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); ++ e820_res = res; + for (i = 0; i < e820.nr_map; i++) { + end = e820.map[i].addr + e820.map[i].size - 1; +-#ifndef CONFIG_RESOURCES_64BIT +- if (end > 0x100000000ULL) { ++ if (end != (resource_size_t)end) { + res++; + continue; + } +-#endif + res->name = e820_type_to_string(e820.map[i].type); + res->start = e820.map[i].addr; + res->end = end; + +- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; +- insert_resource(&iomem_resource, res); ++ res->flags = IORESOURCE_MEM; ++ ++ /* ++ * don't register the region that could be conflicted with ++ * pci device BAR resource and insert them later in ++ * pcibios_resource_survey() ++ */ ++ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { ++ res->flags |= IORESOURCE_BUSY; ++ insert_resource(&iomem_resource, res); ++ } + res++; + } + +@@ -1442,6 +1455,19 @@ void __init e820_reserve_resources(void) + } + } + ++void __init e820_reserve_resources_late(void) ++{ ++ int i; ++ struct resource *res; ++ ++ res = e820_res; ++ for (i = 0; i < e820.nr_map; i++) { ++ if (!res->parent && res->end) ++ insert_resource_expand_to_fit(&iomem_resource, res); ++ res++; ++ } ++} ++ + #undef e820 + + #ifndef CONFIG_XEN +--- head-2010-01-18.orig/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -3,10 +3,18 @@ + #include + #include + #include ++#include ++#include ++#include ++#include + #include + #include + #include + #include ++#include ++#include ++#include ++#include + + #ifndef CONFIG_XEN + /* Simple VGA output */ +@@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; / + static int early_serial_putc(unsigned char ch) + { + unsigned timeout = 0xffff; ++ + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) + cpu_relax(); + outb(ch, early_serial_base + TXR); +@@ -111,7 +120,7 @@ static __init void early_serial_init(cha + if (!strncmp(s, "0x", 2)) { + early_serial_base = simple_strtoul(s, &e, 16); + } else { +- static int bases[] = { 0x3f8, 0x2f8 }; ++ static const int __initconst bases[] = { 0x3f8, 0x2f8 }; + + if (!strncmp(s, "ttyS", 4)) + s += 4; +@@ -180,6 +189,721 @@ static struct console early_serial_conso + .index = -1, + }; + ++#ifdef CONFIG_EARLY_PRINTK_DBGP ++ ++static struct ehci_caps __iomem *ehci_caps; ++static struct ehci_regs __iomem *ehci_regs; ++static struct ehci_dbg_port __iomem *ehci_debug; ++static unsigned int dbgp_endpoint_out; ++ ++struct ehci_dev { ++ u32 bus; ++ u32 slot; ++ u32 func; ++}; ++ ++static struct ehci_dev ehci_dev; ++ ++#define USB_DEBUG_DEVNUM 127 ++ ++#define DBGP_DATA_TOGGLE 0x8800 ++ ++static inline u32 dbgp_pid_update(u32 x, u32 tok) ++{ ++ return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff); ++} ++ ++static inline u32 dbgp_len_update(u32 x, u32 len) ++{ ++ return (x & ~0x0f) | (len & 0x0f); ++} ++ ++/* ++ * USB Packet IDs (PIDs) ++ */ ++ ++/* token */ ++#define USB_PID_OUT 0xe1 ++#define USB_PID_IN 0x69 ++#define USB_PID_SOF 0xa5 ++#define USB_PID_SETUP 0x2d ++/* handshake */ ++#define USB_PID_ACK 0xd2 ++#define USB_PID_NAK 0x5a ++#define USB_PID_STALL 0x1e ++#define USB_PID_NYET 0x96 ++/* data */ ++#define USB_PID_DATA0 0xc3 ++#define USB_PID_DATA1 0x4b ++#define USB_PID_DATA2 0x87 ++#define USB_PID_MDATA 0x0f ++/* Special */ ++#define USB_PID_PREAMBLE 0x3c ++#define USB_PID_ERR 0x3c ++#define USB_PID_SPLIT 0x78 ++#define USB_PID_PING 0xb4 ++#define USB_PID_UNDEF_0 0xf0 ++ ++#define USB_PID_DATA_TOGGLE 0x88 ++#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE) ++ ++#define PCI_CAP_ID_EHCI_DEBUG 0xa ++ ++#define HUB_ROOT_RESET_TIME 50 /* times are in msec */ ++#define HUB_SHORT_RESET_TIME 10 ++#define HUB_LONG_RESET_TIME 200 ++#define HUB_RESET_TIMEOUT 500 ++ ++#define DBGP_MAX_PACKET 8 ++ ++static int dbgp_wait_until_complete(void) ++{ ++ u32 ctrl; ++ int loop = 0x100000; ++ ++ do { ++ ctrl = readl(&ehci_debug->control); ++ /* Stop when the transaction is finished */ ++ if (ctrl & DBGP_DONE) ++ break; ++ } while (--loop > 0); ++ ++ if (!loop) ++ return -1; ++ ++ /* ++ * Now that we have observed the completed transaction, ++ * clear the done bit. ++ */ ++ writel(ctrl | DBGP_DONE, &ehci_debug->control); ++ return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); ++} ++ ++static void dbgp_mdelay(int ms) ++{ ++ int i; ++ ++ while (ms--) { ++ for (i = 0; i < 1000; i++) ++ outb(0x1, 0x80); ++ } ++} ++ ++static void dbgp_breath(void) ++{ ++ /* Sleep to give the debug port a chance to breathe */ ++} ++ ++static int dbgp_wait_until_done(unsigned ctrl) ++{ ++ u32 pids, lpid; ++ int ret; ++ int loop = 3; ++ ++retry: ++ writel(ctrl | DBGP_GO, &ehci_debug->control); ++ ret = dbgp_wait_until_complete(); ++ pids = readl(&ehci_debug->pids); ++ lpid = DBGP_PID_GET(pids); ++ ++ if (ret < 0) ++ return ret; ++ ++ /* ++ * If the port is getting full or it has dropped data ++ * start pacing ourselves, not necessary but it's friendly. ++ */ ++ if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET)) ++ dbgp_breath(); ++ ++ /* If I get a NACK reissue the transmission */ ++ if (lpid == USB_PID_NAK) { ++ if (--loop > 0) ++ goto retry; ++ } ++ ++ return ret; ++} ++ ++static void dbgp_set_data(const void *buf, int size) ++{ ++ const unsigned char *bytes = buf; ++ u32 lo, hi; ++ int i; ++ ++ lo = hi = 0; ++ for (i = 0; i < 4 && i < size; i++) ++ lo |= bytes[i] << (8*i); ++ for (; i < 8 && i < size; i++) ++ hi |= bytes[i] << (8*(i - 4)); ++ writel(lo, &ehci_debug->data03); ++ writel(hi, &ehci_debug->data47); ++} ++ ++static void dbgp_get_data(void *buf, int size) ++{ ++ unsigned char *bytes = buf; ++ u32 lo, hi; ++ int i; ++ ++ lo = readl(&ehci_debug->data03); ++ hi = readl(&ehci_debug->data47); ++ for (i = 0; i < 4 && i < size; i++) ++ bytes[i] = (lo >> (8*i)) & 0xff; ++ for (; i < 8 && i < size; i++) ++ bytes[i] = (hi >> (8*(i - 4))) & 0xff; ++} ++ ++static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, ++ const char *bytes, int size) ++{ ++ u32 pids, addr, ctrl; ++ int ret; ++ ++ if (size > DBGP_MAX_PACKET) ++ return -1; ++ ++ addr = DBGP_EPADDR(devnum, endpoint); ++ ++ pids = readl(&ehci_debug->pids); ++ pids = dbgp_pid_update(pids, USB_PID_OUT); ++ ++ ctrl = readl(&ehci_debug->control); ++ ctrl = dbgp_len_update(ctrl, size); ++ ctrl |= DBGP_OUT; ++ ctrl |= DBGP_GO; ++ ++ dbgp_set_data(bytes, size); ++ writel(addr, &ehci_debug->address); ++ writel(pids, &ehci_debug->pids); ++ ++ ret = dbgp_wait_until_done(ctrl); ++ if (ret < 0) ++ return ret; ++ ++ return ret; ++} ++ ++static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, ++ int size) ++{ ++ u32 pids, addr, ctrl; ++ int ret; ++ ++ if (size > DBGP_MAX_PACKET) ++ return -1; ++ ++ addr = DBGP_EPADDR(devnum, endpoint); ++ ++ pids = readl(&ehci_debug->pids); ++ pids = dbgp_pid_update(pids, USB_PID_IN); ++ ++ ctrl = readl(&ehci_debug->control); ++ ctrl = dbgp_len_update(ctrl, size); ++ ctrl &= ~DBGP_OUT; ++ ctrl |= DBGP_GO; ++ ++ writel(addr, &ehci_debug->address); ++ writel(pids, &ehci_debug->pids); ++ ret = dbgp_wait_until_done(ctrl); ++ if (ret < 0) ++ return ret; ++ ++ if (size > ret) ++ size = ret; ++ dbgp_get_data(data, size); ++ return ret; ++} ++ ++static int dbgp_control_msg(unsigned devnum, int requesttype, int request, ++ int value, int index, void *data, int size) ++{ ++ u32 pids, addr, ctrl; ++ struct usb_ctrlrequest req; ++ int read; ++ int ret; ++ ++ read = (requesttype & USB_DIR_IN) != 0; ++ if (size > (read ? DBGP_MAX_PACKET:0)) ++ return -1; ++ ++ /* Compute the control message */ ++ req.bRequestType = requesttype; ++ req.bRequest = request; ++ req.wValue = cpu_to_le16(value); ++ req.wIndex = cpu_to_le16(index); ++ req.wLength = cpu_to_le16(size); ++ ++ pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); ++ addr = DBGP_EPADDR(devnum, 0); ++ ++ ctrl = readl(&ehci_debug->control); ++ ctrl = dbgp_len_update(ctrl, sizeof(req)); ++ ctrl |= DBGP_OUT; ++ ctrl |= DBGP_GO; ++ ++ /* Send the setup message */ ++ dbgp_set_data(&req, sizeof(req)); ++ writel(addr, &ehci_debug->address); ++ writel(pids, &ehci_debug->pids); ++ ret = dbgp_wait_until_done(ctrl); ++ if (ret < 0) ++ return ret; ++ ++ /* Read the result */ ++ return dbgp_bulk_read(devnum, 0, data, size); ++} ++ ++ ++/* Find a PCI capability */ ++static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap) ++{ ++ u8 pos; ++ int bytes; ++ ++ if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & ++ PCI_STATUS_CAP_LIST)) ++ return 0; ++ ++ pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); ++ for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { ++ u8 id; ++ ++ pos &= ~3; ++ id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); ++ if (id == 0xff) ++ break; ++ if (id == cap) ++ return pos; ++ ++ pos = read_pci_config_byte(num, slot, func, ++ pos+PCI_CAP_LIST_NEXT); ++ } ++ return 0; ++} ++ ++static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func) ++{ ++ u32 class; ++ ++ class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); ++ if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI) ++ return 0; ++ ++ return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG); ++} ++ ++static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) ++{ ++ u32 bus, slot, func; ++ ++ for (bus = 0; bus < 256; bus++) { ++ for (slot = 0; slot < 32; slot++) { ++ for (func = 0; func < 8; func++) { ++ unsigned cap; ++ ++ cap = __find_dbgp(bus, slot, func); ++ ++ if (!cap) ++ continue; ++ if (ehci_num-- != 0) ++ continue; ++ *rbus = bus; ++ *rslot = slot; ++ *rfunc = func; ++ return cap; ++ } ++ } ++ } ++ return 0; ++} ++ ++static int ehci_reset_port(int port) ++{ ++ u32 portsc; ++ u32 delay_time, delay; ++ int loop; ++ ++ /* Reset the usb debug port */ ++ portsc = readl(&ehci_regs->port_status[port - 1]); ++ portsc &= ~PORT_PE; ++ portsc |= PORT_RESET; ++ writel(portsc, &ehci_regs->port_status[port - 1]); ++ ++ delay = HUB_ROOT_RESET_TIME; ++ for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; ++ delay_time += delay) { ++ dbgp_mdelay(delay); ++ ++ portsc = readl(&ehci_regs->port_status[port - 1]); ++ if (portsc & PORT_RESET) { ++ /* force reset to complete */ ++ loop = 2; ++ writel(portsc & ~(PORT_RWC_BITS | PORT_RESET), ++ &ehci_regs->port_status[port - 1]); ++ do { ++ portsc = readl(&ehci_regs->port_status[port-1]); ++ } while ((portsc & PORT_RESET) && (--loop > 0)); ++ } ++ ++ /* Device went away? */ ++ if (!(portsc & PORT_CONNECT)) ++ return -ENOTCONN; ++ ++ /* bomb out completely if something weird happend */ ++ if ((portsc & PORT_CSC)) ++ return -EINVAL; ++ ++ /* If we've finished resetting, then break out of the loop */ ++ if (!(portsc & PORT_RESET) && (portsc & PORT_PE)) ++ return 0; ++ } ++ return -EBUSY; ++} ++ ++static int ehci_wait_for_port(int port) ++{ ++ u32 status; ++ int ret, reps; ++ ++ for (reps = 0; reps < 3; reps++) { ++ dbgp_mdelay(100); ++ status = readl(&ehci_regs->status); ++ if (status & STS_PCD) { ++ ret = ehci_reset_port(port); ++ if (ret == 0) ++ return 0; ++ } ++ } ++ return -ENOTCONN; ++} ++ ++#ifdef DBGP_DEBUG ++# define dbgp_printk early_printk ++#else ++static inline void dbgp_printk(const char *fmt, ...) { } ++#endif ++ ++typedef void (*set_debug_port_t)(int port); ++ ++static void default_set_debug_port(int port) ++{ ++} ++ ++static set_debug_port_t set_debug_port = default_set_debug_port; ++ ++static void nvidia_set_debug_port(int port) ++{ ++ u32 dword; ++ dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, ++ 0x74); ++ dword &= ~(0x0f<<12); ++ dword |= ((port & 0x0f)<<12); ++ write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74, ++ dword); ++ dbgp_printk("set debug port to %d\n", port); ++} ++ ++static void __init detect_set_debug_port(void) ++{ ++ u32 vendorid; ++ ++ vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, ++ 0x00); ++ ++ if ((vendorid & 0xffff) == 0x10de) { ++ dbgp_printk("using nvidia set_debug_port\n"); ++ set_debug_port = nvidia_set_debug_port; ++ } ++} ++ ++static int __init ehci_setup(void) ++{ ++ struct usb_debug_descriptor dbgp_desc; ++ u32 cmd, ctrl, status, portsc, hcs_params; ++ u32 debug_port, new_debug_port = 0, n_ports; ++ u32 devnum; ++ int ret, i; ++ int loop; ++ int port_map_tried; ++ int playtimes = 3; ++ ++try_next_time: ++ port_map_tried = 0; ++ ++try_next_port: ++ ++ hcs_params = readl(&ehci_caps->hcs_params); ++ debug_port = HCS_DEBUG_PORT(hcs_params); ++ n_ports = HCS_N_PORTS(hcs_params); ++ ++ dbgp_printk("debug_port: %d\n", debug_port); ++ dbgp_printk("n_ports: %d\n", n_ports); ++ ++ for (i = 1; i <= n_ports; i++) { ++ portsc = readl(&ehci_regs->port_status[i-1]); ++ dbgp_printk("portstatus%d: %08x\n", i, portsc); ++ } ++ ++ if (port_map_tried && (new_debug_port != debug_port)) { ++ if (--playtimes) { ++ set_debug_port(new_debug_port); ++ goto try_next_time; ++ } ++ return -1; ++ } ++ ++ loop = 10; ++ /* Reset the EHCI controller */ ++ cmd = readl(&ehci_regs->command); ++ cmd |= CMD_RESET; ++ writel(cmd, &ehci_regs->command); ++ do { ++ cmd = readl(&ehci_regs->command); ++ } while ((cmd & CMD_RESET) && (--loop > 0)); ++ ++ if (!loop) { ++ dbgp_printk("can not reset ehci\n"); ++ return -1; ++ } ++ dbgp_printk("ehci reset done\n"); ++ ++ /* Claim ownership, but do not enable yet */ ++ ctrl = readl(&ehci_debug->control); ++ ctrl |= DBGP_OWNER; ++ ctrl &= ~(DBGP_ENABLED | DBGP_INUSE); ++ writel(ctrl, &ehci_debug->control); ++ ++ /* Start the ehci running */ ++ cmd = readl(&ehci_regs->command); ++ cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET); ++ cmd |= CMD_RUN; ++ writel(cmd, &ehci_regs->command); ++ ++ /* Ensure everything is routed to the EHCI */ ++ writel(FLAG_CF, &ehci_regs->configured_flag); ++ ++ /* Wait until the controller is no longer halted */ ++ loop = 10; ++ do { ++ status = readl(&ehci_regs->status); ++ } while ((status & STS_HALT) && (--loop > 0)); ++ ++ if (!loop) { ++ dbgp_printk("ehci can be started\n"); ++ return -1; ++ } ++ dbgp_printk("ehci started\n"); ++ ++ /* Wait for a device to show up in the debug port */ ++ ret = ehci_wait_for_port(debug_port); ++ if (ret < 0) { ++ dbgp_printk("No device found in debug port\n"); ++ goto next_debug_port; ++ } ++ dbgp_printk("ehci wait for port done\n"); ++ ++ /* Enable the debug port */ ++ ctrl = readl(&ehci_debug->control); ++ ctrl |= DBGP_CLAIM; ++ writel(ctrl, &ehci_debug->control); ++ ctrl = readl(&ehci_debug->control); ++ if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) { ++ dbgp_printk("No device in debug port\n"); ++ writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control); ++ goto err; ++ } ++ dbgp_printk("debug ported enabled\n"); ++ ++ /* Completely transfer the debug device to the debug controller */ ++ portsc = readl(&ehci_regs->port_status[debug_port - 1]); ++ portsc &= ~PORT_PE; ++ writel(portsc, &ehci_regs->port_status[debug_port - 1]); ++ ++ dbgp_mdelay(100); ++ ++ /* Find the debug device and make it device number 127 */ ++ for (devnum = 0; devnum <= 127; devnum++) { ++ ret = dbgp_control_msg(devnum, ++ USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE, ++ USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0, ++ &dbgp_desc, sizeof(dbgp_desc)); ++ if (ret > 0) ++ break; ++ } ++ if (devnum > 127) { ++ dbgp_printk("Could not find attached debug device\n"); ++ goto err; ++ } ++ if (ret < 0) { ++ dbgp_printk("Attached device is not a debug device\n"); ++ goto err; ++ } ++ dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint; ++ ++ /* Move the device to 127 if it isn't already there */ ++ if (devnum != USB_DEBUG_DEVNUM) { ++ ret = dbgp_control_msg(devnum, ++ USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, ++ USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0); ++ if (ret < 0) { ++ dbgp_printk("Could not move attached device to %d\n", ++ USB_DEBUG_DEVNUM); ++ goto err; ++ } ++ devnum = USB_DEBUG_DEVNUM; ++ dbgp_printk("debug device renamed to 127\n"); ++ } ++ ++ /* Enable the debug interface */ ++ ret = dbgp_control_msg(USB_DEBUG_DEVNUM, ++ USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, ++ USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0); ++ if (ret < 0) { ++ dbgp_printk(" Could not enable the debug device\n"); ++ goto err; ++ } ++ dbgp_printk("debug interface enabled\n"); ++ ++ /* Perform a small write to get the even/odd data state in sync ++ */ ++ ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1); ++ if (ret < 0) { ++ dbgp_printk("dbgp_bulk_write failed: %d\n", ret); ++ goto err; ++ } ++ dbgp_printk("small write doned\n"); ++ ++ return 0; ++err: ++ /* Things didn't work so remove my claim */ ++ ctrl = readl(&ehci_debug->control); ++ ctrl &= ~(DBGP_CLAIM | DBGP_OUT); ++ writel(ctrl, &ehci_debug->control); ++ return -1; ++ ++next_debug_port: ++ port_map_tried |= (1<<(debug_port - 1)); ++ new_debug_port = ((debug_port-1+1)%n_ports) + 1; ++ if (port_map_tried != ((1<> 29) & 0x7; ++ bar = (bar * 4) + 0xc; ++ offset = (debug_port >> 16) & 0xfff; ++ dbgp_printk("bar: %02x offset: %03x\n", bar, offset); ++ if (bar != PCI_BASE_ADDRESS_0) { ++ dbgp_printk("only debug ports on bar 1 handled.\n"); ++ ++ return -1; ++ } ++ ++ bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); ++ dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset); ++ if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) { ++ dbgp_printk("only simple 32bit mmio bars supported\n"); ++ ++ return -1; ++ } ++ ++ /* double check if the mem space is enabled */ ++ byte = read_pci_config_byte(bus, slot, func, 0x04); ++ if (!(byte & 0x2)) { ++ byte |= 0x02; ++ write_pci_config_byte(bus, slot, func, 0x04, byte); ++ dbgp_printk("mmio for ehci enabled\n"); ++ } ++ ++ /* ++ * FIXME I don't have the bar size so just guess PAGE_SIZE is more ++ * than enough. 1K is the biggest I have seen. ++ */ ++ set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); ++ ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); ++ ehci_bar += bar_val & ~PAGE_MASK; ++ dbgp_printk("ehci_bar: %p\n", ehci_bar); ++ ++ ehci_caps = ehci_bar; ++ ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase)); ++ ehci_debug = ehci_bar + offset; ++ ehci_dev.bus = bus; ++ ehci_dev.slot = slot; ++ ehci_dev.func = func; ++ ++ detect_set_debug_port(); ++ ++ ret = ehci_setup(); ++ if (ret < 0) { ++ dbgp_printk("ehci_setup failed\n"); ++ ehci_debug = NULL; ++ ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static void early_dbgp_write(struct console *con, const char *str, u32 n) ++{ ++ int chunk, ret; ++ ++ if (!ehci_debug) ++ return; ++ while (n > 0) { ++ chunk = n; ++ if (chunk > DBGP_MAX_PACKET) ++ chunk = DBGP_MAX_PACKET; ++ ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, ++ dbgp_endpoint_out, str, chunk); ++ str += chunk; ++ n -= chunk; ++ } ++} ++ ++static struct console early_dbgp_console = { ++ .name = "earlydbg", ++ .write = early_dbgp_write, ++ .flags = CON_PRINTBUFFER, ++ .index = -1, ++}; ++#endif ++ + /* Console interface to a host file on AMD's SimNow! */ + + static int simnow_fd; +@@ -194,6 +918,7 @@ enum { + static noinline long simnow(long cmd, long a, long b, long c) + { + long ret; ++ + asm volatile("cpuid" : + "=a" (ret) : + "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); +@@ -203,6 +928,7 @@ static noinline long simnow(long cmd, lo + static void __init simnow_init(char *str) + { + char *fn = "klog"; ++ + if (*str == '=') + fn = ++str; + /* error ignored */ +@@ -223,7 +949,7 @@ static struct console simnow_console = { + + /* Direct interface for emergencies */ + static struct console *early_console = &early_vga_console; +-static int early_console_initialized; ++static int __initdata early_console_initialized; + + asmlinkage void early_printk(const char *fmt, ...) + { +@@ -237,10 +963,11 @@ asmlinkage void early_printk(const char + va_end(ap); + } + +-static int __initdata keep_early; + + static int __init setup_early_printk(char *buf) + { ++ int keep_early; ++ + if (!buf) + return 0; + +@@ -248,8 +975,7 @@ static int __init setup_early_printk(cha + return 0; + early_console_initialized = 1; + +- if (strstr(buf, "keep")) +- keep_early = 1; ++ keep_early = (strstr(buf, "keep") != NULL); + + if (!strncmp(buf, "serial", 6)) { + early_serial_init(buf + 6); +@@ -269,6 +995,17 @@ static int __init setup_early_printk(cha + simnow_init(buf + 6); + early_console = &simnow_console; + keep_early = 1; ++#ifdef CONFIG_EARLY_PRINTK_DBGP ++ } else if (!strncmp(buf, "dbgp", 4)) { ++ if (early_dbgp_init(buf+4) < 0) ++ return 0; ++ early_console = &early_dbgp_console; ++ /* ++ * usb subsys will reset ehci controller, so don't keep ++ * that early console ++ */ ++ keep_early = 0; ++#endif + #ifdef CONFIG_XEN + } else if (!strncmp(buf, "xen", 3)) { + early_console = &xenboot_console; +@@ -282,4 +1019,5 @@ static int __init setup_early_printk(cha + register_console(early_console); + return 0; + } ++ + early_param("earlyprintk", setup_early_printk); +--- head-2010-01-18.orig/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:51:47.000000000 +0100 +@@ -700,7 +700,7 @@ ENTRY(interrupt) + ENTRY(irq_entries_start) + RING0_INT_FRAME + vector=0 +-.rept NR_IRQS ++.rept NR_VECTORS + ALIGN + .if vector + CFI_ADJUST_CFA_OFFSET -4 +@@ -805,6 +805,7 @@ error_code: + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es ++ TRACE_IRQS_OFF + movl %esp,%eax # pt_regs pointer + call *%edi + jmp ret_from_exception +@@ -974,22 +975,9 @@ ENTRY(device_not_available) + RING0_INT_FRAME + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 +- SAVE_ALL +-#ifndef CONFIG_XEN +- GET_CR0_INTO_EAX +- testl $0x4, %eax # EM (math emulation bit) +- je device_available_emulate +- pushl $0 # temporary storage for ORIG_EIP ++ pushl $do_device_not_available + CFI_ADJUST_CFA_OFFSET 4 +- call math_emulate +- addl $4, %esp +- CFI_ADJUST_CFA_OFFSET -4 +- jmp ret_from_exception +-device_available_emulate: +-#endif +- preempt_stop(CLBR_ANY) +- call math_state_restore +- jmp ret_from_exception ++ jmp error_code + CFI_ENDPROC + END(device_not_available) + +@@ -1034,6 +1022,7 @@ debug_stack_correct: + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL ++ TRACE_IRQS_OFF + xorl %edx,%edx # error code 0 + movl %esp,%eax # pt_regs pointer + call do_debug +@@ -1079,6 +1068,7 @@ nmi_stack_correct: + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL ++ TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi +@@ -1119,6 +1109,7 @@ nmi_espfix_stack: + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL ++ TRACE_IRQS_OFF + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx,%edx # zero error code + call do_nmi +@@ -1162,6 +1153,7 @@ KPROBE_ENTRY(int3) + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL ++ TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_int3 +@@ -1303,24 +1295,10 @@ ENTRY(kernel_thread_helper) + CFI_ENDPROC + ENDPROC(kernel_thread_helper) + +-#ifdef CONFIG_FTRACE ++#ifdef CONFIG_FUNCTION_TRACER + #ifdef CONFIG_DYNAMIC_FTRACE + + ENTRY(mcount) +- pushl %eax +- pushl %ecx +- pushl %edx +- movl 0xc(%esp), %eax +- subl $MCOUNT_INSN_SIZE, %eax +- +-.globl mcount_call +-mcount_call: +- call ftrace_stub +- +- popl %edx +- popl %ecx +- popl %eax +- + ret + END(mcount) + +@@ -1372,7 +1350,7 @@ trace: + jmp ftrace_stub + END(mcount) + #endif /* CONFIG_DYNAMIC_FTRACE */ +-#endif /* CONFIG_FTRACE */ ++#endif /* CONFIG_FUNCTION_TRACER */ + + #include + +--- head-2010-01-18.orig/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:51:47.000000000 +0100 +@@ -66,35 +66,9 @@ + + .code64 + +-#ifdef CONFIG_FTRACE ++#ifdef CONFIG_FUNCTION_TRACER + #ifdef CONFIG_DYNAMIC_FTRACE + ENTRY(mcount) +- +- subq $0x38, %rsp +- movq %rax, (%rsp) +- movq %rcx, 8(%rsp) +- movq %rdx, 16(%rsp) +- movq %rsi, 24(%rsp) +- movq %rdi, 32(%rsp) +- movq %r8, 40(%rsp) +- movq %r9, 48(%rsp) +- +- movq 0x38(%rsp), %rdi +- subq $MCOUNT_INSN_SIZE, %rdi +- +-.globl mcount_call +-mcount_call: +- call ftrace_stub +- +- movq 48(%rsp), %r9 +- movq 40(%rsp), %r8 +- movq 32(%rsp), %rdi +- movq 24(%rsp), %rsi +- movq 16(%rsp), %rdx +- movq 8(%rsp), %rcx +- movq (%rsp), %rax +- addq $0x38, %rsp +- + retq + END(mcount) + +@@ -169,7 +143,7 @@ trace: + jmp ftrace_stub + END(mcount) + #endif /* CONFIG_DYNAMIC_FTRACE */ +-#endif /* CONFIG_FTRACE */ ++#endif /* CONFIG_FUNCTION_TRACER */ + + #ifndef CONFIG_PREEMPT + #define retint_kernel retint_restore_args +@@ -303,9 +277,9 @@ NMI_MASK = 0x80000000 + ENTRY(ret_from_fork) + CFI_DEFAULT_STACK + push kernel_eflags(%rip) +- CFI_ADJUST_CFA_OFFSET 4 ++ CFI_ADJUST_CFA_OFFSET 8 + popf # reset kernel eflags +- CFI_ADJUST_CFA_OFFSET -4 ++ CFI_ADJUST_CFA_OFFSET -8 + call schedule_tail + GET_THREAD_INFO(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) +@@ -869,6 +843,9 @@ END(spurious_interrupt) + .if \ist + movq %gs:pda_data_offset, %rbp + .endif ++ .if \irqtrace ++ TRACE_IRQS_OFF ++ .endif + movq %rsp,%rdi + movq ORIG_RAX(%rsp),%rsi + movq $-1,ORIG_RAX(%rsp) +@@ -1277,7 +1254,7 @@ ENTRY(simd_coprocessor_error) + END(simd_coprocessor_error) + + ENTRY(device_not_available) +- zeroentry math_state_restore ++ zeroentry do_device_not_available + END(device_not_available) + + /* runs on exception stack */ +@@ -1376,9 +1353,11 @@ ENTRY(divide_error) + zeroentry do_divide_error + END(divide_error) + ++#ifndef CONFIG_XEN + ENTRY(spurious_interrupt_bug) + zeroentry do_spurious_interrupt_bug + END(spurious_interrupt_bug) ++#endif + + #ifdef CONFIG_X86_MCE + /* runs on exception stack */ +--- head-2010-01-18.orig/arch/x86/kernel/fixup.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/fixup.c 2009-11-06 10:51:47.000000000 +0100 +@@ -37,7 +37,7 @@ + + #define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args ) + +-void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) ++dotraplinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) + { + static unsigned long printed = 0; + char info[100]; +--- head-2010-01-18.orig/arch/x86/kernel/head-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -36,6 +36,7 @@ void __init reserve_ebda_region(void) + + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); ++ printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ +--- head-2010-01-18.orig/arch/x86/kernel/head64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head64-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -151,12 +151,11 @@ void __init x86_64_start_kernel(char * r + load_idt((const struct desc_ptr *)&idt_descr); + #endif + +- early_printk("Kernel alive\n"); ++ if (console_loglevel == 10) ++ early_printk("Kernel alive\n"); + + x86_64_init_pda(); + +- early_printk("Kernel really alive\n"); +- + x86_64_start_reservations(real_mode_data); + } + +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/kernel/apic/io_apic-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -0,0 +1,3949 @@ ++/* ++ * Intel IO-APIC support for multi-Pentium hosts. ++ * ++ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo ++ * ++ * Many thanks to Stig Venaas for trying out countless experimental ++ * patches and reporting/debugging problems patiently! ++ * ++ * (c) 1999, Multiple IO-APIC support, developed by ++ * Ken-ichi Yaku and ++ * Hidemi Kishimoto , ++ * further tested and cleaned up by Zach Brown ++ * and Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively ++ * Paul Diefenbaugh : Added full ACPI support ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* time_after() */ ++#ifdef CONFIG_ACPI ++#include ++#endif ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#ifdef CONFIG_XEN ++#include ++#include ++#include ++ ++/* Fake i8259 */ ++#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) ++#define disable_8259A_irq(_irq) ((void)0) ++#define i8259A_irq_pending(_irq) (0) ++ ++unsigned long io_apic_irqs; ++#endif /* CONFIG_XEN */ ++ ++#define __apicdebuginit(type) static type __init ++ ++/* ++ * Is the SiS APIC rmw bug present ? ++ * -1 = don't know, 0 = no, 1 = yes ++ */ ++int sis_apic_bug = -1; ++ ++static DEFINE_SPINLOCK(ioapic_lock); ++#ifndef CONFIG_XEN ++static DEFINE_SPINLOCK(vector_lock); ++#endif ++ ++/* ++ * # of IRQ routing registers ++ */ ++int nr_ioapic_registers[MAX_IO_APICS]; ++ ++/* I/O APIC entries */ ++struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; ++int nr_ioapics; ++ ++/* MP IRQ source entries */ ++struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; ++ ++/* # of MP IRQ source entries */ ++int mp_irq_entries; ++ ++#if defined (CONFIG_MCA) || defined (CONFIG_EISA) ++int mp_bus_id_to_type[MAX_MP_BUSSES]; ++#endif ++ ++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); ++ ++int skip_ioapic_setup; ++ ++static int __init parse_noapic(char *str) ++{ ++ /* disable IO-APIC */ ++ disable_ioapic_setup(); ++ return 0; ++} ++early_param("noapic", parse_noapic); ++ ++struct irq_pin_list; ++struct irq_cfg { ++#ifndef CONFIG_XEN ++ unsigned int irq; ++ struct irq_pin_list *irq_2_pin; ++ cpumask_t domain; ++ cpumask_t old_domain; ++ unsigned move_cleanup_count; ++#endif ++ u8 vector; ++#ifndef CONFIG_XEN ++ u8 move_in_progress : 1; ++#endif ++}; ++ ++/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ ++static struct irq_cfg irq_cfgx[NR_IRQS] = { ++ [0] = { .irq = 0 }, ++ [1] = { .irq = 1 }, ++ [2] = { .irq = 2 }, ++ [3] = { .irq = 3 }, ++ [4] = { .irq = 4 }, ++ [5] = { .irq = 5 }, ++ [6] = { .irq = 6 }, ++ [7] = { .irq = 7 }, ++ [8] = { .irq = 8 }, ++ [9] = { .irq = 9 }, ++ [10] = { .irq = 10 }, ++ [11] = { .irq = 11 }, ++ [12] = { .irq = 12 }, ++ [13] = { .irq = 13 }, ++ [14] = { .irq = 14 }, ++ [15] = { .irq = 15 }, ++}; ++ ++#define for_each_irq_cfg(irq, cfg) \ ++ for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) ++ ++static struct irq_cfg *irq_cfg(unsigned int irq) ++{ ++ return irq < nr_irqs ? irq_cfgx + irq : NULL; ++} ++ ++static struct irq_cfg *irq_cfg_alloc(unsigned int irq) ++{ ++ return irq_cfg(irq); ++} ++ ++#ifdef CONFIG_XEN ++#define irq_2_pin_init() ++#define add_pin_to_irq(irq, apic, pin) ++#else ++/* ++ * Rough estimation of how many shared IRQs there are, can be changed ++ * anytime. ++ */ ++#define MAX_PLUS_SHARED_IRQS NR_IRQS ++#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) ++ ++/* ++ * This is performance-critical, we want to do it O(1) ++ * ++ * the indexing order of this array favors 1:1 mappings ++ * between pins and IRQs. ++ */ ++ ++struct irq_pin_list { ++ int apic, pin; ++ struct irq_pin_list *next; ++}; ++ ++static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; ++static struct irq_pin_list *irq_2_pin_ptr; ++ ++static void __init irq_2_pin_init(void) ++{ ++ struct irq_pin_list *pin = irq_2_pin_head; ++ int i; ++ ++ for (i = 1; i < PIN_MAP_SIZE; i++) ++ pin[i-1].next = &pin[i]; ++ ++ irq_2_pin_ptr = &pin[0]; ++} ++ ++static struct irq_pin_list *get_one_free_irq_2_pin(void) ++{ ++ struct irq_pin_list *pin = irq_2_pin_ptr; ++ ++ if (!pin) ++ panic("can not get more irq_2_pin\n"); ++ ++ irq_2_pin_ptr = pin->next; ++ pin->next = NULL; ++ return pin; ++} ++ ++struct io_apic { ++ unsigned int index; ++ unsigned int unused[3]; ++ unsigned int data; ++}; ++ ++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) ++{ ++ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) ++ + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); ++} ++#endif ++ ++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) ++{ ++#ifndef CONFIG_XEN ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(reg, &io_apic->index); ++ return readl(&io_apic->data); ++#else ++ struct physdev_apic apic_op; ++ int ret; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; ++ apic_op.reg = reg; ++ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); ++ if (ret) ++ return ret; ++ return apic_op.value; ++#endif ++} ++ ++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) ++{ ++#ifndef CONFIG_XEN ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(reg, &io_apic->index); ++ writel(value, &io_apic->data); ++#else ++ struct physdev_apic apic_op; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; ++ apic_op.reg = reg; ++ apic_op.value = value; ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); ++#endif ++} ++ ++#ifdef CONFIG_XEN ++#define io_apic_modify io_apic_write ++#else ++/* ++ * Re-write a value: to be used for read-modify-write ++ * cycles where the read already set up the index register. ++ * ++ * Older SiS APIC requires we rewrite the index register ++ */ ++static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) ++{ ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ ++ if (sis_apic_bug) ++ writel(reg, &io_apic->index); ++ writel(value, &io_apic->data); ++} ++ ++static bool io_apic_level_ack_pending(unsigned int irq) ++{ ++ struct irq_pin_list *entry; ++ unsigned long flags; ++ struct irq_cfg *cfg = irq_cfg(irq); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ entry = cfg->irq_2_pin; ++ for (;;) { ++ unsigned int reg; ++ int pin; ++ ++ if (!entry) ++ break; ++ pin = entry->pin; ++ reg = io_apic_read(entry->apic, 0x10 + pin*2); ++ /* Is the remote IRR bit set? */ ++ if (reg & IO_APIC_REDIR_REMOTE_IRR) { ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ return true; ++ } ++ if (!entry->next) ++ break; ++ entry = entry->next; ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return false; ++} ++#endif /* CONFIG_XEN */ ++ ++union entry_union { ++ struct { u32 w1, w2; }; ++ struct IO_APIC_route_entry entry; ++}; ++ ++#ifndef CONFIG_XEN ++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) ++{ ++ union entry_union eu; ++ unsigned long flags; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); ++ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ return eu.entry; ++} ++#endif ++ ++/* ++ * When we write a new IO APIC routing entry, we need to write the high ++ * word first! If the mask bit in the low word is clear, we will enable ++ * the interrupt, and we need to make sure the entry is fully populated ++ * before that happens. ++ */ ++static void ++__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ union entry_union eu; ++ eu.entry = e; ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++} ++ ++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ unsigned long flags; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __ioapic_write_entry(apic, pin, e); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * When we mask an IO APIC routing entry, we need to write the low ++ * word first, in order to set the mask bit before we change the ++ * high bits! ++ */ ++static void ioapic_mask_entry(int apic, int pin) ++{ ++ unsigned long flags; ++ union entry_union eu = { .entry.mask = 1 }; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#ifdef CONFIG_SMP ++static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) ++{ ++ int apic, pin; ++ struct irq_cfg *cfg; ++ struct irq_pin_list *entry; ++ ++ cfg = irq_cfg(irq); ++ entry = cfg->irq_2_pin; ++ for (;;) { ++ unsigned int reg; ++ ++ if (!entry) ++ break; ++ ++ apic = entry->apic; ++ pin = entry->pin; ++#ifdef CONFIG_INTR_REMAP ++ /* ++ * With interrupt-remapping, destination information comes ++ * from interrupt-remapping table entry. ++ */ ++ if (!irq_remapped(irq)) ++ io_apic_write(apic, 0x11 + pin*2, dest); ++#else ++ io_apic_write(apic, 0x11 + pin*2, dest); ++#endif ++ reg = io_apic_read(apic, 0x10 + pin*2); ++ reg &= ~IO_APIC_REDIR_VECTOR_MASK; ++ reg |= vector; ++ io_apic_modify(apic, 0x10 + pin*2, reg); ++ if (!entry->next) ++ break; ++ entry = entry->next; ++ } ++} ++ ++static int assign_irq_vector(int irq, cpumask_t mask); ++ ++static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) ++{ ++ struct irq_cfg *cfg; ++ unsigned long flags; ++ unsigned int dest; ++ cpumask_t tmp; ++ struct irq_desc *desc; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++ cpus_and(tmp, cfg->domain, mask); ++ dest = cpu_mask_to_apicid(tmp); ++ /* ++ * Only the high 8 bits are valid. ++ */ ++ dest = SET_APIC_LOGICAL_ID(dest); ++ ++ desc = irq_to_desc(irq); ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __target_IO_APIC_irq(irq, dest, cfg->vector); ++ desc->affinity = mask; ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++#endif /* CONFIG_SMP */ ++ ++/* ++ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are ++ * shared ISA-space IRQs, so we have to support them. We are super ++ * fast in the common case, and fast for shared ISA-space IRQs. ++ */ ++static void add_pin_to_irq(unsigned int irq, int apic, int pin) ++{ ++ struct irq_cfg *cfg; ++ struct irq_pin_list *entry; ++ ++ /* first time to refer irq_cfg, so with new */ ++ cfg = irq_cfg_alloc(irq); ++ entry = cfg->irq_2_pin; ++ if (!entry) { ++ entry = get_one_free_irq_2_pin(); ++ cfg->irq_2_pin = entry; ++ entry->apic = apic; ++ entry->pin = pin; ++ return; ++ } ++ ++ while (entry->next) { ++ /* not again, please */ ++ if (entry->apic == apic && entry->pin == pin) ++ return; ++ ++ entry = entry->next; ++ } ++ ++ entry->next = get_one_free_irq_2_pin(); ++ entry = entry->next; ++ entry->apic = apic; ++ entry->pin = pin; ++} ++ ++/* ++ * Reroute an IRQ to a different pin. ++ */ ++static void __init replace_pin_at_irq(unsigned int irq, ++ int oldapic, int oldpin, ++ int newapic, int newpin) ++{ ++ struct irq_cfg *cfg = irq_cfg(irq); ++ struct irq_pin_list *entry = cfg->irq_2_pin; ++ int replaced = 0; ++ ++ while (entry) { ++ if (entry->apic == oldapic && entry->pin == oldpin) { ++ entry->apic = newapic; ++ entry->pin = newpin; ++ replaced = 1; ++ /* every one is different, right? */ ++ break; ++ } ++ entry = entry->next; ++ } ++ ++ /* why? call replace before add? */ ++ if (!replaced) ++ add_pin_to_irq(irq, newapic, newpin); ++} ++ ++static inline void io_apic_modify_irq(unsigned int irq, ++ int mask_and, int mask_or, ++ void (*final)(struct irq_pin_list *entry)) ++{ ++ int pin; ++ struct irq_cfg *cfg; ++ struct irq_pin_list *entry; ++ ++ cfg = irq_cfg(irq); ++ for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { ++ unsigned int reg; ++ pin = entry->pin; ++ reg = io_apic_read(entry->apic, 0x10 + pin * 2); ++ reg &= mask_and; ++ reg |= mask_or; ++ io_apic_modify(entry->apic, 0x10 + pin * 2, reg); ++ if (final) ++ final(entry); ++ } ++} ++ ++static void __unmask_IO_APIC_irq(unsigned int irq) ++{ ++ io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); ++} ++ ++#ifdef CONFIG_X86_64 ++void io_apic_sync(struct irq_pin_list *entry) ++{ ++ /* ++ * Synchronize the IO-APIC and the CPU by doing ++ * a dummy read from the IO-APIC ++ */ ++ struct io_apic __iomem *io_apic; ++ io_apic = io_apic_base(entry->apic); ++ readl(&io_apic->data); ++} ++ ++static void __mask_IO_APIC_irq(unsigned int irq) ++{ ++ io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); ++} ++#else /* CONFIG_X86_32 */ ++static void __mask_IO_APIC_irq(unsigned int irq) ++{ ++ io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); ++} ++ ++static void __mask_and_edge_IO_APIC_irq(unsigned int irq) ++{ ++ io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, ++ IO_APIC_REDIR_MASKED, NULL); ++} ++ ++static void __unmask_and_level_IO_APIC_irq(unsigned int irq) ++{ ++ io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, ++ IO_APIC_REDIR_LEVEL_TRIGGER, NULL); ++} ++#endif /* CONFIG_X86_32 */ ++ ++static void mask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __mask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void unmask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ++{ ++ struct IO_APIC_route_entry entry; ++ ++ /* Check delivery_mode to be sure we're not clearing an SMI pin */ ++ entry = ioapic_read_entry(apic, pin); ++ if (entry.delivery_mode == dest_SMI) ++ return; ++ /* ++ * Disable it in the IO-APIC irq-routing table: ++ */ ++ ioapic_mask_entry(apic, pin); ++} ++ ++static void clear_IO_APIC (void) ++{ ++ int apic, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ++ clear_IO_APIC_pin(apic, pin); ++} ++ ++#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) ++void send_IPI_self(int vector) ++{ ++ unsigned int cfg; ++ ++ /* ++ * Wait for idle. ++ */ ++ apic_wait_icr_idle(); ++ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; ++ /* ++ * Send the IPI. The write to APIC_ICR fires this off. ++ */ ++ apic_write(APIC_ICR, cfg); ++} ++#endif /* !CONFIG_SMP && CONFIG_X86_32*/ ++#endif /* CONFIG_XEN */ ++ ++#ifdef CONFIG_X86_32 ++/* ++ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to ++ * specific CPU-side IRQs. ++ */ ++ ++#define MAX_PIRQS 8 ++static int pirq_entries [MAX_PIRQS]; ++static int pirqs_enabled; ++ ++static int __init ioapic_pirq_setup(char *str) ++{ ++ int i, max; ++ int ints[MAX_PIRQS+1]; ++ ++ get_options(str, ARRAY_SIZE(ints), ints); ++ ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++ ++ pirqs_enabled = 1; ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "PIRQ redirection, working around broken MP-BIOS.\n"); ++ max = MAX_PIRQS; ++ if (ints[0] < MAX_PIRQS) ++ max = ints[0]; ++ ++ for (i = 0; i < max; i++) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); ++ /* ++ * PIRQs are mapped upside down, usually. ++ */ ++ pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; ++ } ++ return 1; ++} ++ ++__setup("pirq=", ioapic_pirq_setup); ++#endif /* CONFIG_X86_32 */ ++ ++#ifdef CONFIG_INTR_REMAP ++/* I/O APIC RTE contents at the OS boot up */ ++static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; ++ ++/* ++ * Saves and masks all the unmasked IO-APIC RTE's ++ */ ++int save_mask_IO_APIC_setup(void) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ int apic, pin; ++ ++ /* ++ * The number of IO-APIC IRQ registers (== #pins): ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(apic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ nr_ioapic_registers[apic] = reg_01.bits.entries+1; ++ } ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ early_ioapic_entries[apic] = ++ kzalloc(sizeof(struct IO_APIC_route_entry) * ++ nr_ioapic_registers[apic], GFP_KERNEL); ++ if (!early_ioapic_entries[apic]) ++ goto nomem; ++ } ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ struct IO_APIC_route_entry entry; ++ ++ entry = early_ioapic_entries[apic][pin] = ++ ioapic_read_entry(apic, pin); ++ if (!entry.mask) { ++ entry.mask = 1; ++ ioapic_write_entry(apic, pin, entry); ++ } ++ } ++ ++ return 0; ++ ++nomem: ++ while (apic >= 0) ++ kfree(early_ioapic_entries[apic--]); ++ memset(early_ioapic_entries, 0, ++ ARRAY_SIZE(early_ioapic_entries)); ++ ++ return -ENOMEM; ++} ++ ++void restore_IO_APIC_setup(void) ++{ ++ int apic, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ if (!early_ioapic_entries[apic]) ++ break; ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ++ ioapic_write_entry(apic, pin, ++ early_ioapic_entries[apic][pin]); ++ kfree(early_ioapic_entries[apic]); ++ early_ioapic_entries[apic] = NULL; ++ } ++} ++ ++void reinit_intr_remapped_IO_APIC(int intr_remapping) ++{ ++ /* ++ * for now plain restore of previous settings. ++ * TBD: In the case of OS enabling interrupt-remapping, ++ * IO-APIC RTE's need to be setup to point to interrupt-remapping ++ * table entries. for now, do a plain restore, and wait for ++ * the setup_IO_APIC_irqs() to do proper initialization. ++ */ ++ restore_IO_APIC_setup(); ++} ++#endif ++ ++/* ++ * Find the IRQ entry number of a certain pin. ++ */ ++static int find_irq_entry(int apic, int pin, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mp_irqtype == type && ++ (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || ++ mp_irqs[i].mp_dstapic == MP_APIC_ALL) && ++ mp_irqs[i].mp_dstirq == pin) ++ return i; ++ ++ return -1; ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * Find the pin to which IRQ[irq] (ISA) is connected ++ */ ++static int __init find_isa_irq_pin(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mp_srcbus; ++ ++ if (test_bit(lbus, mp_bus_not_pci) && ++ (mp_irqs[i].mp_irqtype == type) && ++ (mp_irqs[i].mp_srcbusirq == irq)) ++ ++ return mp_irqs[i].mp_dstirq; ++ } ++ return -1; ++} ++ ++static int __init find_isa_irq_apic(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mp_srcbus; ++ ++ if (test_bit(lbus, mp_bus_not_pci) && ++ (mp_irqs[i].mp_irqtype == type) && ++ (mp_irqs[i].mp_srcbusirq == irq)) ++ break; ++ } ++ if (i < mp_irq_entries) { ++ int apic; ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) ++ return apic; ++ } ++ } ++ ++ return -1; ++} ++#endif ++ ++/* ++ * Find a specific PCI IRQ entry. ++ * Not an __init, possibly needed by modules ++ */ ++static int pin_2_irq(int idx, int apic, int pin); ++ ++int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) ++{ ++ int apic, i, best_guess = -1; ++ ++ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", ++ bus, slot, pin); ++ if (test_bit(bus, mp_bus_not_pci)) { ++ apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); ++ return -1; ++ } ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mp_srcbus; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || ++ mp_irqs[i].mp_dstapic == MP_APIC_ALL) ++ break; ++ ++ if (!test_bit(lbus, mp_bus_not_pci) && ++ !mp_irqs[i].mp_irqtype && ++ (bus == lbus) && ++ (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { ++ int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); ++ ++ if (!(apic || IO_APIC_IRQ(irq))) ++ continue; ++ ++ if (pin == (mp_irqs[i].mp_srcbusirq & 3)) ++ return irq; ++ /* ++ * Use the first all-but-pin matching entry as a ++ * best-guess fuzzy result for broken mptables. ++ */ ++ if (best_guess < 0) ++ best_guess = irq; ++ } ++ } ++ return best_guess; ++} ++ ++EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); ++ ++#if defined(CONFIG_EISA) || defined(CONFIG_MCA) ++/* ++ * EISA Edge/Level control register, ELCR ++ */ ++static int EISA_ELCR(unsigned int irq) ++{ ++ if (irq < 16) { ++ unsigned int port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++ } ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "Broken MPtable reports ISA irq %d\n", irq); ++ return 0; ++} ++ ++#endif ++ ++/* ISA interrupts are always polarity zero edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_ISA_trigger(idx) (0) ++#define default_ISA_polarity(idx) (0) ++ ++/* EISA interrupts are always polarity zero and can be edge or level ++ * trigger depending on the ELCR value. If an interrupt is listed as ++ * EISA conforming in the MP table, that means its trigger type must ++ * be read in from the ELCR */ ++ ++#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) ++#define default_EISA_polarity(idx) default_ISA_polarity(idx) ++ ++/* PCI interrupts are always polarity one level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_PCI_trigger(idx) (1) ++#define default_PCI_polarity(idx) (1) ++ ++/* MCA interrupts are always polarity zero level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_MCA_trigger(idx) (1) ++#define default_MCA_polarity(idx) default_ISA_polarity(idx) ++ ++static int MPBIOS_polarity(int idx) ++{ ++ int bus = mp_irqs[idx].mp_srcbus; ++ int polarity; ++ ++ /* ++ * Determine IRQ line polarity (high active or low active): ++ */ ++ switch (mp_irqs[idx].mp_irqflag & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent polarity */ ++ if (test_bit(bus, mp_bus_not_pci)) ++ polarity = default_ISA_polarity(idx); ++ else ++ polarity = default_PCI_polarity(idx); ++ break; ++ case 1: /* high active */ ++ { ++ polarity = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ case 3: /* low active */ ++ { ++ polarity = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ } ++ return polarity; ++} ++ ++static int MPBIOS_trigger(int idx) ++{ ++ int bus = mp_irqs[idx].mp_srcbus; ++ int trigger; ++ ++ /* ++ * Determine IRQ trigger mode (edge or level sensitive): ++ */ ++ switch ((mp_irqs[idx].mp_irqflag>>2) & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent */ ++ if (test_bit(bus, mp_bus_not_pci)) ++ trigger = default_ISA_trigger(idx); ++ else ++ trigger = default_PCI_trigger(idx); ++#if defined(CONFIG_EISA) || defined(CONFIG_MCA) ++ switch (mp_bus_id_to_type[bus]) { ++ case MP_BUS_ISA: /* ISA pin */ ++ { ++ /* set before the switch */ ++ break; ++ } ++ case MP_BUS_EISA: /* EISA pin */ ++ { ++ trigger = default_EISA_trigger(idx); ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ /* set before the switch */ ++ break; ++ } ++ case MP_BUS_MCA: /* MCA pin */ ++ { ++ trigger = default_MCA_trigger(idx); ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ } ++#endif ++ break; ++ case 1: /* edge */ ++ { ++ trigger = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ case 3: /* level */ ++ { ++ trigger = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 0; ++ break; ++ } ++ } ++ return trigger; ++} ++ ++static inline int irq_polarity(int idx) ++{ ++ return MPBIOS_polarity(idx); ++} ++ ++static inline int irq_trigger(int idx) ++{ ++ return MPBIOS_trigger(idx); ++} ++ ++int (*ioapic_renumber_irq)(int ioapic, int irq); ++static int pin_2_irq(int idx, int apic, int pin) ++{ ++ int irq, i; ++ int bus = mp_irqs[idx].mp_srcbus; ++ ++ /* ++ * Debugging check, we are in big trouble if this message pops up! ++ */ ++ if (mp_irqs[idx].mp_dstirq != pin) ++ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); ++ ++ if (test_bit(bus, mp_bus_not_pci)) { ++ irq = mp_irqs[idx].mp_srcbusirq; ++ } else { ++ /* ++ * PCI IRQs are mapped in order ++ */ ++ i = irq = 0; ++ while (i < apic) ++ irq += nr_ioapic_registers[i++]; ++ irq += pin; ++ /* ++ * For MPS mode, so far only needed by ES7000 platform ++ */ ++ if (ioapic_renumber_irq) ++ irq = ioapic_renumber_irq(apic, irq); ++ } ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * PCI IRQ command line redirection. Yes, limits are hardcoded. ++ */ ++ if ((pin >= 16) && (pin <= 23)) { ++ if (pirq_entries[pin-16] != -1) { ++ if (!pirq_entries[pin-16]) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "disabling PIRQ%d\n", pin-16); ++ } else { ++ irq = pirq_entries[pin-16]; ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "using PIRQ%d -> IRQ %d\n", ++ pin-16, irq); ++ } ++ } ++ } ++#endif ++ ++ return irq; ++} ++ ++#ifndef CONFIG_XEN ++void lock_vector_lock(void) ++{ ++ /* Used to the online set of cpus does not change ++ * during assign_irq_vector. ++ */ ++ spin_lock(&vector_lock); ++} ++ ++void unlock_vector_lock(void) ++{ ++ spin_unlock(&vector_lock); ++} ++#endif ++ ++static int assign_irq_vector(int irq, cpumask_t mask) ++{ ++ struct physdev_irq irq_op; ++ struct irq_cfg *cfg; ++ ++ if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS) ++ return -EINVAL; ++ ++ cfg = irq_cfg(irq); ++ ++ if (cfg->vector) ++ return 0; ++ ++ irq_op.irq = irq; ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) ++ return -ENOSPC; ++ ++ cfg->vector = irq_op.vector; ++ ++ return 0; ++} ++ ++#ifndef CONFIG_XEN ++static void __clear_irq_vector(int irq) ++{ ++ struct irq_cfg *cfg; ++ cpumask_t mask; ++ int cpu, vector; ++ ++ cfg = irq_cfg(irq); ++ BUG_ON(!cfg->vector); ++ ++ vector = cfg->vector; ++ cpus_and(mask, cfg->domain, cpu_online_map); ++ for_each_cpu_mask_nr(cpu, mask) ++ per_cpu(vector_irq, cpu)[vector] = -1; ++ ++ cfg->vector = 0; ++ cpus_clear(cfg->domain); ++ ++ if (likely(!cfg->move_in_progress)) ++ return; ++ cpus_and(mask, cfg->old_domain, cpu_online_map); ++ for_each_cpu_mask_nr(cpu, mask) { ++ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; ++ vector++) { ++ if (per_cpu(vector_irq, cpu)[vector] != irq) ++ continue; ++ per_cpu(vector_irq, cpu)[vector] = -1; ++ break; ++ } ++ } ++ cfg->move_in_progress = 0; ++} ++ ++void __setup_vector_irq(int cpu) ++{ ++ /* Initialize vector_irq on a new cpu */ ++ /* This function must be called with vector_lock held */ ++ int irq, vector; ++ struct irq_cfg *cfg; ++ ++ /* Mark the inuse vectors */ ++ for_each_irq_cfg(irq, cfg) { ++ if (!cpu_isset(cpu, cfg->domain)) ++ continue; ++ vector = cfg->vector; ++ per_cpu(vector_irq, cpu)[vector] = irq; ++ } ++ /* Mark the free vectors */ ++ for (vector = 0; vector < NR_VECTORS; ++vector) { ++ irq = per_cpu(vector_irq, cpu)[vector]; ++ if (irq < 0) ++ continue; ++ ++ cfg = irq_cfg(irq); ++ if (!cpu_isset(cpu, cfg->domain)) ++ per_cpu(vector_irq, cpu)[vector] = -1; ++ } ++} ++ ++static struct irq_chip ioapic_chip; ++#ifdef CONFIG_INTR_REMAP ++static struct irq_chip ir_ioapic_chip; ++#endif ++ ++#define IOAPIC_AUTO -1 ++#define IOAPIC_EDGE 0 ++#define IOAPIC_LEVEL 1 ++ ++#ifdef CONFIG_X86_32 ++static inline int IO_APIC_irq_trigger(int irq) ++{ ++ int apic, idx, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ idx = find_irq_entry(apic, pin, mp_INT); ++ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) ++ return irq_trigger(idx); ++ } ++ } ++ /* ++ * nonexistent IRQs are edge default ++ */ ++ return 0; ++} ++#else ++static inline int IO_APIC_irq_trigger(int irq) ++{ ++ return 1; ++} ++#endif ++ ++static void ioapic_register_intr(int irq, unsigned long trigger) ++{ ++ struct irq_desc *desc; ++ ++ desc = irq_to_desc(irq); ++ ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ desc->status |= IRQ_LEVEL; ++ else ++ desc->status &= ~IRQ_LEVEL; ++ ++#ifdef CONFIG_INTR_REMAP ++ if (irq_remapped(irq)) { ++ desc->status |= IRQ_MOVE_PCNTXT; ++ if (trigger) ++ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, ++ handle_fasteoi_irq, ++ "fasteoi"); ++ else ++ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, ++ handle_edge_irq, "edge"); ++ return; ++ } ++#endif ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ set_irq_chip_and_handler_name(irq, &ioapic_chip, ++ handle_fasteoi_irq, ++ "fasteoi"); ++ else ++ set_irq_chip_and_handler_name(irq, &ioapic_chip, ++ handle_edge_irq, "edge"); ++} ++#else /* !CONFIG_XEN */ ++#define __clear_irq_vector(irq) ((void)(irq)) ++#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq) ++#endif ++ ++static int setup_ioapic_entry(int apic, int irq, ++ struct IO_APIC_route_entry *entry, ++ unsigned int destination, int trigger, ++ int polarity, int vector) ++{ ++ /* ++ * add it to the IO-APIC irq-routing table: ++ */ ++ memset(entry,0,sizeof(*entry)); ++ ++#ifdef CONFIG_INTR_REMAP ++ if (intr_remapping_enabled) { ++ struct intel_iommu *iommu = map_ioapic_to_ir(apic); ++ struct irte irte; ++ struct IR_IO_APIC_route_entry *ir_entry = ++ (struct IR_IO_APIC_route_entry *) entry; ++ int index; ++ ++ if (!iommu) ++ panic("No mapping iommu for ioapic %d\n", apic); ++ ++ index = alloc_irte(iommu, irq, 1); ++ if (index < 0) ++ panic("Failed to allocate IRTE for ioapic %d\n", apic); ++ ++ memset(&irte, 0, sizeof(irte)); ++ ++ irte.present = 1; ++ irte.dst_mode = INT_DEST_MODE; ++ irte.trigger_mode = trigger; ++ irte.dlvry_mode = INT_DELIVERY_MODE; ++ irte.vector = vector; ++ irte.dest_id = IRTE_DEST(destination); ++ ++ modify_irte(irq, &irte); ++ ++ ir_entry->index2 = (index >> 15) & 0x1; ++ ir_entry->zero = 0; ++ ir_entry->format = 1; ++ ir_entry->index = (index & 0x7fff); ++ } else ++#endif ++ { ++ entry->delivery_mode = INT_DELIVERY_MODE; ++ entry->dest_mode = INT_DEST_MODE; ++ entry->dest = destination; ++ } ++ ++ entry->mask = 0; /* enable IRQ */ ++ entry->trigger = trigger; ++ entry->polarity = polarity; ++ entry->vector = vector; ++ ++ /* Mask level triggered irqs. ++ * Use IRQ_DELAYED_DISABLE for edge triggered irqs. ++ */ ++ if (trigger) ++ entry->mask = 1; ++ return 0; ++} ++ ++static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, ++ int trigger, int polarity) ++{ ++ struct irq_cfg *cfg; ++ struct IO_APIC_route_entry entry; ++ cpumask_t mask; ++ ++ if (!IO_APIC_IRQ(irq)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ ++ mask = TARGET_CPUS; ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++#ifndef CONFIG_XEN ++ cpus_and(mask, cfg->domain, mask); ++#endif ++ ++ apic_printk(APIC_VERBOSE,KERN_DEBUG ++ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " ++ "IRQ %d Mode:%i Active:%i)\n", ++ apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, ++ irq, trigger, polarity); ++ ++ ++ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, ++ cpu_mask_to_apicid(mask), trigger, polarity, ++ cfg->vector)) { ++ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", ++ mp_ioapics[apic].mp_apicid, pin); ++ __clear_irq_vector(irq); ++ return; ++ } ++ ++ ioapic_register_intr(irq, trigger); ++ if (irq < 16) ++ disable_8259A_irq(irq); ++ ++ ioapic_write_entry(apic, pin, entry); ++} ++ ++static void __init setup_IO_APIC_irqs(void) ++{ ++ int apic, pin, idx, irq; ++ int notcon = 0; ++ ++ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ ++ idx = find_irq_entry(apic, pin, mp_INT); ++ if (idx == -1) { ++ if (!notcon) { ++ notcon = 1; ++ apic_printk(APIC_VERBOSE, ++ KERN_DEBUG " %d-%d", ++ mp_ioapics[apic].mp_apicid, ++ pin); ++ } else ++ apic_printk(APIC_VERBOSE, " %d-%d", ++ mp_ioapics[apic].mp_apicid, ++ pin); ++ continue; ++ } ++ if (notcon) { ++ apic_printk(APIC_VERBOSE, ++ " (apicid-pin) not connected\n"); ++ notcon = 0; ++ } ++ ++ irq = pin_2_irq(idx, apic, pin); ++#if defined(CONFIG_XEN) ++ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) ++ continue; ++#elif defined(CONFIG_X86_32) ++ if (multi_timer_check(apic, irq)) ++ continue; ++#endif ++ add_pin_to_irq(irq, apic, pin); ++ ++ setup_IO_APIC_irq(apic, pin, irq, ++ irq_trigger(idx), irq_polarity(idx)); ++ } ++ } ++ ++ if (notcon) ++ apic_printk(APIC_VERBOSE, ++ " (apicid-pin) not connected\n"); ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * Set up the timer pin, possibly with the 8259A-master behind. ++ */ ++static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ++ int vector) ++{ ++ struct IO_APIC_route_entry entry; ++ ++#ifdef CONFIG_INTR_REMAP ++ if (intr_remapping_enabled) ++ return; ++#endif ++ ++ memset(&entry, 0, sizeof(entry)); ++ ++ /* ++ * We use logical delivery to get the timer IRQ ++ * to the first CPU. ++ */ ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 1; /* mask IRQ now */ ++ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.polarity = 0; ++ entry.trigger = 0; ++ entry.vector = vector; ++ ++ /* ++ * The timer IRQ doesn't have to know that behind the ++ * scene we may have a 8259A-master in AEOI mode ... ++ */ ++ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ ioapic_write_entry(apic, pin, entry); ++} ++ ++ ++__apicdebuginit(void) print_IO_APIC(void) ++{ ++ int apic, i; ++ union IO_APIC_reg_00 reg_00; ++ union IO_APIC_reg_01 reg_01; ++ union IO_APIC_reg_02 reg_02; ++ union IO_APIC_reg_03 reg_03; ++ unsigned long flags; ++ struct irq_cfg *cfg; ++ unsigned int irq; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); ++ for (i = 0; i < nr_ioapics; i++) ++ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", ++ mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); ++ ++ /* ++ * We are a bit conservative about what we expect. We have to ++ * know about every hardware change ASAP. ++ */ ++ printk(KERN_INFO "testing the IO APIC.......................\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ reg_01.raw = io_apic_read(apic, 1); ++ if (reg_01.bits.version >= 0x10) ++ reg_02.raw = io_apic_read(apic, 2); ++ if (reg_01.bits.version >= 0x20) ++ reg_03.raw = io_apic_read(apic, 3); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk("\n"); ++ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); ++ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); ++ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); ++ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); ++ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); ++ ++ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); ++ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); ++ ++ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); ++ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, ++ * but the value of reg_02 is read as the previous read register ++ * value, so ignore it if reg_02 == reg_01. ++ */ ++ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); ++ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); ++ } ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 ++ * or reg_03, but the value of reg_0[23] is read as the previous read ++ * register value, so ignore it if reg_03 == reg_0[12]. ++ */ ++ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && ++ reg_03.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); ++ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); ++ } ++ ++ printk(KERN_DEBUG ".... IRQ redirection table:\n"); ++ ++ printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" ++ " Stat Dmod Deli Vect: \n"); ++ ++ for (i = 0; i <= reg_01.bits.entries; i++) { ++ struct IO_APIC_route_entry entry; ++ ++ entry = ioapic_read_entry(apic, i); ++ ++ printk(KERN_DEBUG " %02x %03X ", ++ i, ++ entry.dest ++ ); ++ ++ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", ++ entry.mask, ++ entry.trigger, ++ entry.irr, ++ entry.polarity, ++ entry.delivery_status, ++ entry.dest_mode, ++ entry.delivery_mode, ++ entry.vector ++ ); ++ } ++ } ++ printk(KERN_DEBUG "IRQ to pin mappings:\n"); ++ for_each_irq_cfg(irq, cfg) { ++ struct irq_pin_list *entry = cfg->irq_2_pin; ++ if (!entry) ++ continue; ++ printk(KERN_DEBUG "IRQ%d ", irq); ++ for (;;) { ++ printk("-> %d:%d", entry->apic, entry->pin); ++ if (!entry->next) ++ break; ++ entry = entry->next; ++ } ++ printk("\n"); ++ } ++ ++ printk(KERN_INFO ".................................... done.\n"); ++ ++ return; ++} ++ ++__apicdebuginit(void) print_APIC_bitfield(int base) ++{ ++ unsigned int v; ++ int i, j; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); ++ for (i = 0; i < 8; i++) { ++ v = apic_read(base + i*0x10); ++ for (j = 0; j < 32; j++) { ++ if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ ++ v = apic_read(APIC_ESR); ++ printk(KERN_DEBUG "... APIC ESR: %08x\n", v); ++ } ++ ++ icr = apic_icr_read(); ++ printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); ++ printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); ++ ++ v = apic_read(APIC_LVTT); ++ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); ++ ++ if (maxlvt > 3) { /* PC is LVT#4. */ ++ v = apic_read(APIC_LVTPC); ++ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); ++ } ++ v = apic_read(APIC_LVT0); ++ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); ++ v = apic_read(APIC_LVT1); ++ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); ++ ++ if (maxlvt > 2) { /* ERR is LVT#3. */ ++ v = apic_read(APIC_LVTERR); ++ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_TMICT); ++ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); ++ v = apic_read(APIC_TMCCT); ++ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); ++ v = apic_read(APIC_TDCR); ++ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); ++ printk("\n"); ++} ++ ++__apicdebuginit(void) print_all_local_APICs(void) ++{ ++ int cpu; ++ ++ preempt_disable(); ++ for_each_online_cpu(cpu) ++ smp_call_function_single(cpu, print_local_APIC, NULL, 1); ++ preempt_enable(); ++} ++ ++__apicdebuginit(void) print_PIC(void) ++{ ++ unsigned int v; ++ unsigned long flags; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "\nprinting PIC contents\n"); ++ ++ spin_lock_irqsave(&i8259A_lock, flags); ++ ++ v = inb(0xa1) << 8 | inb(0x21); ++ printk(KERN_DEBUG "... PIC IMR: %04x\n", v); ++ ++ v = inb(0xa0) << 8 | inb(0x20); ++ printk(KERN_DEBUG "... PIC IRR: %04x\n", v); ++ ++ outb(0x0b,0xa0); ++ outb(0x0b,0x20); ++ v = inb(0xa0) << 8 | inb(0x20); ++ outb(0x0a,0xa0); ++ outb(0x0a,0x20); ++ ++ spin_unlock_irqrestore(&i8259A_lock, flags); ++ ++ printk(KERN_DEBUG "... PIC ISR: %04x\n", v); ++ ++ v = inb(0x4d1) << 8 | inb(0x4d0); ++ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); ++} ++ ++__apicdebuginit(int) print_all_ICs(void) ++{ ++ print_PIC(); ++ print_all_local_APICs(); ++ print_IO_APIC(); ++ ++ return 0; ++} ++ ++fs_initcall(print_all_ICs); ++ ++ ++/* Where if anywhere is the i8259 connect in external int mode */ ++static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; ++#endif /* CONFIG_XEN */ ++ ++void __init enable_IO_APIC(void) ++{ ++ union IO_APIC_reg_01 reg_01; ++#ifndef CONFIG_XEN ++ int i8259_apic, i8259_pin; ++#endif ++ int apic; ++ unsigned long flags; ++ ++#ifdef CONFIG_X86_32 ++ int i; ++ if (!pirqs_enabled) ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++#endif ++ ++ /* ++ * The number of IO-APIC IRQ registers (== #pins): ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(apic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ nr_ioapic_registers[apic] = reg_01.bits.entries+1; ++ } ++#ifndef CONFIG_XEN ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ int pin; ++ /* See if any of the pins is in ExtINT mode */ ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ struct IO_APIC_route_entry entry; ++ entry = ioapic_read_entry(apic, pin); ++ ++ /* If the interrupt line is enabled and in ExtInt mode ++ * I have found the pin where the i8259 is connected. ++ */ ++ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { ++ ioapic_i8259.apic = apic; ++ ioapic_i8259.pin = pin; ++ goto found_i8259; ++ } ++ } ++ } ++ found_i8259: ++ /* Look to see what if the MP table has reported the ExtINT */ ++ /* If we could not find the appropriate pin by looking at the ioapic ++ * the i8259 probably is not connected the ioapic but give the ++ * mptable a chance anyway. ++ */ ++ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); ++ i8259_apic = find_isa_irq_apic(0, mp_ExtINT); ++ /* Trust the MP table if nothing is setup in the hardware */ ++ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { ++ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); ++ ioapic_i8259.pin = i8259_pin; ++ ioapic_i8259.apic = i8259_apic; ++ } ++ /* Complain if the MP table and the hardware disagree */ ++ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && ++ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) ++ { ++ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); ++ } ++ ++ /* ++ * Do not trust the IO-APIC being empty at bootup ++ */ ++ clear_IO_APIC(); ++#endif ++} ++ ++#ifdef CONFIG_XEN ++#define disable_IO_APIC() ((void)0) ++#else ++/* ++ * Not an __init, needed by the reboot code ++ */ ++void disable_IO_APIC(void) ++{ ++ /* ++ * Clear the IO-APIC before rebooting: ++ */ ++ clear_IO_APIC(); ++ ++ /* ++ * If the i8259 is routed through an IOAPIC ++ * Put that IOAPIC in virtual wire mode ++ * so legacy interrupts can be delivered. ++ */ ++ if (ioapic_i8259.pin != -1) { ++ struct IO_APIC_route_entry entry; ++ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = 0; /* Enabled */ ++ entry.trigger = 0; /* Edge */ ++ entry.irr = 0; ++ entry.polarity = 0; /* High */ ++ entry.delivery_status = 0; ++ entry.dest_mode = 0; /* Physical */ ++ entry.delivery_mode = dest_ExtINT; /* ExtInt */ ++ entry.vector = 0; ++ entry.dest = read_apic_id(); ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); ++ } ++ ++ disconnect_bsp_APIC(ioapic_i8259.pin != -1); ++} ++ ++#ifdef CONFIG_X86_32 ++/* ++ * function to set the IO-APIC physical IDs based on the ++ * values stored in the MPC table. ++ * ++ * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 ++ */ ++ ++static void __init setup_ioapic_ids_from_mpc(void) ++{ ++ union IO_APIC_reg_00 reg_00; ++ physid_mask_t phys_id_present_map; ++ int apic; ++ int i; ++ unsigned char old_id; ++ unsigned long flags; ++ ++ if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) ++ return; ++ ++ /* ++ * Don't check I/O APIC IDs for xAPIC systems. They have ++ * no meaning without the serial APIC bus. ++ */ ++ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ return; ++ /* ++ * This is broken; anything with a real cpu count has to ++ * circumvent this idiocy regardless. ++ */ ++ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ /* ++ * Set the IOAPIC ID to the value stored in the MPC table. ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ /* Read the register 0 value */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ old_id = mp_ioapics[apic].mp_apicid; ++ ++ if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", ++ apic, mp_ioapics[apic].mp_apicid); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ reg_00.bits.ID); ++ mp_ioapics[apic].mp_apicid = reg_00.bits.ID; ++ } ++ ++ /* ++ * Sanity check, is the ID really free? Every APIC in a ++ * system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(phys_id_present_map, ++ mp_ioapics[apic].mp_apicid)) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", ++ apic, mp_ioapics[apic].mp_apicid); ++ for (i = 0; i < get_physical_broadcast(); i++) ++ if (!physid_isset(i, phys_id_present_map)) ++ break; ++ if (i >= get_physical_broadcast()) ++ panic("Max APIC ID exceeded!\n"); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ i); ++ physid_set(i, phys_id_present_map); ++ mp_ioapics[apic].mp_apicid = i; ++ } else { ++ physid_mask_t tmp; ++ tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); ++ apic_printk(APIC_VERBOSE, "Setting %d in the " ++ "phys_id_present_map\n", ++ mp_ioapics[apic].mp_apicid); ++ physids_or(phys_id_present_map, phys_id_present_map, tmp); ++ } ++ ++ ++ /* ++ * We need to adjust the IRQ routing table ++ * if the ID changed. ++ */ ++ if (old_id != mp_ioapics[apic].mp_apicid) ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mp_dstapic == old_id) ++ mp_irqs[i].mp_dstapic ++ = mp_ioapics[apic].mp_apicid; ++ ++ /* ++ * Read the right value from the MPC table and ++ * write it into the ID register. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "...changing IO-APIC physical APIC ID to %d ...", ++ mp_ioapics[apic].mp_apicid); ++ ++ reg_00.bits.ID = mp_ioapics[apic].mp_apicid; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0, reg_00.raw); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* ++ * Sanity check ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) ++ printk("could not set ID!\n"); ++ else ++ apic_printk(APIC_VERBOSE, " ok.\n"); ++ } ++} ++#endif ++ ++int no_timer_check __initdata; ++ ++static int __init notimercheck(char *s) ++{ ++ no_timer_check = 1; ++ return 1; ++} ++__setup("no_timer_check", notimercheck); ++ ++/* ++ * There is a nasty bug in some older SMP boards, their mptable lies ++ * about the timer IRQ. We do the following to work around the situation: ++ * ++ * - timer IRQ defaults to IO-APIC IRQ ++ * - if this function detects that timer IRQs are defunct, then we fall ++ * back to ISA timer IRQs ++ */ ++static int __init timer_irq_works(void) ++{ ++ unsigned long t1 = jiffies; ++ unsigned long flags; ++ ++ if (no_timer_check) ++ return 1; ++ ++ local_save_flags(flags); ++ local_irq_enable(); ++ /* Let ten ticks pass... */ ++ mdelay((10 * 1000) / HZ); ++ local_irq_restore(flags); ++ ++ /* ++ * Expect a few ticks at least, to be sure some possible ++ * glue logic does not lock up after one or two first ++ * ticks in a non-ExtINT mode. Also the local APIC ++ * might have cached one ExtINT interrupt. Finally, at ++ * least one tick may be lost due to delays. ++ */ ++ ++ /* jiffies wrap? */ ++ if (time_after(jiffies, t1 + 4)) ++ return 1; ++ return 0; ++} ++ ++/* ++ * In the SMP+IOAPIC case it might happen that there are an unspecified ++ * number of pending IRQ events unhandled. These cases are very rare, ++ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much ++ * better to do it this way as thus we do not have to be aware of ++ * 'pending' interrupts in the IRQ path, except at this point. ++ */ ++/* ++ * Edge triggered needs to resend any interrupt ++ * that was delayed but this is now handled in the device ++ * independent code. ++ */ ++ ++/* ++ * Starting up a edge-triggered IO-APIC interrupt is ++ * nasty - we need to make sure that we get the edge. ++ * If it is already asserted for some reason, we need ++ * return 1 to indicate that is was pending. ++ * ++ * This is not complete - we should be able to fake ++ * an edge even if it isn't on the 8259A... ++ */ ++ ++static unsigned int startup_ioapic_irq(unsigned int irq) ++{ ++ int was_pending = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ if (irq < 16) { ++ disable_8259A_irq(irq); ++ if (i8259A_irq_pending(irq)) ++ was_pending = 1; ++ } ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return was_pending; ++} ++ ++#ifdef CONFIG_X86_64 ++static int ioapic_retrigger_irq(unsigned int irq) ++{ ++ ++ struct irq_cfg *cfg = irq_cfg(irq); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ return 1; ++} ++#else ++static int ioapic_retrigger_irq(unsigned int irq) ++{ ++ send_IPI_self(irq_cfg(irq)->vector); ++ ++ return 1; ++} ++#endif ++ ++/* ++ * Level and edge triggered IO-APIC interrupts need different handling, ++ * so we use two separate IRQ descriptors. Edge triggered IRQs can be ++ * handled with the level-triggered descriptor, but that one has slightly ++ * more overhead. Level-triggered interrupts cannot be handled with the ++ * edge-triggered handler, without risking IRQ storms and other ugly ++ * races. ++ */ ++ ++#ifdef CONFIG_SMP ++ ++#ifdef CONFIG_INTR_REMAP ++static void ir_irq_migration(struct work_struct *work); ++ ++static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); ++ ++/* ++ * Migrate the IO-APIC irq in the presence of intr-remapping. ++ * ++ * For edge triggered, irq migration is a simple atomic update(of vector ++ * and cpu destination) of IRTE and flush the hardware cache. ++ * ++ * For level triggered, we need to modify the io-apic RTE aswell with the update ++ * vector information, along with modifying IRTE with vector and destination. ++ * So irq migration for level triggered is little bit more complex compared to ++ * edge triggered migration. But the good news is, we use the same algorithm ++ * for level triggered migration as we have today, only difference being, ++ * we now initiate the irq migration from process context instead of the ++ * interrupt context. ++ * ++ * In future, when we do a directed EOI (combined with cpu EOI broadcast ++ * suppression) to the IO-APIC, level triggered irq migration will also be ++ * as simple as edge triggered migration and we can do the irq migration ++ * with a simple atomic update to IO-APIC RTE. ++ */ ++static void migrate_ioapic_irq(int irq, cpumask_t mask) ++{ ++ struct irq_cfg *cfg; ++ struct irq_desc *desc; ++ cpumask_t tmp, cleanup_mask; ++ struct irte irte; ++ int modify_ioapic_rte; ++ unsigned int dest; ++ unsigned long flags; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ return; ++ ++ if (get_irte(irq, &irte)) ++ return; ++ ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, mask); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ desc = irq_to_desc(irq); ++ modify_ioapic_rte = desc->status & IRQ_LEVEL; ++ if (modify_ioapic_rte) { ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __target_IO_APIC_irq(irq, dest, cfg->vector); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ ++ irte.vector = cfg->vector; ++ irte.dest_id = IRTE_DEST(dest); ++ ++ /* ++ * Modified the IRTE and flushes the Interrupt entry cache. ++ */ ++ modify_irte(irq, &irte); ++ ++ if (cfg->move_in_progress) { ++ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); ++ cfg->move_cleanup_count = cpus_weight(cleanup_mask); ++ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); ++ cfg->move_in_progress = 0; ++ } ++ ++ desc->affinity = mask; ++} ++ ++static int migrate_irq_remapped_level(int irq) ++{ ++ int ret = -1; ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ mask_IO_APIC_irq(irq); ++ ++ if (io_apic_level_ack_pending(irq)) { ++ /* ++ * Interrupt in progress. Migrating irq now will change the ++ * vector information in the IO-APIC RTE and that will confuse ++ * the EOI broadcast performed by cpu. ++ * So, delay the irq migration to the next instance. ++ */ ++ schedule_delayed_work(&ir_migration_work, 1); ++ goto unmask; ++ } ++ ++ /* everthing is clear. we have right of way */ ++ migrate_ioapic_irq(irq, desc->pending_mask); ++ ++ ret = 0; ++ desc->status &= ~IRQ_MOVE_PENDING; ++ cpus_clear(desc->pending_mask); ++ ++unmask: ++ unmask_IO_APIC_irq(irq); ++ return ret; ++} ++ ++static void ir_irq_migration(struct work_struct *work) ++{ ++ unsigned int irq; ++ struct irq_desc *desc; ++ ++ for_each_irq_desc(irq, desc) { ++ if (desc->status & IRQ_MOVE_PENDING) { ++ unsigned long flags; ++ ++ spin_lock_irqsave(&desc->lock, flags); ++ if (!desc->chip->set_affinity || ++ !(desc->status & IRQ_MOVE_PENDING)) { ++ desc->status &= ~IRQ_MOVE_PENDING; ++ spin_unlock_irqrestore(&desc->lock, flags); ++ continue; ++ } ++ ++ desc->chip->set_affinity(irq, desc->pending_mask); ++ spin_unlock_irqrestore(&desc->lock, flags); ++ } ++ } ++} ++ ++/* ++ * Migrates the IRQ destination in the process context. ++ */ ++static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (desc->status & IRQ_LEVEL) { ++ desc->status |= IRQ_MOVE_PENDING; ++ desc->pending_mask = mask; ++ migrate_irq_remapped_level(irq); ++ return; ++ } ++ ++ migrate_ioapic_irq(irq, mask); ++} ++#endif ++ ++asmlinkage void smp_irq_move_cleanup_interrupt(void) ++{ ++ unsigned vector, me; ++ ack_APIC_irq(); ++#ifdef CONFIG_X86_64 ++ exit_idle(); ++#endif ++ irq_enter(); ++ ++ me = smp_processor_id(); ++ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { ++ unsigned int irq; ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ irq = __get_cpu_var(vector_irq)[vector]; ++ ++ desc = irq_to_desc(irq); ++ if (!desc) ++ continue; ++ ++ cfg = irq_cfg(irq); ++ spin_lock(&desc->lock); ++ if (!cfg->move_cleanup_count) ++ goto unlock; ++ ++ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) ++ goto unlock; ++ ++ __get_cpu_var(vector_irq)[vector] = -1; ++ cfg->move_cleanup_count--; ++unlock: ++ spin_unlock(&desc->lock); ++ } ++ ++ irq_exit(); ++} ++ ++static void irq_complete_move(unsigned int irq) ++{ ++ struct irq_cfg *cfg = irq_cfg(irq); ++ unsigned vector, me; ++ ++ if (likely(!cfg->move_in_progress)) ++ return; ++ ++ vector = ~get_irq_regs()->orig_ax; ++ me = smp_processor_id(); ++ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { ++ cpumask_t cleanup_mask; ++ ++ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); ++ cfg->move_cleanup_count = cpus_weight(cleanup_mask); ++ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); ++ cfg->move_in_progress = 0; ++ } ++} ++#else ++static inline void irq_complete_move(unsigned int irq) {} ++#endif ++#ifdef CONFIG_INTR_REMAP ++static void ack_x2apic_level(unsigned int irq) ++{ ++ ack_x2APIC_irq(); ++} ++ ++static void ack_x2apic_edge(unsigned int irq) ++{ ++ ack_x2APIC_irq(); ++} ++#endif ++ ++static void ack_apic_edge(unsigned int irq) ++{ ++ irq_complete_move(irq); ++ move_native_irq(irq); ++ ack_APIC_irq(); ++} ++ ++atomic_t irq_mis_count; ++ ++static void ack_apic_level(unsigned int irq) ++{ ++#ifdef CONFIG_X86_32 ++ unsigned long v; ++ int i; ++#endif ++ int do_unmask_irq = 0; ++ ++ irq_complete_move(irq); ++#ifdef CONFIG_GENERIC_PENDING_IRQ ++ /* If we are moving the irq we need to mask it */ ++ if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { ++ do_unmask_irq = 1; ++ mask_IO_APIC_irq(irq); ++ } ++#endif ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * It appears there is an erratum which affects at least version 0x11 ++ * of I/O APIC (that's the 82093AA and cores integrated into various ++ * chipsets). Under certain conditions a level-triggered interrupt is ++ * erroneously delivered as edge-triggered one but the respective IRR ++ * bit gets set nevertheless. As a result the I/O unit expects an EOI ++ * message but it will never arrive and further interrupts are blocked ++ * from the source. The exact reason is so far unknown, but the ++ * phenomenon was observed when two consecutive interrupt requests ++ * from a given source get delivered to the same CPU and the source is ++ * temporarily disabled in between. ++ * ++ * A workaround is to simulate an EOI message manually. We achieve it ++ * by setting the trigger mode to edge and then to level when the edge ++ * trigger mode gets detected in the TMR of a local APIC for a ++ * level-triggered interrupt. We mask the source for the time of the ++ * operation to prevent an edge-triggered interrupt escaping meanwhile. ++ * The idea is from Manfred Spraul. --macro ++ */ ++ i = irq_cfg(irq)->vector; ++ ++ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ++#endif ++ ++ /* ++ * We must acknowledge the irq before we move it or the acknowledge will ++ * not propagate properly. ++ */ ++ ack_APIC_irq(); ++ ++ /* Now we can move and renable the irq */ ++ if (unlikely(do_unmask_irq)) { ++ /* Only migrate the irq if the ack has been received. ++ * ++ * On rare occasions the broadcast level triggered ack gets ++ * delayed going to ioapics, and if we reprogram the ++ * vector while Remote IRR is still set the irq will never ++ * fire again. ++ * ++ * To prevent this scenario we read the Remote IRR bit ++ * of the ioapic. This has two effects. ++ * - On any sane system the read of the ioapic will ++ * flush writes (and acks) going to the ioapic from ++ * this cpu. ++ * - We get to see if the ACK has actually been delivered. ++ * ++ * Based on failed experiments of reprogramming the ++ * ioapic entry from outside of irq context starting ++ * with masking the ioapic entry and then polling until ++ * Remote IRR was clear before reprogramming the ++ * ioapic I don't trust the Remote IRR bit to be ++ * completey accurate. ++ * ++ * However there appears to be no other way to plug ++ * this race, so if the Remote IRR bit is not ++ * accurate and is causing problems then it is a hardware bug ++ * and you can go talk to the chipset vendor about it. ++ */ ++ if (!io_apic_level_ack_pending(irq)) ++ move_masked_irq(irq); ++ unmask_IO_APIC_irq(irq); ++ } ++ ++#ifdef CONFIG_X86_32 ++ if (!(v & (1 << (i & 0x1f)))) { ++ atomic_inc(&irq_mis_count); ++ spin_lock(&ioapic_lock); ++ __mask_and_edge_IO_APIC_irq(irq); ++ __unmask_and_level_IO_APIC_irq(irq); ++ spin_unlock(&ioapic_lock); ++ } ++#endif ++} ++ ++static struct irq_chip ioapic_chip __read_mostly = { ++ .name = "IO-APIC", ++ .startup = startup_ioapic_irq, ++ .mask = mask_IO_APIC_irq, ++ .unmask = unmask_IO_APIC_irq, ++ .ack = ack_apic_edge, ++ .eoi = ack_apic_level, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ioapic_affinity_irq, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++#ifdef CONFIG_INTR_REMAP ++static struct irq_chip ir_ioapic_chip __read_mostly = { ++ .name = "IR-IO-APIC", ++ .startup = startup_ioapic_irq, ++ .mask = mask_IO_APIC_irq, ++ .unmask = unmask_IO_APIC_irq, ++ .ack = ack_x2apic_edge, ++ .eoi = ack_x2apic_level, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ir_ioapic_affinity_irq, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++#endif ++#endif /* CONFIG_XEN */ ++ ++static inline void init_IO_APIC_traps(void) ++{ ++ int irq; ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ ++ /* ++ * NOTE! The local APIC isn't very good at handling ++ * multiple interrupts at the same interrupt level. ++ * As the interrupt level is determined by taking the ++ * vector number and shifting that right by 4, we ++ * want to spread these out a bit so that they don't ++ * all fall in the same interrupt level. ++ * ++ * Also, we've got to be careful not to trash gate ++ * 0x80, because int 0x80 is hm, kind of importantish. ;) ++ */ ++ for_each_irq_cfg(irq, cfg) { ++#ifdef CONFIG_XEN ++ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) ++ continue; ++#endif ++ if (IO_APIC_IRQ(irq) && !cfg->vector) { ++ /* ++ * Hmm.. We don't have an entry for this, ++ * so default to an old-fashioned 8259 ++ * interrupt if we can.. ++ */ ++ if (irq < 16) ++ make_8259A_irq(irq); ++ else { ++ desc = irq_to_desc(irq); ++ /* Strange. Oh, well.. */ ++ desc->chip = &no_irq_chip; ++ } ++ } ++ } ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * The local APIC irq-chip implementation: ++ */ ++ ++static void mask_lapic_irq(unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write(APIC_LVT0, v | APIC_LVT_MASKED); ++} ++ ++static void unmask_lapic_irq(unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); ++} ++ ++static void ack_lapic_irq (unsigned int irq) ++{ ++ ack_APIC_irq(); ++} ++ ++static struct irq_chip lapic_chip __read_mostly = { ++ .name = "local-APIC", ++ .mask = mask_lapic_irq, ++ .unmask = unmask_lapic_irq, ++ .ack = ack_lapic_irq, ++}; ++ ++static void lapic_register_intr(int irq) ++{ ++ struct irq_desc *desc; ++ ++ desc = irq_to_desc(irq); ++ desc->status &= ~IRQ_LEVEL; ++ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, ++ "edge"); ++} ++ ++static void __init setup_nmi(void) ++{ ++ /* ++ * Dirty trick to enable the NMI watchdog ... ++ * We put the 8259A master into AEOI mode and ++ * unmask on all local APICs LVT0 as NMI. ++ * ++ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') ++ * is from Maciej W. Rozycki - so we do not have to EOI from ++ * the NMI handler or the timer interrupt. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); ++ ++ enable_NMI_through_LVT0(); ++ ++ apic_printk(APIC_VERBOSE, " done.\n"); ++} ++ ++/* ++ * This looks a bit hackish but it's about the only one way of sending ++ * a few INTA cycles to 8259As and any associated glue logic. ICR does ++ * not support the ExtINT mode, unfortunately. We need to send these ++ * cycles as some i82489DX-based boards have glue logic that keeps the ++ * 8259A interrupt line asserted until INTA. --macro ++ */ ++static inline void __init unlock_ExtINT_logic(void) ++{ ++ int apic, pin, i; ++ struct IO_APIC_route_entry entry0, entry1; ++ unsigned char save_control, save_freq_select; ++ ++ pin = find_isa_irq_pin(8, mp_INT); ++ if (pin == -1) { ++ WARN_ON_ONCE(1); ++ return; ++ } ++ apic = find_isa_irq_apic(8, mp_INT); ++ if (apic == -1) { ++ WARN_ON_ONCE(1); ++ return; ++ } ++ ++ entry0 = ioapic_read_entry(apic, pin); ++ clear_IO_APIC_pin(apic, pin); ++ ++ memset(&entry1, 0, sizeof(entry1)); ++ ++ entry1.dest_mode = 0; /* physical delivery */ ++ entry1.mask = 0; /* unmask IRQ now */ ++ entry1.dest = hard_smp_processor_id(); ++ entry1.delivery_mode = dest_ExtINT; ++ entry1.polarity = entry0.polarity; ++ entry1.trigger = 0; ++ entry1.vector = 0; ++ ++ ioapic_write_entry(apic, pin, entry1); ++ ++ save_control = CMOS_READ(RTC_CONTROL); ++ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); ++ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, ++ RTC_FREQ_SELECT); ++ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); ++ ++ i = 100; ++ while (i-- > 0) { ++ mdelay(10); ++ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) ++ i -= 10; ++ } ++ ++ CMOS_WRITE(save_control, RTC_CONTROL); ++ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); ++ clear_IO_APIC_pin(apic, pin); ++ ++ ioapic_write_entry(apic, pin, entry0); ++} ++ ++static int disable_timer_pin_1 __initdata; ++/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ ++static int __init disable_timer_pin_setup(char *arg) ++{ ++ disable_timer_pin_1 = 1; ++ return 0; ++} ++early_param("disable_timer_pin_1", disable_timer_pin_setup); ++ ++int timer_through_8259 __initdata; ++ ++/* ++ * This code may look a bit paranoid, but it's supposed to cooperate with ++ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ ++ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast ++ * fanatically on his truly buggy board. ++ * ++ * FIXME: really need to revamp this for all platforms. ++ */ ++static inline void __init check_timer(void) ++{ ++ struct irq_cfg *cfg = irq_cfg(0); ++ int apic1, pin1, apic2, pin2; ++ unsigned long flags; ++ unsigned int ver; ++ int no_pin1 = 0; ++ ++ local_irq_save(flags); ++ ++ ver = apic_read(APIC_LVR); ++ ver = GET_APIC_VERSION(ver); ++ ++ /* ++ * get/set the timer IRQ vector: ++ */ ++ disable_8259A_irq(0); ++ assign_irq_vector(0, TARGET_CPUS); ++ ++ /* ++ * As IRQ0 is to be enabled in the 8259A, the virtual ++ * wire has to be disabled in the local APIC. Also ++ * timer interrupts need to be acknowledged manually in ++ * the 8259A for the i82489DX when using the NMI ++ * watchdog as that APIC treats NMIs as level-triggered. ++ * The AEOI mode will finish them in the 8259A ++ * automatically. ++ */ ++ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ init_8259A(1); ++#ifdef CONFIG_X86_32 ++ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); ++#endif ++ ++ pin1 = find_isa_irq_pin(0, mp_INT); ++ apic1 = find_isa_irq_apic(0, mp_INT); ++ pin2 = ioapic_i8259.pin; ++ apic2 = ioapic_i8259.apic; ++ ++ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " ++ "apic1=%d pin1=%d apic2=%d pin2=%d\n", ++ cfg->vector, apic1, pin1, apic2, pin2); ++ ++ /* ++ * Some BIOS writers are clueless and report the ExtINTA ++ * I/O APIC input from the cascaded 8259A as the timer ++ * interrupt input. So just in case, if only one pin ++ * was found above, try it both directly and through the ++ * 8259A. ++ */ ++ if (pin1 == -1) { ++#ifdef CONFIG_INTR_REMAP ++ if (intr_remapping_enabled) ++ panic("BIOS bug: timer not connected to IO-APIC"); ++#endif ++ pin1 = pin2; ++ apic1 = apic2; ++ no_pin1 = 1; ++ } else if (pin2 == -1) { ++ pin2 = pin1; ++ apic2 = apic1; ++ } ++ ++ if (pin1 != -1) { ++ /* ++ * Ok, does IRQ0 through the IOAPIC work? ++ */ ++ if (no_pin1) { ++ add_pin_to_irq(0, apic1, pin1); ++ setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); ++ } ++ unmask_IO_APIC_irq(0); ++ if (timer_irq_works()) { ++ if (nmi_watchdog == NMI_IO_APIC) { ++ setup_nmi(); ++ enable_8259A_irq(0); ++ } ++ if (disable_timer_pin_1 > 0) ++ clear_IO_APIC_pin(0, pin1); ++ goto out; ++ } ++#ifdef CONFIG_INTR_REMAP ++ if (intr_remapping_enabled) ++ panic("timer doesn't work through Interrupt-remapped IO-APIC"); ++#endif ++ clear_IO_APIC_pin(apic1, pin1); ++ if (!no_pin1) ++ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " ++ "8254 timer not connected to IO-APIC\n"); ++ ++ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " ++ "(IRQ0) through the 8259A ...\n"); ++ apic_printk(APIC_QUIET, KERN_INFO ++ "..... (found apic %d pin %d) ...\n", apic2, pin2); ++ /* ++ * legacy devices should be connected to IO APIC #0 ++ */ ++ replace_pin_at_irq(0, apic1, pin1, apic2, pin2); ++ setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); ++ unmask_IO_APIC_irq(0); ++ enable_8259A_irq(0); ++ if (timer_irq_works()) { ++ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); ++ timer_through_8259 = 1; ++ if (nmi_watchdog == NMI_IO_APIC) { ++ disable_8259A_irq(0); ++ setup_nmi(); ++ enable_8259A_irq(0); ++ } ++ goto out; ++ } ++ /* ++ * Cleanup, just in case ... ++ */ ++ disable_8259A_irq(0); ++ clear_IO_APIC_pin(apic2, pin2); ++ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); ++ } ++ ++ if (nmi_watchdog == NMI_IO_APIC) { ++ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " ++ "through the IO-APIC - disabling NMI Watchdog!\n"); ++ nmi_watchdog = NMI_NONE; ++ } ++#ifdef CONFIG_X86_32 ++ timer_ack = 0; ++#endif ++ ++ apic_printk(APIC_QUIET, KERN_INFO ++ "...trying to set up timer as Virtual Wire IRQ...\n"); ++ ++ lapic_register_intr(0); ++ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ ++ enable_8259A_irq(0); ++ ++ if (timer_irq_works()) { ++ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); ++ goto out; ++ } ++ disable_8259A_irq(0); ++ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); ++ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); ++ ++ apic_printk(APIC_QUIET, KERN_INFO ++ "...trying to set up timer as ExtINT IRQ...\n"); ++ ++ init_8259A(0); ++ make_8259A_irq(0); ++ apic_write(APIC_LVT0, APIC_DM_EXTINT); ++ ++ unlock_ExtINT_logic(); ++ ++ if (timer_irq_works()) { ++ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); ++ goto out; ++ } ++ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); ++ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " ++ "report. Then try booting with the 'noapic' option.\n"); ++out: ++ local_irq_restore(flags); ++} ++#else ++#define check_timer() ((void)0) ++#endif ++ ++/* ++ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available ++ * to devices. However there may be an I/O APIC pin available for ++ * this interrupt regardless. The pin may be left unconnected, but ++ * typically it will be reused as an ExtINT cascade interrupt for ++ * the master 8259A. In the MPS case such a pin will normally be ++ * reported as an ExtINT interrupt in the MP table. With ACPI ++ * there is no provision for ExtINT interrupts, and in the absence ++ * of an override it would be treated as an ordinary ISA I/O APIC ++ * interrupt, that is edge-triggered and unmasked by default. We ++ * used to do this, but it caused problems on some systems because ++ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using ++ * the same ExtINT cascade interrupt to drive the local APIC of the ++ * bootstrap processor. Therefore we refrain from routing IRQ2 to ++ * the I/O APIC in all cases now. No actual device should request ++ * it anyway. --macro ++ */ ++#define PIC_IRQS (1 << PIC_CASCADE_IR) ++ ++void __init setup_IO_APIC(void) ++{ ++ ++#if defined(CONFIG_X86_32) || defined(CONFIG_XEN) ++ enable_IO_APIC(); ++#else ++ /* ++ * calling enable_IO_APIC() is moved to setup_local_APIC for BP ++ */ ++#endif ++ ++ io_apic_irqs = ~PIC_IRQS; ++ ++ apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); ++ /* ++ * Set up IO-APIC IRQ routing. ++ */ ++#ifndef CONFIG_XEN ++#ifdef CONFIG_X86_32 ++ if (!acpi_ioapic) ++ setup_ioapic_ids_from_mpc(); ++#endif ++ sync_Arb_IDs(); ++#endif ++ setup_IO_APIC_irqs(); ++ init_IO_APIC_traps(); ++ check_timer(); ++} ++ ++/* ++ * Called after all the initialization is done. If we didnt find any ++ * APIC bugs then we can allow the modify fast path ++ */ ++ ++static int __init io_apic_bug_finalize(void) ++{ ++ if (sis_apic_bug == -1) ++ sis_apic_bug = 0; ++#ifdef CONFIG_XEN ++ if (is_initial_xendomain()) { ++ struct xen_platform_op op = { .cmd = XENPF_platform_quirk }; ++ op.u.platform_quirk.quirk_id = sis_apic_bug ? ++ QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL; ++ VOID(HYPERVISOR_platform_op(&op)); ++ } ++#endif ++ return 0; ++} ++ ++late_initcall(io_apic_bug_finalize); ++ ++#ifndef CONFIG_XEN ++struct sysfs_ioapic_data { ++ struct sys_device dev; ++ struct IO_APIC_route_entry entry[0]; ++}; ++static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; ++ ++static int ioapic_suspend(struct sys_device *dev, pm_message_t state) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) ++ *entry = ioapic_read_entry(dev->id, i); ++ ++ return 0; ++} ++ ++static int ioapic_resume(struct sys_device *dev) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ unsigned long flags; ++ union IO_APIC_reg_00 reg_00; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(dev->id, 0); ++ if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { ++ reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; ++ io_apic_write(dev->id, 0, reg_00.raw); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ++ ioapic_write_entry(dev->id, i, entry[i]); ++ ++ return 0; ++} ++ ++static struct sysdev_class ioapic_sysdev_class = { ++ .name = "ioapic", ++ .suspend = ioapic_suspend, ++ .resume = ioapic_resume, ++}; ++ ++static int __init ioapic_init_sysfs(void) ++{ ++ struct sys_device * dev; ++ int i, size, error; ++ ++ error = sysdev_class_register(&ioapic_sysdev_class); ++ if (error) ++ return error; ++ ++ for (i = 0; i < nr_ioapics; i++ ) { ++ size = sizeof(struct sys_device) + nr_ioapic_registers[i] ++ * sizeof(struct IO_APIC_route_entry); ++ mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); ++ if (!mp_ioapic_data[i]) { ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ dev = &mp_ioapic_data[i]->dev; ++ dev->id = i; ++ dev->cls = &ioapic_sysdev_class; ++ error = sysdev_register(dev); ++ if (error) { ++ kfree(mp_ioapic_data[i]); ++ mp_ioapic_data[i] = NULL; ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ } ++ ++ return 0; ++} ++ ++device_initcall(ioapic_init_sysfs); ++ ++/* ++ * Dynamic irq allocate and deallocation ++ */ ++unsigned int create_irq_nr(unsigned int irq_want) ++{ ++ /* Allocate an unused irq */ ++ unsigned int irq; ++ unsigned int new; ++ unsigned long flags; ++ struct irq_cfg *cfg_new; ++ ++ irq_want = nr_irqs - 1; ++ ++ irq = 0; ++ spin_lock_irqsave(&vector_lock, flags); ++ for (new = irq_want; new > 0; new--) { ++ if (platform_legacy_irq(new)) ++ continue; ++ cfg_new = irq_cfg(new); ++ if (cfg_new && cfg_new->vector != 0) ++ continue; ++ /* check if need to create one */ ++ if (!cfg_new) ++ cfg_new = irq_cfg_alloc(new); ++ if (__assign_irq_vector(new, TARGET_CPUS) == 0) ++ irq = new; ++ break; ++ } ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ if (irq > 0) { ++ dynamic_irq_init(irq); ++ } ++ return irq; ++} ++ ++int create_irq(void) ++{ ++ int irq; ++ ++ irq = create_irq_nr(nr_irqs - 1); ++ ++ if (irq == 0) ++ irq = -1; ++ ++ return irq; ++} ++ ++void destroy_irq(unsigned int irq) ++{ ++ unsigned long flags; ++ ++ dynamic_irq_cleanup(irq); ++ ++#ifdef CONFIG_INTR_REMAP ++ free_irte(irq); ++#endif ++ spin_lock_irqsave(&vector_lock, flags); ++ __clear_irq_vector(irq); ++ spin_unlock_irqrestore(&vector_lock, flags); ++} ++#endif /* CONFIG_XEN */ ++ ++/* ++ * MSI message composition ++ */ ++#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) ++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) ++{ ++ struct irq_cfg *cfg; ++ int err; ++ unsigned dest; ++ cpumask_t tmp; ++ ++ tmp = TARGET_CPUS; ++ err = assign_irq_vector(irq, tmp); ++ if (err) ++ return err; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, tmp); ++ dest = cpu_mask_to_apicid(tmp); ++ ++#ifdef CONFIG_INTR_REMAP ++ if (irq_remapped(irq)) { ++ struct irte irte; ++ int ir_index; ++ u16 sub_handle; ++ ++ ir_index = map_irq_to_irte_handle(irq, &sub_handle); ++ BUG_ON(ir_index == -1); ++ ++ memset (&irte, 0, sizeof(irte)); ++ ++ irte.present = 1; ++ irte.dst_mode = INT_DEST_MODE; ++ irte.trigger_mode = 0; /* edge */ ++ irte.dlvry_mode = INT_DELIVERY_MODE; ++ irte.vector = cfg->vector; ++ irte.dest_id = IRTE_DEST(dest); ++ ++ modify_irte(irq, &irte); ++ ++ msg->address_hi = MSI_ADDR_BASE_HI; ++ msg->data = sub_handle; ++ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | ++ MSI_ADDR_IR_SHV | ++ MSI_ADDR_IR_INDEX1(ir_index) | ++ MSI_ADDR_IR_INDEX2(ir_index); ++ } else ++#endif ++ { ++ msg->address_hi = MSI_ADDR_BASE_HI; ++ msg->address_lo = ++ MSI_ADDR_BASE_LO | ++ ((INT_DEST_MODE == 0) ? ++ MSI_ADDR_DEST_MODE_PHYSICAL: ++ MSI_ADDR_DEST_MODE_LOGICAL) | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ MSI_ADDR_REDIRECTION_CPU: ++ MSI_ADDR_REDIRECTION_LOWPRI) | ++ MSI_ADDR_DEST_ID(dest); ++ ++ msg->data = ++ MSI_DATA_TRIGGER_EDGE | ++ MSI_DATA_LEVEL_ASSERT | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ MSI_DATA_DELIVERY_FIXED: ++ MSI_DATA_DELIVERY_LOWPRI) | ++ MSI_DATA_VECTOR(cfg->vector); ++ } ++ return err; ++} ++ ++#ifdef CONFIG_SMP ++static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ struct irq_cfg *cfg; ++ struct msi_msg msg; ++ unsigned int dest; ++ cpumask_t tmp; ++ struct irq_desc *desc; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ return; ++ ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, mask); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ read_msi_msg(irq, &msg); ++ ++ msg.data &= ~MSI_DATA_VECTOR_MASK; ++ msg.data |= MSI_DATA_VECTOR(cfg->vector); ++ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; ++ msg.address_lo |= MSI_ADDR_DEST_ID(dest); ++ ++ write_msi_msg(irq, &msg); ++ desc = irq_to_desc(irq); ++ desc->affinity = mask; ++} ++ ++#ifdef CONFIG_INTR_REMAP ++/* ++ * Migrate the MSI irq to another cpumask. This migration is ++ * done in the process context using interrupt-remapping hardware. ++ */ ++static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ struct irq_cfg *cfg; ++ unsigned int dest; ++ cpumask_t tmp, cleanup_mask; ++ struct irte irte; ++ struct irq_desc *desc; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ return; ++ ++ if (get_irte(irq, &irte)) ++ return; ++ ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, mask); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ irte.vector = cfg->vector; ++ irte.dest_id = IRTE_DEST(dest); ++ ++ /* ++ * atomically update the IRTE with the new destination and vector. ++ */ ++ modify_irte(irq, &irte); ++ ++ /* ++ * After this point, all the interrupts will start arriving ++ * at the new destination. So, time to cleanup the previous ++ * vector allocation. ++ */ ++ if (cfg->move_in_progress) { ++ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); ++ cfg->move_cleanup_count = cpus_weight(cleanup_mask); ++ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); ++ cfg->move_in_progress = 0; ++ } ++ ++ desc = irq_to_desc(irq); ++ desc->affinity = mask; ++} ++#endif ++#endif /* CONFIG_SMP */ ++ ++/* ++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, ++ * which implement the MSI or MSI-X Capability Structure. ++ */ ++static struct irq_chip msi_chip = { ++ .name = "PCI-MSI", ++ .unmask = unmask_msi_irq, ++ .mask = mask_msi_irq, ++ .ack = ack_apic_edge, ++#ifdef CONFIG_SMP ++ .set_affinity = set_msi_irq_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++#ifdef CONFIG_INTR_REMAP ++static struct irq_chip msi_ir_chip = { ++ .name = "IR-PCI-MSI", ++ .unmask = unmask_msi_irq, ++ .mask = mask_msi_irq, ++ .ack = ack_x2apic_edge, ++#ifdef CONFIG_SMP ++ .set_affinity = ir_set_msi_irq_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++/* ++ * Map the PCI dev to the corresponding remapping hardware unit ++ * and allocate 'nvec' consecutive interrupt-remapping table entries ++ * in it. ++ */ ++static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) ++{ ++ struct intel_iommu *iommu; ++ int index; ++ ++ iommu = map_dev_to_ir(dev); ++ if (!iommu) { ++ printk(KERN_ERR ++ "Unable to map PCI %s to iommu\n", pci_name(dev)); ++ return -ENOENT; ++ } ++ ++ index = alloc_irte(iommu, irq, nvec); ++ if (index < 0) { ++ printk(KERN_ERR ++ "Unable to allocate %d IRTE for PCI %s\n", nvec, ++ pci_name(dev)); ++ return -ENOSPC; ++ } ++ return index; ++} ++#endif ++ ++static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) ++{ ++ int ret; ++ struct msi_msg msg; ++ ++ ret = msi_compose_msg(dev, irq, &msg); ++ if (ret < 0) ++ return ret; ++ ++ set_irq_msi(irq, desc); ++ write_msi_msg(irq, &msg); ++ ++#ifdef CONFIG_INTR_REMAP ++ if (irq_remapped(irq)) { ++ struct irq_desc *desc = irq_to_desc(irq); ++ /* ++ * irq migration in process context ++ */ ++ desc->status |= IRQ_MOVE_PCNTXT; ++ set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); ++ } else ++#endif ++ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); ++ ++ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); ++ ++ return 0; ++} ++ ++static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) ++{ ++ unsigned int irq; ++ ++ irq = dev->bus->number; ++ irq <<= 8; ++ irq |= dev->devfn; ++ irq <<= 12; ++ ++ return irq; ++} ++ ++int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) ++{ ++ unsigned int irq; ++ int ret; ++ unsigned int irq_want; ++ ++ irq_want = build_irq_for_pci_dev(dev) + 0x100; ++ ++ irq = create_irq_nr(irq_want); ++ if (irq == 0) ++ return -1; ++ ++#ifdef CONFIG_INTR_REMAP ++ if (!intr_remapping_enabled) ++ goto no_ir; ++ ++ ret = msi_alloc_irte(dev, irq, 1); ++ if (ret < 0) ++ goto error; ++no_ir: ++#endif ++ ret = setup_msi_irq(dev, desc, irq); ++ if (ret < 0) { ++ destroy_irq(irq); ++ return ret; ++ } ++ return 0; ++ ++#ifdef CONFIG_INTR_REMAP ++error: ++ destroy_irq(irq); ++ return ret; ++#endif ++} ++ ++int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ++{ ++ unsigned int irq; ++ int ret, sub_handle; ++ struct msi_desc *desc; ++ unsigned int irq_want; ++ ++#ifdef CONFIG_INTR_REMAP ++ struct intel_iommu *iommu = 0; ++ int index = 0; ++#endif ++ ++ irq_want = build_irq_for_pci_dev(dev) + 0x100; ++ sub_handle = 0; ++ list_for_each_entry(desc, &dev->msi_list, list) { ++ irq = create_irq_nr(irq_want--); ++ if (irq == 0) ++ return -1; ++#ifdef CONFIG_INTR_REMAP ++ if (!intr_remapping_enabled) ++ goto no_ir; ++ ++ if (!sub_handle) { ++ /* ++ * allocate the consecutive block of IRTE's ++ * for 'nvec' ++ */ ++ index = msi_alloc_irte(dev, irq, nvec); ++ if (index < 0) { ++ ret = index; ++ goto error; ++ } ++ } else { ++ iommu = map_dev_to_ir(dev); ++ if (!iommu) { ++ ret = -ENOENT; ++ goto error; ++ } ++ /* ++ * setup the mapping between the irq and the IRTE ++ * base index, the sub_handle pointing to the ++ * appropriate interrupt remap table entry. ++ */ ++ set_irte_irq(irq, iommu, index, sub_handle); ++ } ++no_ir: ++#endif ++ ret = setup_msi_irq(dev, desc, irq); ++ if (ret < 0) ++ goto error; ++ sub_handle++; ++ } ++ return 0; ++ ++error: ++ destroy_irq(irq); ++ return ret; ++} ++ ++void arch_teardown_msi_irq(unsigned int irq) ++{ ++ destroy_irq(irq); ++} ++ ++#ifdef CONFIG_DMAR ++#ifdef CONFIG_SMP ++static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) ++{ ++ struct irq_cfg *cfg; ++ struct msi_msg msg; ++ unsigned int dest; ++ cpumask_t tmp; ++ struct irq_desc *desc; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ return; ++ ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, mask); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ dmar_msi_read(irq, &msg); ++ ++ msg.data &= ~MSI_DATA_VECTOR_MASK; ++ msg.data |= MSI_DATA_VECTOR(cfg->vector); ++ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; ++ msg.address_lo |= MSI_ADDR_DEST_ID(dest); ++ ++ dmar_msi_write(irq, &msg); ++ desc = irq_to_desc(irq); ++ desc->affinity = mask; ++} ++#endif /* CONFIG_SMP */ ++ ++struct irq_chip dmar_msi_type = { ++ .name = "DMAR_MSI", ++ .unmask = dmar_msi_unmask, ++ .mask = dmar_msi_mask, ++ .ack = ack_apic_edge, ++#ifdef CONFIG_SMP ++ .set_affinity = dmar_msi_set_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++int arch_setup_dmar_msi(unsigned int irq) ++{ ++ int ret; ++ struct msi_msg msg; ++ ++ ret = msi_compose_msg(NULL, irq, &msg); ++ if (ret < 0) ++ return ret; ++ dmar_msi_write(irq, &msg); ++ set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, ++ "edge"); ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_HPET_TIMER ++ ++#ifdef CONFIG_SMP ++static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) ++{ ++ struct irq_cfg *cfg; ++ struct irq_desc *desc; ++ struct msi_msg msg; ++ unsigned int dest; ++ cpumask_t tmp; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ return; ++ ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, mask); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ hpet_msi_read(irq, &msg); ++ ++ msg.data &= ~MSI_DATA_VECTOR_MASK; ++ msg.data |= MSI_DATA_VECTOR(cfg->vector); ++ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; ++ msg.address_lo |= MSI_ADDR_DEST_ID(dest); ++ ++ hpet_msi_write(irq, &msg); ++ desc = irq_to_desc(irq); ++ desc->affinity = mask; ++} ++#endif /* CONFIG_SMP */ ++ ++struct irq_chip hpet_msi_type = { ++ .name = "HPET_MSI", ++ .unmask = hpet_msi_unmask, ++ .mask = hpet_msi_mask, ++ .ack = ack_apic_edge, ++#ifdef CONFIG_SMP ++ .set_affinity = hpet_msi_set_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++int arch_setup_hpet_msi(unsigned int irq) ++{ ++ int ret; ++ struct msi_msg msg; ++ ++ ret = msi_compose_msg(NULL, irq, &msg); ++ if (ret < 0) ++ return ret; ++ ++ hpet_msi_write(irq, &msg); ++ set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, ++ "edge"); ++ ++ return 0; ++} ++#endif ++ ++#endif /* CONFIG_PCI_MSI */ ++/* ++ * Hypertransport interrupt support ++ */ ++#ifdef CONFIG_HT_IRQ ++ ++#ifdef CONFIG_SMP ++ ++static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) ++{ ++ struct ht_irq_msg msg; ++ fetch_ht_irq_msg(irq, &msg); ++ ++ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); ++ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); ++ ++ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); ++ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); ++ ++ write_ht_irq_msg(irq, &msg); ++} ++ ++static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ struct irq_cfg *cfg; ++ unsigned int dest; ++ cpumask_t tmp; ++ struct irq_desc *desc; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ return; ++ ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, mask); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ target_ht_irq(irq, dest, cfg->vector); ++ desc = irq_to_desc(irq); ++ desc->affinity = mask; ++} ++#endif ++ ++static struct irq_chip ht_irq_chip = { ++ .name = "PCI-HT", ++ .mask = mask_ht_irq, ++ .unmask = unmask_ht_irq, ++ .ack = ack_apic_edge, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ht_irq_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) ++{ ++ struct irq_cfg *cfg; ++ int err; ++ cpumask_t tmp; ++ ++ tmp = TARGET_CPUS; ++ err = assign_irq_vector(irq, tmp); ++ if (!err) { ++ struct ht_irq_msg msg; ++ unsigned dest; ++ ++ cfg = irq_cfg(irq); ++ cpus_and(tmp, cfg->domain, tmp); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); ++ ++ msg.address_lo = ++ HT_IRQ_LOW_BASE | ++ HT_IRQ_LOW_DEST_ID(dest) | ++ HT_IRQ_LOW_VECTOR(cfg->vector) | ++ ((INT_DEST_MODE == 0) ? ++ HT_IRQ_LOW_DM_PHYSICAL : ++ HT_IRQ_LOW_DM_LOGICAL) | ++ HT_IRQ_LOW_RQEOI_EDGE | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ HT_IRQ_LOW_MT_FIXED : ++ HT_IRQ_LOW_MT_ARBITRATED) | ++ HT_IRQ_LOW_IRQ_MASKED; ++ ++ write_ht_irq_msg(irq, &msg); ++ ++ set_irq_chip_and_handler_name(irq, &ht_irq_chip, ++ handle_edge_irq, "edge"); ++ ++ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); ++ } ++ return err; ++} ++#endif /* CONFIG_HT_IRQ */ ++ ++#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) ++/* ++ * Re-target the irq to the specified CPU and enable the specified MMR located ++ * on the specified blade to allow the sending of MSIs to the specified CPU. ++ */ ++int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, ++ unsigned long mmr_offset) ++{ ++ const cpumask_t *eligible_cpu = get_cpu_mask(cpu); ++ struct irq_cfg *cfg; ++ int mmr_pnode; ++ unsigned long mmr_value; ++ struct uv_IO_APIC_route_entry *entry; ++ unsigned long flags; ++ int err; ++ ++ err = assign_irq_vector(irq, *eligible_cpu); ++ if (err != 0) ++ return err; ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, ++ irq_name); ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ cfg = irq_cfg(irq); ++ ++ mmr_value = 0; ++ entry = (struct uv_IO_APIC_route_entry *)&mmr_value; ++ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); ++ ++ entry->vector = cfg->vector; ++ entry->delivery_mode = INT_DELIVERY_MODE; ++ entry->dest_mode = INT_DEST_MODE; ++ entry->polarity = 0; ++ entry->trigger = 0; ++ entry->mask = 0; ++ entry->dest = cpu_mask_to_apicid(*eligible_cpu); ++ ++ mmr_pnode = uv_blade_to_pnode(mmr_blade); ++ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); ++ ++ return irq; ++} ++ ++/* ++ * Disable the specified MMR located on the specified blade so that MSIs are ++ * longer allowed to be sent. ++ */ ++void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) ++{ ++ unsigned long mmr_value; ++ struct uv_IO_APIC_route_entry *entry; ++ int mmr_pnode; ++ ++ mmr_value = 0; ++ entry = (struct uv_IO_APIC_route_entry *)&mmr_value; ++ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); ++ ++ entry->mask = 1; ++ ++ mmr_pnode = uv_blade_to_pnode(mmr_blade); ++ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); ++} ++#endif /* CONFIG_X86_64 */ ++ ++int __init io_apic_get_redir_entries (int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.entries; ++} ++ ++int __init probe_nr_irqs(void) ++{ ++ return NR_IRQS; ++} ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based IOAPIC Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++#ifdef CONFIG_X86_32 ++int __init io_apic_get_unique_id(int ioapic, int apic_id) ++{ ++#ifndef CONFIG_XEN ++ union IO_APIC_reg_00 reg_00; ++ static physid_mask_t apic_id_map = PHYSID_MASK_NONE; ++ physid_mask_t tmp; ++ unsigned long flags; ++ int i = 0; ++ ++ /* ++ * The P4 platform supports up to 256 APIC IDs on two separate APIC ++ * buses (one for LAPICs, one for IOAPICs), where predecessors only ++ * supports up to 16 on one shared APIC bus. ++ * ++ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full ++ * advantage of new APIC bus architecture. ++ */ ++ ++ if (physids_empty(apic_id_map)) ++ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ if (apic_id >= get_physical_broadcast()) { ++ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " ++ "%d\n", ioapic, apic_id, reg_00.bits.ID); ++ apic_id = reg_00.bits.ID; ++ } ++ ++ /* ++ * Every APIC in a system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(apic_id_map, apic_id)) { ++ ++ for (i = 0; i < get_physical_broadcast(); i++) { ++ if (!check_apicid_used(apic_id_map, i)) ++ break; ++ } ++ ++ if (i == get_physical_broadcast()) ++ panic("Max apic_id exceeded!\n"); ++ ++ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " ++ "trying %d\n", ioapic, apic_id, i); ++ ++ apic_id = i; ++ } ++ ++ tmp = apicid_to_cpu_present(apic_id); ++ physids_or(apic_id_map, apic_id_map, tmp); ++ ++ if (reg_00.bits.ID != apic_id) { ++ reg_00.bits.ID = apic_id; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0, reg_00.raw); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* Sanity check */ ++ if (reg_00.bits.ID != apic_id) { ++ printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); ++ return -1; ++ } ++ } ++ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); ++#endif /* !CONFIG_XEN */ ++ ++ return apic_id; ++} ++ ++int __init io_apic_get_version(int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.version; ++} ++#endif ++ ++int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) ++{ ++#ifdef CONFIG_XEN ++ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) { ++ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n", ++ ioapic, irq); ++ return -EINVAL; ++ } ++#endif ++ ++ if (!IO_APIC_IRQ(irq)) { ++ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ++ ioapic); ++ return -EINVAL; ++ } ++ ++ /* ++ * IRQs < 16 are already in the irq_2_pin[] map ++ */ ++ if (irq >= 16) ++ add_pin_to_irq(irq, ioapic, pin); ++ ++ setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); ++ ++ return 0; ++} ++ ++ ++int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) ++{ ++ int i; ++ ++ if (skip_ioapic_setup) ++ return -1; ++ ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mp_irqtype == mp_INT && ++ mp_irqs[i].mp_srcbusirq == bus_irq) ++ break; ++ if (i >= mp_irq_entries) ++ return -1; ++ ++ *trigger = irq_trigger(i); ++ *polarity = irq_polarity(i); ++ return 0; ++} ++ ++#endif /* CONFIG_ACPI */ ++ ++#ifndef CONFIG_XEN ++/* ++ * This function currently is only a helper for the i386 smp boot process where ++ * we need to reprogram the ioredtbls to cater for the cpus which have come online ++ * so mask in all cases should simply be TARGET_CPUS ++ */ ++#ifdef CONFIG_SMP ++void __init setup_ioapic_dest(void) ++{ ++ int pin, ioapic, irq, irq_entry; ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ cpumask_t mask; ++ ++ if (skip_ioapic_setup == 1) ++ return; ++ ++ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { ++ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { ++ irq_entry = find_irq_entry(ioapic, pin, mp_INT); ++ if (irq_entry == -1) ++ continue; ++ irq = pin_2_irq(irq_entry, ioapic, pin); ++ ++ /* setup_IO_APIC_irqs could fail to get vector for some device ++ * when you have too many devices, because at that time only boot ++ * cpu is online. ++ */ ++ cfg = irq_cfg(irq); ++ if (!cfg->vector) { ++ setup_IO_APIC_irq(ioapic, pin, irq, ++ irq_trigger(irq_entry), ++ irq_polarity(irq_entry)); ++ continue; ++ ++ } ++ ++ /* ++ * Honour affinities which have been set in early boot ++ */ ++ desc = irq_to_desc(irq); ++ if (desc->status & ++ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) ++ mask = desc->affinity; ++ else ++ mask = TARGET_CPUS; ++ ++#ifdef CONFIG_INTR_REMAP ++ if (intr_remapping_enabled) ++ set_ir_ioapic_affinity_irq(irq, mask); ++ else ++#endif ++ set_ioapic_affinity_irq(irq, mask); ++ } ++ ++ } ++} ++#endif ++ ++#define IOAPIC_RESOURCE_NAME_SIZE 11 ++ ++static struct resource *ioapic_resources; ++ ++static struct resource * __init ioapic_setup_resources(void) ++{ ++ unsigned long n; ++ struct resource *res; ++ char *mem; ++ int i; ++ ++ if (nr_ioapics <= 0) ++ return NULL; ++ ++ n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); ++ n *= nr_ioapics; ++ ++ mem = alloc_bootmem(n); ++ res = (void *)mem; ++ ++ if (mem != NULL) { ++ mem += sizeof(struct resource) * nr_ioapics; ++ ++ for (i = 0; i < nr_ioapics; i++) { ++ res[i].name = mem; ++ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; ++ sprintf(mem, "IOAPIC %u", i); ++ mem += IOAPIC_RESOURCE_NAME_SIZE; ++ } ++ } ++ ++ ioapic_resources = res; ++ ++ return res; ++} ++ ++void __init ioapic_init_mappings(void) ++{ ++ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; ++ struct resource *ioapic_res; ++ int i; ++ ++ irq_2_pin_init(); ++ ioapic_res = ioapic_setup_resources(); ++ for (i = 0; i < nr_ioapics; i++) { ++ if (smp_found_config) { ++ ioapic_phys = mp_ioapics[i].mp_apicaddr; ++#ifdef CONFIG_X86_32 ++ if (!ioapic_phys) { ++ printk(KERN_ERR ++ "WARNING: bogus zero IO-APIC " ++ "address found in MPTABLE, " ++ "disabling IO/APIC support!\n"); ++ smp_found_config = 0; ++ skip_ioapic_setup = 1; ++ goto fake_ioapic_page; ++ } ++#endif ++ } else { ++#ifdef CONFIG_X86_32 ++fake_ioapic_page: ++#endif ++ ioapic_phys = (unsigned long) ++ alloc_bootmem_pages(PAGE_SIZE); ++ ioapic_phys = __pa(ioapic_phys); ++ } ++ set_fixmap_nocache(idx, ioapic_phys); ++ apic_printk(APIC_VERBOSE, ++ "mapped IOAPIC to %08lx (%08lx)\n", ++ __fix_to_virt(idx), ioapic_phys); ++ idx++; ++ ++ if (ioapic_res != NULL) { ++ ioapic_res->start = ioapic_phys; ++ ioapic_res->end = ioapic_phys + (4 * 1024) - 1; ++ ioapic_res++; ++ } ++ } ++} ++ ++static int __init ioapic_insert_resources(void) ++{ ++ int i; ++ struct resource *r = ioapic_resources; ++ ++ if (!r) { ++ printk(KERN_ERR ++ "IO APIC resources could be not be allocated.\n"); ++ return -1; ++ } ++ ++ for (i = 0; i < nr_ioapics; i++) { ++ insert_resource(&iomem_resource, r); ++ r++; ++ } ++ ++ return 0; ++} ++ ++/* Insert the IO APIC resources after PCI initialization has occured to handle ++ * IO APICS that are mapped in on a BAR in PCI space. */ ++late_initcall(ioapic_insert_resources); ++#endif /* !CONFIG_XEN */ +--- head-2010-01-18.orig/arch/x86/kernel/io_apic_32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,2985 +0,0 @@ +-/* +- * Intel IO-APIC support for multi-Pentium hosts. +- * +- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo +- * +- * Many thanks to Stig Venaas for trying out countless experimental +- * patches and reporting/debugging problems patiently! +- * +- * (c) 1999, Multiple IO-APIC support, developed by +- * Ken-ichi Yaku and +- * Hidemi Kishimoto , +- * further tested and cleaned up by Zach Brown +- * and Ingo Molnar +- * +- * Fixes +- * Maciej W. Rozycki : Bits for genuine 82489DX APICs; +- * thanks to Eric Gilmore +- * and Rolf G. Tews +- * for testing these extensively +- * Paul Diefenbaugh : Added full ACPI support +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include /* time_after() */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#ifdef CONFIG_XEN +-#include +-#include +-#include +- +-/* Fake i8259 */ +-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) +-#define disable_8259A_irq(_irq) ((void)0) +-#define i8259A_irq_pending(_irq) (0) +- +-unsigned long io_apic_irqs; +- +-#define clear_IO_APIC() ((void)0) +-#endif /* CONFIG_XEN */ +- +-int (*ioapic_renumber_irq)(int ioapic, int irq); +-atomic_t irq_mis_count; +- +-#ifndef CONFIG_XEN +-/* Where if anywhere is the i8259 connect in external int mode */ +-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; +-#endif +- +-static DEFINE_SPINLOCK(ioapic_lock); +-static DEFINE_SPINLOCK(vector_lock); +- +-int timer_through_8259 __initdata; +- +-/* +- * Is the SiS APIC rmw bug present ? +- * -1 = don't know, 0 = no, 1 = yes +- */ +-int sis_apic_bug = -1; +- +-/* +- * # of IRQ routing registers +- */ +-int nr_ioapic_registers[MAX_IO_APICS]; +- +-/* I/O APIC entries */ +-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +-int nr_ioapics; +- +-/* MP IRQ source entries */ +-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +- +-/* # of MP IRQ source entries */ +-int mp_irq_entries; +- +-#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +-int mp_bus_id_to_type[MAX_MP_BUSSES]; +-#endif +- +-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); +- +-static int disable_timer_pin_1 __initdata; +- +-/* +- * Rough estimation of how many shared IRQs there are, can +- * be changed anytime. +- */ +-#define MAX_PLUS_SHARED_IRQS NR_IRQS +-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +- +-/* +- * This is performance-critical, we want to do it O(1) +- * +- * the indexing order of this array favors 1:1 mappings +- * between pins and IRQs. +- */ +- +-static struct irq_pin_list { +- int apic, pin, next; +-} irq_2_pin[PIN_MAP_SIZE]; +- +-#ifndef CONFIG_XEN +-struct io_apic { +- unsigned int index; +- unsigned int unused[3]; +- unsigned int data; +-}; +- +-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) +-{ +- return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) +- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); +-} +-#endif +- +-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +-{ +-#ifndef CONFIG_XEN +- struct io_apic __iomem *io_apic = io_apic_base(apic); +- writel(reg, &io_apic->index); +- return readl(&io_apic->data); +-#else +- struct physdev_apic apic_op; +- int ret; +- +- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; +- apic_op.reg = reg; +- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); +- if (ret) +- return ret; +- return apic_op.value; +-#endif +-} +- +-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +-{ +-#ifndef CONFIG_XEN +- struct io_apic __iomem *io_apic = io_apic_base(apic); +- writel(reg, &io_apic->index); +- writel(value, &io_apic->data); +-#else +- struct physdev_apic apic_op; +- +- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; +- apic_op.reg = reg; +- apic_op.value = value; +- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); +-#endif +-} +- +-#ifndef CONFIG_XEN +-/* +- * Re-write a value: to be used for read-modify-write +- * cycles where the read already set up the index register. +- * +- * Older SiS APIC requires we rewrite the index register +- */ +-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +-{ +- volatile struct io_apic __iomem *io_apic = io_apic_base(apic); +- if (sis_apic_bug) +- writel(reg, &io_apic->index); +- writel(value, &io_apic->data); +-} +-#else +-#define io_apic_modify io_apic_write +-#endif +- +-union entry_union { +- struct { u32 w1, w2; }; +- struct IO_APIC_route_entry entry; +-}; +- +-#ifndef CONFIG_XEN +-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) +-{ +- union entry_union eu; +- unsigned long flags; +- spin_lock_irqsave(&ioapic_lock, flags); +- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); +- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- return eu.entry; +-} +-#endif +- +-/* +- * When we write a new IO APIC routing entry, we need to write the high +- * word first! If the mask bit in the low word is clear, we will enable +- * the interrupt, and we need to make sure the entry is fully populated +- * before that happens. +- */ +-static void +-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +-{ +- union entry_union eu; +- eu.entry = e; +- io_apic_write(apic, 0x11 + 2*pin, eu.w2); +- io_apic_write(apic, 0x10 + 2*pin, eu.w1); +-} +- +-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +-{ +- unsigned long flags; +- spin_lock_irqsave(&ioapic_lock, flags); +- __ioapic_write_entry(apic, pin, e); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-#ifndef CONFIG_XEN +-/* +- * When we mask an IO APIC routing entry, we need to write the low +- * word first, in order to set the mask bit before we change the +- * high bits! +- */ +-static void ioapic_mask_entry(int apic, int pin) +-{ +- unsigned long flags; +- union entry_union eu = { .entry.mask = 1 }; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x10 + 2*pin, eu.w1); +- io_apic_write(apic, 0x11 + 2*pin, eu.w2); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +-#endif +- +-/* +- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are +- * shared ISA-space IRQs, so we have to support them. We are super +- * fast in the common case, and fast for shared ISA-space IRQs. +- */ +-static void add_pin_to_irq(unsigned int irq, int apic, int pin) +-{ +- static int first_free_entry = NR_IRQS; +- struct irq_pin_list *entry = irq_2_pin + irq; +- +- while (entry->next) +- entry = irq_2_pin + entry->next; +- +- if (entry->pin != -1) { +- entry->next = first_free_entry; +- entry = irq_2_pin + entry->next; +- if (++first_free_entry >= PIN_MAP_SIZE) +- panic("io_apic.c: whoops"); +- } +- entry->apic = apic; +- entry->pin = pin; +-} +- +-#ifndef CONFIG_XEN +-/* +- * Reroute an IRQ to a different pin. +- */ +-static void __init replace_pin_at_irq(unsigned int irq, +- int oldapic, int oldpin, +- int newapic, int newpin) +-{ +- struct irq_pin_list *entry = irq_2_pin + irq; +- +- while (1) { +- if (entry->apic == oldapic && entry->pin == oldpin) { +- entry->apic = newapic; +- entry->pin = newpin; +- } +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +-} +- +-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) +-{ +- struct irq_pin_list *entry = irq_2_pin + irq; +- unsigned int pin, reg; +- +- for (;;) { +- pin = entry->pin; +- if (pin == -1) +- break; +- reg = io_apic_read(entry->apic, 0x10 + pin*2); +- reg &= ~disable; +- reg |= enable; +- io_apic_modify(entry->apic, 0x10 + pin*2, reg); +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +-} +- +-/* mask = 1 */ +-static void __mask_IO_APIC_irq(unsigned int irq) +-{ +- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); +-} +- +-/* mask = 0 */ +-static void __unmask_IO_APIC_irq(unsigned int irq) +-{ +- __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); +-} +- +-/* mask = 1, trigger = 0 */ +-static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +-{ +- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, +- IO_APIC_REDIR_LEVEL_TRIGGER); +-} +- +-/* mask = 0, trigger = 1 */ +-static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +-{ +- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, +- IO_APIC_REDIR_MASKED); +-} +- +-static void mask_IO_APIC_irq(unsigned int irq) +-{ +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- __mask_IO_APIC_irq(irq); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-static void unmask_IO_APIC_irq(unsigned int irq) +-{ +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- __unmask_IO_APIC_irq(irq); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +-{ +- struct IO_APIC_route_entry entry; +- +- /* Check delivery_mode to be sure we're not clearing an SMI pin */ +- entry = ioapic_read_entry(apic, pin); +- if (entry.delivery_mode == dest_SMI) +- return; +- +- /* +- * Disable it in the IO-APIC irq-routing table: +- */ +- ioapic_mask_entry(apic, pin); +-} +- +-static void clear_IO_APIC(void) +-{ +- int apic, pin; +- +- for (apic = 0; apic < nr_ioapics; apic++) +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) +- clear_IO_APIC_pin(apic, pin); +-} +- +-#ifdef CONFIG_SMP +-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) +-{ +- unsigned long flags; +- int pin; +- struct irq_pin_list *entry = irq_2_pin + irq; +- unsigned int apicid_value; +- cpumask_t tmp; +- +- cpus_and(tmp, cpumask, cpu_online_map); +- if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(cpumask, tmp, CPU_MASK_ALL); +- +- apicid_value = cpu_mask_to_apicid(cpumask); +- /* Prepare to do the io_apic_write */ +- apicid_value = apicid_value << 24; +- spin_lock_irqsave(&ioapic_lock, flags); +- for (;;) { +- pin = entry->pin; +- if (pin == -1) +- break; +- io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +- irq_desc[irq].affinity = cpumask; +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-#if defined(CONFIG_IRQBALANCE) +-# include /* kernel_thread() */ +-# include /* kstat */ +-# include /* kmalloc() */ +-# include +- +-#define IRQBALANCE_CHECK_ARCH -999 +-#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) +-#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) +-#define BALANCED_IRQ_MORE_DELTA (HZ/10) +-#define BALANCED_IRQ_LESS_DELTA (HZ) +- +-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; +-static int physical_balance __read_mostly; +-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; +- +-static struct irq_cpu_info { +- unsigned long *last_irq; +- unsigned long *irq_delta; +- unsigned long irq; +-} irq_cpu_data[NR_CPUS]; +- +-#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) +-#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) +-#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) +- +-#define IDLE_ENOUGH(cpu,now) \ +- (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) +- +-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) +- +-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) +- +-static cpumask_t balance_irq_affinity[NR_IRQS] = { +- [0 ... NR_IRQS-1] = CPU_MASK_ALL +-}; +- +-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) +-{ +- balance_irq_affinity[irq] = mask; +-} +- +-static unsigned long move(int curr_cpu, cpumask_t allowed_mask, +- unsigned long now, int direction) +-{ +- int search_idle = 1; +- int cpu = curr_cpu; +- +- goto inside; +- +- do { +- if (unlikely(cpu == curr_cpu)) +- search_idle = 0; +-inside: +- if (direction == 1) { +- cpu++; +- if (cpu >= NR_CPUS) +- cpu = 0; +- } else { +- cpu--; +- if (cpu == -1) +- cpu = NR_CPUS-1; +- } +- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || +- (search_idle && !IDLE_ENOUGH(cpu, now))); +- +- return cpu; +-} +- +-static inline void balance_irq(int cpu, int irq) +-{ +- unsigned long now = jiffies; +- cpumask_t allowed_mask; +- unsigned int new_cpu; +- +- if (irqbalance_disabled) +- return; +- +- cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); +- new_cpu = move(cpu, allowed_mask, now, 1); +- if (cpu != new_cpu) +- set_pending_irq(irq, cpumask_of_cpu(new_cpu)); +-} +- +-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) +-{ +- int i, j; +- +- for_each_online_cpu(i) { +- for (j = 0; j < NR_IRQS; j++) { +- if (!irq_desc[j].action) +- continue; +- /* Is it a significant load ? */ +- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < +- useful_load_threshold) +- continue; +- balance_irq(i, j); +- } +- } +- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, +- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); +- return; +-} +- +-static void do_irq_balance(void) +-{ +- int i, j; +- unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); +- unsigned long move_this_load = 0; +- int max_loaded = 0, min_loaded = 0; +- int load; +- unsigned long useful_load_threshold = balanced_irq_interval + 10; +- int selected_irq; +- int tmp_loaded, first_attempt = 1; +- unsigned long tmp_cpu_irq; +- unsigned long imbalance = 0; +- cpumask_t allowed_mask, target_cpu_mask, tmp; +- +- for_each_possible_cpu(i) { +- int package_index; +- CPU_IRQ(i) = 0; +- if (!cpu_online(i)) +- continue; +- package_index = CPU_TO_PACKAGEINDEX(i); +- for (j = 0; j < NR_IRQS; j++) { +- unsigned long value_now, delta; +- /* Is this an active IRQ or balancing disabled ? */ +- if (!irq_desc[j].action || irq_balancing_disabled(j)) +- continue; +- if (package_index == i) +- IRQ_DELTA(package_index, j) = 0; +- /* Determine the total count per processor per IRQ */ +- value_now = (unsigned long) kstat_cpu(i).irqs[j]; +- +- /* Determine the activity per processor per IRQ */ +- delta = value_now - LAST_CPU_IRQ(i, j); +- +- /* Update last_cpu_irq[][] for the next time */ +- LAST_CPU_IRQ(i, j) = value_now; +- +- /* Ignore IRQs whose rate is less than the clock */ +- if (delta < useful_load_threshold) +- continue; +- /* update the load for the processor or package total */ +- IRQ_DELTA(package_index, j) += delta; +- +- /* Keep track of the higher numbered sibling as well */ +- if (i != package_index) +- CPU_IRQ(i) += delta; +- /* +- * We have sibling A and sibling B in the package +- * +- * cpu_irq[A] = load for cpu A + load for cpu B +- * cpu_irq[B] = load for cpu B +- */ +- CPU_IRQ(package_index) += delta; +- } +- } +- /* Find the least loaded processor package */ +- for_each_online_cpu(i) { +- if (i != CPU_TO_PACKAGEINDEX(i)) +- continue; +- if (min_cpu_irq > CPU_IRQ(i)) { +- min_cpu_irq = CPU_IRQ(i); +- min_loaded = i; +- } +- } +- max_cpu_irq = ULONG_MAX; +- +-tryanothercpu: +- /* +- * Look for heaviest loaded processor. +- * We may come back to get the next heaviest loaded processor. +- * Skip processors with trivial loads. +- */ +- tmp_cpu_irq = 0; +- tmp_loaded = -1; +- for_each_online_cpu(i) { +- if (i != CPU_TO_PACKAGEINDEX(i)) +- continue; +- if (max_cpu_irq <= CPU_IRQ(i)) +- continue; +- if (tmp_cpu_irq < CPU_IRQ(i)) { +- tmp_cpu_irq = CPU_IRQ(i); +- tmp_loaded = i; +- } +- } +- +- if (tmp_loaded == -1) { +- /* +- * In the case of small number of heavy interrupt sources, +- * loading some of the cpus too much. We use Ingo's original +- * approach to rotate them around. +- */ +- if (!first_attempt && imbalance >= useful_load_threshold) { +- rotate_irqs_among_cpus(useful_load_threshold); +- return; +- } +- goto not_worth_the_effort; +- } +- +- first_attempt = 0; /* heaviest search */ +- max_cpu_irq = tmp_cpu_irq; /* load */ +- max_loaded = tmp_loaded; /* processor */ +- imbalance = (max_cpu_irq - min_cpu_irq) / 2; +- +- /* +- * if imbalance is less than approx 10% of max load, then +- * observe diminishing returns action. - quit +- */ +- if (imbalance < (max_cpu_irq >> 3)) +- goto not_worth_the_effort; +- +-tryanotherirq: +- /* if we select an IRQ to move that can't go where we want, then +- * see if there is another one to try. +- */ +- move_this_load = 0; +- selected_irq = -1; +- for (j = 0; j < NR_IRQS; j++) { +- /* Is this an active IRQ? */ +- if (!irq_desc[j].action) +- continue; +- if (imbalance <= IRQ_DELTA(max_loaded, j)) +- continue; +- /* Try to find the IRQ that is closest to the imbalance +- * without going over. +- */ +- if (move_this_load < IRQ_DELTA(max_loaded, j)) { +- move_this_load = IRQ_DELTA(max_loaded, j); +- selected_irq = j; +- } +- } +- if (selected_irq == -1) +- goto tryanothercpu; +- +- imbalance = move_this_load; +- +- /* For physical_balance case, we accumulated both load +- * values in the one of the siblings cpu_irq[], +- * to use the same code for physical and logical processors +- * as much as possible. +- * +- * NOTE: the cpu_irq[] array holds the sum of the load for +- * sibling A and sibling B in the slot for the lowest numbered +- * sibling (A), _AND_ the load for sibling B in the slot for +- * the higher numbered sibling. +- * +- * We seek the least loaded sibling by making the comparison +- * (A+B)/2 vs B +- */ +- load = CPU_IRQ(min_loaded) >> 1; +- for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { +- if (load > CPU_IRQ(j)) { +- /* This won't change cpu_sibling_map[min_loaded] */ +- load = CPU_IRQ(j); +- min_loaded = j; +- } +- } +- +- cpus_and(allowed_mask, +- cpu_online_map, +- balance_irq_affinity[selected_irq]); +- target_cpu_mask = cpumask_of_cpu(min_loaded); +- cpus_and(tmp, target_cpu_mask, allowed_mask); +- +- if (!cpus_empty(tmp)) { +- /* mark for change destination */ +- set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); +- +- /* Since we made a change, come back sooner to +- * check for more variation. +- */ +- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, +- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); +- return; +- } +- goto tryanotherirq; +- +-not_worth_the_effort: +- /* +- * if we did not find an IRQ to move, then adjust the time interval +- * upward +- */ +- balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, +- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); +- return; +-} +- +-static int balanced_irq(void *unused) +-{ +- int i; +- unsigned long prev_balance_time = jiffies; +- long time_remaining = balanced_irq_interval; +- +- /* push everything to CPU 0 to give us a starting point. */ +- for (i = 0 ; i < NR_IRQS ; i++) { +- irq_desc[i].pending_mask = cpumask_of_cpu(0); +- set_pending_irq(i, cpumask_of_cpu(0)); +- } +- +- set_freezable(); +- for ( ; ; ) { +- time_remaining = schedule_timeout_interruptible(time_remaining); +- try_to_freeze(); +- if (time_after(jiffies, +- prev_balance_time+balanced_irq_interval)) { +- preempt_disable(); +- do_irq_balance(); +- prev_balance_time = jiffies; +- time_remaining = balanced_irq_interval; +- preempt_enable(); +- } +- } +- return 0; +-} +- +-static int __init balanced_irq_init(void) +-{ +- int i; +- struct cpuinfo_x86 *c; +- cpumask_t tmp; +- +- cpus_shift_right(tmp, cpu_online_map, 2); +- c = &boot_cpu_data; +- /* When not overwritten by the command line ask subarchitecture. */ +- if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) +- irqbalance_disabled = NO_BALANCE_IRQ; +- if (irqbalance_disabled) +- return 0; +- +- /* disable irqbalance completely if there is only one processor online */ +- if (num_online_cpus() < 2) { +- irqbalance_disabled = 1; +- return 0; +- } +- /* +- * Enable physical balance only if more than 1 physical processor +- * is present +- */ +- if (smp_num_siblings > 1 && !cpus_empty(tmp)) +- physical_balance = 1; +- +- for_each_online_cpu(i) { +- irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); +- irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); +- if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { +- printk(KERN_ERR "balanced_irq_init: out of memory"); +- goto failed; +- } +- } +- +- printk(KERN_INFO "Starting balanced_irq\n"); +- if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) +- return 0; +- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); +-failed: +- for_each_possible_cpu(i) { +- kfree(irq_cpu_data[i].irq_delta); +- irq_cpu_data[i].irq_delta = NULL; +- kfree(irq_cpu_data[i].last_irq); +- irq_cpu_data[i].last_irq = NULL; +- } +- return 0; +-} +- +-int __devinit irqbalance_disable(char *str) +-{ +- irqbalance_disabled = 1; +- return 1; +-} +- +-__setup("noirqbalance", irqbalance_disable); +- +-late_initcall(balanced_irq_init); +-#endif /* CONFIG_IRQBALANCE */ +-#endif /* CONFIG_SMP */ +-#endif +- +-#ifndef CONFIG_SMP +-void send_IPI_self(int vector) +-{ +-#ifndef CONFIG_XEN +- unsigned int cfg; +- +- /* +- * Wait for idle. +- */ +- apic_wait_icr_idle(); +- cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; +- /* +- * Send the IPI. The write to APIC_ICR fires this off. +- */ +- apic_write(APIC_ICR, cfg); +-#endif +-} +-#endif /* !CONFIG_SMP */ +- +- +-/* +- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to +- * specific CPU-side IRQs. +- */ +- +-#define MAX_PIRQS 8 +-static int pirq_entries [MAX_PIRQS]; +-static int pirqs_enabled; +-int skip_ioapic_setup; +- +-static int __init ioapic_pirq_setup(char *str) +-{ +- int i, max; +- int ints[MAX_PIRQS+1]; +- +- get_options(str, ARRAY_SIZE(ints), ints); +- +- for (i = 0; i < MAX_PIRQS; i++) +- pirq_entries[i] = -1; +- +- pirqs_enabled = 1; +- apic_printk(APIC_VERBOSE, KERN_INFO +- "PIRQ redirection, working around broken MP-BIOS.\n"); +- max = MAX_PIRQS; +- if (ints[0] < MAX_PIRQS) +- max = ints[0]; +- +- for (i = 0; i < max; i++) { +- apic_printk(APIC_VERBOSE, KERN_DEBUG +- "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); +- /* +- * PIRQs are mapped upside down, usually. +- */ +- pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; +- } +- return 1; +-} +- +-__setup("pirq=", ioapic_pirq_setup); +- +-/* +- * Find the IRQ entry number of a certain pin. +- */ +-static int find_irq_entry(int apic, int pin, int type) +-{ +- int i; +- +- for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_irqtype == type && +- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || +- mp_irqs[i].mp_dstapic == MP_APIC_ALL) && +- mp_irqs[i].mp_dstirq == pin) +- return i; +- +- return -1; +-} +- +-#ifndef CONFIG_XEN +-/* +- * Find the pin to which IRQ[irq] (ISA) is connected +- */ +-static int __init find_isa_irq_pin(int irq, int type) +-{ +- int i; +- +- for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; +- +- if (test_bit(lbus, mp_bus_not_pci) && +- (mp_irqs[i].mp_irqtype == type) && +- (mp_irqs[i].mp_srcbusirq == irq)) +- +- return mp_irqs[i].mp_dstirq; +- } +- return -1; +-} +- +-static int __init find_isa_irq_apic(int irq, int type) +-{ +- int i; +- +- for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; +- +- if (test_bit(lbus, mp_bus_not_pci) && +- (mp_irqs[i].mp_irqtype == type) && +- (mp_irqs[i].mp_srcbusirq == irq)) +- break; +- } +- if (i < mp_irq_entries) { +- int apic; +- for (apic = 0; apic < nr_ioapics; apic++) { +- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) +- return apic; +- } +- } +- +- return -1; +-} +-#endif +- +-/* +- * Find a specific PCI IRQ entry. +- * Not an __init, possibly needed by modules +- */ +-static int pin_2_irq(int idx, int apic, int pin); +- +-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) +-{ +- int apic, i, best_guess = -1; +- +- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " +- "slot:%d, pin:%d.\n", bus, slot, pin); +- if (test_bit(bus, mp_bus_not_pci)) { +- printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); +- return -1; +- } +- for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; +- +- for (apic = 0; apic < nr_ioapics; apic++) +- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || +- mp_irqs[i].mp_dstapic == MP_APIC_ALL) +- break; +- +- if (!test_bit(lbus, mp_bus_not_pci) && +- !mp_irqs[i].mp_irqtype && +- (bus == lbus) && +- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { +- int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); +- +- if (!(apic || IO_APIC_IRQ(irq))) +- continue; +- +- if (pin == (mp_irqs[i].mp_srcbusirq & 3)) +- return irq; +- /* +- * Use the first all-but-pin matching entry as a +- * best-guess fuzzy result for broken mptables. +- */ +- if (best_guess < 0) +- best_guess = irq; +- } +- } +- return best_guess; +-} +-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); +- +-/* +- * This function currently is only a helper for the i386 smp boot process where +- * we need to reprogram the ioredtbls to cater for the cpus which have come online +- * so mask in all cases should simply be TARGET_CPUS +- */ +-#ifdef CONFIG_SMP +-#ifndef CONFIG_XEN +-void __init setup_ioapic_dest(void) +-{ +- int pin, ioapic, irq, irq_entry; +- +- if (skip_ioapic_setup == 1) +- return; +- +- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { +- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { +- irq_entry = find_irq_entry(ioapic, pin, mp_INT); +- if (irq_entry == -1) +- continue; +- irq = pin_2_irq(irq_entry, ioapic, pin); +- set_ioapic_affinity_irq(irq, TARGET_CPUS); +- } +- +- } +-} +-#endif /* !CONFIG_XEN */ +-#endif +- +-#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +-/* +- * EISA Edge/Level control register, ELCR +- */ +-static int EISA_ELCR(unsigned int irq) +-{ +- if (irq < 16) { +- unsigned int port = 0x4d0 + (irq >> 3); +- return (inb(port) >> (irq & 7)) & 1; +- } +- apic_printk(APIC_VERBOSE, KERN_INFO +- "Broken MPtable reports ISA irq %d\n", irq); +- return 0; +-} +-#endif +- +-/* ISA interrupts are always polarity zero edge triggered, +- * when listed as conforming in the MP table. */ +- +-#define default_ISA_trigger(idx) (0) +-#define default_ISA_polarity(idx) (0) +- +-/* EISA interrupts are always polarity zero and can be edge or level +- * trigger depending on the ELCR value. If an interrupt is listed as +- * EISA conforming in the MP table, that means its trigger type must +- * be read in from the ELCR */ +- +-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +-#define default_EISA_polarity(idx) default_ISA_polarity(idx) +- +-/* PCI interrupts are always polarity one level triggered, +- * when listed as conforming in the MP table. */ +- +-#define default_PCI_trigger(idx) (1) +-#define default_PCI_polarity(idx) (1) +- +-/* MCA interrupts are always polarity zero level triggered, +- * when listed as conforming in the MP table. */ +- +-#define default_MCA_trigger(idx) (1) +-#define default_MCA_polarity(idx) default_ISA_polarity(idx) +- +-static int MPBIOS_polarity(int idx) +-{ +- int bus = mp_irqs[idx].mp_srcbus; +- int polarity; +- +- /* +- * Determine IRQ line polarity (high active or low active): +- */ +- switch (mp_irqs[idx].mp_irqflag & 3) { +- case 0: /* conforms, ie. bus-type dependent polarity */ +- { +- polarity = test_bit(bus, mp_bus_not_pci)? +- default_ISA_polarity(idx): +- default_PCI_polarity(idx); +- break; +- } +- case 1: /* high active */ +- { +- polarity = 0; +- break; +- } +- case 2: /* reserved */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- polarity = 1; +- break; +- } +- case 3: /* low active */ +- { +- polarity = 1; +- break; +- } +- default: /* invalid */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- polarity = 1; +- break; +- } +- } +- return polarity; +-} +- +-static int MPBIOS_trigger(int idx) +-{ +- int bus = mp_irqs[idx].mp_srcbus; +- int trigger; +- +- /* +- * Determine IRQ trigger mode (edge or level sensitive): +- */ +- switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { +- case 0: /* conforms, ie. bus-type dependent */ +- { +- trigger = test_bit(bus, mp_bus_not_pci)? +- default_ISA_trigger(idx): +- default_PCI_trigger(idx); +-#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +- switch (mp_bus_id_to_type[bus]) { +- case MP_BUS_ISA: /* ISA pin */ +- { +- /* set before the switch */ +- break; +- } +- case MP_BUS_EISA: /* EISA pin */ +- { +- trigger = default_EISA_trigger(idx); +- break; +- } +- case MP_BUS_PCI: /* PCI pin */ +- { +- /* set before the switch */ +- break; +- } +- case MP_BUS_MCA: /* MCA pin */ +- { +- trigger = default_MCA_trigger(idx); +- break; +- } +- default: +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- trigger = 1; +- break; +- } +- } +-#endif +- break; +- } +- case 1: /* edge */ +- { +- trigger = 0; +- break; +- } +- case 2: /* reserved */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- trigger = 1; +- break; +- } +- case 3: /* level */ +- { +- trigger = 1; +- break; +- } +- default: /* invalid */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- trigger = 0; +- break; +- } +- } +- return trigger; +-} +- +-static inline int irq_polarity(int idx) +-{ +- return MPBIOS_polarity(idx); +-} +- +-static inline int irq_trigger(int idx) +-{ +- return MPBIOS_trigger(idx); +-} +- +-static int pin_2_irq(int idx, int apic, int pin) +-{ +- int irq, i; +- int bus = mp_irqs[idx].mp_srcbus; +- +- /* +- * Debugging check, we are in big trouble if this message pops up! +- */ +- if (mp_irqs[idx].mp_dstirq != pin) +- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); +- +- if (test_bit(bus, mp_bus_not_pci)) +- irq = mp_irqs[idx].mp_srcbusirq; +- else { +- /* +- * PCI IRQs are mapped in order +- */ +- i = irq = 0; +- while (i < apic) +- irq += nr_ioapic_registers[i++]; +- irq += pin; +- +- /* +- * For MPS mode, so far only needed by ES7000 platform +- */ +- if (ioapic_renumber_irq) +- irq = ioapic_renumber_irq(apic, irq); +- } +- +- /* +- * PCI IRQ command line redirection. Yes, limits are hardcoded. +- */ +- if ((pin >= 16) && (pin <= 23)) { +- if (pirq_entries[pin-16] != -1) { +- if (!pirq_entries[pin-16]) { +- apic_printk(APIC_VERBOSE, KERN_DEBUG +- "disabling PIRQ%d\n", pin-16); +- } else { +- irq = pirq_entries[pin-16]; +- apic_printk(APIC_VERBOSE, KERN_DEBUG +- "using PIRQ%d -> IRQ %d\n", +- pin-16, irq); +- } +- } +- } +- return irq; +-} +- +-static inline int IO_APIC_irq_trigger(int irq) +-{ +- int apic, idx, pin; +- +- for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { +- idx = find_irq_entry(apic, pin, mp_INT); +- if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) +- return irq_trigger(idx); +- } +- } +- /* +- * nonexistent IRQs are edge default +- */ +- return 0; +-} +- +-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ +-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ +- +-static int __assign_irq_vector(int irq) +-{ +- int vector; +- struct physdev_irq irq_op; +- +- BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); +- +- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS) +- return -EINVAL; +- +- if (irq_vector[irq] > 0) +- return irq_vector[irq]; +- +- irq_op.irq = irq; +- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) +- return -ENOSPC; +- +- vector = irq_op.vector; +- irq_vector[irq] = vector; +- +- return vector; +-} +- +-static int assign_irq_vector(int irq) +-{ +- unsigned long flags; +- int vector; +- +- spin_lock_irqsave(&vector_lock, flags); +- vector = __assign_irq_vector(irq); +- spin_unlock_irqrestore(&vector_lock, flags); +- +- return vector; +-} +- +-#ifndef CONFIG_XEN +-static struct irq_chip ioapic_chip; +- +-#define IOAPIC_AUTO -1 +-#define IOAPIC_EDGE 0 +-#define IOAPIC_LEVEL 1 +- +-static void ioapic_register_intr(int irq, int vector, unsigned long trigger) +-{ +- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || +- trigger == IOAPIC_LEVEL) { +- irq_desc[irq].status |= IRQ_LEVEL; +- set_irq_chip_and_handler_name(irq, &ioapic_chip, +- handle_fasteoi_irq, "fasteoi"); +- } else { +- irq_desc[irq].status &= ~IRQ_LEVEL; +- set_irq_chip_and_handler_name(irq, &ioapic_chip, +- handle_edge_irq, "edge"); +- } +- set_intr_gate(vector, interrupt[irq]); +-} +-#else +-#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq) +-#endif +- +-static void __init setup_IO_APIC_irqs(void) +-{ +- struct IO_APIC_route_entry entry; +- int apic, pin, idx, irq, first_notcon = 1, vector; +- +- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); +- +- for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { +- +- /* +- * add it to the IO-APIC irq-routing table: +- */ +- memset(&entry, 0, sizeof(entry)); +- +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.dest_mode = INT_DEST_MODE; +- entry.mask = 0; /* enable IRQ */ +- entry.dest.logical.logical_dest = +- cpu_mask_to_apicid(TARGET_CPUS); +- +- idx = find_irq_entry(apic, pin, mp_INT); +- if (idx == -1) { +- if (first_notcon) { +- apic_printk(APIC_VERBOSE, KERN_DEBUG +- " IO-APIC (apicid-pin) %d-%d", +- mp_ioapics[apic].mp_apicid, +- pin); +- first_notcon = 0; +- } else +- apic_printk(APIC_VERBOSE, ", %d-%d", +- mp_ioapics[apic].mp_apicid, pin); +- continue; +- } +- +- if (!first_notcon) { +- apic_printk(APIC_VERBOSE, " not connected.\n"); +- first_notcon = 1; +- } +- +- entry.trigger = irq_trigger(idx); +- entry.polarity = irq_polarity(idx); +- +- if (irq_trigger(idx)) { +- entry.trigger = 1; +- entry.mask = 1; +- } +- +- irq = pin_2_irq(idx, apic, pin); +- /* +- * skip adding the timer int on secondary nodes, which causes +- * a small but painful rift in the time-space continuum +- */ +- if (multi_timer_check(apic, irq)) +- continue; +- else +- add_pin_to_irq(irq, apic, pin); +- +- if (/*!apic &&*/ !IO_APIC_IRQ(irq)) +- continue; +- +- if (IO_APIC_IRQ(irq)) { +- vector = assign_irq_vector(irq); +- entry.vector = vector; +- ioapic_register_intr(irq, vector, IOAPIC_AUTO); +- +- if (!apic && (irq < 16)) +- disable_8259A_irq(irq); +- } +- ioapic_write_entry(apic, pin, entry); +- } +- } +- +- if (!first_notcon) +- apic_printk(APIC_VERBOSE, " not connected.\n"); +-} +- +-#ifndef CONFIG_XEN +-/* +- * Set up the timer pin, possibly with the 8259A-master behind. +- */ +-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, +- int vector) +-{ +- struct IO_APIC_route_entry entry; +- +- memset(&entry, 0, sizeof(entry)); +- +- /* +- * We use logical delivery to get the timer IRQ +- * to the first CPU. +- */ +- entry.dest_mode = INT_DEST_MODE; +- entry.mask = 1; /* mask IRQ now */ +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.polarity = 0; +- entry.trigger = 0; +- entry.vector = vector; +- +- /* +- * The timer IRQ doesn't have to know that behind the +- * scene we may have a 8259A-master in AEOI mode ... +- */ +- ioapic_register_intr(0, vector, IOAPIC_EDGE); +- +- /* +- * Add it to the IO-APIC irq-routing table: +- */ +- ioapic_write_entry(apic, pin, entry); +-} +- +-void __init print_IO_APIC(void) +-{ +- int apic, i; +- union IO_APIC_reg_00 reg_00; +- union IO_APIC_reg_01 reg_01; +- union IO_APIC_reg_02 reg_02; +- union IO_APIC_reg_03 reg_03; +- unsigned long flags; +- +- if (apic_verbosity == APIC_QUIET) +- return; +- +- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); +- for (i = 0; i < nr_ioapics; i++) +- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", +- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); +- +- /* +- * We are a bit conservative about what we expect. We have to +- * know about every hardware change ASAP. +- */ +- printk(KERN_INFO "testing the IO APIC.......................\n"); +- +- for (apic = 0; apic < nr_ioapics; apic++) { +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); +- reg_01.raw = io_apic_read(apic, 1); +- if (reg_01.bits.version >= 0x10) +- reg_02.raw = io_apic_read(apic, 2); +- if (reg_01.bits.version >= 0x20) +- reg_03.raw = io_apic_read(apic, 3); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); +- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); +- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); +- printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); +- printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); +- +- printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); +- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); +- +- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); +- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); +- +- /* +- * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, +- * but the value of reg_02 is read as the previous read register +- * value, so ignore it if reg_02 == reg_01. +- */ +- if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { +- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); +- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); +- } +- +- /* +- * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 +- * or reg_03, but the value of reg_0[23] is read as the previous read +- * register value, so ignore it if reg_03 == reg_0[12]. +- */ +- if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && +- reg_03.raw != reg_01.raw) { +- printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); +- printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); +- } +- +- printk(KERN_DEBUG ".... IRQ redirection table:\n"); +- +- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" +- " Stat Dest Deli Vect: \n"); +- +- for (i = 0; i <= reg_01.bits.entries; i++) { +- struct IO_APIC_route_entry entry; +- +- entry = ioapic_read_entry(apic, i); +- +- printk(KERN_DEBUG " %02x %03X %02X ", +- i, +- entry.dest.logical.logical_dest, +- entry.dest.physical.physical_dest +- ); +- +- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", +- entry.mask, +- entry.trigger, +- entry.irr, +- entry.polarity, +- entry.delivery_status, +- entry.dest_mode, +- entry.delivery_mode, +- entry.vector +- ); +- } +- } +- printk(KERN_DEBUG "IRQ to pin mappings:\n"); +- for (i = 0; i < NR_IRQS; i++) { +- struct irq_pin_list *entry = irq_2_pin + i; +- if (entry->pin < 0) +- continue; +- printk(KERN_DEBUG "IRQ%d ", i); +- for (;;) { +- printk("-> %d:%d", entry->apic, entry->pin); +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +- printk("\n"); +- } +- +- printk(KERN_INFO ".................................... done.\n"); +- +- return; +-} +- +-static void print_APIC_bitfield(int base) +-{ +- unsigned int v; +- int i, j; +- +- if (apic_verbosity == APIC_QUIET) +- return; +- +- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); +- for (i = 0; i < 8; i++) { +- v = apic_read(base + i*0x10); +- for (j = 0; j < 32; j++) { +- if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ +- apic_write(APIC_ESR, 0); +- v = apic_read(APIC_ESR); +- printk(KERN_DEBUG "... APIC ESR: %08x\n", v); +- } +- +- v = apic_read(APIC_ICR); +- printk(KERN_DEBUG "... APIC ICR: %08x\n", v); +- v = apic_read(APIC_ICR2); +- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); +- +- v = apic_read(APIC_LVTT); +- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); +- +- if (maxlvt > 3) { /* PC is LVT#4. */ +- v = apic_read(APIC_LVTPC); +- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); +- } +- v = apic_read(APIC_LVT0); +- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); +- v = apic_read(APIC_LVT1); +- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); +- +- if (maxlvt > 2) { /* ERR is LVT#3. */ +- v = apic_read(APIC_LVTERR); +- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); +- } +- +- v = apic_read(APIC_TMICT); +- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); +- v = apic_read(APIC_TMCCT); +- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); +- v = apic_read(APIC_TDCR); +- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); +- printk("\n"); +-} +- +-void print_all_local_APICs(void) +-{ +- on_each_cpu(print_local_APIC, NULL, 1); +-} +- +-void /*__init*/ print_PIC(void) +-{ +- unsigned int v; +- unsigned long flags; +- +- if (apic_verbosity == APIC_QUIET) +- return; +- +- printk(KERN_DEBUG "\nprinting PIC contents\n"); +- +- spin_lock_irqsave(&i8259A_lock, flags); +- +- v = inb(0xa1) << 8 | inb(0x21); +- printk(KERN_DEBUG "... PIC IMR: %04x\n", v); +- +- v = inb(0xa0) << 8 | inb(0x20); +- printk(KERN_DEBUG "... PIC IRR: %04x\n", v); +- +- outb(0x0b, 0xa0); +- outb(0x0b, 0x20); +- v = inb(0xa0) << 8 | inb(0x20); +- outb(0x0a, 0xa0); +- outb(0x0a, 0x20); +- +- spin_unlock_irqrestore(&i8259A_lock, flags); +- +- printk(KERN_DEBUG "... PIC ISR: %04x\n", v); +- +- v = inb(0x4d1) << 8 | inb(0x4d0); +- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); +-} +-#else +-void __init print_IO_APIC(void) {} +-#endif /* !CONFIG_XEN */ +- +-static void __init enable_IO_APIC(void) +-{ +- union IO_APIC_reg_01 reg_01; +-#ifndef CONFIG_XEN +- int i8259_apic, i8259_pin; +-#endif +- int i, apic; +- unsigned long flags; +- +- for (i = 0; i < PIN_MAP_SIZE; i++) { +- irq_2_pin[i].pin = -1; +- irq_2_pin[i].next = 0; +- } +- if (!pirqs_enabled) +- for (i = 0; i < MAX_PIRQS; i++) +- pirq_entries[i] = -1; +- +- /* +- * The number of IO-APIC IRQ registers (== #pins): +- */ +- for (apic = 0; apic < nr_ioapics; apic++) { +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_01.raw = io_apic_read(apic, 1); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- nr_ioapic_registers[apic] = reg_01.bits.entries+1; +- } +-#ifndef CONFIG_XEN +- for (apic = 0; apic < nr_ioapics; apic++) { +- int pin; +- /* See if any of the pins is in ExtINT mode */ +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { +- struct IO_APIC_route_entry entry; +- entry = ioapic_read_entry(apic, pin); +- +- +- /* If the interrupt line is enabled and in ExtInt mode +- * I have found the pin where the i8259 is connected. +- */ +- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { +- ioapic_i8259.apic = apic; +- ioapic_i8259.pin = pin; +- goto found_i8259; +- } +- } +- } +- found_i8259: +- /* Look to see what if the MP table has reported the ExtINT */ +- /* If we could not find the appropriate pin by looking at the ioapic +- * the i8259 probably is not connected the ioapic but give the +- * mptable a chance anyway. +- */ +- i8259_pin = find_isa_irq_pin(0, mp_ExtINT); +- i8259_apic = find_isa_irq_apic(0, mp_ExtINT); +- /* Trust the MP table if nothing is setup in the hardware */ +- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { +- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); +- ioapic_i8259.pin = i8259_pin; +- ioapic_i8259.apic = i8259_apic; +- } +- /* Complain if the MP table and the hardware disagree */ +- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && +- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) +- { +- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); +- } +-#endif +- +- /* +- * Do not trust the IO-APIC being empty at bootup +- */ +- clear_IO_APIC(); +-} +- +-/* +- * Not an __init, needed by the reboot code +- */ +-void disable_IO_APIC(void) +-{ +- /* +- * Clear the IO-APIC before rebooting: +- */ +- clear_IO_APIC(); +- +-#ifndef CONFIG_XEN +- /* +- * If the i8259 is routed through an IOAPIC +- * Put that IOAPIC in virtual wire mode +- * so legacy interrupts can be delivered. +- */ +- if (ioapic_i8259.pin != -1) { +- struct IO_APIC_route_entry entry; +- +- memset(&entry, 0, sizeof(entry)); +- entry.mask = 0; /* Enabled */ +- entry.trigger = 0; /* Edge */ +- entry.irr = 0; +- entry.polarity = 0; /* High */ +- entry.delivery_status = 0; +- entry.dest_mode = 0; /* Physical */ +- entry.delivery_mode = dest_ExtINT; /* ExtInt */ +- entry.vector = 0; +- entry.dest.physical.physical_dest = +- GET_APIC_ID(read_apic_id()); +- +- /* +- * Add it to the IO-APIC irq-routing table: +- */ +- ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); +- } +- disconnect_bsp_APIC(ioapic_i8259.pin != -1); +-#endif +-} +- +-/* +- * function to set the IO-APIC physical IDs based on the +- * values stored in the MPC table. +- * +- * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 +- */ +- +-#ifndef CONFIG_XEN +-static void __init setup_ioapic_ids_from_mpc(void) +-{ +- union IO_APIC_reg_00 reg_00; +- physid_mask_t phys_id_present_map; +- int apic; +- int i; +- unsigned char old_id; +- unsigned long flags; +- +-#ifdef CONFIG_X86_NUMAQ +- if (found_numaq) +- return; +-#endif +- +- /* +- * Don't check I/O APIC IDs for xAPIC systems. They have +- * no meaning without the serial APIC bus. +- */ +- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) +- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) +- return; +- /* +- * This is broken; anything with a real cpu count has to +- * circumvent this idiocy regardless. +- */ +- phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); +- +- /* +- * Set the IOAPIC ID to the value stored in the MPC table. +- */ +- for (apic = 0; apic < nr_ioapics; apic++) { +- +- /* Read the register 0 value */ +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- old_id = mp_ioapics[apic].mp_apicid; +- +- if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { +- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", +- apic, mp_ioapics[apic].mp_apicid); +- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", +- reg_00.bits.ID); +- mp_ioapics[apic].mp_apicid = reg_00.bits.ID; +- } +- +- /* +- * Sanity check, is the ID really free? Every APIC in a +- * system must have a unique ID or we get lots of nice +- * 'stuck on smp_invalidate_needed IPI wait' messages. +- */ +- if (check_apicid_used(phys_id_present_map, +- mp_ioapics[apic].mp_apicid)) { +- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", +- apic, mp_ioapics[apic].mp_apicid); +- for (i = 0; i < get_physical_broadcast(); i++) +- if (!physid_isset(i, phys_id_present_map)) +- break; +- if (i >= get_physical_broadcast()) +- panic("Max APIC ID exceeded!\n"); +- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", +- i); +- physid_set(i, phys_id_present_map); +- mp_ioapics[apic].mp_apicid = i; +- } else { +- physid_mask_t tmp; +- tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); +- apic_printk(APIC_VERBOSE, "Setting %d in the " +- "phys_id_present_map\n", +- mp_ioapics[apic].mp_apicid); +- physids_or(phys_id_present_map, phys_id_present_map, tmp); +- } +- +- +- /* +- * We need to adjust the IRQ routing table +- * if the ID changed. +- */ +- if (old_id != mp_ioapics[apic].mp_apicid) +- for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_dstapic == old_id) +- mp_irqs[i].mp_dstapic +- = mp_ioapics[apic].mp_apicid; +- +- /* +- * Read the right value from the MPC table and +- * write it into the ID register. +- */ +- apic_printk(APIC_VERBOSE, KERN_INFO +- "...changing IO-APIC physical APIC ID to %d ...", +- mp_ioapics[apic].mp_apicid); +- +- reg_00.bits.ID = mp_ioapics[apic].mp_apicid; +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0, reg_00.raw); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- /* +- * Sanity check +- */ +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) +- printk("could not set ID!\n"); +- else +- apic_printk(APIC_VERBOSE, " ok.\n"); +- } +-} +- +-int no_timer_check __initdata; +- +-static int __init notimercheck(char *s) +-{ +- no_timer_check = 1; +- return 1; +-} +-__setup("no_timer_check", notimercheck); +- +-/* +- * There is a nasty bug in some older SMP boards, their mptable lies +- * about the timer IRQ. We do the following to work around the situation: +- * +- * - timer IRQ defaults to IO-APIC IRQ +- * - if this function detects that timer IRQs are defunct, then we fall +- * back to ISA timer IRQs +- */ +-static int __init timer_irq_works(void) +-{ +- unsigned long t1 = jiffies; +- unsigned long flags; +- +- if (no_timer_check) +- return 1; +- +- local_save_flags(flags); +- local_irq_enable(); +- /* Let ten ticks pass... */ +- mdelay((10 * 1000) / HZ); +- local_irq_restore(flags); +- +- /* +- * Expect a few ticks at least, to be sure some possible +- * glue logic does not lock up after one or two first +- * ticks in a non-ExtINT mode. Also the local APIC +- * might have cached one ExtINT interrupt. Finally, at +- * least one tick may be lost due to delays. +- */ +- if (time_after(jiffies, t1 + 4)) +- return 1; +- +- return 0; +-} +- +-/* +- * In the SMP+IOAPIC case it might happen that there are an unspecified +- * number of pending IRQ events unhandled. These cases are very rare, +- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much +- * better to do it this way as thus we do not have to be aware of +- * 'pending' interrupts in the IRQ path, except at this point. +- */ +-/* +- * Edge triggered needs to resend any interrupt +- * that was delayed but this is now handled in the device +- * independent code. +- */ +- +-/* +- * Startup quirk: +- * +- * Starting up a edge-triggered IO-APIC interrupt is +- * nasty - we need to make sure that we get the edge. +- * If it is already asserted for some reason, we need +- * return 1 to indicate that is was pending. +- * +- * This is not complete - we should be able to fake +- * an edge even if it isn't on the 8259A... +- * +- * (We do this for level-triggered IRQs too - it cannot hurt.) +- */ +-static unsigned int startup_ioapic_irq(unsigned int irq) +-{ +- int was_pending = 0; +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- if (irq < 16) { +- disable_8259A_irq(irq); +- if (i8259A_irq_pending(irq)) +- was_pending = 1; +- } +- __unmask_IO_APIC_irq(irq); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- return was_pending; +-} +- +-static void ack_ioapic_irq(unsigned int irq) +-{ +- move_native_irq(irq); +- ack_APIC_irq(); +-} +- +-static void ack_ioapic_quirk_irq(unsigned int irq) +-{ +- unsigned long v; +- int i; +- +- move_native_irq(irq); +-/* +- * It appears there is an erratum which affects at least version 0x11 +- * of I/O APIC (that's the 82093AA and cores integrated into various +- * chipsets). Under certain conditions a level-triggered interrupt is +- * erroneously delivered as edge-triggered one but the respective IRR +- * bit gets set nevertheless. As a result the I/O unit expects an EOI +- * message but it will never arrive and further interrupts are blocked +- * from the source. The exact reason is so far unknown, but the +- * phenomenon was observed when two consecutive interrupt requests +- * from a given source get delivered to the same CPU and the source is +- * temporarily disabled in between. +- * +- * A workaround is to simulate an EOI message manually. We achieve it +- * by setting the trigger mode to edge and then to level when the edge +- * trigger mode gets detected in the TMR of a local APIC for a +- * level-triggered interrupt. We mask the source for the time of the +- * operation to prevent an edge-triggered interrupt escaping meanwhile. +- * The idea is from Manfred Spraul. --macro +- */ +- i = irq_vector[irq]; +- +- v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); +- +- ack_APIC_irq(); +- +- if (!(v & (1 << (i & 0x1f)))) { +- atomic_inc(&irq_mis_count); +- spin_lock(&ioapic_lock); +- __mask_and_edge_IO_APIC_irq(irq); +- __unmask_and_level_IO_APIC_irq(irq); +- spin_unlock(&ioapic_lock); +- } +-} +- +-static int ioapic_retrigger_irq(unsigned int irq) +-{ +- send_IPI_self(irq_vector[irq]); +- +- return 1; +-} +- +-static struct irq_chip ioapic_chip __read_mostly = { +- .name = "IO-APIC", +- .startup = startup_ioapic_irq, +- .mask = mask_IO_APIC_irq, +- .unmask = unmask_IO_APIC_irq, +- .ack = ack_ioapic_irq, +- .eoi = ack_ioapic_quirk_irq, +-#ifdef CONFIG_SMP +- .set_affinity = set_ioapic_affinity_irq, +-#endif +- .retrigger = ioapic_retrigger_irq, +-}; +-#endif /* !CONFIG_XEN */ +- +-static inline void init_IO_APIC_traps(void) +-{ +- int irq; +- +- /* +- * NOTE! The local APIC isn't very good at handling +- * multiple interrupts at the same interrupt level. +- * As the interrupt level is determined by taking the +- * vector number and shifting that right by 4, we +- * want to spread these out a bit so that they don't +- * all fall in the same interrupt level. +- * +- * Also, we've got to be careful not to trash gate +- * 0x80, because int 0x80 is hm, kind of importantish. ;) +- */ +- for (irq = 0; irq < NR_IRQS ; irq++) { +- if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { +- /* +- * Hmm.. We don't have an entry for this, +- * so default to an old-fashioned 8259 +- * interrupt if we can.. +- */ +- if (irq < 16) +- make_8259A_irq(irq); +-#ifndef CONFIG_XEN +- else +- /* Strange. Oh, well.. */ +- irq_desc[irq].chip = &no_irq_chip; +-#endif +- } +- } +-} +- +-#ifndef CONFIG_XEN +-/* +- * The local APIC irq-chip implementation: +- */ +- +-static void ack_lapic_irq(unsigned int irq) +-{ +- ack_APIC_irq(); +-} +- +-static void mask_lapic_irq(unsigned int irq) +-{ +- unsigned long v; +- +- v = apic_read(APIC_LVT0); +- apic_write(APIC_LVT0, v | APIC_LVT_MASKED); +-} +- +-static void unmask_lapic_irq(unsigned int irq) +-{ +- unsigned long v; +- +- v = apic_read(APIC_LVT0); +- apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); +-} +- +-static struct irq_chip lapic_chip __read_mostly = { +- .name = "local-APIC", +- .mask = mask_lapic_irq, +- .unmask = unmask_lapic_irq, +- .ack = ack_lapic_irq, +-}; +- +-static void lapic_register_intr(int irq, int vector) +-{ +- irq_desc[irq].status &= ~IRQ_LEVEL; +- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, +- "edge"); +- set_intr_gate(vector, interrupt[irq]); +-} +- +-static void __init setup_nmi(void) +-{ +- /* +- * Dirty trick to enable the NMI watchdog ... +- * We put the 8259A master into AEOI mode and +- * unmask on all local APICs LVT0 as NMI. +- * +- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') +- * is from Maciej W. Rozycki - so we do not have to EOI from +- * the NMI handler or the timer interrupt. +- */ +- apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); +- +- enable_NMI_through_LVT0(); +- +- apic_printk(APIC_VERBOSE, " done.\n"); +-} +- +-/* +- * This looks a bit hackish but it's about the only one way of sending +- * a few INTA cycles to 8259As and any associated glue logic. ICR does +- * not support the ExtINT mode, unfortunately. We need to send these +- * cycles as some i82489DX-based boards have glue logic that keeps the +- * 8259A interrupt line asserted until INTA. --macro +- */ +-static inline void __init unlock_ExtINT_logic(void) +-{ +- int apic, pin, i; +- struct IO_APIC_route_entry entry0, entry1; +- unsigned char save_control, save_freq_select; +- +- pin = find_isa_irq_pin(8, mp_INT); +- if (pin == -1) { +- WARN_ON_ONCE(1); +- return; +- } +- apic = find_isa_irq_apic(8, mp_INT); +- if (apic == -1) { +- WARN_ON_ONCE(1); +- return; +- } +- +- entry0 = ioapic_read_entry(apic, pin); +- clear_IO_APIC_pin(apic, pin); +- +- memset(&entry1, 0, sizeof(entry1)); +- +- entry1.dest_mode = 0; /* physical delivery */ +- entry1.mask = 0; /* unmask IRQ now */ +- entry1.dest.physical.physical_dest = hard_smp_processor_id(); +- entry1.delivery_mode = dest_ExtINT; +- entry1.polarity = entry0.polarity; +- entry1.trigger = 0; +- entry1.vector = 0; +- +- ioapic_write_entry(apic, pin, entry1); +- +- save_control = CMOS_READ(RTC_CONTROL); +- save_freq_select = CMOS_READ(RTC_FREQ_SELECT); +- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, +- RTC_FREQ_SELECT); +- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); +- +- i = 100; +- while (i-- > 0) { +- mdelay(10); +- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) +- i -= 10; +- } +- +- CMOS_WRITE(save_control, RTC_CONTROL); +- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); +- clear_IO_APIC_pin(apic, pin); +- +- ioapic_write_entry(apic, pin, entry0); +-} +- +-/* +- * This code may look a bit paranoid, but it's supposed to cooperate with +- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ +- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast +- * fanatically on his truly buggy board. +- */ +-static inline void __init check_timer(void) +-{ +- int apic1, pin1, apic2, pin2; +- int no_pin1 = 0; +- int vector; +- unsigned int ver; +- unsigned long flags; +- +- local_irq_save(flags); +- +- ver = apic_read(APIC_LVR); +- ver = GET_APIC_VERSION(ver); +- +- /* +- * get/set the timer IRQ vector: +- */ +- disable_8259A_irq(0); +- vector = assign_irq_vector(0); +- set_intr_gate(vector, interrupt[0]); +- +- /* +- * As IRQ0 is to be enabled in the 8259A, the virtual +- * wire has to be disabled in the local APIC. Also +- * timer interrupts need to be acknowledged manually in +- * the 8259A for the i82489DX when using the NMI +- * watchdog as that APIC treats NMIs as level-triggered. +- * The AEOI mode will finish them in the 8259A +- * automatically. +- */ +- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); +- init_8259A(1); +- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +- +- pin1 = find_isa_irq_pin(0, mp_INT); +- apic1 = find_isa_irq_apic(0, mp_INT); +- pin2 = ioapic_i8259.pin; +- apic2 = ioapic_i8259.apic; +- +- apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " +- "apic1=%d pin1=%d apic2=%d pin2=%d\n", +- vector, apic1, pin1, apic2, pin2); +- +- /* +- * Some BIOS writers are clueless and report the ExtINTA +- * I/O APIC input from the cascaded 8259A as the timer +- * interrupt input. So just in case, if only one pin +- * was found above, try it both directly and through the +- * 8259A. +- */ +- if (pin1 == -1) { +- pin1 = pin2; +- apic1 = apic2; +- no_pin1 = 1; +- } else if (pin2 == -1) { +- pin2 = pin1; +- apic2 = apic1; +- } +- +- if (pin1 != -1) { +- /* +- * Ok, does IRQ0 through the IOAPIC work? +- */ +- if (no_pin1) { +- add_pin_to_irq(0, apic1, pin1); +- setup_timer_IRQ0_pin(apic1, pin1, vector); +- } +- unmask_IO_APIC_irq(0); +- if (timer_irq_works()) { +- if (nmi_watchdog == NMI_IO_APIC) { +- setup_nmi(); +- enable_8259A_irq(0); +- } +- if (disable_timer_pin_1 > 0) +- clear_IO_APIC_pin(0, pin1); +- goto out; +- } +- clear_IO_APIC_pin(apic1, pin1); +- if (!no_pin1) +- apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " +- "8254 timer not connected to IO-APIC\n"); +- +- apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " +- "(IRQ0) through the 8259A ...\n"); +- apic_printk(APIC_QUIET, KERN_INFO +- "..... (found apic %d pin %d) ...\n", apic2, pin2); +- /* +- * legacy devices should be connected to IO APIC #0 +- */ +- replace_pin_at_irq(0, apic1, pin1, apic2, pin2); +- setup_timer_IRQ0_pin(apic2, pin2, vector); +- unmask_IO_APIC_irq(0); +- enable_8259A_irq(0); +- if (timer_irq_works()) { +- apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); +- timer_through_8259 = 1; +- if (nmi_watchdog == NMI_IO_APIC) { +- disable_8259A_irq(0); +- setup_nmi(); +- enable_8259A_irq(0); +- } +- goto out; +- } +- /* +- * Cleanup, just in case ... +- */ +- disable_8259A_irq(0); +- clear_IO_APIC_pin(apic2, pin2); +- apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); +- } +- +- if (nmi_watchdog == NMI_IO_APIC) { +- apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " +- "through the IO-APIC - disabling NMI Watchdog!\n"); +- nmi_watchdog = NMI_NONE; +- } +- timer_ack = 0; +- +- apic_printk(APIC_QUIET, KERN_INFO +- "...trying to set up timer as Virtual Wire IRQ...\n"); +- +- lapic_register_intr(0, vector); +- apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ +- enable_8259A_irq(0); +- +- if (timer_irq_works()) { +- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); +- goto out; +- } +- disable_8259A_irq(0); +- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); +- apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); +- +- apic_printk(APIC_QUIET, KERN_INFO +- "...trying to set up timer as ExtINT IRQ...\n"); +- +- init_8259A(0); +- make_8259A_irq(0); +- apic_write(APIC_LVT0, APIC_DM_EXTINT); +- +- unlock_ExtINT_logic(); +- +- if (timer_irq_works()) { +- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); +- goto out; +- } +- apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); +- panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " +- "report. Then try booting with the 'noapic' option.\n"); +-out: +- local_irq_restore(flags); +-} +-#else +-int timer_uses_ioapic_pin_0 = 0; +-#define check_timer() ((void)0) +-#endif +- +-/* +- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available +- * to devices. However there may be an I/O APIC pin available for +- * this interrupt regardless. The pin may be left unconnected, but +- * typically it will be reused as an ExtINT cascade interrupt for +- * the master 8259A. In the MPS case such a pin will normally be +- * reported as an ExtINT interrupt in the MP table. With ACPI +- * there is no provision for ExtINT interrupts, and in the absence +- * of an override it would be treated as an ordinary ISA I/O APIC +- * interrupt, that is edge-triggered and unmasked by default. We +- * used to do this, but it caused problems on some systems because +- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using +- * the same ExtINT cascade interrupt to drive the local APIC of the +- * bootstrap processor. Therefore we refrain from routing IRQ2 to +- * the I/O APIC in all cases now. No actual device should request +- * it anyway. --macro +- */ +-#define PIC_IRQS (1 << PIC_CASCADE_IR) +- +-void __init setup_IO_APIC(void) +-{ +-#ifndef CONFIG_XEN +- int i; +- +- /* Reserve all the system vectors. */ +- for (i = first_system_vector; i < NR_VECTORS; i++) +- set_bit(i, used_vectors); +-#endif +- +- enable_IO_APIC(); +- +- io_apic_irqs = ~PIC_IRQS; +- +- printk("ENABLING IO-APIC IRQs\n"); +- +-#ifndef CONFIG_XEN +- /* +- * Set up IO-APIC IRQ routing. +- */ +- if (!acpi_ioapic) +- setup_ioapic_ids_from_mpc(); +- sync_Arb_IDs(); +-#endif +- setup_IO_APIC_irqs(); +- init_IO_APIC_traps(); +- check_timer(); +- if (!acpi_ioapic) +- print_IO_APIC(); +-} +- +-/* +- * Called after all the initialization is done. If we didnt find any +- * APIC bugs then we can allow the modify fast path +- */ +- +-static int __init io_apic_bug_finalize(void) +-{ +- if (sis_apic_bug == -1) +- sis_apic_bug = 0; +- if (is_initial_xendomain()) { +- struct xen_platform_op op = { .cmd = XENPF_platform_quirk }; +- op.u.platform_quirk.quirk_id = sis_apic_bug ? +- QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL; +- VOID(HYPERVISOR_platform_op(&op)); +- } +- return 0; +-} +- +-late_initcall(io_apic_bug_finalize); +- +-#ifndef CONFIG_XEN +- +-struct sysfs_ioapic_data { +- struct sys_device dev; +- struct IO_APIC_route_entry entry[0]; +-}; +-static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; +- +-static int ioapic_suspend(struct sys_device *dev, pm_message_t state) +-{ +- struct IO_APIC_route_entry *entry; +- struct sysfs_ioapic_data *data; +- int i; +- +- data = container_of(dev, struct sysfs_ioapic_data, dev); +- entry = data->entry; +- for (i = 0; i < nr_ioapic_registers[dev->id]; i++) +- entry[i] = ioapic_read_entry(dev->id, i); +- +- return 0; +-} +- +-static int ioapic_resume(struct sys_device *dev) +-{ +- struct IO_APIC_route_entry *entry; +- struct sysfs_ioapic_data *data; +- unsigned long flags; +- union IO_APIC_reg_00 reg_00; +- int i; +- +- data = container_of(dev, struct sysfs_ioapic_data, dev); +- entry = data->entry; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(dev->id, 0); +- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { +- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; +- io_apic_write(dev->id, 0, reg_00.raw); +- } +- spin_unlock_irqrestore(&ioapic_lock, flags); +- for (i = 0; i < nr_ioapic_registers[dev->id]; i++) +- ioapic_write_entry(dev->id, i, entry[i]); +- +- return 0; +-} +- +-static struct sysdev_class ioapic_sysdev_class = { +- .name = "ioapic", +- .suspend = ioapic_suspend, +- .resume = ioapic_resume, +-}; +- +-static int __init ioapic_init_sysfs(void) +-{ +- struct sys_device *dev; +- int i, size, error = 0; +- +- error = sysdev_class_register(&ioapic_sysdev_class); +- if (error) +- return error; +- +- for (i = 0; i < nr_ioapics; i++) { +- size = sizeof(struct sys_device) + nr_ioapic_registers[i] +- * sizeof(struct IO_APIC_route_entry); +- mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); +- if (!mp_ioapic_data[i]) { +- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); +- continue; +- } +- dev = &mp_ioapic_data[i]->dev; +- dev->id = i; +- dev->cls = &ioapic_sysdev_class; +- error = sysdev_register(dev); +- if (error) { +- kfree(mp_ioapic_data[i]); +- mp_ioapic_data[i] = NULL; +- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); +- continue; +- } +- } +- +- return 0; +-} +- +-device_initcall(ioapic_init_sysfs); +- +-/* +- * Dynamic irq allocate and deallocation +- */ +-int create_irq(void) +-{ +- /* Allocate an unused irq */ +- int irq, new, vector = 0; +- unsigned long flags; +- +- irq = -ENOSPC; +- spin_lock_irqsave(&vector_lock, flags); +- for (new = (NR_IRQS - 1); new >= 0; new--) { +- if (platform_legacy_irq(new)) +- continue; +- if (irq_vector[new] != 0) +- continue; +- vector = __assign_irq_vector(new); +- if (likely(vector > 0)) +- irq = new; +- break; +- } +- spin_unlock_irqrestore(&vector_lock, flags); +- +- if (irq >= 0) { +- set_intr_gate(vector, interrupt[irq]); +- dynamic_irq_init(irq); +- } +- return irq; +-} +- +-void destroy_irq(unsigned int irq) +-{ +- unsigned long flags; +- +- dynamic_irq_cleanup(irq); +- +- spin_lock_irqsave(&vector_lock, flags); +- clear_bit(irq_vector[irq], used_vectors); +- irq_vector[irq] = 0; +- spin_unlock_irqrestore(&vector_lock, flags); +-} +- +-#endif /* CONFIG_XEN */ +- +-/* +- * MSI message composition +- */ +-#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) +-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +-{ +- int vector; +- unsigned dest; +- +- vector = assign_irq_vector(irq); +- if (vector >= 0) { +- dest = cpu_mask_to_apicid(TARGET_CPUS); +- +- msg->address_hi = MSI_ADDR_BASE_HI; +- msg->address_lo = +- MSI_ADDR_BASE_LO | +- ((INT_DEST_MODE == 0) ? +-MSI_ADDR_DEST_MODE_PHYSICAL: +- MSI_ADDR_DEST_MODE_LOGICAL) | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? +- MSI_ADDR_REDIRECTION_CPU: +- MSI_ADDR_REDIRECTION_LOWPRI) | +- MSI_ADDR_DEST_ID(dest); +- +- msg->data = +- MSI_DATA_TRIGGER_EDGE | +- MSI_DATA_LEVEL_ASSERT | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? +-MSI_DATA_DELIVERY_FIXED: +- MSI_DATA_DELIVERY_LOWPRI) | +- MSI_DATA_VECTOR(vector); +- } +- return vector; +-} +- +-#ifdef CONFIG_SMP +-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +-{ +- struct msi_msg msg; +- unsigned int dest; +- cpumask_t tmp; +- int vector; +- +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- vector = assign_irq_vector(irq); +- if (vector < 0) +- return; +- +- dest = cpu_mask_to_apicid(mask); +- +- read_msi_msg(irq, &msg); +- +- msg.data &= ~MSI_DATA_VECTOR_MASK; +- msg.data |= MSI_DATA_VECTOR(vector); +- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; +- msg.address_lo |= MSI_ADDR_DEST_ID(dest); +- +- write_msi_msg(irq, &msg); +- irq_desc[irq].affinity = mask; +-} +-#endif /* CONFIG_SMP */ +- +-/* +- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, +- * which implement the MSI or MSI-X Capability Structure. +- */ +-static struct irq_chip msi_chip = { +- .name = "PCI-MSI", +- .unmask = unmask_msi_irq, +- .mask = mask_msi_irq, +- .ack = ack_ioapic_irq, +-#ifdef CONFIG_SMP +- .set_affinity = set_msi_irq_affinity, +-#endif +- .retrigger = ioapic_retrigger_irq, +-}; +- +-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +-{ +- struct msi_msg msg; +- int irq, ret; +- irq = create_irq(); +- if (irq < 0) +- return irq; +- +- ret = msi_compose_msg(dev, irq, &msg); +- if (ret < 0) { +- destroy_irq(irq); +- return ret; +- } +- +- set_irq_msi(irq, desc); +- write_msi_msg(irq, &msg); +- +- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, +- "edge"); +- +- return 0; +-} +- +-void arch_teardown_msi_irq(unsigned int irq) +-{ +- destroy_irq(irq); +-} +- +-#endif /* CONFIG_PCI_MSI */ +- +-/* +- * Hypertransport interrupt support +- */ +-#ifdef CONFIG_HT_IRQ +- +-#ifdef CONFIG_SMP +- +-static void target_ht_irq(unsigned int irq, unsigned int dest) +-{ +- struct ht_irq_msg msg; +- fetch_ht_irq_msg(irq, &msg); +- +- msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); +- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); +- +- msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); +- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); +- +- write_ht_irq_msg(irq, &msg); +-} +- +-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +-{ +- unsigned int dest; +- cpumask_t tmp; +- +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(mask, tmp, CPU_MASK_ALL); +- +- dest = cpu_mask_to_apicid(mask); +- +- target_ht_irq(irq, dest); +- irq_desc[irq].affinity = mask; +-} +-#endif +- +-static struct irq_chip ht_irq_chip = { +- .name = "PCI-HT", +- .mask = mask_ht_irq, +- .unmask = unmask_ht_irq, +- .ack = ack_ioapic_irq, +-#ifdef CONFIG_SMP +- .set_affinity = set_ht_irq_affinity, +-#endif +- .retrigger = ioapic_retrigger_irq, +-}; +- +-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +-{ +- int vector; +- +- vector = assign_irq_vector(irq); +- if (vector >= 0) { +- struct ht_irq_msg msg; +- unsigned dest; +- cpumask_t tmp; +- +- cpus_clear(tmp); +- cpu_set(vector >> 8, tmp); +- dest = cpu_mask_to_apicid(tmp); +- +- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); +- +- msg.address_lo = +- HT_IRQ_LOW_BASE | +- HT_IRQ_LOW_DEST_ID(dest) | +- HT_IRQ_LOW_VECTOR(vector) | +- ((INT_DEST_MODE == 0) ? +- HT_IRQ_LOW_DM_PHYSICAL : +- HT_IRQ_LOW_DM_LOGICAL) | +- HT_IRQ_LOW_RQEOI_EDGE | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? +- HT_IRQ_LOW_MT_FIXED : +- HT_IRQ_LOW_MT_ARBITRATED) | +- HT_IRQ_LOW_IRQ_MASKED; +- +- write_ht_irq_msg(irq, &msg); +- +- set_irq_chip_and_handler_name(irq, &ht_irq_chip, +- handle_edge_irq, "edge"); +- } +- return vector; +-} +-#endif /* CONFIG_HT_IRQ */ +- +-/* -------------------------------------------------------------------------- +- ACPI-based IOAPIC Configuration +- -------------------------------------------------------------------------- */ +- +-#ifdef CONFIG_ACPI +- +-int __init io_apic_get_unique_id(int ioapic, int apic_id) +-{ +-#ifndef CONFIG_XEN +- union IO_APIC_reg_00 reg_00; +- static physid_mask_t apic_id_map = PHYSID_MASK_NONE; +- physid_mask_t tmp; +- unsigned long flags; +- int i = 0; +- +- /* +- * The P4 platform supports up to 256 APIC IDs on two separate APIC +- * buses (one for LAPICs, one for IOAPICs), where predecessors only +- * supports up to 16 on one shared APIC bus. +- * +- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full +- * advantage of new APIC bus architecture. +- */ +- +- if (physids_empty(apic_id_map)) +- apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(ioapic, 0); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- if (apic_id >= get_physical_broadcast()) { +- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " +- "%d\n", ioapic, apic_id, reg_00.bits.ID); +- apic_id = reg_00.bits.ID; +- } +- +- /* +- * Every APIC in a system must have a unique ID or we get lots of nice +- * 'stuck on smp_invalidate_needed IPI wait' messages. +- */ +- if (check_apicid_used(apic_id_map, apic_id)) { +- +- for (i = 0; i < get_physical_broadcast(); i++) { +- if (!check_apicid_used(apic_id_map, i)) +- break; +- } +- +- if (i == get_physical_broadcast()) +- panic("Max apic_id exceeded!\n"); +- +- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " +- "trying %d\n", ioapic, apic_id, i); +- +- apic_id = i; +- } +- +- tmp = apicid_to_cpu_present(apic_id); +- physids_or(apic_id_map, apic_id_map, tmp); +- +- if (reg_00.bits.ID != apic_id) { +- reg_00.bits.ID = apic_id; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(ioapic, 0, reg_00.raw); +- reg_00.raw = io_apic_read(ioapic, 0); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- /* Sanity check */ +- if (reg_00.bits.ID != apic_id) { +- printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); +- return -1; +- } +- } +- +- apic_printk(APIC_VERBOSE, KERN_INFO +- "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); +-#endif /* !CONFIG_XEN */ +- +- return apic_id; +-} +- +- +-int __init io_apic_get_version(int ioapic) +-{ +- union IO_APIC_reg_01 reg_01; +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_01.raw = io_apic_read(ioapic, 1); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- return reg_01.bits.version; +-} +- +- +-int __init io_apic_get_redir_entries(int ioapic) +-{ +- union IO_APIC_reg_01 reg_01; +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_01.raw = io_apic_read(ioapic, 1); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- return reg_01.bits.entries; +-} +- +- +-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) +-{ +- struct IO_APIC_route_entry entry; +- +- if (!IO_APIC_IRQ(irq)) { +- printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", +- ioapic); +- return -EINVAL; +- } +- +- /* +- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. +- * Note that we mask (disable) IRQs now -- these get enabled when the +- * corresponding device driver registers for this IRQ. +- */ +- +- memset(&entry, 0, sizeof(entry)); +- +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.dest_mode = INT_DEST_MODE; +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); +- entry.trigger = edge_level; +- entry.polarity = active_high_low; +- entry.mask = 1; +- +- /* +- * IRQs < 16 are already in the irq_2_pin[] map +- */ +- if (irq >= 16) +- add_pin_to_irq(irq, ioapic, pin); +- +- entry.vector = assign_irq_vector(irq); +- +- apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " +- "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, +- mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, +- edge_level, active_high_low); +- +- ioapic_register_intr(irq, entry.vector, edge_level); +- +- if (!ioapic && (irq < 16)) +- disable_8259A_irq(irq); +- +- ioapic_write_entry(ioapic, pin, entry); +- +- return 0; +-} +- +-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +-{ +- int i; +- +- if (skip_ioapic_setup) +- return -1; +- +- for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_irqtype == mp_INT && +- mp_irqs[i].mp_srcbusirq == bus_irq) +- break; +- if (i >= mp_irq_entries) +- return -1; +- +- *trigger = irq_trigger(i); +- *polarity = irq_polarity(i); +- return 0; +-} +- +-#endif /* CONFIG_ACPI */ +- +-static int __init parse_disable_timer_pin_1(char *arg) +-{ +- disable_timer_pin_1 = 1; +- return 0; +-} +-early_param("disable_timer_pin_1", parse_disable_timer_pin_1); +- +-static int __init parse_enable_timer_pin_1(char *arg) +-{ +- disable_timer_pin_1 = -1; +- return 0; +-} +-early_param("enable_timer_pin_1", parse_enable_timer_pin_1); +- +-static int __init parse_noapic(char *arg) +-{ +- /* disable IO-APIC */ +- disable_ioapic_setup(); +- return 0; +-} +-early_param("noapic", parse_noapic); +- +-#ifndef CONFIG_XEN +-void __init ioapic_init_mappings(void) +-{ +- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; +- int i; +- +- for (i = 0; i < nr_ioapics; i++) { +- if (smp_found_config) { +- ioapic_phys = mp_ioapics[i].mp_apicaddr; +- if (!ioapic_phys) { +- printk(KERN_ERR +- "WARNING: bogus zero IO-APIC " +- "address found in MPTABLE, " +- "disabling IO/APIC support!\n"); +- smp_found_config = 0; +- skip_ioapic_setup = 1; +- goto fake_ioapic_page; +- } +- } else { +-fake_ioapic_page: +- ioapic_phys = (unsigned long) +- alloc_bootmem_pages(PAGE_SIZE); +- ioapic_phys = __pa(ioapic_phys); +- } +- set_fixmap_nocache(idx, ioapic_phys); +- printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", +- __fix_to_virt(idx), ioapic_phys); +- idx++; +- } +-} +-#endif +--- head-2010-01-18.orig/arch/x86/kernel/io_apic_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,2448 +0,0 @@ +-/* +- * Intel IO-APIC support for multi-Pentium hosts. +- * +- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo +- * +- * Many thanks to Stig Venaas for trying out countless experimental +- * patches and reporting/debugging problems patiently! +- * +- * (c) 1999, Multiple IO-APIC support, developed by +- * Ken-ichi Yaku and +- * Hidemi Kishimoto , +- * further tested and cleaned up by Zach Brown +- * and Ingo Molnar +- * +- * Fixes +- * Maciej W. Rozycki : Bits for genuine 82489DX APICs; +- * thanks to Eric Gilmore +- * and Rolf G. Tews +- * for testing these extensively +- * Paul Diefenbaugh : Added full ACPI support +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#ifdef CONFIG_ACPI +-#include +-#endif +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-struct irq_cfg { +-#ifndef CONFIG_XEN +- cpumask_t domain; +- cpumask_t old_domain; +-#endif +- unsigned move_cleanup_count; +- u8 vector; +- u8 move_in_progress : 1; +-}; +- +-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly; +- +-static int assign_irq_vector(int irq, cpumask_t mask); +- +-#ifndef CONFIG_XEN +-int first_system_vector = 0xfe; +- +-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; +-#endif +- +-#define __apicdebuginit __init +- +-int sis_apic_bug; /* not actually supported, dummy for compile */ +- +-static int no_timer_check; +- +-static int disable_timer_pin_1 __initdata; +- +-#ifdef CONFIG_XEN +-#include +-#include +-#include +- +-/* Fake i8259 */ +-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) +-#define disable_8259A_irq(_irq) ((void)0) +-#define i8259A_irq_pending(_irq) (0) +- +-unsigned long io_apic_irqs; +- +-#define clear_IO_APIC() ((void)0) +-#else +-int timer_through_8259 __initdata; +- +-/* Where if anywhere is the i8259 connect in external int mode */ +-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; +-#endif +- +-static DEFINE_SPINLOCK(ioapic_lock); +-static DEFINE_SPINLOCK(vector_lock); +- +-/* +- * # of IRQ routing registers +- */ +-int nr_ioapic_registers[MAX_IO_APICS]; +- +-/* I/O APIC entries */ +-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +-int nr_ioapics; +- +-/* MP IRQ source entries */ +-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +- +-/* # of MP IRQ source entries */ +-int mp_irq_entries; +- +-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); +- +-/* +- * Rough estimation of how many shared IRQs there are, can +- * be changed anytime. +- */ +-#define MAX_PLUS_SHARED_IRQS NR_IRQS +-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +- +-/* +- * This is performance-critical, we want to do it O(1) +- * +- * the indexing order of this array favors 1:1 mappings +- * between pins and IRQs. +- */ +- +-static struct irq_pin_list { +- short apic, pin, next; +-} irq_2_pin[PIN_MAP_SIZE]; +- +-#ifndef CONFIG_XEN +-struct io_apic { +- unsigned int index; +- unsigned int unused[3]; +- unsigned int data; +-}; +- +-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) +-{ +- return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) +- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); +-} +-#endif +- +-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +-{ +-#ifndef CONFIG_XEN +- struct io_apic __iomem *io_apic = io_apic_base(apic); +- writel(reg, &io_apic->index); +- return readl(&io_apic->data); +-#else +- struct physdev_apic apic_op; +- int ret; +- +- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; +- apic_op.reg = reg; +- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); +- if (ret) +- return ret; +- return apic_op.value; +-#endif +-} +- +-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +-{ +-#ifndef CONFIG_XEN +- struct io_apic __iomem *io_apic = io_apic_base(apic); +- writel(reg, &io_apic->index); +- writel(value, &io_apic->data); +-#else +- struct physdev_apic apic_op; +- +- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; +- apic_op.reg = reg; +- apic_op.value = value; +- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); +-#endif +-} +- +-#ifdef CONFIG_XEN +-#define io_apic_modify io_apic_write +-#else +-/* +- * Re-write a value: to be used for read-modify-write +- * cycles where the read already set up the index register. +- */ +-static inline void io_apic_modify(unsigned int apic, unsigned int value) +-{ +- struct io_apic __iomem *io_apic = io_apic_base(apic); +- writel(value, &io_apic->data); +-} +- +-static bool io_apic_level_ack_pending(unsigned int irq) +-{ +- struct irq_pin_list *entry; +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- entry = irq_2_pin + irq; +- for (;;) { +- unsigned int reg; +- int pin; +- +- pin = entry->pin; +- if (pin == -1) +- break; +- reg = io_apic_read(entry->apic, 0x10 + pin*2); +- /* Is the remote IRR bit set? */ +- if (reg & IO_APIC_REDIR_REMOTE_IRR) { +- spin_unlock_irqrestore(&ioapic_lock, flags); +- return true; +- } +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- return false; +-} +-#endif +- +-/* +- * Synchronize the IO-APIC and the CPU by doing +- * a dummy read from the IO-APIC +- */ +-static inline void io_apic_sync(unsigned int apic) +-{ +-#ifndef CONFIG_XEN +- struct io_apic __iomem *io_apic = io_apic_base(apic); +- readl(&io_apic->data); +-#endif +-} +- +-union entry_union { +- struct { u32 w1, w2; }; +- struct IO_APIC_route_entry entry; +-}; +- +-#ifndef CONFIG_XEN +-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) +-{ +- union entry_union eu; +- unsigned long flags; +- spin_lock_irqsave(&ioapic_lock, flags); +- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); +- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- return eu.entry; +-} +-#endif +- +-/* +- * When we write a new IO APIC routing entry, we need to write the high +- * word first! If the mask bit in the low word is clear, we will enable +- * the interrupt, and we need to make sure the entry is fully populated +- * before that happens. +- */ +-static void +-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +-{ +- union entry_union eu; +- eu.entry = e; +- io_apic_write(apic, 0x11 + 2*pin, eu.w2); +- io_apic_write(apic, 0x10 + 2*pin, eu.w1); +-} +- +-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +-{ +- unsigned long flags; +- spin_lock_irqsave(&ioapic_lock, flags); +- __ioapic_write_entry(apic, pin, e); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-#ifndef CONFIG_XEN +-/* +- * When we mask an IO APIC routing entry, we need to write the low +- * word first, in order to set the mask bit before we change the +- * high bits! +- */ +-static void ioapic_mask_entry(int apic, int pin) +-{ +- unsigned long flags; +- union entry_union eu = { .entry.mask = 1 }; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x10 + 2*pin, eu.w1); +- io_apic_write(apic, 0x11 + 2*pin, eu.w2); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-#ifdef CONFIG_SMP +-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +-{ +- int apic, pin; +- struct irq_pin_list *entry = irq_2_pin + irq; +- +- BUG_ON(irq >= NR_IRQS); +- for (;;) { +- unsigned int reg; +- apic = entry->apic; +- pin = entry->pin; +- if (pin == -1) +- break; +- io_apic_write(apic, 0x11 + pin*2, dest); +- reg = io_apic_read(apic, 0x10 + pin*2); +- reg &= ~IO_APIC_REDIR_VECTOR_MASK; +- reg |= vector; +- io_apic_modify(apic, reg); +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +-} +- +-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- unsigned long flags; +- unsigned int dest; +- cpumask_t tmp; +- +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- return; +- +- if (assign_irq_vector(irq, mask)) +- return; +- +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); +- +- /* +- * Only the high 8 bits are valid. +- */ +- dest = SET_APIC_LOGICAL_ID(dest); +- +- spin_lock_irqsave(&ioapic_lock, flags); +- __target_IO_APIC_irq(irq, dest, cfg->vector); +- irq_desc[irq].affinity = mask; +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +-#endif +-#endif +- +-/* +- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are +- * shared ISA-space IRQs, so we have to support them. We are super +- * fast in the common case, and fast for shared ISA-space IRQs. +- */ +-static void add_pin_to_irq(unsigned int irq, int apic, int pin) +-{ +- static int first_free_entry = NR_IRQS; +- struct irq_pin_list *entry = irq_2_pin + irq; +- +- BUG_ON(irq >= NR_IRQS); +- while (entry->next) +- entry = irq_2_pin + entry->next; +- +- if (entry->pin != -1) { +- entry->next = first_free_entry; +- entry = irq_2_pin + entry->next; +- if (++first_free_entry >= PIN_MAP_SIZE) +- panic("io_apic.c: ran out of irq_2_pin entries!"); +- } +- entry->apic = apic; +- entry->pin = pin; +-} +- +-#ifndef CONFIG_XEN +-/* +- * Reroute an IRQ to a different pin. +- */ +-static void __init replace_pin_at_irq(unsigned int irq, +- int oldapic, int oldpin, +- int newapic, int newpin) +-{ +- struct irq_pin_list *entry = irq_2_pin + irq; +- +- while (1) { +- if (entry->apic == oldapic && entry->pin == oldpin) { +- entry->apic = newapic; +- entry->pin = newpin; +- } +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +-} +- +-#define __DO_ACTION(R, ACTION, FINAL) \ +- \ +-{ \ +- int pin; \ +- struct irq_pin_list *entry = irq_2_pin + irq; \ +- \ +- BUG_ON(irq >= NR_IRQS); \ +- for (;;) { \ +- unsigned int reg; \ +- pin = entry->pin; \ +- if (pin == -1) \ +- break; \ +- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ +- reg ACTION; \ +- io_apic_modify(entry->apic, reg); \ +- FINAL; \ +- if (!entry->next) \ +- break; \ +- entry = irq_2_pin + entry->next; \ +- } \ +-} +- +-#define DO_ACTION(name,R,ACTION, FINAL) \ +- \ +- static void name##_IO_APIC_irq (unsigned int irq) \ +- __DO_ACTION(R, ACTION, FINAL) +- +-/* mask = 1 */ +-DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) +- +-/* mask = 0 */ +-DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) +- +-static void mask_IO_APIC_irq (unsigned int irq) +-{ +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- __mask_IO_APIC_irq(irq); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-static void unmask_IO_APIC_irq (unsigned int irq) +-{ +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- __unmask_IO_APIC_irq(irq); +- spin_unlock_irqrestore(&ioapic_lock, flags); +-} +- +-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +-{ +- struct IO_APIC_route_entry entry; +- +- /* Check delivery_mode to be sure we're not clearing an SMI pin */ +- entry = ioapic_read_entry(apic, pin); +- if (entry.delivery_mode == dest_SMI) +- return; +- /* +- * Disable it in the IO-APIC irq-routing table: +- */ +- ioapic_mask_entry(apic, pin); +-} +- +-static void clear_IO_APIC (void) +-{ +- int apic, pin; +- +- for (apic = 0; apic < nr_ioapics; apic++) +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) +- clear_IO_APIC_pin(apic, pin); +-} +- +-#endif /* !CONFIG_XEN */ +- +-int skip_ioapic_setup; +-int ioapic_force; +- +-static int __init parse_noapic(char *str) +-{ +- disable_ioapic_setup(); +- return 0; +-} +-early_param("noapic", parse_noapic); +- +-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ +-static int __init disable_timer_pin_setup(char *arg) +-{ +- disable_timer_pin_1 = 1; +- return 1; +-} +-__setup("disable_timer_pin_1", disable_timer_pin_setup); +- +- +-/* +- * Find the IRQ entry number of a certain pin. +- */ +-static int find_irq_entry(int apic, int pin, int type) +-{ +- int i; +- +- for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_irqtype == type && +- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || +- mp_irqs[i].mp_dstapic == MP_APIC_ALL) && +- mp_irqs[i].mp_dstirq == pin) +- return i; +- +- return -1; +-} +- +-#ifndef CONFIG_XEN +-/* +- * Find the pin to which IRQ[irq] (ISA) is connected +- */ +-static int __init find_isa_irq_pin(int irq, int type) +-{ +- int i; +- +- for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; +- +- if (test_bit(lbus, mp_bus_not_pci) && +- (mp_irqs[i].mp_irqtype == type) && +- (mp_irqs[i].mp_srcbusirq == irq)) +- +- return mp_irqs[i].mp_dstirq; +- } +- return -1; +-} +- +-static int __init find_isa_irq_apic(int irq, int type) +-{ +- int i; +- +- for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; +- +- if (test_bit(lbus, mp_bus_not_pci) && +- (mp_irqs[i].mp_irqtype == type) && +- (mp_irqs[i].mp_srcbusirq == irq)) +- break; +- } +- if (i < mp_irq_entries) { +- int apic; +- for(apic = 0; apic < nr_ioapics; apic++) { +- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) +- return apic; +- } +- } +- +- return -1; +-} +-#endif +- +-/* +- * Find a specific PCI IRQ entry. +- * Not an __init, possibly needed by modules +- */ +-static int pin_2_irq(int idx, int apic, int pin); +- +-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) +-{ +- int apic, i, best_guess = -1; +- +- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", +- bus, slot, pin); +- if (test_bit(bus, mp_bus_not_pci)) { +- apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); +- return -1; +- } +- for (i = 0; i < mp_irq_entries; i++) { +- int lbus = mp_irqs[i].mp_srcbus; +- +- for (apic = 0; apic < nr_ioapics; apic++) +- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || +- mp_irqs[i].mp_dstapic == MP_APIC_ALL) +- break; +- +- if (!test_bit(lbus, mp_bus_not_pci) && +- !mp_irqs[i].mp_irqtype && +- (bus == lbus) && +- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { +- int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); +- +- if (!(apic || IO_APIC_IRQ(irq))) +- continue; +- +- if (pin == (mp_irqs[i].mp_srcbusirq & 3)) +- return irq; +- /* +- * Use the first all-but-pin matching entry as a +- * best-guess fuzzy result for broken mptables. +- */ +- if (best_guess < 0) +- best_guess = irq; +- } +- } +- BUG_ON(best_guess >= NR_IRQS); +- return best_guess; +-} +- +-/* ISA interrupts are always polarity zero edge triggered, +- * when listed as conforming in the MP table. */ +- +-#define default_ISA_trigger(idx) (0) +-#define default_ISA_polarity(idx) (0) +- +-/* PCI interrupts are always polarity one level triggered, +- * when listed as conforming in the MP table. */ +- +-#define default_PCI_trigger(idx) (1) +-#define default_PCI_polarity(idx) (1) +- +-static int MPBIOS_polarity(int idx) +-{ +- int bus = mp_irqs[idx].mp_srcbus; +- int polarity; +- +- /* +- * Determine IRQ line polarity (high active or low active): +- */ +- switch (mp_irqs[idx].mp_irqflag & 3) +- { +- case 0: /* conforms, ie. bus-type dependent polarity */ +- if (test_bit(bus, mp_bus_not_pci)) +- polarity = default_ISA_polarity(idx); +- else +- polarity = default_PCI_polarity(idx); +- break; +- case 1: /* high active */ +- { +- polarity = 0; +- break; +- } +- case 2: /* reserved */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- polarity = 1; +- break; +- } +- case 3: /* low active */ +- { +- polarity = 1; +- break; +- } +- default: /* invalid */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- polarity = 1; +- break; +- } +- } +- return polarity; +-} +- +-static int MPBIOS_trigger(int idx) +-{ +- int bus = mp_irqs[idx].mp_srcbus; +- int trigger; +- +- /* +- * Determine IRQ trigger mode (edge or level sensitive): +- */ +- switch ((mp_irqs[idx].mp_irqflag>>2) & 3) +- { +- case 0: /* conforms, ie. bus-type dependent */ +- if (test_bit(bus, mp_bus_not_pci)) +- trigger = default_ISA_trigger(idx); +- else +- trigger = default_PCI_trigger(idx); +- break; +- case 1: /* edge */ +- { +- trigger = 0; +- break; +- } +- case 2: /* reserved */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- trigger = 1; +- break; +- } +- case 3: /* level */ +- { +- trigger = 1; +- break; +- } +- default: /* invalid */ +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- trigger = 0; +- break; +- } +- } +- return trigger; +-} +- +-static inline int irq_polarity(int idx) +-{ +- return MPBIOS_polarity(idx); +-} +- +-static inline int irq_trigger(int idx) +-{ +- return MPBIOS_trigger(idx); +-} +- +-static int pin_2_irq(int idx, int apic, int pin) +-{ +- int irq, i; +- int bus = mp_irqs[idx].mp_srcbus; +- +- /* +- * Debugging check, we are in big trouble if this message pops up! +- */ +- if (mp_irqs[idx].mp_dstirq != pin) +- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); +- +- if (test_bit(bus, mp_bus_not_pci)) { +- irq = mp_irqs[idx].mp_srcbusirq; +- } else { +- /* +- * PCI IRQs are mapped in order +- */ +- i = irq = 0; +- while (i < apic) +- irq += nr_ioapic_registers[i++]; +- irq += pin; +- } +- BUG_ON(irq >= NR_IRQS); +- return irq; +-} +- +-void lock_vector_lock(void) +-{ +- /* Used to the online set of cpus does not change +- * during assign_irq_vector. +- */ +- spin_lock(&vector_lock); +-} +- +-void unlock_vector_lock(void) +-{ +- spin_unlock(&vector_lock); +-} +- +-static int __assign_irq_vector(int irq, cpumask_t mask) +-{ +- struct physdev_irq irq_op; +- struct irq_cfg *cfg; +- +- BUG_ON((unsigned)irq >= NR_IRQS); +- +- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS) +- return -EINVAL; +- +- cfg = &irq_cfg[irq]; +- +- if ((cfg->move_in_progress) || cfg->move_cleanup_count) +- return -EBUSY; +- +- if (cfg->vector) +- return 0; +- +- irq_op.irq = irq; +- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) +- return -ENOSPC; +- +- cfg->vector = irq_op.vector; +- +- return 0; +-} +- +-static int assign_irq_vector(int irq, cpumask_t mask) +-{ +- int err; +- unsigned long flags; +- +- spin_lock_irqsave(&vector_lock, flags); +- err = __assign_irq_vector(irq, mask); +- spin_unlock_irqrestore(&vector_lock, flags); +- return err; +-} +- +-#ifndef CONFIG_XEN +-static void __clear_irq_vector(int irq) +-{ +- struct irq_cfg *cfg; +- cpumask_t mask; +- int cpu, vector; +- +- BUG_ON((unsigned)irq >= NR_IRQS); +- cfg = &irq_cfg[irq]; +- BUG_ON(!cfg->vector); +- +- vector = cfg->vector; +- cpus_and(mask, cfg->domain, cpu_online_map); +- for_each_cpu_mask_nr(cpu, mask) +- per_cpu(vector_irq, cpu)[vector] = -1; +- +- cfg->vector = 0; +- cpus_clear(cfg->domain); +-} +- +-void __setup_vector_irq(int cpu) +-{ +- /* Initialize vector_irq on a new cpu */ +- /* This function must be called with vector_lock held */ +- int irq, vector; +- +- /* Mark the inuse vectors */ +- for (irq = 0; irq < NR_IRQS; ++irq) { +- if (!cpu_isset(cpu, irq_cfg[irq].domain)) +- continue; +- vector = irq_cfg[irq].vector; +- per_cpu(vector_irq, cpu)[vector] = irq; +- } +- /* Mark the free vectors */ +- for (vector = 0; vector < NR_VECTORS; ++vector) { +- irq = per_cpu(vector_irq, cpu)[vector]; +- if (irq < 0) +- continue; +- if (!cpu_isset(cpu, irq_cfg[irq].domain)) +- per_cpu(vector_irq, cpu)[vector] = -1; +- } +-} +- +-static struct irq_chip ioapic_chip; +- +-static void ioapic_register_intr(int irq, unsigned long trigger) +-{ +- if (trigger) { +- irq_desc[irq].status |= IRQ_LEVEL; +- set_irq_chip_and_handler_name(irq, &ioapic_chip, +- handle_fasteoi_irq, "fasteoi"); +- } else { +- irq_desc[irq].status &= ~IRQ_LEVEL; +- set_irq_chip_and_handler_name(irq, &ioapic_chip, +- handle_edge_irq, "edge"); +- } +-} +-#else +-#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq) +-#endif /* !CONFIG_XEN */ +- +-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, +- int trigger, int polarity) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- struct IO_APIC_route_entry entry; +- cpumask_t mask; +- +- if (!IO_APIC_IRQ(irq)) +- return; +- +- mask = TARGET_CPUS; +- if (assign_irq_vector(irq, mask)) +- return; +- +-#ifndef CONFIG_XEN +- cpus_and(mask, cfg->domain, mask); +-#endif +- +- apic_printk(APIC_VERBOSE,KERN_DEBUG +- "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " +- "IRQ %d Mode:%i Active:%i)\n", +- apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, +- irq, trigger, polarity); +- +- /* +- * add it to the IO-APIC irq-routing table: +- */ +- memset(&entry,0,sizeof(entry)); +- +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.dest_mode = INT_DEST_MODE; +- entry.dest = cpu_mask_to_apicid(mask); +- entry.mask = 0; /* enable IRQ */ +- entry.trigger = trigger; +- entry.polarity = polarity; +- entry.vector = cfg->vector; +- +- /* Mask level triggered irqs. +- * Use IRQ_DELAYED_DISABLE for edge triggered irqs. +- */ +- if (trigger) +- entry.mask = 1; +- +- ioapic_register_intr(irq, trigger); +- if (irq < 16) +- disable_8259A_irq(irq); +- +- ioapic_write_entry(apic, pin, entry); +-} +- +-static void __init setup_IO_APIC_irqs(void) +-{ +- int apic, pin, idx, irq, first_notcon = 1; +- +- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); +- +- for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { +- +- idx = find_irq_entry(apic,pin,mp_INT); +- if (idx == -1) { +- if (first_notcon) { +- apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); +- first_notcon = 0; +- } else +- apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); +- continue; +- } +- if (!first_notcon) { +- apic_printk(APIC_VERBOSE, " not connected.\n"); +- first_notcon = 1; +- } +- +- irq = pin_2_irq(idx, apic, pin); +- add_pin_to_irq(irq, apic, pin); +- +- setup_IO_APIC_irq(apic, pin, irq, +- irq_trigger(idx), irq_polarity(idx)); +- } +- } +- +- if (!first_notcon) +- apic_printk(APIC_VERBOSE, " not connected.\n"); +-} +- +-#ifndef CONFIG_XEN +-/* +- * Set up the timer pin, possibly with the 8259A-master behind. +- */ +-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, +- int vector) +-{ +- struct IO_APIC_route_entry entry; +- +- memset(&entry, 0, sizeof(entry)); +- +- /* +- * We use logical delivery to get the timer IRQ +- * to the first CPU. +- */ +- entry.dest_mode = INT_DEST_MODE; +- entry.mask = 1; /* mask IRQ now */ +- entry.dest = cpu_mask_to_apicid(TARGET_CPUS); +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.polarity = 0; +- entry.trigger = 0; +- entry.vector = vector; +- +- /* +- * The timer IRQ doesn't have to know that behind the +- * scene we may have a 8259A-master in AEOI mode ... +- */ +- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); +- +- /* +- * Add it to the IO-APIC irq-routing table: +- */ +- ioapic_write_entry(apic, pin, entry); +-} +- +-void __apicdebuginit print_IO_APIC(void) +-{ +- int apic, i; +- union IO_APIC_reg_00 reg_00; +- union IO_APIC_reg_01 reg_01; +- union IO_APIC_reg_02 reg_02; +- unsigned long flags; +- +- if (apic_verbosity == APIC_QUIET) +- return; +- +- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); +- for (i = 0; i < nr_ioapics; i++) +- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", +- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); +- +- /* +- * We are a bit conservative about what we expect. We have to +- * know about every hardware change ASAP. +- */ +- printk(KERN_INFO "testing the IO APIC.......................\n"); +- +- for (apic = 0; apic < nr_ioapics; apic++) { +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); +- reg_01.raw = io_apic_read(apic, 1); +- if (reg_01.bits.version >= 0x10) +- reg_02.raw = io_apic_read(apic, 2); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- printk("\n"); +- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); +- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); +- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); +- +- printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); +- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); +- +- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); +- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); +- +- if (reg_01.bits.version >= 0x10) { +- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); +- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); +- } +- +- printk(KERN_DEBUG ".... IRQ redirection table:\n"); +- +- printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" +- " Stat Dmod Deli Vect: \n"); +- +- for (i = 0; i <= reg_01.bits.entries; i++) { +- struct IO_APIC_route_entry entry; +- +- entry = ioapic_read_entry(apic, i); +- +- printk(KERN_DEBUG " %02x %03X ", +- i, +- entry.dest +- ); +- +- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", +- entry.mask, +- entry.trigger, +- entry.irr, +- entry.polarity, +- entry.delivery_status, +- entry.dest_mode, +- entry.delivery_mode, +- entry.vector +- ); +- } +- } +- printk(KERN_DEBUG "IRQ to pin mappings:\n"); +- for (i = 0; i < NR_IRQS; i++) { +- struct irq_pin_list *entry = irq_2_pin + i; +- if (entry->pin < 0) +- continue; +- printk(KERN_DEBUG "IRQ%d ", i); +- for (;;) { +- printk("-> %d:%d", entry->apic, entry->pin); +- if (!entry->next) +- break; +- entry = irq_2_pin + entry->next; +- } +- printk("\n"); +- } +- +- printk(KERN_INFO ".................................... done.\n"); +- +- return; +-} +- +-static __apicdebuginit void print_APIC_bitfield (int base) +-{ +- unsigned int v; +- int i, j; +- +- if (apic_verbosity == APIC_QUIET) +- return; +- +- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); +- for (i = 0; i < 8; i++) { +- v = apic_read(base + i*0x10); +- for (j = 0; j < 32; j++) { +- if (v & (1< 3) { /* PC is LVT#4. */ +- v = apic_read(APIC_LVTPC); +- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); +- } +- v = apic_read(APIC_LVT0); +- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); +- v = apic_read(APIC_LVT1); +- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); +- +- if (maxlvt > 2) { /* ERR is LVT#3. */ +- v = apic_read(APIC_LVTERR); +- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); +- } +- +- v = apic_read(APIC_TMICT); +- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); +- v = apic_read(APIC_TMCCT); +- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); +- v = apic_read(APIC_TDCR); +- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); +- printk("\n"); +-} +- +-void print_all_local_APICs (void) +-{ +- on_each_cpu(print_local_APIC, NULL, 1); +-} +- +-void __apicdebuginit print_PIC(void) +-{ +- unsigned int v; +- unsigned long flags; +- +- if (apic_verbosity == APIC_QUIET) +- return; +- +- printk(KERN_DEBUG "\nprinting PIC contents\n"); +- +- spin_lock_irqsave(&i8259A_lock, flags); +- +- v = inb(0xa1) << 8 | inb(0x21); +- printk(KERN_DEBUG "... PIC IMR: %04x\n", v); +- +- v = inb(0xa0) << 8 | inb(0x20); +- printk(KERN_DEBUG "... PIC IRR: %04x\n", v); +- +- outb(0x0b,0xa0); +- outb(0x0b,0x20); +- v = inb(0xa0) << 8 | inb(0x20); +- outb(0x0a,0xa0); +- outb(0x0a,0x20); +- +- spin_unlock_irqrestore(&i8259A_lock, flags); +- +- printk(KERN_DEBUG "... PIC ISR: %04x\n", v); +- +- v = inb(0x4d1) << 8 | inb(0x4d0); +- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); +-} +-#else +-void __apicdebuginit print_IO_APIC(void) {} +-#endif /* !CONFIG_XEN */ +- +-void __init enable_IO_APIC(void) +-{ +- union IO_APIC_reg_01 reg_01; +-#ifndef CONFIG_XEN +- int i8259_apic, i8259_pin; +-#endif +- int i, apic; +- unsigned long flags; +- +- for (i = 0; i < PIN_MAP_SIZE; i++) { +- irq_2_pin[i].pin = -1; +- irq_2_pin[i].next = 0; +- } +- +- /* +- * The number of IO-APIC IRQ registers (== #pins): +- */ +- for (apic = 0; apic < nr_ioapics; apic++) { +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_01.raw = io_apic_read(apic, 1); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- nr_ioapic_registers[apic] = reg_01.bits.entries+1; +- } +-#ifndef CONFIG_XEN +- for(apic = 0; apic < nr_ioapics; apic++) { +- int pin; +- /* See if any of the pins is in ExtINT mode */ +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { +- struct IO_APIC_route_entry entry; +- entry = ioapic_read_entry(apic, pin); +- +- /* If the interrupt line is enabled and in ExtInt mode +- * I have found the pin where the i8259 is connected. +- */ +- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { +- ioapic_i8259.apic = apic; +- ioapic_i8259.pin = pin; +- goto found_i8259; +- } +- } +- } +- found_i8259: +- /* Look to see what if the MP table has reported the ExtINT */ +- i8259_pin = find_isa_irq_pin(0, mp_ExtINT); +- i8259_apic = find_isa_irq_apic(0, mp_ExtINT); +- /* Trust the MP table if nothing is setup in the hardware */ +- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { +- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); +- ioapic_i8259.pin = i8259_pin; +- ioapic_i8259.apic = i8259_apic; +- } +- /* Complain if the MP table and the hardware disagree */ +- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && +- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) +- { +- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); +- } +-#endif +- +- /* +- * Do not trust the IO-APIC being empty at bootup +- */ +- clear_IO_APIC(); +-} +- +-/* +- * Not an __init, needed by the reboot code +- */ +-void disable_IO_APIC(void) +-{ +- /* +- * Clear the IO-APIC before rebooting: +- */ +- clear_IO_APIC(); +- +-#ifndef CONFIG_XEN +- /* +- * If the i8259 is routed through an IOAPIC +- * Put that IOAPIC in virtual wire mode +- * so legacy interrupts can be delivered. +- */ +- if (ioapic_i8259.pin != -1) { +- struct IO_APIC_route_entry entry; +- +- memset(&entry, 0, sizeof(entry)); +- entry.mask = 0; /* Enabled */ +- entry.trigger = 0; /* Edge */ +- entry.irr = 0; +- entry.polarity = 0; /* High */ +- entry.delivery_status = 0; +- entry.dest_mode = 0; /* Physical */ +- entry.delivery_mode = dest_ExtINT; /* ExtInt */ +- entry.vector = 0; +- entry.dest = GET_APIC_ID(read_apic_id()); +- +- /* +- * Add it to the IO-APIC irq-routing table: +- */ +- ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); +- } +- +- disconnect_bsp_APIC(ioapic_i8259.pin != -1); +-#endif +-} +- +-/* +- * There is a nasty bug in some older SMP boards, their mptable lies +- * about the timer IRQ. We do the following to work around the situation: +- * +- * - timer IRQ defaults to IO-APIC IRQ +- * - if this function detects that timer IRQs are defunct, then we fall +- * back to ISA timer IRQs +- */ +-#ifndef CONFIG_XEN +-static int __init timer_irq_works(void) +-{ +- unsigned long t1 = jiffies; +- unsigned long flags; +- +- local_save_flags(flags); +- local_irq_enable(); +- /* Let ten ticks pass... */ +- mdelay((10 * 1000) / HZ); +- local_irq_restore(flags); +- +- /* +- * Expect a few ticks at least, to be sure some possible +- * glue logic does not lock up after one or two first +- * ticks in a non-ExtINT mode. Also the local APIC +- * might have cached one ExtINT interrupt. Finally, at +- * least one tick may be lost due to delays. +- */ +- +- /* jiffies wrap? */ +- if (time_after(jiffies, t1 + 4)) +- return 1; +- return 0; +-} +- +-/* +- * In the SMP+IOAPIC case it might happen that there are an unspecified +- * number of pending IRQ events unhandled. These cases are very rare, +- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much +- * better to do it this way as thus we do not have to be aware of +- * 'pending' interrupts in the IRQ path, except at this point. +- */ +-/* +- * Edge triggered needs to resend any interrupt +- * that was delayed but this is now handled in the device +- * independent code. +- */ +- +-/* +- * Starting up a edge-triggered IO-APIC interrupt is +- * nasty - we need to make sure that we get the edge. +- * If it is already asserted for some reason, we need +- * return 1 to indicate that is was pending. +- * +- * This is not complete - we should be able to fake +- * an edge even if it isn't on the 8259A... +- */ +- +-static unsigned int startup_ioapic_irq(unsigned int irq) +-{ +- int was_pending = 0; +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- if (irq < 16) { +- disable_8259A_irq(irq); +- if (i8259A_irq_pending(irq)) +- was_pending = 1; +- } +- __unmask_IO_APIC_irq(irq); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- return was_pending; +-} +- +-static int ioapic_retrigger_irq(unsigned int irq) +-{ +- struct irq_cfg *cfg = &irq_cfg[irq]; +- unsigned long flags; +- +- spin_lock_irqsave(&vector_lock, flags); +- send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); +- spin_unlock_irqrestore(&vector_lock, flags); +- +- return 1; +-} +- +-/* +- * Level and edge triggered IO-APIC interrupts need different handling, +- * so we use two separate IRQ descriptors. Edge triggered IRQs can be +- * handled with the level-triggered descriptor, but that one has slightly +- * more overhead. Level-triggered interrupts cannot be handled with the +- * edge-triggered handler, without risking IRQ storms and other ugly +- * races. +- */ +- +-#ifdef CONFIG_SMP +-asmlinkage void smp_irq_move_cleanup_interrupt(void) +-{ +- unsigned vector, me; +- ack_APIC_irq(); +- exit_idle(); +- irq_enter(); +- +- me = smp_processor_id(); +- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { +- unsigned int irq; +- struct irq_desc *desc; +- struct irq_cfg *cfg; +- irq = __get_cpu_var(vector_irq)[vector]; +- if (irq >= NR_IRQS) +- continue; +- +- desc = irq_desc + irq; +- cfg = irq_cfg + irq; +- spin_lock(&desc->lock); +- if (!cfg->move_cleanup_count) +- goto unlock; +- +- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) +- goto unlock; +- +- __get_cpu_var(vector_irq)[vector] = -1; +- cfg->move_cleanup_count--; +-unlock: +- spin_unlock(&desc->lock); +- } +- +- irq_exit(); +-} +- +-static void irq_complete_move(unsigned int irq) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- unsigned vector, me; +- +- if (likely(!cfg->move_in_progress)) +- return; +- +- vector = ~get_irq_regs()->orig_ax; +- me = smp_processor_id(); +- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { +- cpumask_t cleanup_mask; +- +- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); +- cfg->move_cleanup_count = cpus_weight(cleanup_mask); +- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); +- cfg->move_in_progress = 0; +- } +-} +-#else +-static inline void irq_complete_move(unsigned int irq) {} +-#endif +- +-static void ack_apic_edge(unsigned int irq) +-{ +- irq_complete_move(irq); +- move_native_irq(irq); +- ack_APIC_irq(); +-} +- +-static void ack_apic_level(unsigned int irq) +-{ +- int do_unmask_irq = 0; +- +- irq_complete_move(irq); +-#ifdef CONFIG_GENERIC_PENDING_IRQ +- /* If we are moving the irq we need to mask it */ +- if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { +- do_unmask_irq = 1; +- mask_IO_APIC_irq(irq); +- } +-#endif +- +- /* +- * We must acknowledge the irq before we move it or the acknowledge will +- * not propagate properly. +- */ +- ack_APIC_irq(); +- +- /* Now we can move and renable the irq */ +- if (unlikely(do_unmask_irq)) { +- /* Only migrate the irq if the ack has been received. +- * +- * On rare occasions the broadcast level triggered ack gets +- * delayed going to ioapics, and if we reprogram the +- * vector while Remote IRR is still set the irq will never +- * fire again. +- * +- * To prevent this scenario we read the Remote IRR bit +- * of the ioapic. This has two effects. +- * - On any sane system the read of the ioapic will +- * flush writes (and acks) going to the ioapic from +- * this cpu. +- * - We get to see if the ACK has actually been delivered. +- * +- * Based on failed experiments of reprogramming the +- * ioapic entry from outside of irq context starting +- * with masking the ioapic entry and then polling until +- * Remote IRR was clear before reprogramming the +- * ioapic I don't trust the Remote IRR bit to be +- * completey accurate. +- * +- * However there appears to be no other way to plug +- * this race, so if the Remote IRR bit is not +- * accurate and is causing problems then it is a hardware bug +- * and you can go talk to the chipset vendor about it. +- */ +- if (!io_apic_level_ack_pending(irq)) +- move_masked_irq(irq); +- unmask_IO_APIC_irq(irq); +- } +-} +- +-static struct irq_chip ioapic_chip __read_mostly = { +- .name = "IO-APIC", +- .startup = startup_ioapic_irq, +- .mask = mask_IO_APIC_irq, +- .unmask = unmask_IO_APIC_irq, +- .ack = ack_apic_edge, +- .eoi = ack_apic_level, +-#ifdef CONFIG_SMP +- .set_affinity = set_ioapic_affinity_irq, +-#endif +- .retrigger = ioapic_retrigger_irq, +-}; +-#endif /* !CONFIG_XEN */ +- +-static inline void init_IO_APIC_traps(void) +-{ +- int irq; +- +- /* +- * NOTE! The local APIC isn't very good at handling +- * multiple interrupts at the same interrupt level. +- * As the interrupt level is determined by taking the +- * vector number and shifting that right by 4, we +- * want to spread these out a bit so that they don't +- * all fall in the same interrupt level. +- * +- * Also, we've got to be careful not to trash gate +- * 0x80, because int 0x80 is hm, kind of importantish. ;) +- */ +- for (irq = 0; irq < NR_IRQS ; irq++) { +- if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { +- /* +- * Hmm.. We don't have an entry for this, +- * so default to an old-fashioned 8259 +- * interrupt if we can.. +- */ +- if (irq < 16) +- make_8259A_irq(irq); +-#ifndef CONFIG_XEN +- else +- /* Strange. Oh, well.. */ +- irq_desc[irq].chip = &no_irq_chip; +-#endif +- } +- } +-} +- +-#ifndef CONFIG_XEN +-static void unmask_lapic_irq(unsigned int irq) +-{ +- unsigned long v; +- +- v = apic_read(APIC_LVT0); +- apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); +-} +- +-static void mask_lapic_irq(unsigned int irq) +-{ +- unsigned long v; +- +- v = apic_read(APIC_LVT0); +- apic_write(APIC_LVT0, v | APIC_LVT_MASKED); +-} +- +-static void ack_lapic_irq (unsigned int irq) +-{ +- ack_APIC_irq(); +-} +- +-static struct irq_chip lapic_chip __read_mostly = { +- .name = "local-APIC", +- .mask = mask_lapic_irq, +- .unmask = unmask_lapic_irq, +- .ack = ack_lapic_irq, +-}; +- +-static void lapic_register_intr(int irq) +-{ +- irq_desc[irq].status &= ~IRQ_LEVEL; +- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, +- "edge"); +-} +- +-static void __init setup_nmi(void) +-{ +- /* +- * Dirty trick to enable the NMI watchdog ... +- * We put the 8259A master into AEOI mode and +- * unmask on all local APICs LVT0 as NMI. +- * +- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') +- * is from Maciej W. Rozycki - so we do not have to EOI from +- * the NMI handler or the timer interrupt. +- */ +- printk(KERN_INFO "activating NMI Watchdog ..."); +- +- enable_NMI_through_LVT0(); +- +- printk(" done.\n"); +-} +- +-/* +- * This looks a bit hackish but it's about the only one way of sending +- * a few INTA cycles to 8259As and any associated glue logic. ICR does +- * not support the ExtINT mode, unfortunately. We need to send these +- * cycles as some i82489DX-based boards have glue logic that keeps the +- * 8259A interrupt line asserted until INTA. --macro +- */ +-static inline void __init unlock_ExtINT_logic(void) +-{ +- int apic, pin, i; +- struct IO_APIC_route_entry entry0, entry1; +- unsigned char save_control, save_freq_select; +- +- pin = find_isa_irq_pin(8, mp_INT); +- apic = find_isa_irq_apic(8, mp_INT); +- if (pin == -1) +- return; +- +- entry0 = ioapic_read_entry(apic, pin); +- +- clear_IO_APIC_pin(apic, pin); +- +- memset(&entry1, 0, sizeof(entry1)); +- +- entry1.dest_mode = 0; /* physical delivery */ +- entry1.mask = 0; /* unmask IRQ now */ +- entry1.dest = hard_smp_processor_id(); +- entry1.delivery_mode = dest_ExtINT; +- entry1.polarity = entry0.polarity; +- entry1.trigger = 0; +- entry1.vector = 0; +- +- ioapic_write_entry(apic, pin, entry1); +- +- save_control = CMOS_READ(RTC_CONTROL); +- save_freq_select = CMOS_READ(RTC_FREQ_SELECT); +- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, +- RTC_FREQ_SELECT); +- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); +- +- i = 100; +- while (i-- > 0) { +- mdelay(10); +- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) +- i -= 10; +- } +- +- CMOS_WRITE(save_control, RTC_CONTROL); +- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); +- clear_IO_APIC_pin(apic, pin); +- +- ioapic_write_entry(apic, pin, entry0); +-} +- +-/* +- * This code may look a bit paranoid, but it's supposed to cooperate with +- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ +- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast +- * fanatically on his truly buggy board. +- * +- * FIXME: really need to revamp this for modern platforms only. +- */ +-static inline void __init check_timer(void) +-{ +- struct irq_cfg *cfg = irq_cfg + 0; +- int apic1, pin1, apic2, pin2; +- unsigned long flags; +- int no_pin1 = 0; +- +- local_irq_save(flags); +- +- /* +- * get/set the timer IRQ vector: +- */ +- disable_8259A_irq(0); +- assign_irq_vector(0, TARGET_CPUS); +- +- /* +- * As IRQ0 is to be enabled in the 8259A, the virtual +- * wire has to be disabled in the local APIC. +- */ +- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); +- init_8259A(1); +- +- pin1 = find_isa_irq_pin(0, mp_INT); +- apic1 = find_isa_irq_apic(0, mp_INT); +- pin2 = ioapic_i8259.pin; +- apic2 = ioapic_i8259.apic; +- +- apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " +- "apic1=%d pin1=%d apic2=%d pin2=%d\n", +- cfg->vector, apic1, pin1, apic2, pin2); +- +- /* +- * Some BIOS writers are clueless and report the ExtINTA +- * I/O APIC input from the cascaded 8259A as the timer +- * interrupt input. So just in case, if only one pin +- * was found above, try it both directly and through the +- * 8259A. +- */ +- if (pin1 == -1) { +- pin1 = pin2; +- apic1 = apic2; +- no_pin1 = 1; +- } else if (pin2 == -1) { +- pin2 = pin1; +- apic2 = apic1; +- } +- +- if (pin1 != -1) { +- /* +- * Ok, does IRQ0 through the IOAPIC work? +- */ +- if (no_pin1) { +- add_pin_to_irq(0, apic1, pin1); +- setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); +- } +- unmask_IO_APIC_irq(0); +- if (!no_timer_check && timer_irq_works()) { +- if (nmi_watchdog == NMI_IO_APIC) { +- setup_nmi(); +- enable_8259A_irq(0); +- } +- if (disable_timer_pin_1 > 0) +- clear_IO_APIC_pin(0, pin1); +- goto out; +- } +- clear_IO_APIC_pin(apic1, pin1); +- if (!no_pin1) +- apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " +- "8254 timer not connected to IO-APIC\n"); +- +- apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " +- "(IRQ0) through the 8259A ...\n"); +- apic_printk(APIC_QUIET, KERN_INFO +- "..... (found apic %d pin %d) ...\n", apic2, pin2); +- /* +- * legacy devices should be connected to IO APIC #0 +- */ +- replace_pin_at_irq(0, apic1, pin1, apic2, pin2); +- setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); +- unmask_IO_APIC_irq(0); +- enable_8259A_irq(0); +- if (timer_irq_works()) { +- apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); +- timer_through_8259 = 1; +- if (nmi_watchdog == NMI_IO_APIC) { +- disable_8259A_irq(0); +- setup_nmi(); +- enable_8259A_irq(0); +- } +- goto out; +- } +- /* +- * Cleanup, just in case ... +- */ +- disable_8259A_irq(0); +- clear_IO_APIC_pin(apic2, pin2); +- apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); +- } +- +- if (nmi_watchdog == NMI_IO_APIC) { +- apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " +- "through the IO-APIC - disabling NMI Watchdog!\n"); +- nmi_watchdog = NMI_NONE; +- } +- +- apic_printk(APIC_QUIET, KERN_INFO +- "...trying to set up timer as Virtual Wire IRQ...\n"); +- +- lapic_register_intr(0); +- apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ +- enable_8259A_irq(0); +- +- if (timer_irq_works()) { +- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); +- goto out; +- } +- disable_8259A_irq(0); +- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); +- apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); +- +- apic_printk(APIC_QUIET, KERN_INFO +- "...trying to set up timer as ExtINT IRQ...\n"); +- +- init_8259A(0); +- make_8259A_irq(0); +- apic_write(APIC_LVT0, APIC_DM_EXTINT); +- +- unlock_ExtINT_logic(); +- +- if (timer_irq_works()) { +- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); +- goto out; +- } +- apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); +- panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " +- "report. Then try booting with the 'noapic' option.\n"); +-out: +- local_irq_restore(flags); +-} +-#else +-#define check_timer() ((void)0) +-int timer_uses_ioapic_pin_0 = 0; +-#endif /* !CONFIG_XEN */ +- +-static int __init notimercheck(char *s) +-{ +- no_timer_check = 1; +- return 1; +-} +-__setup("no_timer_check", notimercheck); +- +-/* +- * +- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available +- * to devices. However there may be an I/O APIC pin available for +- * this interrupt regardless. The pin may be left unconnected, but +- * typically it will be reused as an ExtINT cascade interrupt for +- * the master 8259A. In the MPS case such a pin will normally be +- * reported as an ExtINT interrupt in the MP table. With ACPI +- * there is no provision for ExtINT interrupts, and in the absence +- * of an override it would be treated as an ordinary ISA I/O APIC +- * interrupt, that is edge-triggered and unmasked by default. We +- * used to do this, but it caused problems on some systems because +- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using +- * the same ExtINT cascade interrupt to drive the local APIC of the +- * bootstrap processor. Therefore we refrain from routing IRQ2 to +- * the I/O APIC in all cases now. No actual device should request +- * it anyway. --macro +- */ +-#define PIC_IRQS (1<<2) +- +-void __init setup_IO_APIC(void) +-{ +- enable_IO_APIC(); +- +- io_apic_irqs = ~PIC_IRQS; +- +- apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); +- +-#ifndef CONFIG_XEN +- sync_Arb_IDs(); +-#endif /* !CONFIG_XEN */ +- setup_IO_APIC_irqs(); +- init_IO_APIC_traps(); +- check_timer(); +- if (!acpi_ioapic) +- print_IO_APIC(); +-} +- +-#ifndef CONFIG_XEN +- +-struct sysfs_ioapic_data { +- struct sys_device dev; +- struct IO_APIC_route_entry entry[0]; +-}; +-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; +- +-static int ioapic_suspend(struct sys_device *dev, pm_message_t state) +-{ +- struct IO_APIC_route_entry *entry; +- struct sysfs_ioapic_data *data; +- int i; +- +- data = container_of(dev, struct sysfs_ioapic_data, dev); +- entry = data->entry; +- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) +- *entry = ioapic_read_entry(dev->id, i); +- +- return 0; +-} +- +-static int ioapic_resume(struct sys_device *dev) +-{ +- struct IO_APIC_route_entry *entry; +- struct sysfs_ioapic_data *data; +- unsigned long flags; +- union IO_APIC_reg_00 reg_00; +- int i; +- +- data = container_of(dev, struct sysfs_ioapic_data, dev); +- entry = data->entry; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(dev->id, 0); +- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { +- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; +- io_apic_write(dev->id, 0, reg_00.raw); +- } +- spin_unlock_irqrestore(&ioapic_lock, flags); +- for (i = 0; i < nr_ioapic_registers[dev->id]; i++) +- ioapic_write_entry(dev->id, i, entry[i]); +- +- return 0; +-} +- +-static struct sysdev_class ioapic_sysdev_class = { +- .name = "ioapic", +- .suspend = ioapic_suspend, +- .resume = ioapic_resume, +-}; +- +-static int __init ioapic_init_sysfs(void) +-{ +- struct sys_device * dev; +- int i, size, error; +- +- error = sysdev_class_register(&ioapic_sysdev_class); +- if (error) +- return error; +- +- for (i = 0; i < nr_ioapics; i++ ) { +- size = sizeof(struct sys_device) + nr_ioapic_registers[i] +- * sizeof(struct IO_APIC_route_entry); +- mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); +- if (!mp_ioapic_data[i]) { +- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); +- continue; +- } +- dev = &mp_ioapic_data[i]->dev; +- dev->id = i; +- dev->cls = &ioapic_sysdev_class; +- error = sysdev_register(dev); +- if (error) { +- kfree(mp_ioapic_data[i]); +- mp_ioapic_data[i] = NULL; +- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); +- continue; +- } +- } +- +- return 0; +-} +- +-device_initcall(ioapic_init_sysfs); +- +-/* +- * Dynamic irq allocate and deallocation +- */ +-int create_irq(void) +-{ +- /* Allocate an unused irq */ +- int irq; +- int new; +- unsigned long flags; +- +- irq = -ENOSPC; +- spin_lock_irqsave(&vector_lock, flags); +- for (new = (NR_IRQS - 1); new >= 0; new--) { +- if (platform_legacy_irq(new)) +- continue; +- if (irq_cfg[new].vector != 0) +- continue; +- if (__assign_irq_vector(new, TARGET_CPUS) == 0) +- irq = new; +- break; +- } +- spin_unlock_irqrestore(&vector_lock, flags); +- +- if (irq >= 0) { +- dynamic_irq_init(irq); +- } +- return irq; +-} +- +-void destroy_irq(unsigned int irq) +-{ +- unsigned long flags; +- +- dynamic_irq_cleanup(irq); +- +- spin_lock_irqsave(&vector_lock, flags); +- __clear_irq_vector(irq); +- spin_unlock_irqrestore(&vector_lock, flags); +-} +- +-#endif /* CONFIG_XEN */ +- +-/* +- * MSI message composition +- */ +-#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) +-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- int err; +- unsigned dest; +- cpumask_t tmp; +- +- tmp = TARGET_CPUS; +- err = assign_irq_vector(irq, tmp); +- if (!err) { +- cpus_and(tmp, cfg->domain, tmp); +- dest = cpu_mask_to_apicid(tmp); +- +- msg->address_hi = MSI_ADDR_BASE_HI; +- msg->address_lo = +- MSI_ADDR_BASE_LO | +- ((INT_DEST_MODE == 0) ? +- MSI_ADDR_DEST_MODE_PHYSICAL: +- MSI_ADDR_DEST_MODE_LOGICAL) | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? +- MSI_ADDR_REDIRECTION_CPU: +- MSI_ADDR_REDIRECTION_LOWPRI) | +- MSI_ADDR_DEST_ID(dest); +- +- msg->data = +- MSI_DATA_TRIGGER_EDGE | +- MSI_DATA_LEVEL_ASSERT | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? +- MSI_DATA_DELIVERY_FIXED: +- MSI_DATA_DELIVERY_LOWPRI) | +- MSI_DATA_VECTOR(cfg->vector); +- } +- return err; +-} +- +-#ifdef CONFIG_SMP +-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- struct msi_msg msg; +- unsigned int dest; +- cpumask_t tmp; +- +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- return; +- +- if (assign_irq_vector(irq, mask)) +- return; +- +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); +- +- read_msi_msg(irq, &msg); +- +- msg.data &= ~MSI_DATA_VECTOR_MASK; +- msg.data |= MSI_DATA_VECTOR(cfg->vector); +- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; +- msg.address_lo |= MSI_ADDR_DEST_ID(dest); +- +- write_msi_msg(irq, &msg); +- irq_desc[irq].affinity = mask; +-} +-#endif /* CONFIG_SMP */ +- +-/* +- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, +- * which implement the MSI or MSI-X Capability Structure. +- */ +-static struct irq_chip msi_chip = { +- .name = "PCI-MSI", +- .unmask = unmask_msi_irq, +- .mask = mask_msi_irq, +- .ack = ack_apic_edge, +-#ifdef CONFIG_SMP +- .set_affinity = set_msi_irq_affinity, +-#endif +- .retrigger = ioapic_retrigger_irq, +-}; +- +-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +-{ +- struct msi_msg msg; +- int irq, ret; +- irq = create_irq(); +- if (irq < 0) +- return irq; +- +- ret = msi_compose_msg(dev, irq, &msg); +- if (ret < 0) { +- destroy_irq(irq); +- return ret; +- } +- +- set_irq_msi(irq, desc); +- write_msi_msg(irq, &msg); +- +- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +- +- return 0; +-} +- +-void arch_teardown_msi_irq(unsigned int irq) +-{ +- destroy_irq(irq); +-} +- +-#ifdef CONFIG_DMAR +-#ifdef CONFIG_SMP +-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- struct msi_msg msg; +- unsigned int dest; +- cpumask_t tmp; +- +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- return; +- +- if (assign_irq_vector(irq, mask)) +- return; +- +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); +- +- dmar_msi_read(irq, &msg); +- +- msg.data &= ~MSI_DATA_VECTOR_MASK; +- msg.data |= MSI_DATA_VECTOR(cfg->vector); +- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; +- msg.address_lo |= MSI_ADDR_DEST_ID(dest); +- +- dmar_msi_write(irq, &msg); +- irq_desc[irq].affinity = mask; +-} +-#endif /* CONFIG_SMP */ +- +-struct irq_chip dmar_msi_type = { +- .name = "DMAR_MSI", +- .unmask = dmar_msi_unmask, +- .mask = dmar_msi_mask, +- .ack = ack_apic_edge, +-#ifdef CONFIG_SMP +- .set_affinity = dmar_msi_set_affinity, +-#endif +- .retrigger = ioapic_retrigger_irq, +-}; +- +-int arch_setup_dmar_msi(unsigned int irq) +-{ +- int ret; +- struct msi_msg msg; +- +- ret = msi_compose_msg(NULL, irq, &msg); +- if (ret < 0) +- return ret; +- dmar_msi_write(irq, &msg); +- set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, +- "edge"); +- return 0; +-} +-#endif +- +-#endif /* CONFIG_PCI_MSI */ +-/* +- * Hypertransport interrupt support +- */ +-#ifdef CONFIG_HT_IRQ +- +-#ifdef CONFIG_SMP +- +-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) +-{ +- struct ht_irq_msg msg; +- fetch_ht_irq_msg(irq, &msg); +- +- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); +- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); +- +- msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); +- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); +- +- write_ht_irq_msg(irq, &msg); +-} +- +-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- unsigned int dest; +- cpumask_t tmp; +- +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- return; +- +- if (assign_irq_vector(irq, mask)) +- return; +- +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); +- +- target_ht_irq(irq, dest, cfg->vector); +- irq_desc[irq].affinity = mask; +-} +-#endif +- +-static struct irq_chip ht_irq_chip = { +- .name = "PCI-HT", +- .mask = mask_ht_irq, +- .unmask = unmask_ht_irq, +- .ack = ack_apic_edge, +-#ifdef CONFIG_SMP +- .set_affinity = set_ht_irq_affinity, +-#endif +- .retrigger = ioapic_retrigger_irq, +-}; +- +-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +-{ +- struct irq_cfg *cfg = irq_cfg + irq; +- int err; +- cpumask_t tmp; +- +- tmp = TARGET_CPUS; +- err = assign_irq_vector(irq, tmp); +- if (!err) { +- struct ht_irq_msg msg; +- unsigned dest; +- +- cpus_and(tmp, cfg->domain, tmp); +- dest = cpu_mask_to_apicid(tmp); +- +- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); +- +- msg.address_lo = +- HT_IRQ_LOW_BASE | +- HT_IRQ_LOW_DEST_ID(dest) | +- HT_IRQ_LOW_VECTOR(cfg->vector) | +- ((INT_DEST_MODE == 0) ? +- HT_IRQ_LOW_DM_PHYSICAL : +- HT_IRQ_LOW_DM_LOGICAL) | +- HT_IRQ_LOW_RQEOI_EDGE | +- ((INT_DELIVERY_MODE != dest_LowestPrio) ? +- HT_IRQ_LOW_MT_FIXED : +- HT_IRQ_LOW_MT_ARBITRATED) | +- HT_IRQ_LOW_IRQ_MASKED; +- +- write_ht_irq_msg(irq, &msg); +- +- set_irq_chip_and_handler_name(irq, &ht_irq_chip, +- handle_edge_irq, "edge"); +- } +- return err; +-} +-#endif /* CONFIG_HT_IRQ */ +- +-/* -------------------------------------------------------------------------- +- ACPI-based IOAPIC Configuration +- -------------------------------------------------------------------------- */ +- +-#ifdef CONFIG_ACPI +- +-#define IO_APIC_MAX_ID 0xFE +- +-int __init io_apic_get_redir_entries (int ioapic) +-{ +- union IO_APIC_reg_01 reg_01; +- unsigned long flags; +- +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_01.raw = io_apic_read(ioapic, 1); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- return reg_01.bits.entries; +-} +- +- +-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) +-{ +- if (!IO_APIC_IRQ(irq)) { +- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", +- ioapic); +- return -EINVAL; +- } +- +- /* +- * IRQs < 16 are already in the irq_2_pin[] map +- */ +- if (irq >= 16) +- add_pin_to_irq(irq, ioapic, pin); +- +- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); +- +- return 0; +-} +- +- +-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +-{ +- int i; +- +- if (skip_ioapic_setup) +- return -1; +- +- for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mp_irqtype == mp_INT && +- mp_irqs[i].mp_srcbusirq == bus_irq) +- break; +- if (i >= mp_irq_entries) +- return -1; +- +- *trigger = irq_trigger(i); +- *polarity = irq_polarity(i); +- return 0; +-} +- +-#endif /* CONFIG_ACPI */ +- +-#ifndef CONFIG_XEN +-/* +- * This function currently is only a helper for the i386 smp boot process where +- * we need to reprogram the ioredtbls to cater for the cpus which have come online +- * so mask in all cases should simply be TARGET_CPUS +- */ +-#ifdef CONFIG_SMP +-void __init setup_ioapic_dest(void) +-{ +- int pin, ioapic, irq, irq_entry; +- +- if (skip_ioapic_setup == 1) +- return; +- +- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { +- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { +- irq_entry = find_irq_entry(ioapic, pin, mp_INT); +- if (irq_entry == -1) +- continue; +- irq = pin_2_irq(irq_entry, ioapic, pin); +- +- /* setup_IO_APIC_irqs could fail to get vector for some device +- * when you have too many devices, because at that time only boot +- * cpu is online. +- */ +- if (!irq_cfg[irq].vector) +- setup_IO_APIC_irq(ioapic, pin, irq, +- irq_trigger(irq_entry), +- irq_polarity(irq_entry)); +- else +- set_ioapic_affinity_irq(irq, TARGET_CPUS); +- } +- +- } +-} +-#endif +- +-#define IOAPIC_RESOURCE_NAME_SIZE 11 +- +-static struct resource *ioapic_resources; +- +-static struct resource * __init ioapic_setup_resources(void) +-{ +- unsigned long n; +- struct resource *res; +- char *mem; +- int i; +- +- if (nr_ioapics <= 0) +- return NULL; +- +- n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); +- n *= nr_ioapics; +- +- mem = alloc_bootmem(n); +- res = (void *)mem; +- +- if (mem != NULL) { +- mem += sizeof(struct resource) * nr_ioapics; +- +- for (i = 0; i < nr_ioapics; i++) { +- res[i].name = mem; +- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; +- sprintf(mem, "IOAPIC %u", i); +- mem += IOAPIC_RESOURCE_NAME_SIZE; +- } +- } +- +- ioapic_resources = res; +- +- return res; +-} +- +-void __init ioapic_init_mappings(void) +-{ +- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; +- struct resource *ioapic_res; +- int i; +- +- ioapic_res = ioapic_setup_resources(); +- for (i = 0; i < nr_ioapics; i++) { +- if (smp_found_config) { +- ioapic_phys = mp_ioapics[i].mp_apicaddr; +- } else { +- ioapic_phys = (unsigned long) +- alloc_bootmem_pages(PAGE_SIZE); +- ioapic_phys = __pa(ioapic_phys); +- } +- set_fixmap_nocache(idx, ioapic_phys); +- apic_printk(APIC_VERBOSE, +- "mapped IOAPIC to %016lx (%016lx)\n", +- __fix_to_virt(idx), ioapic_phys); +- idx++; +- +- if (ioapic_res != NULL) { +- ioapic_res->start = ioapic_phys; +- ioapic_res->end = ioapic_phys + (4 * 1024) - 1; +- ioapic_res++; +- } +- } +-} +- +-static int __init ioapic_insert_resources(void) +-{ +- int i; +- struct resource *r = ioapic_resources; +- +- if (!r) { +- printk(KERN_ERR +- "IO APIC resources could be not be allocated.\n"); +- return -1; +- } +- +- for (i = 0; i < nr_ioapics; i++) { +- insert_resource(&iomem_resource, r); +- r++; +- } +- +- return 0; +-} +- +-/* Insert the IO APIC resources after PCI initialization has occured to handle +- * IO APICS that are mapped in on a BAR in PCI space. */ +-late_initcall(ioapic_insert_resources); +-#endif /* !CONFIG_XEN */ +--- head-2010-01-18.orig/arch/x86/kernel/ioport-xen.c 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/ioport-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + + /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ +--- head-2010-01-18.orig/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -21,6 +21,8 @@ + #ifdef CONFIG_X86_32 + #ifndef CONFIG_XEN + #include ++#include ++ + /* + * the following functions deal with sending IPIs between CPUs. + * +@@ -197,10 +199,8 @@ void send_IPI_mask_sequence(cpumask_t ma + #endif + } + +-/* must come after the send_IPI functions above for inlining */ +-#include +- + #ifndef CONFIG_XEN ++/* must come after the send_IPI functions above for inlining */ + static int convert_apicid_to_cpu(int apic_id) + { + int i; +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/kernel/irq-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -0,0 +1,193 @@ ++/* ++ * Common interrupt code for 32 and 64 bit ++ */ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++atomic_t irq_err_count; ++ ++/* ++ * 'what should we do if we get a hw irq event on an illegal vector'. ++ * each architecture has to answer this themselves. ++ */ ++void ack_bad_irq(unsigned int irq) ++{ ++ printk(KERN_ERR "unexpected IRQ trap at irq %02x\n", irq); ++ ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) ++ /* ++ * Currently unexpected vectors happen only on SMP and APIC. ++ * We _must_ ack these because every local APIC has only N ++ * irq slots per priority level, and a 'hanging, unacked' IRQ ++ * holds up an irq slot - in excessive cases (when multiple ++ * unexpected vectors occur) that might lock up the APIC ++ * completely. ++ * But only ack when the APIC is enabled -AK ++ */ ++ if (cpu_has_apic) ++ ack_APIC_irq(); ++#endif ++} ++ ++#ifdef CONFIG_X86_32 ++# define irq_stats(x) (&per_cpu(irq_stat, x)) ++#else ++# define irq_stats(x) cpu_pda(x) ++#endif ++/* ++ * /proc/interrupts printing: ++ */ ++static int show_other_interrupts(struct seq_file *p) ++{ ++ int j; ++ ++ seq_printf(p, "NMI: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); ++ seq_printf(p, " Non-maskable interrupts\n"); ++#ifdef CONFIG_X86_LOCAL_APIC ++ seq_printf(p, "LOC: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); ++ seq_printf(p, " Local timer interrupts\n"); ++#endif ++#ifdef CONFIG_SMP ++ seq_printf(p, "RES: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); ++ seq_printf(p, " Rescheduling interrupts\n"); ++ seq_printf(p, "CAL: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); ++ seq_printf(p, " Function call interrupts\n"); ++#ifndef CONFIG_XEN ++ seq_printf(p, "TLB: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); ++ seq_printf(p, " TLB shootdowns\n"); ++#endif ++#endif ++#ifdef CONFIG_X86_MCE ++ seq_printf(p, "TRM: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); ++ seq_printf(p, " Thermal event interrupts\n"); ++# ifdef CONFIG_X86_64 ++ seq_printf(p, "THR: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); ++ seq_printf(p, " Threshold APIC interrupts\n"); ++# endif ++#endif ++#ifdef CONFIG_X86_LOCAL_APIC ++ seq_printf(p, "SPU: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); ++ seq_printf(p, " Spurious interrupts\n"); ++#endif ++ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); ++#if defined(CONFIG_X86_IO_APIC) ++ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); ++#endif ++ return 0; ++} ++ ++int show_interrupts(struct seq_file *p, void *v) ++{ ++ unsigned long flags, any_count = 0; ++ int i = *(loff_t *) v, j; ++ struct irqaction *action; ++ struct irq_desc *desc; ++ ++ if (i > nr_irqs) ++ return 0; ++ ++ if (i == nr_irqs) ++ return show_other_interrupts(p); ++ ++ /* print header */ ++ if (i == 0) { ++ seq_printf(p, " "); ++ for_each_online_cpu(j) ++ seq_printf(p, "CPU%-8d", j); ++ seq_putc(p, '\n'); ++ } ++ ++ desc = irq_to_desc(i); ++ spin_lock_irqsave(&desc->lock, flags); ++#ifndef CONFIG_SMP ++ any_count = kstat_irqs(i); ++#else ++ for_each_online_cpu(j) ++ any_count |= kstat_irqs_cpu(i, j); ++#endif ++ action = desc->action; ++ if (!action && !any_count) ++ goto out; ++ ++ seq_printf(p, "%3d: ", i); ++#ifndef CONFIG_SMP ++ seq_printf(p, "%10u ", kstat_irqs(i)); ++#else ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); ++#endif ++ seq_printf(p, " %8s", desc->chip->name); ++ seq_printf(p, "-%-8s", desc->name); ++ ++ if (action) { ++ seq_printf(p, " %s", action->name); ++ while ((action = action->next) != NULL) ++ seq_printf(p, ", %s", action->name); ++ } ++ ++ seq_putc(p, '\n'); ++out: ++ spin_unlock_irqrestore(&desc->lock, flags); ++ return 0; ++} ++ ++/* ++ * /proc/stat helpers ++ */ ++u64 arch_irq_stat_cpu(unsigned int cpu) ++{ ++ u64 sum = irq_stats(cpu)->__nmi_count; ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ sum += irq_stats(cpu)->apic_timer_irqs; ++#endif ++#ifdef CONFIG_SMP ++ sum += irq_stats(cpu)->irq_resched_count; ++ sum += irq_stats(cpu)->irq_call_count; ++#ifndef CONFIG_XEN ++ sum += irq_stats(cpu)->irq_tlb_count; ++#endif ++#endif ++#ifdef CONFIG_X86_MCE ++ sum += irq_stats(cpu)->irq_thermal_count; ++# ifdef CONFIG_X86_64 ++ sum += irq_stats(cpu)->irq_threshold_count; ++#endif ++#endif ++#ifdef CONFIG_X86_LOCAL_APIC ++ sum += irq_stats(cpu)->irq_spurious_count; ++#endif ++ return sum; ++} ++ ++u64 arch_irq_stat(void) ++{ ++ u64 sum = atomic_read(&irq_err_count); ++ ++#ifdef CONFIG_X86_IO_APIC ++ sum += atomic_read(&irq_mis_count); ++#endif ++ return sum; ++} +--- head-2010-01-18.orig/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); + DEFINE_PER_CPU(struct pt_regs *, irq_regs); + EXPORT_PER_CPU_SYMBOL(irq_regs); + +-/* +- * 'what should we do if we get a hw irq event on an illegal vector'. +- * each architecture has to answer this themselves. +- */ +-void ack_bad_irq(unsigned int irq) +-{ +- printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); +- +-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) +- /* +- * Currently unexpected vectors happen only on SMP and APIC. +- * We _must_ ack these because every local APIC has only N +- * irq slots per priority level, and a 'hanging, unacked' IRQ +- * holds up an irq slot - in excessive cases (when multiple +- * unexpected vectors occur) that might lock up the APIC +- * completely. +- * But only ack when the APIC is enabled -AK +- */ +- if (cpu_has_apic) +- ack_APIC_irq(); +-#endif +-} +- + #ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 1KB free? */ + static int check_stack_overflow(void) +@@ -223,20 +200,22 @@ unsigned int do_IRQ(struct pt_regs *regs + { + struct pt_regs *old_regs; + /* high bit used in ret_from_ code */ +- int overflow, irq = ~regs->orig_ax; +- struct irq_desc *desc = irq_desc + irq; +- +- if (unlikely((unsigned)irq >= NR_IRQS)) { +- printk(KERN_EMERG "%s: cannot handle IRQ %d\n", +- __func__, irq); +- BUG(); +- } ++ int overflow; ++ unsigned irq = ~regs->orig_ax; ++ struct irq_desc *desc; + + old_regs = set_irq_regs(regs); + /*irq_enter();*/ + + overflow = check_stack_overflow(); + ++ desc = irq_to_desc(irq); ++ if (unlikely(!desc)) { ++ printk(KERN_EMERG "%s: cannot handle IRQ %d cpu %d\n", ++ __func__, irq, smp_processor_id()); ++ BUG(); ++ } ++ + if (!execute_on_irq_stack(overflow, desc, irq)) { + if (unlikely(overflow)) + print_stack_overflow(); +@@ -248,170 +227,28 @@ unsigned int do_IRQ(struct pt_regs *regs + return 1; + } + +-/* +- * Interrupt statistics: +- */ +- +-atomic_t irq_err_count; +- +-/* +- * /proc/interrupts printing: +- */ +- +-int show_interrupts(struct seq_file *p, void *v) +-{ +- int i = *(loff_t *) v, j; +- struct irqaction * action; +- unsigned long flags; +- +- if (i == 0) { +- seq_printf(p, " "); +- for_each_online_cpu(j) +- seq_printf(p, "CPU%-8d",j); +- seq_putc(p, '\n'); +- } +- +- if (i < NR_IRQS) { +- unsigned any_count = 0; +- +- spin_lock_irqsave(&irq_desc[i].lock, flags); +-#ifndef CONFIG_SMP +- any_count = kstat_irqs(i); +-#else +- for_each_online_cpu(j) +- any_count |= kstat_cpu(j).irqs[i]; +-#endif +- action = irq_desc[i].action; +- if (!action && !any_count) +- goto skip; +- seq_printf(p, "%3d: ",i); +-#ifndef CONFIG_SMP +- seq_printf(p, "%10u ", kstat_irqs(i)); +-#else +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +-#endif +- seq_printf(p, " %8s", irq_desc[i].chip->name); +- seq_printf(p, "-%-8s", irq_desc[i].name); +- +- if (action) { +- seq_printf(p, " %s", action->name); +- while ((action = action->next) != NULL) +- seq_printf(p, ", %s", action->name); +- } +- +- seq_putc(p, '\n'); +-skip: +- spin_unlock_irqrestore(&irq_desc[i].lock, flags); +- } else if (i == NR_IRQS) { +- seq_printf(p, "NMI: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", nmi_count(j)); +- seq_printf(p, " Non-maskable interrupts\n"); +-#ifdef CONFIG_X86_LOCAL_APIC +- seq_printf(p, "LOC: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", +- per_cpu(irq_stat,j).apic_timer_irqs); +- seq_printf(p, " Local timer interrupts\n"); +-#endif +-#ifdef CONFIG_SMP +- seq_printf(p, "RES: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", +- per_cpu(irq_stat,j).irq_resched_count); +- seq_printf(p, " Rescheduling interrupts\n"); +- seq_printf(p, "CAL: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", +- per_cpu(irq_stat,j).irq_call_count); +- seq_printf(p, " function call interrupts\n"); +-#ifndef CONFIG_XEN +- seq_printf(p, "TLB: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", +- per_cpu(irq_stat,j).irq_tlb_count); +- seq_printf(p, " TLB shootdowns\n"); +-#endif +-#endif +-#ifdef CONFIG_X86_MCE +- seq_printf(p, "TRM: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", +- per_cpu(irq_stat,j).irq_thermal_count); +- seq_printf(p, " Thermal event interrupts\n"); +-#endif +-#ifdef CONFIG_X86_LOCAL_APIC +- seq_printf(p, "SPU: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", +- per_cpu(irq_stat,j).irq_spurious_count); +- seq_printf(p, " Spurious interrupts\n"); +-#endif +- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +-#if defined(CONFIG_X86_IO_APIC) +- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +-#endif +- } +- return 0; +-} +- +-/* +- * /proc/stat helpers +- */ +-u64 arch_irq_stat_cpu(unsigned int cpu) +-{ +- u64 sum = nmi_count(cpu); +- +-#ifdef CONFIG_X86_LOCAL_APIC +- sum += per_cpu(irq_stat, cpu).apic_timer_irqs; +-#endif +-#ifdef CONFIG_SMP +- sum += per_cpu(irq_stat, cpu).irq_resched_count; +- sum += per_cpu(irq_stat, cpu).irq_call_count; +-#ifndef CONFIG_XEN +- sum += per_cpu(irq_stat, cpu).irq_tlb_count; +-#endif +-#endif +-#ifdef CONFIG_X86_MCE +- sum += per_cpu(irq_stat, cpu).irq_thermal_count; +-#endif +-#ifdef CONFIG_X86_LOCAL_APIC +- sum += per_cpu(irq_stat, cpu).irq_spurious_count; +-#endif +- return sum; +-} +- +-u64 arch_irq_stat(void) +-{ +- u64 sum = atomic_read(&irq_err_count); +- +-#ifdef CONFIG_X86_IO_APIC +- sum += atomic_read(&irq_mis_count); +-#endif +- return sum; +-} +- + #ifdef CONFIG_HOTPLUG_CPU + + void fixup_irqs(cpumask_t map) + { + unsigned int irq; + static int warned; ++ struct irq_desc *desc; + +- for (irq = 0; irq < NR_IRQS; irq++) { ++ for_each_irq_desc(irq, desc) { + cpumask_t mask; ++ + if (irq == 2) + continue; + +- cpus_and(mask, irq_desc[irq].affinity, map); ++ cpus_and(mask, desc->affinity, map); + if (any_online_cpu(mask) == NR_CPUS) { + /*printk("Breaking affinity for irq %i\n", irq);*/ + mask = map; + } +- if (irq_desc[irq].chip->set_affinity) +- irq_desc[irq].chip->set_affinity(irq, mask); +- else if (irq_desc[irq].action && !(warned++)) ++ if (desc->chip->set_affinity) ++ desc->chip->set_affinity(irq, mask); ++ else if (desc->action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + +--- head-2010-01-18.orig/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -18,30 +18,6 @@ + #include + #include + +-atomic_t irq_err_count; +- +-/* +- * 'what should we do if we get a hw irq event on an illegal vector'. +- * each architecture has to answer this themselves. +- */ +-void ack_bad_irq(unsigned int irq) +-{ +- printk(KERN_WARNING "unexpected IRQ trap at irq %02x\n", irq); +-#ifdef CONFIG_X86_LOCAL_APIC +- /* +- * Currently unexpected vectors happen only on SMP and APIC. +- * We _must_ ack these because every local APIC has only N +- * irq slots per priority level, and a 'hanging, unacked' IRQ +- * holds up an irq slot - in excessive cases (when multiple +- * unexpected vectors occur) that might lock up the APIC +- * completely. +- * But don't ack when the APIC is disabled. -AK +- */ +- if (!disable_apic) +- ack_APIC_irq(); +-#endif +-} +- + #ifdef CONFIG_DEBUG_STACKOVERFLOW + /* + * Probabilistic stack overflow check: +@@ -67,130 +43,6 @@ static inline void stack_overflow_check( + #endif + + /* +- * Generic, controller-independent functions: +- */ +- +-int show_interrupts(struct seq_file *p, void *v) +-{ +- int i = *(loff_t *) v, j; +- struct irqaction * action; +- unsigned long flags; +- +- if (i == 0) { +- seq_printf(p, " "); +- for_each_online_cpu(j) +- seq_printf(p, "CPU%-8d",j); +- seq_putc(p, '\n'); +- } +- +- if (i < NR_IRQS) { +- unsigned any_count = 0; +- +- spin_lock_irqsave(&irq_desc[i].lock, flags); +-#ifndef CONFIG_SMP +- any_count = kstat_irqs(i); +-#else +- for_each_online_cpu(j) +- any_count |= kstat_cpu(j).irqs[i]; +-#endif +- action = irq_desc[i].action; +- if (!action && !any_count) +- goto skip; +- seq_printf(p, "%3d: ",i); +-#ifndef CONFIG_SMP +- seq_printf(p, "%10u ", kstat_irqs(i)); +-#else +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +-#endif +- seq_printf(p, " %8s", irq_desc[i].chip->name); +- seq_printf(p, "-%-8s", irq_desc[i].name); +- +- if (action) { +- seq_printf(p, " %s", action->name); +- while ((action = action->next) != NULL) +- seq_printf(p, ", %s", action->name); +- } +- seq_putc(p, '\n'); +-skip: +- spin_unlock_irqrestore(&irq_desc[i].lock, flags); +- } else if (i == NR_IRQS) { +- seq_printf(p, "NMI: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); +- seq_printf(p, " Non-maskable interrupts\n"); +-#ifdef CONFIG_X86_LOCAL_APIC +- seq_printf(p, "LOC: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); +- seq_printf(p, " Local timer interrupts\n"); +-#endif +-#ifdef CONFIG_SMP +- seq_printf(p, "RES: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); +- seq_printf(p, " Rescheduling interrupts\n"); +- seq_printf(p, "CAL: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); +- seq_printf(p, " function call interrupts\n"); +-#ifndef CONFIG_XEN +- seq_printf(p, "TLB: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); +- seq_printf(p, " TLB shootdowns\n"); +-#endif +-#endif +-#ifdef CONFIG_X86_MCE +- seq_printf(p, "TRM: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); +- seq_printf(p, " Thermal event interrupts\n"); +- seq_printf(p, "THR: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); +- seq_printf(p, " Threshold APIC interrupts\n"); +-#endif +-#ifdef CONFIG_X86_LOCAL_APIC +- seq_printf(p, "SPU: "); +- for_each_online_cpu(j) +- seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); +- seq_printf(p, " Spurious interrupts\n"); +-#endif +- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +- } +- return 0; +-} +- +-/* +- * /proc/stat helpers +- */ +-u64 arch_irq_stat_cpu(unsigned int cpu) +-{ +- u64 sum = cpu_pda(cpu)->__nmi_count; +- +- sum += cpu_pda(cpu)->apic_timer_irqs; +-#ifdef CONFIG_SMP +- sum += cpu_pda(cpu)->irq_resched_count; +- sum += cpu_pda(cpu)->irq_call_count; +-#ifndef CONFIG_XEN +- sum += cpu_pda(cpu)->irq_tlb_count; +-#endif +-#endif +-#ifdef CONFIG_X86_MCE +- sum += cpu_pda(cpu)->irq_thermal_count; +- sum += cpu_pda(cpu)->irq_threshold_count; +-#endif +- sum += cpu_pda(cpu)->irq_spurious_count; +- return sum; +-} +- +-u64 arch_irq_stat(void) +-{ +- return atomic_read(&irq_err_count); +-} +- +-/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). +@@ -198,6 +50,7 @@ u64 arch_irq_stat(void) + asmlinkage unsigned int do_IRQ(struct pt_regs *regs) + { + struct pt_regs *old_regs = set_irq_regs(regs); ++ struct irq_desc *desc; + + /* high bit used in ret_from_ code */ + unsigned irq = ~regs->orig_ax; +@@ -209,8 +62,9 @@ asmlinkage unsigned int do_IRQ(struct pt + stack_overflow_check(regs); + #endif + +- if (likely(irq < NR_IRQS)) +- generic_handle_irq(irq); ++ desc = irq_to_desc(irq); ++ if (likely(desc)) ++ generic_handle_irq_desc(irq, desc); + else { + #ifndef CONFIG_XEN + if (!disable_apic) +@@ -232,8 +86,9 @@ void fixup_irqs(cpumask_t map) + { + unsigned int irq; + static int warned; ++ struct irq_desc *desc; + +- for (irq = 0; irq < NR_IRQS; irq++) { ++ for_each_irq_desc(irq, desc) { + cpumask_t mask; + int break_affinity = 0; + int set_affinity = 1; +@@ -242,32 +97,32 @@ void fixup_irqs(cpumask_t map) + continue; + + /* interrupt's are disabled at this point */ +- spin_lock(&irq_desc[irq].lock); ++ spin_lock(&desc->lock); + + if (!irq_has_action(irq) || +- cpus_equal(irq_desc[irq].affinity, map)) { +- spin_unlock(&irq_desc[irq].lock); ++ cpus_equal(desc->affinity, map)) { ++ spin_unlock(&desc->lock); + continue; + } + +- cpus_and(mask, irq_desc[irq].affinity, map); ++ cpus_and(mask, desc->affinity, map); + if (cpus_empty(mask)) { + break_affinity = 1; + mask = map; + } + +- if (irq_desc[irq].chip->mask) +- irq_desc[irq].chip->mask(irq); ++ if (desc->chip->mask) ++ desc->chip->mask(irq); + +- if (irq_desc[irq].chip->set_affinity) +- irq_desc[irq].chip->set_affinity(irq, mask); ++ if (desc->chip->set_affinity) ++ desc->chip->set_affinity(irq, mask); + else if (!(warned++)) + set_affinity = 0; + +- if (irq_desc[irq].chip->unmask) +- irq_desc[irq].chip->unmask(irq); ++ if (desc->chip->unmask) ++ desc->chip->unmask(irq); + +- spin_unlock(&irq_desc[irq].lock); ++ spin_unlock(&desc->lock); + + if (break_affinity && set_affinity) + /*printk("Broke affinity for irq %i\n", irq)*/; +--- head-2010-01-18.orig/arch/x86/kernel/ldt-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/ldt-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_SMP + static void flush_ldt(void *current_mm) +--- head-2010-01-18.orig/arch/x86/kernel/microcode-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,214 +0,0 @@ +-/* +- * Intel CPU Microcode Update Driver for Linux +- * +- * Copyright (C) 2000-2006 Tigran Aivazian +- * 2006 Shaohua Li +- * +- * This driver allows to upgrade microcode on Intel processors +- * belonging to IA-32 family - PentiumPro, Pentium II, +- * Pentium III, Xeon, Pentium 4, etc. +- * +- * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture +- * Software Developer's Manual +- * Order Number 253668 or free download from: +- * +- * http://developer.intel.com/design/pentium4/manuals/253668.htm +- * +- * For more information, go to http://www.urbanmyth.org/microcode +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-//#define DEBUG /* pr_debug */ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +- +-MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +-MODULE_AUTHOR("Tigran Aivazian "); +-MODULE_LICENSE("GPL"); +- +-static int verbose; +-module_param(verbose, int, 0644); +- +-#define MICROCODE_VERSION "1.14a-xen" +- +-#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ +-#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ +-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ +- +-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +-static DEFINE_MUTEX(microcode_mutex); +- +-#ifdef CONFIG_MICROCODE_OLD_INTERFACE +-static int do_microcode_update (const void __user *ubuf, size_t len) +-{ +- int err; +- void *kbuf; +- +- kbuf = vmalloc(len); +- if (!kbuf) +- return -ENOMEM; +- +- if (copy_from_user(kbuf, ubuf, len) == 0) { +- struct xen_platform_op op; +- +- op.cmd = XENPF_microcode_update; +- set_xen_guest_handle(op.u.microcode.data, kbuf); +- op.u.microcode.length = len; +- err = HYPERVISOR_platform_op(&op); +- } else +- err = -EFAULT; +- +- vfree(kbuf); +- +- return err; +-} +- +-static int microcode_open (struct inode *unused1, struct file *unused2) +-{ +- cycle_kernel_lock(); +- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +-} +- +-static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +-{ +- ssize_t ret; +- +- if (len < MC_HEADER_SIZE) { +- printk(KERN_ERR "microcode: not enough data\n"); +- return -EINVAL; +- } +- +- mutex_lock(µcode_mutex); +- +- ret = do_microcode_update(buf, len); +- if (!ret) +- ret = (ssize_t)len; +- +- mutex_unlock(µcode_mutex); +- +- return ret; +-} +- +-static const struct file_operations microcode_fops = { +- .owner = THIS_MODULE, +- .write = microcode_write, +- .open = microcode_open, +-}; +- +-static struct miscdevice microcode_dev = { +- .minor = MICROCODE_MINOR, +- .name = "microcode", +- .fops = µcode_fops, +-}; +- +-static int __init microcode_dev_init (void) +-{ +- int error; +- +- error = misc_register(µcode_dev); +- if (error) { +- printk(KERN_ERR +- "microcode: can't misc_register on minor=%d\n", +- MICROCODE_MINOR); +- return error; +- } +- +- return 0; +-} +- +-static void microcode_dev_exit (void) +-{ +- misc_deregister(µcode_dev); +-} +- +-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +-#else +-#define microcode_dev_init() 0 +-#define microcode_dev_exit() do { } while(0) +-#endif +- +-/* fake device for request_firmware */ +-static struct platform_device *microcode_pdev; +- +-static int request_microcode(void) +-{ +- char name[30]; +- const struct cpuinfo_x86 *c = &boot_cpu_data; +- const struct firmware *firmware; +- int error; +- struct xen_platform_op op; +- +- sprintf(name,"intel-ucode/%02x-%02x-%02x", +- c->x86, c->x86_model, c->x86_mask); +- error = request_firmware(&firmware, name, µcode_pdev->dev); +- if (error) { +- pr_debug("microcode: data file %s load failed\n", name); +- return error; +- } +- +- op.cmd = XENPF_microcode_update; +- set_xen_guest_handle(op.u.microcode.data, firmware->data); +- op.u.microcode.length = firmware->size; +- error = HYPERVISOR_platform_op(&op); +- +- release_firmware(firmware); +- +- if (error) +- pr_debug("ucode load failed\n"); +- +- return error; +-} +- +-static int __init microcode_init (void) +-{ +- int error; +- +- printk(KERN_INFO +- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); +- +- error = microcode_dev_init(); +- if (error) +- return error; +- microcode_pdev = platform_device_register_simple("microcode", -1, +- NULL, 0); +- if (IS_ERR(microcode_pdev)) { +- microcode_dev_exit(); +- return PTR_ERR(microcode_pdev); +- } +- +- request_microcode(); +- +- return 0; +-} +- +-static void __exit microcode_exit (void) +-{ +- microcode_dev_exit(); +- platform_device_unregister(microcode_pdev); +-} +- +-module_init(microcode_init) +-module_exit(microcode_exit) +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/kernel/microcode_core-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -0,0 +1,225 @@ ++/* ++ * Intel CPU Microcode Update Driver for Linux ++ * ++ * Copyright (C) 2000-2006 Tigran Aivazian ++ * 2006 Shaohua Li ++ * ++ * This driver allows to upgrade microcode on Intel processors ++ * belonging to IA-32 family - PentiumPro, Pentium II, ++ * Pentium III, Xeon, Pentium 4, etc. ++ * ++ * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture ++ * Software Developer's Manual ++ * Order Number 253668 or free download from: ++ * ++ * http://developer.intel.com/design/pentium4/manuals/253668.htm ++ * ++ * For more information, go to http://www.urbanmyth.org/microcode ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("Microcode Update Driver"); ++MODULE_AUTHOR("Tigran Aivazian "); ++MODULE_LICENSE("GPL"); ++ ++static int verbose; ++module_param(verbose, int, 0644); ++ ++#define MICROCODE_VERSION "2.00-xen" ++ ++/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ ++static DEFINE_MUTEX(microcode_mutex); ++ ++#ifdef CONFIG_MICROCODE_OLD_INTERFACE ++static int do_microcode_update(const void __user *ubuf, size_t len) ++{ ++ int err; ++ void *kbuf; ++ ++ kbuf = vmalloc(len); ++ if (!kbuf) ++ return -ENOMEM; ++ ++ if (copy_from_user(kbuf, ubuf, len) == 0) { ++ struct xen_platform_op op; ++ ++ op.cmd = XENPF_microcode_update; ++ set_xen_guest_handle(op.u.microcode.data, kbuf); ++ op.u.microcode.length = len; ++ err = HYPERVISOR_platform_op(&op); ++ } else ++ err = -EFAULT; ++ ++ vfree(kbuf); ++ ++ return err; ++} ++ ++static int microcode_open(struct inode *unused1, struct file *unused2) ++{ ++ cycle_kernel_lock(); ++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; ++} ++ ++static ssize_t microcode_write(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos) ++{ ++ ssize_t ret; ++ ++ if ((len >> PAGE_SHIFT) > num_physpages) { ++ printk(KERN_ERR "microcode: too much data (max %ld pages)\n", ++ num_physpages); ++ return -EINVAL; ++ } ++ ++ mutex_lock(µcode_mutex); ++ ++ ret = do_microcode_update(buf, len); ++ if (!ret) ++ ret = (ssize_t)len; ++ ++ mutex_unlock(µcode_mutex); ++ ++ return ret; ++} ++ ++static const struct file_operations microcode_fops = { ++ .owner = THIS_MODULE, ++ .write = microcode_write, ++ .open = microcode_open, ++}; ++ ++static struct miscdevice microcode_dev = { ++ .minor = MICROCODE_MINOR, ++ .name = "microcode", ++ .fops = µcode_fops, ++}; ++ ++static int __init microcode_dev_init(void) ++{ ++ int error; ++ ++ error = misc_register(µcode_dev); ++ if (error) { ++ printk(KERN_ERR ++ "microcode: can't misc_register on minor=%d\n", ++ MICROCODE_MINOR); ++ return error; ++ } ++ ++ return 0; ++} ++ ++static void microcode_dev_exit(void) ++{ ++ misc_deregister(µcode_dev); ++} ++ ++MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); ++#else ++#define microcode_dev_init() 0 ++#define microcode_dev_exit() do { } while (0) ++#endif ++ ++/* fake device for request_firmware */ ++static struct platform_device *microcode_pdev; ++ ++static int request_microcode(const char *name) ++{ ++ const struct firmware *firmware; ++ int error; ++ struct xen_platform_op op; ++ ++ error = request_firmware(&firmware, name, µcode_pdev->dev); ++ if (error) { ++ pr_debug("microcode: data file %s load failed\n", name); ++ return error; ++ } ++ ++ op.cmd = XENPF_microcode_update; ++ set_xen_guest_handle(op.u.microcode.data, firmware->data); ++ op.u.microcode.length = firmware->size; ++ error = HYPERVISOR_platform_op(&op); ++ ++ release_firmware(firmware); ++ ++ if (error) ++ pr_debug("ucode load failed\n"); ++ ++ return error; ++} ++ ++static int __init microcode_init(void) ++{ ++ const struct cpuinfo_x86 *c = &boot_cpu_data; ++ char buf[32]; ++ const char *fw_name = buf; ++ int error; ++ ++ if (c->x86_vendor == X86_VENDOR_INTEL) ++ sprintf(buf, "intel-ucode/%02x-%02x-%02x", ++ c->x86, c->x86_model, c->x86_mask); ++ else if (c->x86_vendor == X86_VENDOR_AMD) ++ fw_name = "amd-ucode/microcode_amd.bin"; ++ else { ++ printk(KERN_ERR "microcode: no support for this CPU vendor\n"); ++ return -ENODEV; ++ } ++ ++ error = microcode_dev_init(); ++ if (error) ++ return error; ++ microcode_pdev = platform_device_register_simple("microcode", -1, ++ NULL, 0); ++ if (IS_ERR(microcode_pdev)) { ++ microcode_dev_exit(); ++ return PTR_ERR(microcode_pdev); ++ } ++ ++ request_microcode(fw_name); ++ ++ printk(KERN_INFO ++ "Microcode Update Driver: v" MICROCODE_VERSION ++ " ," ++ " Peter Oruba\n"); ++ ++ return 0; ++} ++ ++static void __exit microcode_exit(void) ++{ ++ microcode_dev_exit(); ++ platform_device_unregister(microcode_pdev); ++ ++ printk(KERN_INFO ++ "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); ++} ++ ++module_init(microcode_init); ++module_exit(microcode_exit); +--- head-2010-01-18.orig/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -406,7 +406,9 @@ static int __init smp_read_mpc(struct mp + generic_bigsmp_probe(); + #endif + ++#ifdef CONFIG_X86_32 + setup_apic_routing(); ++#endif + if (!num_processors) + printk(KERN_ERR "MPTABLE: no processors registered!\n"); + return num_processors; +@@ -611,6 +613,9 @@ static void __init __get_smp_config(unsi + printk(KERN_INFO "Using ACPI for processor (LAPIC) " + "configuration information\n"); + ++ if (!mpf) ++ return; ++ + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", + mpf->mpf_specification); + #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN) +--- head-2010-01-18.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); + /* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to older i386. */ +-struct device fallback_dev = { ++struct device x86_dma_fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, +- .dma_mask = &fallback_dev.coherent_dma_mask, ++ .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, + }; ++EXPORT_SYMBOL(x86_dma_fallback_dev); + + int dma_set_mask(struct device *dev, u64 mask) + { +@@ -82,7 +83,7 @@ void __init dma32_reserve_bootmem(void) + * using 512M as goal + */ + align = 64ULL<<20; +- size = round_up(dma32_bootmem_size, align); ++ size = roundup(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + 512ULL<<20); + if (dma32_bootmem_ptr) +@@ -109,6 +110,8 @@ static void __init dma32_free_bootmem(vo + #endif + + static struct dma_mapping_ops swiotlb_dma_ops = { ++ .alloc_coherent = dma_generic_alloc_coherent, ++ .free_coherent = dma_generic_free_coherent, + .mapping_error = swiotlb_dma_mapping_error, + .map_single = swiotlb_map_single_phys, + .unmap_single = swiotlb_unmap_single, +@@ -147,13 +150,77 @@ void __init pci_iommu_alloc(void) + } + + #ifndef CONFIG_XEN +-unsigned long iommu_num_pages(unsigned long addr, unsigned long len) ++unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) + { + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; + } +-EXPORT_SYMBOL(iommu_num_pages); ++EXPORT_SYMBOL(iommu_nr_pages); ++#endif ++ ++void *dma_generic_alloc_coherent(struct device *dev, size_t size, ++ dma_addr_t *dma_addr, gfp_t flag) ++{ ++ unsigned long dma_mask; ++ struct page *page; ++#ifndef CONFIG_XEN ++ dma_addr_t addr; ++#else ++ void *memory; ++#endif ++ unsigned int order = get_order(size); ++ ++ dma_mask = dma_alloc_coherent_mask(dev, flag); ++ ++#ifndef CONFIG_XEN ++ flag |= __GFP_ZERO; ++again: ++#else ++ flag &= ~(__GFP_DMA | __GFP_DMA32); ++#endif ++ page = alloc_pages_node(dev_to_node(dev), flag, order); ++ if (!page) ++ return NULL; ++ ++#ifndef CONFIG_XEN ++ addr = page_to_phys(page); ++ if (!is_buffer_dma_capable(dma_mask, addr, size)) { ++ __free_pages(page, order); ++ ++ if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { ++ flag = (flag & ~GFP_DMA32) | GFP_DMA; ++ goto again; ++ } ++ ++ return NULL; ++ } ++ ++ *dma_addr = addr; ++ return page_address(page); ++#else ++ memory = page_address(page); ++ if (xen_create_contiguous_region((unsigned long)memory, order, ++ fls64(dma_mask))) { ++ __free_pages(page, order); ++ return NULL; ++ } ++ ++ *dma_addr = virt_to_bus(memory); ++ return memset(memory, 0, size); ++#endif ++} ++ ++#ifdef CONFIG_XEN ++void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, ++ dma_addr_t dma_addr) ++{ ++ unsigned int order = get_order(size); ++ unsigned long va = (unsigned long)vaddr; ++ ++ xen_destroy_contiguous_region(va, order); ++ free_pages(va, order); ++} + #endif + + /* +@@ -291,164 +358,6 @@ int dma_supported(struct device *dev, u6 + } + EXPORT_SYMBOL(dma_supported); + +-/* Allocate DMA memory on node near device */ +-static struct page * +-dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) +-{ +- int node; +- +- node = dev_to_node(dev); +- +- return alloc_pages_node(node, gfp, order); +-} +- +-/* +- * Allocate memory for a coherent mapping. +- */ +-void * +-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, +- gfp_t gfp) +-{ +-#ifndef CONFIG_XEN +- struct dma_mapping_ops *ops = get_dma_ops(dev); +-#endif +- void *memory = NULL; +- struct page *page; +- unsigned long dma_mask = 0; +- int noretry = 0; +- unsigned int order = get_order(size); +- +- /* ignore region specifiers */ +- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); +- +- if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) +- return memory; +- +- if (!dev) { +- dev = &fallback_dev; +- gfp |= GFP_DMA; +- } +- dma_mask = dev->coherent_dma_mask; +- if (dma_mask == 0) +- dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; +- +- /* Device not DMA able */ +- if (dev->dma_mask == NULL) +- return NULL; +- +-#ifdef CONFIG_XEN +- gfp &= ~(__GFP_DMA | __GFP_DMA32); +-#else +- /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ +- if (gfp & __GFP_DMA) +- noretry = 1; +- +-#ifdef CONFIG_X86_64 +- /* Why <=? Even when the mask is smaller than 4GB it is often +- larger than 16MB and in this case we have a chance of +- finding fitting memory in the next higher zone first. If +- not retry with true GFP_DMA. -AK */ +- if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { +- gfp |= GFP_DMA32; +-#endif +- +- again: +-#endif +- page = dma_alloc_pages(dev, +- noretry ? gfp | __GFP_NORETRY : gfp, order); +- if (page == NULL) +- return NULL; +- +-#ifndef CONFIG_XEN +- { +- int high, mmu; +- dma_addr_t bus = page_to_phys(page); +- memory = page_address(page); +- high = (bus + size) >= dma_mask; +- mmu = high; +- if (force_iommu && !(gfp & GFP_DMA)) +- mmu = 1; +- else if (high) { +- free_pages((unsigned long)memory, order); +- +- /* Don't use the 16MB ZONE_DMA unless absolutely +- needed. It's better to use remapping first. */ +- if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { +- gfp = (gfp & ~GFP_DMA32) | GFP_DMA; +- goto again; +- } +- +- /* Let low level make its own zone decisions */ +- gfp &= ~(GFP_DMA32|GFP_DMA); +- +- if (ops->alloc_coherent) +- return ops->alloc_coherent(dev, size, +- dma_handle, gfp); +- return NULL; +- } +- +- memset(memory, 0, size); +- if (!mmu) { +- *dma_handle = bus; +- return memory; +- } +- } +- +- if (ops->alloc_coherent) { +- free_pages((unsigned long)memory, order); +- gfp &= ~(GFP_DMA|GFP_DMA32); +- return ops->alloc_coherent(dev, size, dma_handle, gfp); +- } +- +- if (ops->map_simple) { +- *dma_handle = ops->map_simple(dev, virt_to_bus(memory), +- size, +- PCI_DMA_BIDIRECTIONAL); +- if (*dma_handle != bad_dma_address) +- return memory; +- } +-#else +- memory = page_address(page); +- if (xen_create_contiguous_region((unsigned long)memory, order, +- fls64(dma_mask)) == 0) { +- memset(memory, 0, size); +- *dma_handle = virt_to_bus(memory); +- return memory; +- } +-#endif +- +- if (panic_on_overflow) +- panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", +- (unsigned long)size); +- free_pages((unsigned long)memory, order); +- return NULL; +-} +-EXPORT_SYMBOL(dma_alloc_coherent); +- +-/* +- * Unmap coherent memory. +- * The caller must ensure that the device has finished accessing the mapping. +- */ +-void dma_free_coherent(struct device *dev, size_t size, +- void *vaddr, dma_addr_t bus) +-{ +-#ifndef CONFIG_XEN +- struct dma_mapping_ops *ops = get_dma_ops(dev); +-#endif +- +- int order = get_order(size); +- WARN_ON(irqs_disabled()); /* for portability */ +- if (dma_release_from_coherent(dev, order, vaddr)) +- return; +-#ifndef CONFIG_XEN +- if (ops->unmap_single) +- ops->unmap_single(dev, bus, size, 0); +-#endif +- xen_destroy_contiguous_region((unsigned long)vaddr, order); +- free_pages((unsigned long)vaddr, order); +-} +-EXPORT_SYMBOL(dma_free_coherent); +- + static int __init pci_iommu_init(void) + { + calgary_iommu_init(); +--- head-2010-01-18.orig/arch/x86/kernel/pci-nommu-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-nommu-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -5,6 +5,7 @@ + + #include + ++#include + #include + #include + #include +@@ -36,7 +37,7 @@ gnttab_map_sg(struct device *hwdev, stru + gnttab_dma_map_page(sg_page(sg)) + sg->offset; + sg->dma_length = sg->length; + IOMMU_BUG_ON(address_needs_mapping( +- hwdev, sg->dma_address)); ++ hwdev, sg->dma_address, sg->length)); + IOMMU_BUG_ON(range_straddles_page_boundary( + page_to_pseudophys(sg_page(sg)) + sg->offset, + sg->length)); +@@ -67,7 +68,7 @@ gnttab_map_single(struct device *dev, ph + dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) + + offset_in_page(paddr); + IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size)); +- IOMMU_BUG_ON(address_needs_mapping(dev, dma)); ++ IOMMU_BUG_ON(address_needs_mapping(dev, dma, size)); + + return dma; + } +@@ -79,7 +80,9 @@ gnttab_unmap_single(struct device *dev, + gnttab_dma_unmap_page(dma_addr); + } + +-static struct dma_mapping_ops nommu_dma_ops = { ++struct dma_mapping_ops nommu_dma_ops = { ++ .alloc_coherent = dma_generic_alloc_coherent, ++ .free_coherent = dma_generic_free_coherent, + .map_single = gnttab_map_single, + .unmap_single = gnttab_unmap_single, + .map_sg = gnttab_map_sg, +--- head-2010-01-18.orig/arch/x86/kernel/process-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -180,7 +180,8 @@ static void mwait_idle(void) + static void poll_idle(void) + { + local_irq_enable(); +- cpu_relax(); ++ while (!need_resched()) ++ cpu_relax(); + } + + #ifndef CONFIG_XEN +--- head-2010-01-18.orig/arch/x86/kernel/process_32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_32-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -60,6 +61,8 @@ + #include + #include + #include ++#include ++#include + + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork"); +@@ -78,42 +81,12 @@ unsigned long thread_saved_pc(struct tas + return ((unsigned long *)tsk->thread.sp)[3]; + } + +-#ifdef CONFIG_HOTPLUG_CPU +-#ifndef CONFIG_XEN +-#include +- +-static void cpu_exit_clear(void) +-{ +- int cpu = raw_smp_processor_id(); +- +- idle_task_exit(); +- +- cpu_uninit(); +- irq_ctx_exit(cpu); +- +- cpu_clear(cpu, cpu_callout_map); +- cpu_clear(cpu, cpu_callin_map); +- +- numa_remove_cpu(cpu); +- c1e_remove_cpu(cpu); +-} +-#endif +- +-static inline void play_dead(void) +-{ +- idle_task_exit(); +- local_irq_disable(); +- cpu_clear(smp_processor_id(), cpu_initialized); +- preempt_enable_no_resched(); +- VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); +- cpu_bringup(); +-} +-#else ++#ifndef CONFIG_SMP + static inline void play_dead(void) + { + BUG(); + } +-#endif /* CONFIG_HOTPLUG_CPU */ ++#endif + + /* + * The idle thread. There's no useful work to be +@@ -155,12 +128,13 @@ void cpu_idle(void) + } + } + +-void __show_registers(struct pt_regs *regs, int all) ++void __show_regs(struct pt_regs *regs, int all) + { + unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; + unsigned long d0, d1, d2, d3, d6, d7; + unsigned long sp; + unsigned short ss, gs; ++ const char *board; + + if (user_mode_vm(regs)) { + sp = regs->sp; +@@ -173,11 +147,15 @@ void __show_registers(struct pt_regs *re + } + + printk("\n"); +- printk("Pid: %d, comm: %s %s (%s %.*s)\n", ++ ++ board = dmi_get_system_info(DMI_PRODUCT_NAME); ++ if (!board) ++ board = ""; ++ printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", + task_pid_nr(current), current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), +- init_utsname()->version); ++ init_utsname()->version, board); + + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + (u16)regs->cs, regs->ip, regs->flags, +@@ -216,7 +194,7 @@ void __show_registers(struct pt_regs *re + + void show_regs(struct pt_regs *regs) + { +- __show_registers(regs, 1); ++ __show_regs(regs, 1); + show_trace(NULL, regs, ®s->sp, regs->bp); + } + +@@ -269,6 +247,14 @@ void exit_thread(void) + t->io_bitmap_ptr = NULL; + clear_thread_flag(TIF_IO_BITMAP); + } ++#ifdef CONFIG_X86_DS ++ /* Free any DS contexts that have not been properly released. */ ++ if (unlikely(current->thread.ds_ctx)) { ++ /* we clear debugctl to make sure DS is not used. */ ++ update_debugctlmsr(0); ++ ds_free(current->thread.ds_ctx); ++ } ++#endif /* CONFIG_X86_DS */ + } + + void flush_thread(void) +@@ -434,6 +420,35 @@ int set_tsc_mode(unsigned int val) + return 0; + } + ++#ifdef CONFIG_X86_DS ++static int update_debugctl(struct thread_struct *prev, ++ struct thread_struct *next, unsigned long debugctl) ++{ ++ unsigned long ds_prev = 0; ++ unsigned long ds_next = 0; ++ ++ if (prev->ds_ctx) ++ ds_prev = (unsigned long)prev->ds_ctx->ds; ++ if (next->ds_ctx) ++ ds_next = (unsigned long)next->ds_ctx->ds; ++ ++ if (ds_next != ds_prev) { ++ /* we clear debugctl to make sure DS ++ * is not in use when we change it */ ++ debugctl = 0; ++ update_debugctlmsr(0); ++ wrmsr(MSR_IA32_DS_AREA, ds_next, 0); ++ } ++ return debugctl; ++} ++#else ++static int update_debugctl(struct thread_struct *prev, ++ struct thread_struct *next, unsigned long debugctl) ++{ ++ return debugctl; ++} ++#endif /* CONFIG_X86_DS */ ++ + static noinline void + __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) + { +@@ -443,14 +458,7 @@ __switch_to_xtra(struct task_struct *pre + prev = &prev_p->thread; + next = &next_p->thread; + +- debugctl = prev->debugctlmsr; +- if (next->ds_area_msr != prev->ds_area_msr) { +- /* we clear debugctl to make sure DS +- * is not in use when we change it */ +- debugctl = 0; +- update_debugctlmsr(0); +- wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); +- } ++ debugctl = update_debugctl(prev, next, prev->debugctlmsr); + + if (next->debugctlmsr != debugctl) + update_debugctlmsr(next->debugctlmsr); +@@ -474,13 +482,13 @@ __switch_to_xtra(struct task_struct *pre + hard_enable_TSC(); + } + +-#ifdef X86_BTS ++#ifdef CONFIG_X86_PTRACE_BTS + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); + + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); +-#endif ++#endif /* CONFIG_X86_PTRACE_BTS */ + } + + /* +--- head-2010-01-18.orig/arch/x86/kernel/process_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_64-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -40,11 +40,11 @@ + #include + #include + #include ++#include ++#include + +-#include + #include + #include +-#include + #include + #include + #include +@@ -58,6 +58,7 @@ + #include + #include + #include ++#include + + #include + +@@ -71,6 +72,13 @@ void idle_notifier_register(struct notif + { + atomic_notifier_chain_register(&idle_notifier, n); + } ++EXPORT_SYMBOL_GPL(idle_notifier_register); ++ ++void idle_notifier_unregister(struct notifier_block *n) ++{ ++ atomic_notifier_chain_unregister(&idle_notifier, n); ++} ++EXPORT_SYMBOL_GPL(idle_notifier_unregister); + + void enter_idle(void) + { +@@ -94,25 +102,12 @@ void exit_idle(void) + __exit_idle(); + } + +-#ifdef CONFIG_HOTPLUG_CPU +-static inline void play_dead(void) +-{ +- idle_task_exit(); +-#ifndef CONFIG_XEN +- c1e_remove_cpu(raw_smp_processor_id()); +-#endif +- local_irq_disable(); +- cpu_clear(smp_processor_id(), cpu_initialized); +- preempt_enable_no_resched(); +- VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); +- cpu_bringup(); +-} +-#else ++#ifndef CONFIG_SMP + static inline void play_dead(void) + { + BUG(); + } +-#endif /* CONFIG_HOTPLUG_CPU */ ++#endif + + /* + * The idle thread. There's no useful work to be +@@ -157,63 +152,74 @@ void cpu_idle(void) + } + + /* Prints also some state that isn't saved in the pt_regs */ +-void __show_regs(struct pt_regs * regs) ++void __show_regs(struct pt_regs *regs, int all) + { +- unsigned long fs, gs, shadowgs; ++ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; + unsigned long d0, d1, d2, d3, d6, d7; + unsigned int fsindex, gsindex; + unsigned int ds, cs, es; + + printk("\n"); + print_modules(); +- printk("Pid: %d, comm: %.20s %s %s %.*s\n", ++ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); ++ printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk_address(regs->ip, 1); +- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, +- regs->flags); +- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", ++ printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, ++ regs->sp, regs->flags); ++ printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", + regs->ax, regs->bx, regs->cx); +- printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", ++ printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", + regs->dx, regs->si, regs->di); +- printk("RBP: %016lx R08: %016lx R09: %016lx\n", ++ printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", + regs->bp, regs->r8, regs->r9); +- printk("R10: %016lx R11: %016lx R12: %016lx\n", +- regs->r10, regs->r11, regs->r12); +- printk("R13: %016lx R14: %016lx R15: %016lx\n", +- regs->r13, regs->r14, regs->r15); +- +- asm("mov %%ds,%0" : "=r" (ds)); +- asm("mov %%cs,%0" : "=r" (cs)); +- asm("mov %%es,%0" : "=r" (es)); ++ printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", ++ regs->r10, regs->r11, regs->r12); ++ printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", ++ regs->r13, regs->r14, regs->r15); ++ ++ asm("movl %%ds,%0" : "=r" (ds)); ++ asm("movl %%cs,%0" : "=r" (cs)); ++ asm("movl %%es,%0" : "=r" (es)); + asm("mov %%fs,%0" : "=r" (fsindex)); + asm("mov %%gs,%0" : "=r" (gsindex)); + + rdmsrl(MSR_FS_BASE, fs); +- rdmsrl(MSR_GS_BASE, gs); +- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); ++ rdmsrl(MSR_GS_BASE, gs); ++ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + +- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", +- fs,fsindex,gs,gsindex,shadowgs); +- printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es); ++ if (!all) ++ return; ++ ++ cr0 = read_cr0(); ++ cr2 = read_cr2(); ++ cr3 = read_cr3(); ++ cr4 = read_cr4(); ++ ++ printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", ++ fs, fsindex, gs, gsindex, shadowgs); ++ printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, ++ es, cr0); ++ printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, ++ cr4); + + get_debugreg(d0, 0); + get_debugreg(d1, 1); + get_debugreg(d2, 2); +- printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); ++ printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + get_debugreg(d3, 3); + get_debugreg(d6, 6); + get_debugreg(d7, 7); +- printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); ++ printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + } + + void show_regs(struct pt_regs *regs) + { +- printk("CPU %d:", smp_processor_id()); +- __show_regs(regs); ++ printk(KERN_INFO "CPU %d:", smp_processor_id()); ++ __show_regs(regs, 1); + show_trace(NULL, regs, (void *)(regs + 1), regs->bp); + } + +@@ -250,6 +256,14 @@ void exit_thread(void) + #endif + t->io_bitmap_max = 0; + } ++#ifdef CONFIG_X86_DS ++ /* Free any DS contexts that have not been properly released. */ ++ if (unlikely(t->ds_ctx)) { ++ /* we clear debugctl to make sure DS is not used. */ ++ update_debugctlmsr(0); ++ ds_free(t->ds_ctx); ++ } ++#endif /* CONFIG_X86_DS */ + } + + void xen_load_gs_index(unsigned gs) +@@ -330,10 +344,10 @@ void prepare_to_copy(struct task_struct + + int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, + unsigned long unused, +- struct task_struct * p, struct pt_regs * regs) ++ struct task_struct *p, struct pt_regs *regs) + { + int err; +- struct pt_regs * childregs; ++ struct pt_regs *childregs; + struct task_struct *me = current; + + childregs = ((struct pt_regs *) +@@ -378,10 +392,10 @@ int copy_thread(int nr, unsigned long cl + if (test_thread_flag(TIF_IA32)) + err = do_set_thread_area(p, -1, + (struct user_desc __user *)childregs->si, 0); +- else +-#endif +- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); +- if (err) ++ else ++#endif ++ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); ++ if (err) + goto out; + } + p->thread.iopl = current->thread.iopl; +@@ -489,13 +503,27 @@ static inline void __switch_to_xtra(stru + next = &next_p->thread; + + debugctl = prev->debugctlmsr; +- if (next->ds_area_msr != prev->ds_area_msr) { +- /* we clear debugctl to make sure DS +- * is not in use when we change it */ +- debugctl = 0; +- update_debugctlmsr(0); +- wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); ++ ++#ifdef CONFIG_X86_DS ++ { ++ unsigned long ds_prev = 0, ds_next = 0; ++ ++ if (prev->ds_ctx) ++ ds_prev = (unsigned long)prev->ds_ctx->ds; ++ if (next->ds_ctx) ++ ds_next = (unsigned long)next->ds_ctx->ds; ++ ++ if (ds_next != ds_prev) { ++ /* ++ * We clear debugctl to make sure DS ++ * is not in use when we change it: ++ */ ++ debugctl = 0; ++ update_debugctlmsr(0); ++ wrmsrl(MSR_IA32_DS_AREA, ds_next); ++ } + } ++#endif /* CONFIG_X86_DS */ + + if (next->debugctlmsr != debugctl) + update_debugctlmsr(next->debugctlmsr); +@@ -519,13 +547,13 @@ static inline void __switch_to_xtra(stru + hard_enable_TSC(); + } + +-#ifdef X86_BTS ++#ifdef CONFIG_X86_PTRACE_BTS + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); + + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); +-#endif ++#endif /* CONFIG_X86_PTRACE_BTS */ + } + + /* +@@ -557,7 +585,7 @@ __switch_to(struct task_struct *prev_p, + multicall_entry_t _mcl[8], *mcl = _mcl; + + /* we're going to use this soon, after a few expensive things */ +- if (next_p->fpu_counter>5) ++ if (next_p->fpu_counter > 5) + prefetch(next->xstate); + + /* +@@ -638,12 +666,12 @@ __switch_to(struct task_struct *prev_p, + if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) + BUG(); + +- /* ++ /* + * Switch DS and ES. + * This won't pick up thread selector changes, but I guess that is ok. + */ + if (unlikely(next->es)) +- loadsegment(es, next->es); ++ loadsegment(es, next->es); + + if (unlikely(next->ds)) + loadsegment(ds, next->ds); +@@ -657,7 +685,7 @@ __switch_to(struct task_struct *prev_p, + */ + arch_leave_lazy_cpu_mode(); + +- /* ++ /* + * Switch FS and GS. + * + * Segment register != 0 always requires a reload. Also +@@ -676,12 +704,12 @@ __switch_to(struct task_struct *prev_p, + if (next->gs) + WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs)); + +- /* ++ /* + * Switch the PDA context. + */ + prev->usersp = read_pda(oldrsp); + write_pda(oldrsp, next->usersp); +- write_pda(pcurrent, next_p); ++ write_pda(pcurrent, next_p); + write_pda(kernelstack, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - PDA_STACKOFFSET); +@@ -722,7 +750,7 @@ long sys_execve(char __user *name, char + char __user * __user *envp, struct pt_regs *regs) + { + long error; +- char * filename; ++ char *filename; + + filename = getname(name); + error = PTR_ERR(filename); +@@ -780,56 +808,56 @@ asmlinkage long sys_vfork(struct pt_regs + unsigned long get_wchan(struct task_struct *p) + { + unsigned long stack; +- u64 fp,ip; ++ u64 fp, ip; + int count = 0; + +- if (!p || p == current || p->state==TASK_RUNNING) +- return 0; ++ if (!p || p == current || p->state == TASK_RUNNING) ++ return 0; + stack = (unsigned long)task_stack_page(p); +- if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) ++ if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + return 0; + fp = *(u64 *)(p->thread.sp); +- do { ++ do { + if (fp < (unsigned long)stack || +- fp > (unsigned long)stack+THREAD_SIZE) +- return 0; ++ fp >= (unsigned long)stack+THREAD_SIZE) ++ return 0; + ip = *(u64 *)(fp+8); + if (!in_sched_functions(ip)) + return ip; +- fp = *(u64 *)fp; +- } while (count++ < 16); ++ fp = *(u64 *)fp; ++ } while (count++ < 16); + return 0; + } + + long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) +-{ +- int ret = 0; ++{ ++ int ret = 0; + int doit = task == current; + int cpu; + +- switch (code) { ++ switch (code) { + case ARCH_SET_GS: + if (addr >= TASK_SIZE_OF(task)) +- return -EPERM; ++ return -EPERM; + cpu = get_cpu(); +- /* handle small bases via the GDT because that's faster to ++ /* handle small bases via the GDT because that's faster to + switch. */ +- if (addr <= 0xffffffff) { +- set_32bit_tls(task, GS_TLS, addr); +- if (doit) { ++ if (addr <= 0xffffffff) { ++ set_32bit_tls(task, GS_TLS, addr); ++ if (doit) { + load_TLS(&task->thread, cpu); +- load_gs_index(GS_TLS_SEL); ++ load_gs_index(GS_TLS_SEL); + } +- task->thread.gsindex = GS_TLS_SEL; ++ task->thread.gsindex = GS_TLS_SEL; + task->thread.gs = 0; +- } else { ++ } else { + task->thread.gsindex = 0; + task->thread.gs = addr; + if (doit) { + load_gs_index(0); + ret = HYPERVISOR_set_segment_base( + SEGBASE_GS_USER, addr); +- } ++ } + } + put_cpu(); + break; +@@ -884,8 +912,7 @@ long do_arch_prctl(struct task_struct *t + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gs; +- } +- else ++ } else + base = task->thread.gs; + ret = put_user(base, (unsigned long __user *)addr); + break; +--- head-2010-01-18.orig/arch/x86/kernel/quirks-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/quirks-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -352,9 +352,27 @@ static void ati_force_hpet_resume(void) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + } + ++static u32 ati_ixp4x0_rev(struct pci_dev *dev) ++{ ++ u32 d; ++ u8 b; ++ ++ pci_read_config_byte(dev, 0xac, &b); ++ b &= ~(1<<5); ++ pci_write_config_byte(dev, 0xac, b); ++ pci_read_config_dword(dev, 0x70, &d); ++ d |= 1<<8; ++ pci_write_config_dword(dev, 0x70, d); ++ pci_read_config_dword(dev, 0x8, &d); ++ d &= 0xff; ++ dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d); ++ return d; ++} ++ + static void ati_force_enable_hpet(struct pci_dev *dev) + { +- u32 uninitialized_var(val); ++ u32 d, val; ++ u8 b; + + if (hpet_address || force_hpet_address) + return; +@@ -364,14 +382,33 @@ static void ati_force_enable_hpet(struct + return; + } + ++ d = ati_ixp4x0_rev(dev); ++ if (d < 0x82) ++ return; ++ ++ /* base address */ + pci_write_config_dword(dev, 0x14, 0xfed00000); + pci_read_config_dword(dev, 0x14, &val); ++ ++ /* enable interrupt */ ++ outb(0x72, 0xcd6); b = inb(0xcd7); ++ b |= 0x1; ++ outb(0x72, 0xcd6); outb(b, 0xcd7); ++ outb(0x72, 0xcd6); b = inb(0xcd7); ++ if (!(b & 0x1)) ++ return; ++ pci_read_config_dword(dev, 0x64, &d); ++ d |= (1<<10); ++ pci_write_config_dword(dev, 0x64, d); ++ pci_read_config_dword(dev, 0x64, &d); ++ if (!(d & (1<<10))) ++ return; ++ + force_hpet_address = val; + force_hpet_resume_type = ATI_FORCE_HPET_RESUME; + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", + force_hpet_address); + cached_dev = dev; +- return; + } + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, + ati_force_enable_hpet); +--- head-2010-01-18.orig/arch/x86/kernel/setup-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -261,6 +261,9 @@ unsigned long saved_video_mode; + #define RAMDISK_LOAD_FLAG 0x4000 + + static char __initdata command_line[COMMAND_LINE_SIZE]; ++#ifdef CONFIG_CMDLINE_BOOL ++static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; ++#endif + + #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + struct edd edd; +@@ -339,7 +342,7 @@ static void __init relocate_initrd(void) + if (clen > MAX_MAP_CHUNK-slop) + clen = MAX_MAP_CHUNK-slop; + mapaddr = ramdisk_image & PAGE_MASK; +- p = early_ioremap(mapaddr, clen+slop); ++ p = early_memremap(mapaddr, clen+slop); + memcpy(q, p+slop, clen); + early_iounmap(p, clen+slop); + q += clen; +@@ -430,7 +433,7 @@ static void __init parse_setup_data(void + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { +- data = early_ioremap(pa_data, PAGE_SIZE); ++ data = early_memremap(pa_data, PAGE_SIZE); + switch (data->type) { + case SETUP_E820_EXT: + parse_e820_ext(data, pa_data); +@@ -455,7 +458,7 @@ static void __init e820_reserve_setup_da + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { +- data = early_ioremap(pa_data, sizeof(*data)); ++ data = early_memremap(pa_data, sizeof(*data)); + e820_update_range(pa_data, sizeof(*data)+data->len, + E820_RAM, E820_RESERVED_KERN); + found = 1; +@@ -483,7 +486,7 @@ static void __init reserve_early_setup_d + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { +- data = early_ioremap(pa_data, sizeof(*data)); ++ data = early_memremap(pa_data, sizeof(*data)); + sprintf(buf, "setup data %x", data->type); + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + pa_data = data->next; +@@ -625,7 +628,13 @@ static void __init reserve_standard_io_r + + } + +-#ifdef CONFIG_PROC_VMCORE ++/* ++ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by ++ * is_kdump_kernel() to determine if we are booting after a panic. Hence ++ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. ++ */ ++ ++#ifdef CONFIG_CRASH_DUMP + /* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. This option will be passed + * by kexec loader to the capture kernel. +@@ -646,6 +655,190 @@ static struct x86_quirks default_x86_qui + struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; + + /* ++ * Some BIOSes seem to corrupt the low 64k of memory during events ++ * like suspend/resume and unplugging an HDMI cable. Reserve all ++ * remaining free memory in that area and fill it with a distinct ++ * pattern. ++ */ ++#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION ++#define MAX_SCAN_AREAS 8 ++ ++static int __read_mostly memory_corruption_check = -1; ++ ++static unsigned __read_mostly corruption_check_size = 64*1024; ++static unsigned __read_mostly corruption_check_period = 60; /* seconds */ ++ ++static struct e820entry scan_areas[MAX_SCAN_AREAS]; ++static int num_scan_areas; ++ ++ ++static int set_corruption_check(char *arg) ++{ ++ char *end; ++ ++ memory_corruption_check = simple_strtol(arg, &end, 10); ++ ++ return (*end == 0) ? 0 : -EINVAL; ++} ++early_param("memory_corruption_check", set_corruption_check); ++ ++static int set_corruption_check_period(char *arg) ++{ ++ char *end; ++ ++ corruption_check_period = simple_strtoul(arg, &end, 10); ++ ++ return (*end == 0) ? 0 : -EINVAL; ++} ++early_param("memory_corruption_check_period", set_corruption_check_period); ++ ++static int set_corruption_check_size(char *arg) ++{ ++ char *end; ++ unsigned size; ++ ++ size = memparse(arg, &end); ++ ++ if (*end == '\0') ++ corruption_check_size = size; ++ ++ return (size == corruption_check_size) ? 0 : -EINVAL; ++} ++early_param("memory_corruption_check_size", set_corruption_check_size); ++ ++ ++static void __init setup_bios_corruption_check(void) ++{ ++ u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ ++ ++ if (memory_corruption_check == -1) { ++ memory_corruption_check = ++#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK ++ 1 ++#else ++ 0 ++#endif ++ ; ++ } ++ ++ if (corruption_check_size == 0) ++ memory_corruption_check = 0; ++ ++ if (!memory_corruption_check) ++ return; ++ ++ corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); ++ ++ while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { ++ u64 size; ++ addr = find_e820_area_size(addr, &size, PAGE_SIZE); ++ ++ if (addr == 0) ++ break; ++ ++ if ((addr + size) > corruption_check_size) ++ size = corruption_check_size - addr; ++ ++ if (size == 0) ++ break; ++ ++ e820_update_range(addr, size, E820_RAM, E820_RESERVED); ++ scan_areas[num_scan_areas].addr = addr; ++ scan_areas[num_scan_areas].size = size; ++ num_scan_areas++; ++ ++ /* Assume we've already mapped this early memory */ ++ memset(__va(addr), 0, size); ++ ++ addr += size; ++ } ++ ++ printk(KERN_INFO "Scanning %d areas for low memory corruption\n", ++ num_scan_areas); ++ update_e820(); ++} ++ ++static struct timer_list periodic_check_timer; ++ ++void check_for_bios_corruption(void) ++{ ++ int i; ++ int corruption = 0; ++ ++ if (!memory_corruption_check) ++ return; ++ ++ for(i = 0; i < num_scan_areas; i++) { ++ unsigned long *addr = __va(scan_areas[i].addr); ++ unsigned long size = scan_areas[i].size; ++ ++ for(; size; addr++, size -= sizeof(unsigned long)) { ++ if (!*addr) ++ continue; ++ printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", ++ addr, __pa(addr), *addr); ++ corruption = 1; ++ *addr = 0; ++ } ++ } ++ ++ WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); ++} ++ ++static void periodic_check_for_corruption(unsigned long data) ++{ ++ check_for_bios_corruption(); ++ mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); ++} ++ ++void start_periodic_check_for_corruption(void) ++{ ++ if (!memory_corruption_check || corruption_check_period == 0) ++ return; ++ ++ printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", ++ corruption_check_period); ++ ++ init_timer(&periodic_check_timer); ++ periodic_check_timer.function = &periodic_check_for_corruption; ++ periodic_check_for_corruption(0); ++} ++#endif ++ ++static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) ++{ ++ printk(KERN_NOTICE ++ "%s detected: BIOS may corrupt low RAM, working it around.\n", ++ d->ident); ++ ++ e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); ++ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); ++ ++ return 0; ++} ++ ++/* List of systems that have known low memory corruption BIOS problems */ ++static struct dmi_system_id __initdata bad_bios_dmi_table[] = { ++#ifdef CONFIG_X86_RESERVE_LOW_64K ++ { ++ .callback = dmi_low_memory_corruption, ++ .ident = "AMI BIOS", ++ .matches = { ++ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), ++ }, ++ }, ++ { ++ .callback = dmi_low_memory_corruption, ++ .ident = "Phoenix BIOS", ++ .matches = { ++ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), ++ }, ++ }, ++#endif ++ {} ++}; ++ ++/* + * Determine if we were loaded by an EFI loader. If so, then we have also been + * passed the efi memmap, systab, etc., so we should use these data structures + * for initialization. Note, the efi init code path is determined by the +@@ -691,6 +884,9 @@ void __init setup_arch(char **cmdline_p) + printk(KERN_INFO "Command line: %s\n", boot_command_line); + #endif + ++ /* VMI may relocate the fixmap; do this before touching ioremap area */ ++ vmi_init(); ++ + early_cpu_init(); + early_ioremap_init(); + +@@ -785,6 +981,19 @@ void __init setup_arch(char **cmdline_p) + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; + ++#ifdef CONFIG_CMDLINE_BOOL ++#ifdef CONFIG_CMDLINE_OVERRIDE ++ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); ++#else ++ if (builtin_cmdline[0]) { ++ /* append boot loader cmdline to builtin */ ++ strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); ++ strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); ++ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); ++ } ++#endif ++#endif ++ + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + +@@ -794,13 +1003,8 @@ void __init setup_arch(char **cmdline_p) + check_efer(); + #endif + +-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) +- /* +- * Must be before kernel pagetables are setup +- * or fixmap area is touched. +- */ +- vmi_init(); +-#endif ++ /* Must be before kernel pagetables are setup */ ++ vmi_activate(); + + /* after early param, so could get panic from serial */ + reserve_early_setup_data(); +@@ -819,10 +1023,15 @@ void __init setup_arch(char **cmdline_p) + + finish_e820_parsing(); + ++ if (is_initial_xendomain()) { ++ dmi_scan_machine(); ++ ++ dmi_check_system(bad_bios_dmi_table); ++ + #ifdef CONFIG_X86_32 +- if (is_initial_xendomain()) + probe_roms(); + #endif ++ } + + #ifndef CONFIG_XEN + /* after parse_early_param, so could debug it */ +@@ -868,6 +1077,10 @@ void __init setup_arch(char **cmdline_p) + num_physpages = max_pfn; + max_mapnr = max_pfn; + ++#ifndef CONFIG_XEN ++ if (cpu_has_x2apic) ++ check_x2apic(); ++#endif + + /* How many end-of-memory variables you have, grandma! */ + /* need this before calling reserve_initrd */ +@@ -879,6 +1092,10 @@ void __init setup_arch(char **cmdline_p) + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + #endif + ++#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION ++ setup_bios_corruption_check(); ++#endif ++ + /* max_pfn_mapped is updated here */ + max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< + #include ++#include + + #include + #include +@@ -406,14 +407,9 @@ unsigned long profile_pc(struct pt_regs + unsigned long pc = instruction_pointer(regs); + + #if defined(CONFIG_SMP) || defined(__x86_64__) +-# ifdef __i386__ +- if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) +-# else +- if (!user_mode(regs) +-# endif +- && in_lock_functions(pc)) { ++ if (!user_mode_vm(regs) && in_lock_functions(pc)) { + # ifdef CONFIG_FRAME_POINTER +- return ((unsigned long *)regs->bp)[1]; ++ return *(unsigned long *)(regs->bp + sizeof(long)); + # else + # ifdef __i386__ + unsigned long *sp = (unsigned long *)®s->sp; +@@ -565,6 +561,7 @@ irqreturn_t timer_interrupt(int irq, voi + run_local_timers(); + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs())); ++ printk_tick(); + scheduler_tick(); + run_posix_cpu_timers(current); + profile_tick(CPU_PROFILING); +@@ -794,7 +791,8 @@ static void stop_hz_timer(void) + smp_mb(); + + /* Leave ourselves in tick mode if rcu or softirq or timer pending. */ +- if (rcu_needs_cpu(cpu) || local_softirq_pending() || ++ if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || ++ local_softirq_pending() || + (j = get_next_timer_interrupt(jiffies), + time_before_eq(j, jiffies))) { + cpu_clear(cpu, nohz_cpu_mask); +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/kernel/traps-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -0,0 +1,1022 @@ ++/* ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs ++ * ++ * Pentium III FXSR, SSE support ++ * Gareth Hughes , May 2000 ++ */ ++ ++/* ++ * Handle hardware traps and faults. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_EISA ++#include ++#include ++#endif ++ ++#ifdef CONFIG_MCA ++#include ++#endif ++ ++#if defined(CONFIG_EDAC) ++#include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifdef CONFIG_X86_64 ++#include ++#include ++#include ++#else ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "cpu/mcheck/mce.h" ++ ++#ifndef CONFIG_XEN ++DECLARE_BITMAP(used_vectors, NR_VECTORS); ++EXPORT_SYMBOL_GPL(used_vectors); ++#endif ++ ++asmlinkage int system_call(void); ++ ++/* Do we ignore FPU interrupts ? */ ++char ignore_fpu_irq; ++ ++#ifndef CONFIG_X86_NO_IDT ++/* ++ * The IDT has to be page-aligned to simplify the Pentium ++ * F0 0F bug workaround.. We have a special link segment ++ * for this. ++ */ ++gate_desc idt_table[256] ++ __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; ++#endif ++#endif ++ ++static int ignore_nmis; ++ ++static inline void conditional_sti(struct pt_regs *regs) ++{ ++ if (regs->flags & X86_EFLAGS_IF) ++ local_irq_enable(); ++} ++ ++static inline void preempt_conditional_sti(struct pt_regs *regs) ++{ ++ inc_preempt_count(); ++ if (regs->flags & X86_EFLAGS_IF) ++ local_irq_enable(); ++} ++ ++static inline void preempt_conditional_cli(struct pt_regs *regs) ++{ ++ if (regs->flags & X86_EFLAGS_IF) ++ local_irq_disable(); ++ dec_preempt_count(); ++} ++ ++#ifdef CONFIG_X86_32 ++static inline void ++die_if_kernel(const char *str, struct pt_regs *regs, long err) ++{ ++ if (!user_mode_vm(regs)) ++ die(str, regs, err); ++} ++ ++/* ++ * Perform the lazy TSS's I/O bitmap copy. If the TSS has an ++ * invalid offset set (the LAZY one) and the faulting thread has ++ * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, ++ * we set the offset field correctly and return 1. ++ */ ++static int lazy_iobitmap_copy(void) ++{ ++#ifndef CONFIG_XEN ++ struct thread_struct *thread; ++ struct tss_struct *tss; ++ int cpu; ++ ++ cpu = get_cpu(); ++ tss = &per_cpu(init_tss, cpu); ++ thread = ¤t->thread; ++ ++ if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && ++ thread->io_bitmap_ptr) { ++ memcpy(tss->io_bitmap, thread->io_bitmap_ptr, ++ thread->io_bitmap_max); ++ /* ++ * If the previously set map was extending to higher ports ++ * than the current one, pad extra space with 0xff (no access). ++ */ ++ if (thread->io_bitmap_max < tss->io_bitmap_max) { ++ memset((char *) tss->io_bitmap + ++ thread->io_bitmap_max, 0xff, ++ tss->io_bitmap_max - thread->io_bitmap_max); ++ } ++ tss->io_bitmap_max = thread->io_bitmap_max; ++ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; ++ tss->io_bitmap_owner = thread; ++ put_cpu(); ++ ++ return 1; ++ } ++ put_cpu(); ++#endif ++ ++ return 0; ++} ++#endif ++ ++static void __kprobes ++do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, ++ long error_code, siginfo_t *info) ++{ ++ struct task_struct *tsk = current; ++ ++#ifdef CONFIG_X86_32 ++ if (regs->flags & X86_VM_MASK) { ++ /* ++ * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. ++ * On nmi (interrupt 2), do_trap should not be called. ++ */ ++ if (trapnr < 6) ++ goto vm86_trap; ++ goto trap_signal; ++ } ++#endif ++ ++ if (!user_mode(regs)) ++ goto kernel_trap; ++ ++#ifdef CONFIG_X86_32 ++trap_signal: ++#endif ++ /* ++ * We want error_code and trap_no set for userspace faults and ++ * kernelspace faults which result in die(), but not ++ * kernelspace faults which are fixed up. die() gives the ++ * process no chance to handle the signal and notice the ++ * kernel fault information, so that won't result in polluting ++ * the information about previously queued, but not yet ++ * delivered, faults. See also do_general_protection below. ++ */ ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = trapnr; ++ ++#ifdef CONFIG_X86_64 ++ if (show_unhandled_signals && unhandled_signal(tsk, signr) && ++ printk_ratelimit()) { ++ printk(KERN_INFO ++ "%s[%d] trap %s ip:%lx sp:%lx error:%lx", ++ tsk->comm, tsk->pid, str, ++ regs->ip, regs->sp, error_code); ++ print_vma_addr(" in ", regs->ip); ++ printk("\n"); ++ } ++#endif ++ ++ if (info) ++ force_sig_info(signr, info, tsk); ++ else ++ force_sig(signr, tsk); ++ return; ++ ++kernel_trap: ++ if (!fixup_exception(regs)) { ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = trapnr; ++ die(str, regs, error_code); ++ } ++ return; ++ ++#ifdef CONFIG_X86_32 ++vm86_trap: ++ if (handle_vm86_trap((struct kernel_vm86_regs *) regs, ++ error_code, trapnr)) ++ goto trap_signal; ++ return; ++#endif ++} ++ ++#define DO_ERROR(trapnr, signr, str, name) \ ++dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ ++{ \ ++ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ ++ == NOTIFY_STOP) \ ++ return; \ ++ conditional_sti(regs); \ ++ do_trap(trapnr, signr, str, regs, error_code, NULL); \ ++} ++ ++#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ ++dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ ++{ \ ++ siginfo_t info; \ ++ info.si_signo = signr; \ ++ info.si_errno = 0; \ ++ info.si_code = sicode; \ ++ info.si_addr = (void __user *)siaddr; \ ++ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ ++ == NOTIFY_STOP) \ ++ return; \ ++ conditional_sti(regs); \ ++ do_trap(trapnr, signr, str, regs, error_code, &info); \ ++} ++ ++DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) ++DO_ERROR(4, SIGSEGV, "overflow", overflow) ++DO_ERROR(5, SIGSEGV, "bounds", bounds) ++DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) ++DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) ++DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) ++DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) ++#ifdef CONFIG_X86_32 ++DO_ERROR(12, SIGBUS, "stack segment", stack_segment) ++#endif ++DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) ++ ++#ifdef CONFIG_X86_64 ++/* Runs on IST stack */ ++dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) ++{ ++ if (notify_die(DIE_TRAP, "stack segment", regs, error_code, ++ 12, SIGBUS) == NOTIFY_STOP) ++ return; ++ preempt_conditional_sti(regs); ++ do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); ++ preempt_conditional_cli(regs); ++} ++ ++dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) ++{ ++ static const char str[] = "double fault"; ++ struct task_struct *tsk = current; ++ ++ /* Return not checked because double check cannot be ignored */ ++ notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); ++ ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = 8; ++ ++ /* This is always a kernel trap and never fixable (and thus must ++ never return). */ ++ for (;;) ++ die(str, regs, error_code); ++} ++#endif ++ ++dotraplinkage void __kprobes ++do_general_protection(struct pt_regs *regs, long error_code) ++{ ++ struct task_struct *tsk; ++ ++ conditional_sti(regs); ++ ++#ifdef CONFIG_X86_32 ++ if (lazy_iobitmap_copy()) { ++ /* restart the faulting instruction */ ++ return; ++ } ++ ++ if (regs->flags & X86_VM_MASK) ++ goto gp_in_vm86; ++#endif ++ ++ tsk = current; ++ if (!user_mode(regs)) ++ goto gp_in_kernel; ++ ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = 13; ++ ++ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && ++ printk_ratelimit()) { ++ printk(KERN_INFO ++ "%s[%d] general protection ip:%lx sp:%lx error:%lx", ++ tsk->comm, task_pid_nr(tsk), ++ regs->ip, regs->sp, error_code); ++ print_vma_addr(" in ", regs->ip); ++ printk("\n"); ++ } ++ ++ force_sig(SIGSEGV, tsk); ++ return; ++ ++#ifdef CONFIG_X86_32 ++gp_in_vm86: ++ local_irq_enable(); ++ handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); ++ return; ++#endif ++ ++gp_in_kernel: ++ if (fixup_exception(regs)) ++ return; ++ ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = 13; ++ if (notify_die(DIE_GPF, "general protection fault", regs, ++ error_code, 13, SIGSEGV) == NOTIFY_STOP) ++ return; ++ die("general protection fault", regs, error_code); ++} ++ ++static notrace __kprobes void ++mem_parity_error(unsigned char reason, struct pt_regs *regs) ++{ ++ printk(KERN_EMERG ++ "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", ++ reason, smp_processor_id()); ++ ++ printk(KERN_EMERG ++ "You have some hardware problem, likely on the PCI bus.\n"); ++ ++#if defined(CONFIG_EDAC) ++ if (edac_handler_set()) { ++ edac_atomic_assert_error(); ++ return; ++ } ++#endif ++ ++ if (panic_on_unrecovered_nmi) ++ panic("NMI: Not continuing"); ++ ++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); ++ ++ /* Clear and disable the memory parity error line. */ ++ clear_mem_error(reason); ++} ++ ++static notrace __kprobes void ++io_check_error(unsigned char reason, struct pt_regs *regs) ++{ ++ printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); ++ show_registers(regs); ++ ++ /* Re-enable the IOCK line, wait for a few seconds */ ++ clear_io_check_error(reason); ++} ++ ++static notrace __kprobes void ++unknown_nmi_error(unsigned char reason, struct pt_regs *regs) ++{ ++ if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == ++ NOTIFY_STOP) ++ return; ++#ifdef CONFIG_MCA ++ /* ++ * Might actually be able to figure out what the guilty party ++ * is: ++ */ ++ if (MCA_bus) { ++ mca_handle_nmi(); ++ return; ++ } ++#endif ++ printk(KERN_EMERG ++ "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", ++ reason, smp_processor_id()); ++ ++ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); ++ if (panic_on_unrecovered_nmi) ++ panic("NMI: Not continuing"); ++ ++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); ++} ++ ++static notrace __kprobes void default_do_nmi(struct pt_regs *regs) ++{ ++ unsigned char reason = 0; ++ int cpu; ++ ++ cpu = smp_processor_id(); ++ ++ /* Only the BSP gets external NMIs from the system. */ ++ if (!cpu) ++ reason = get_nmi_reason(); ++ ++ if (!(reason & 0xc0)) { ++ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) ++ == NOTIFY_STOP) ++ return; ++#ifdef CONFIG_X86_LOCAL_APIC ++ /* ++ * Ok, so this is none of the documented NMI sources, ++ * so it must be the NMI watchdog. ++ */ ++ if (nmi_watchdog_tick(regs, reason)) ++ return; ++ if (!do_nmi_callback(regs, cpu)) ++ unknown_nmi_error(reason, regs); ++#else ++ unknown_nmi_error(reason, regs); ++#endif ++ ++ return; ++ } ++ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) ++ return; ++ ++ /* AK: following checks seem to be broken on modern chipsets. FIXME */ ++ if (reason & 0x80) ++ mem_parity_error(reason, regs); ++ if (reason & 0x40) ++ io_check_error(reason, regs); ++#ifdef CONFIG_X86_32 ++ /* ++ * Reassert NMI in case it became active meanwhile ++ * as it's edge-triggered: ++ */ ++ reassert_nmi(); ++#endif ++} ++ ++dotraplinkage notrace __kprobes void ++do_nmi(struct pt_regs *regs, long error_code) ++{ ++ nmi_enter(); ++ ++#ifdef CONFIG_X86_32 ++ { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } ++#else ++ add_pda(__nmi_count, 1); ++#endif ++ ++ if (!ignore_nmis) ++ default_do_nmi(regs); ++ ++ nmi_exit(); ++} ++ ++void stop_nmi(void) ++{ ++ acpi_nmi_disable(); ++ ignore_nmis++; ++} ++ ++void restart_nmi(void) ++{ ++ ignore_nmis--; ++ acpi_nmi_enable(); ++} ++ ++/* May run on IST stack. */ ++dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) ++{ ++#ifdef CONFIG_KPROBES ++ if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) ++ == NOTIFY_STOP) ++ return; ++#else ++ if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) ++ == NOTIFY_STOP) ++ return; ++#endif ++ ++ preempt_conditional_sti(regs); ++ do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); ++ preempt_conditional_cli(regs); ++} ++ ++#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) ++/* Help handler running on IST stack to switch back to user stack ++ for scheduling or signal handling. The actual stack switch is done in ++ entry.S */ ++asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) ++{ ++ struct pt_regs *regs = eregs; ++ /* Did already sync */ ++ if (eregs == (struct pt_regs *)eregs->sp) ++ ; ++ /* Exception from user space */ ++ else if (user_mode(eregs)) ++ regs = task_pt_regs(current); ++ /* Exception from kernel and interrupts are enabled. Move to ++ kernel process stack. */ ++ else if (eregs->flags & X86_EFLAGS_IF) ++ regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); ++ if (eregs != regs) ++ *regs = *eregs; ++ return regs; ++} ++#endif ++ ++/* ++ * Our handling of the processor debug registers is non-trivial. ++ * We do not clear them on entry and exit from the kernel. Therefore ++ * it is possible to get a watchpoint trap here from inside the kernel. ++ * However, the code in ./ptrace.c has ensured that the user can ++ * only set watchpoints on userspace addresses. Therefore the in-kernel ++ * watchpoint trap can only occur in code which is reading/writing ++ * from user space. Such code must not hold kernel locks (since it ++ * can equally take a page fault), therefore it is safe to call ++ * force_sig_info even though that claims and releases locks. ++ * ++ * Code in ./signal.c ensures that the debug control register ++ * is restored before we deliver any signal, and therefore that ++ * user code runs with the correct debug control register even though ++ * we clear it here. ++ * ++ * Being careful here means that we don't have to be as careful in a ++ * lot of more complicated places (task switching can be a bit lazy ++ * about restoring all the debug state, and ptrace doesn't have to ++ * find every occurrence of the TF bit that could be saved away even ++ * by user code) ++ * ++ * May run on IST stack. ++ */ ++dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) ++{ ++ struct task_struct *tsk = current; ++ unsigned long condition; ++ int si_code; ++ ++ get_debugreg(condition, 6); ++ ++ /* ++ * The processor cleared BTF, so don't mark that we need it set. ++ */ ++ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); ++ tsk->thread.debugctlmsr = 0; ++ ++ if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, ++ SIGTRAP) == NOTIFY_STOP) ++ return; ++ ++ /* It's safe to allow irq's after DR6 has been saved */ ++ preempt_conditional_sti(regs); ++ ++ /* Mask out spurious debug traps due to lazy DR7 setting */ ++ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { ++ if (!tsk->thread.debugreg7) ++ goto clear_dr7; ++ } ++ ++#ifdef CONFIG_X86_32 ++ if (regs->flags & X86_VM_MASK) ++ goto debug_vm86; ++#endif ++ ++ /* Save debug status register where ptrace can see it */ ++ tsk->thread.debugreg6 = condition; ++ ++ /* ++ * Single-stepping through TF: make sure we ignore any events in ++ * kernel space (but re-enable TF when returning to user mode). ++ */ ++ if (condition & DR_STEP) { ++ if (!user_mode(regs)) ++ goto clear_TF_reenable; ++ } ++ ++ si_code = get_si_code(condition); ++ /* Ok, finally something we can handle */ ++ send_sigtrap(tsk, regs, error_code, si_code); ++ ++ /* ++ * Disable additional traps. They'll be re-enabled when ++ * the signal is delivered. ++ */ ++clear_dr7: ++ set_debugreg(0, 7); ++ preempt_conditional_cli(regs); ++ return; ++ ++#ifdef CONFIG_X86_32 ++debug_vm86: ++ handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); ++ preempt_conditional_cli(regs); ++ return; ++#endif ++ ++clear_TF_reenable: ++ set_tsk_thread_flag(tsk, TIF_SINGLESTEP); ++ regs->flags &= ~X86_EFLAGS_TF; ++ preempt_conditional_cli(regs); ++ return; ++} ++ ++#ifdef CONFIG_X86_64 ++static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) ++{ ++ if (fixup_exception(regs)) ++ return 1; ++ ++ notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); ++ /* Illegal floating point operation in the kernel */ ++ current->thread.trap_no = trapnr; ++ die(str, regs, 0); ++ return 0; ++} ++#endif ++ ++/* ++ * Note that we play around with the 'TS' bit in an attempt to get ++ * the correct behaviour even in the presence of the asynchronous ++ * IRQ13 behaviour ++ */ ++void math_error(void __user *ip) ++{ ++ struct task_struct *task; ++ siginfo_t info; ++ unsigned short cwd, swd; ++ ++ /* ++ * Save the info for the exception handler and clear the error. ++ */ ++ task = current; ++ save_init_fpu(task); ++ task->thread.trap_no = 16; ++ task->thread.error_code = 0; ++ info.si_signo = SIGFPE; ++ info.si_errno = 0; ++ info.si_code = __SI_FAULT; ++ info.si_addr = ip; ++ /* ++ * (~cwd & swd) will mask out exceptions that are not set to unmasked ++ * status. 0x3f is the exception bits in these regs, 0x200 is the ++ * C1 reg you need in case of a stack fault, 0x040 is the stack ++ * fault bit. We should only be taking one exception at a time, ++ * so if this combination doesn't produce any single exception, ++ * then we have a bad program that isn't synchronizing its FPU usage ++ * and it will suffer the consequences since we won't be able to ++ * fully reproduce the context of the exception ++ */ ++ cwd = get_fpu_cwd(task); ++ swd = get_fpu_swd(task); ++ switch (swd & ~cwd & 0x3f) { ++ case 0x000: /* No unmasked exception */ ++#ifdef CONFIG_X86_32 ++ return; ++#endif ++ default: /* Multiple exceptions */ ++ break; ++ case 0x001: /* Invalid Op */ ++ /* ++ * swd & 0x240 == 0x040: Stack Underflow ++ * swd & 0x240 == 0x240: Stack Overflow ++ * User must clear the SF bit (0x40) if set ++ */ ++ info.si_code = FPE_FLTINV; ++ break; ++ case 0x002: /* Denormalize */ ++ case 0x010: /* Underflow */ ++ info.si_code = FPE_FLTUND; ++ break; ++ case 0x004: /* Zero Divide */ ++ info.si_code = FPE_FLTDIV; ++ break; ++ case 0x008: /* Overflow */ ++ info.si_code = FPE_FLTOVF; ++ break; ++ case 0x020: /* Precision */ ++ info.si_code = FPE_FLTRES; ++ break; ++ } ++ force_sig_info(SIGFPE, &info, task); ++} ++ ++dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) ++{ ++ conditional_sti(regs); ++ ++#ifdef CONFIG_X86_32 ++ ignore_fpu_irq = 1; ++#else ++ if (!user_mode(regs) && ++ kernel_math_error(regs, "kernel x87 math error", 16)) ++ return; ++#endif ++ ++ math_error((void __user *)regs->ip); ++} ++ ++static void simd_math_error(void __user *ip) ++{ ++ struct task_struct *task; ++ siginfo_t info; ++ unsigned short mxcsr; ++ ++ /* ++ * Save the info for the exception handler and clear the error. ++ */ ++ task = current; ++ save_init_fpu(task); ++ task->thread.trap_no = 19; ++ task->thread.error_code = 0; ++ info.si_signo = SIGFPE; ++ info.si_errno = 0; ++ info.si_code = __SI_FAULT; ++ info.si_addr = ip; ++ /* ++ * The SIMD FPU exceptions are handled a little differently, as there ++ * is only a single status/control register. Thus, to determine which ++ * unmasked exception was caught we must mask the exception mask bits ++ * at 0x1f80, and then use these to mask the exception bits at 0x3f. ++ */ ++ mxcsr = get_fpu_mxcsr(task); ++ switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { ++ case 0x000: ++ default: ++ break; ++ case 0x001: /* Invalid Op */ ++ info.si_code = FPE_FLTINV; ++ break; ++ case 0x002: /* Denormalize */ ++ case 0x010: /* Underflow */ ++ info.si_code = FPE_FLTUND; ++ break; ++ case 0x004: /* Zero Divide */ ++ info.si_code = FPE_FLTDIV; ++ break; ++ case 0x008: /* Overflow */ ++ info.si_code = FPE_FLTOVF; ++ break; ++ case 0x020: /* Precision */ ++ info.si_code = FPE_FLTRES; ++ break; ++ } ++ force_sig_info(SIGFPE, &info, task); ++} ++ ++dotraplinkage void ++do_simd_coprocessor_error(struct pt_regs *regs, long error_code) ++{ ++ conditional_sti(regs); ++ ++#ifdef CONFIG_X86_32 ++ if (cpu_has_xmm) { ++ /* Handle SIMD FPU exceptions on PIII+ processors. */ ++ ignore_fpu_irq = 1; ++ simd_math_error((void __user *)regs->ip); ++ return; ++ } ++ /* ++ * Handle strange cache flush from user space exception ++ * in all other cases. This is undocumented behaviour. ++ */ ++ if (regs->flags & X86_VM_MASK) { ++ handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); ++ return; ++ } ++ current->thread.trap_no = 19; ++ current->thread.error_code = error_code; ++ die_if_kernel("cache flush denied", regs, error_code); ++ force_sig(SIGSEGV, current); ++#else ++ if (!user_mode(regs) && ++ kernel_math_error(regs, "kernel simd math error", 19)) ++ return; ++ simd_math_error((void __user *)regs->ip); ++#endif ++} ++ ++#ifndef CONFIG_XEN ++dotraplinkage void ++do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) ++{ ++ conditional_sti(regs); ++#if 0 ++ /* No need to warn about this any longer. */ ++ printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); ++#endif ++} ++ ++#ifdef CONFIG_X86_32 ++unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) ++{ ++ struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); ++ unsigned long base = (kesp - uesp) & -THREAD_SIZE; ++ unsigned long new_kesp = kesp - base; ++ unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; ++ __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; ++ ++ /* Set up base for espfix segment */ ++ desc &= 0x00f0ff0000000000ULL; ++ desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | ++ ((((__u64)base) << 32) & 0xff00000000000000ULL) | ++ ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | ++ (lim_pages & 0xffff); ++ *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; ++ ++ return new_kesp; ++} ++#else ++asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) ++{ ++} ++ ++asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) ++{ ++} ++#endif ++#endif /* CONFIG_XEN */ ++ ++/* ++ * 'math_state_restore()' saves the current math information in the ++ * old math state array, and gets the new ones from the current task ++ * ++ * Careful.. There are problems with IBM-designed IRQ13 behaviour. ++ * Don't touch unless you *really* know how it works. ++ * ++ * Must be called with kernel preemption disabled (in this case, ++ * local interrupts are disabled at the call-site in entry.S). ++ */ ++asmlinkage void math_state_restore(void) ++{ ++ struct thread_info *thread = current_thread_info(); ++ struct task_struct *tsk = thread->task; ++ ++ if (!tsk_used_math(tsk)) { ++ local_irq_enable(); ++ /* ++ * does a slab alloc which can sleep ++ */ ++ if (init_fpu(tsk)) { ++ /* ++ * ran out of memory! ++ */ ++ do_group_exit(SIGKILL); ++ return; ++ } ++ local_irq_disable(); ++ } ++ ++ /* NB. 'clts' is done for us by Xen during virtual trap. */ ++#ifdef CONFIG_X86_32 ++ restore_fpu(tsk); ++#else ++ /* ++ * Paranoid restore. send a SIGSEGV if we fail to restore the state. ++ */ ++ if (unlikely(restore_fpu_checking(tsk))) { ++ stts(); ++ force_sig(SIGSEGV, tsk); ++ return; ++ } ++#endif ++ thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ ++ tsk->fpu_counter++; ++} ++EXPORT_SYMBOL_GPL(math_state_restore); ++ ++#ifndef CONFIG_MATH_EMULATION ++asmlinkage void math_emulate(long arg) ++{ ++ printk(KERN_EMERG ++ "math-emulation not enabled and no coprocessor found.\n"); ++ printk(KERN_EMERG "killing %s.\n", current->comm); ++ force_sig(SIGFPE, current); ++ schedule(); ++} ++#endif /* CONFIG_MATH_EMULATION */ ++ ++dotraplinkage void __kprobes ++do_device_not_available(struct pt_regs *regs, long error) ++{ ++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) ++ if (read_cr0() & X86_CR0_EM) { ++ conditional_sti(regs); ++ math_emulate(0); ++ } else { ++ math_state_restore(); /* interrupts still off */ ++ conditional_sti(regs); ++ } ++#else ++ math_state_restore(); ++#endif ++} ++ ++#ifdef CONFIG_X86_32 ++dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) ++{ ++ siginfo_t info; ++ local_irq_enable(); ++ ++ info.si_signo = SIGILL; ++ info.si_errno = 0; ++ info.si_code = ILL_BADSTK; ++ info.si_addr = 0; ++ if (notify_die(DIE_TRAP, "iret exception", ++ regs, error_code, 32, SIGILL) == NOTIFY_STOP) ++ return; ++ do_trap(32, SIGILL, "iret exception", regs, error_code, &info); ++} ++#endif ++ ++/* ++ * NB. All these are "trap gates" (i.e. events_mask isn't set) except ++ * for those that specify |4 in the second field. ++ */ ++static const trap_info_t __cpuinitconst trap_table[] = { ++#ifdef CONFIG_X86_32 ++#define X 0 ++#else ++#define X 4 ++#endif ++ { 0, 0|X, __KERNEL_CS, (unsigned long)divide_error }, ++ { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, ++ { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, ++ { 4, 3|X, __KERNEL_CS, (unsigned long)overflow }, ++ { 5, 0|X, __KERNEL_CS, (unsigned long)bounds }, ++ { 6, 0|X, __KERNEL_CS, (unsigned long)invalid_op }, ++ { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available }, ++ { 9, 0|X, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, ++ { 10, 0|X, __KERNEL_CS, (unsigned long)invalid_TSS }, ++ { 11, 0|X, __KERNEL_CS, (unsigned long)segment_not_present }, ++ { 12, 0|X, __KERNEL_CS, (unsigned long)stack_segment }, ++ { 13, 0|X, __KERNEL_CS, (unsigned long)general_protection }, ++ { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault }, ++ { 16, 0|X, __KERNEL_CS, (unsigned long)coprocessor_error }, ++ { 17, 0|X, __KERNEL_CS, (unsigned long)alignment_check }, ++#ifdef CONFIG_X86_MCE ++ { 18, 0|X, __KERNEL_CS, (unsigned long)machine_check }, ++#endif ++ { 19, 0|X, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, ++#ifdef CONFIG_X86_32 ++ { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment }, ++ { SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)system_call }, ++#elif defined(CONFIG_IA32_EMULATION) ++ { IA32_SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)ia32_syscall }, ++#endif ++ { 0, 0, 0, 0 } ++}; ++ ++void __init trap_init(void) ++{ ++ int ret; ++ ++ ret = HYPERVISOR_set_trap_table(trap_table); ++ if (ret) ++ printk("HYPERVISOR_set_trap_table failed (%d)\n", ret); ++ ++#ifdef CONFIG_X86_32 ++ if (cpu_has_fxsr) { ++ printk(KERN_INFO "Enabling fast FPU save and restore... "); ++ set_in_cr4(X86_CR4_OSFXSR); ++ printk("done.\n"); ++ } ++ if (cpu_has_xmm) { ++ printk(KERN_INFO ++ "Enabling unmasked SIMD FPU exception support... "); ++ set_in_cr4(X86_CR4_OSXMMEXCPT); ++ printk("done.\n"); ++ } ++ ++#endif ++ /* ++ * Should be a barrier for any external CPU state: ++ */ ++ cpu_init(); ++} ++ ++void __cpuinit smp_trap_init(trap_info_t *trap_ctxt) ++{ ++ const trap_info_t *t = trap_table; ++ ++ for (t = trap_table; t->address; t++) { ++ trap_ctxt[t->vector].flags = t->flags; ++ trap_ctxt[t->vector].cs = t->cs; ++ trap_ctxt[t->vector].address = t->address; ++ } ++} +--- head-2010-01-18.orig/arch/x86/kernel/traps_32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,1222 +0,0 @@ +-/* +- * Copyright (C) 1991, 1992 Linus Torvalds +- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs +- * +- * Pentium III FXSR, SSE support +- * Gareth Hughes , May 2000 +- */ +- +-/* +- * 'Traps.c' handles hardware traps and faults after we have saved some +- * state in 'asm.s'. +- */ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#ifdef CONFIG_EISA +-#include +-#include +-#endif +- +-#ifdef CONFIG_MCA +-#include +-#endif +- +-#if defined(CONFIG_EDAC) +-#include +-#endif +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "mach_traps.h" +- +-#ifndef CONFIG_XEN +-DECLARE_BITMAP(used_vectors, NR_VECTORS); +-EXPORT_SYMBOL_GPL(used_vectors); +-#endif +- +-asmlinkage int system_call(void); +- +-/* Do we ignore FPU interrupts ? */ +-char ignore_fpu_irq; +- +-#ifndef CONFIG_X86_NO_IDT +-/* +- * The IDT has to be page-aligned to simplify the Pentium +- * F0 0F bug workaround.. We have a special link segment +- * for this. +- */ +-gate_desc idt_table[256] +- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; +-#endif +- +-int panic_on_unrecovered_nmi; +-int kstack_depth_to_print = 24; +-static unsigned int code_bytes = 64; +-static int ignore_nmis; +-static int die_counter; +- +-void printk_address(unsigned long address, int reliable) +-{ +-#ifdef CONFIG_KALLSYMS +- unsigned long offset = 0; +- unsigned long symsize; +- const char *symname; +- char *modname; +- char *delim = ":"; +- char namebuf[KSYM_NAME_LEN]; +- char reliab[4] = ""; +- +- symname = kallsyms_lookup(address, &symsize, &offset, +- &modname, namebuf); +- if (!symname) { +- printk(" [<%08lx>]\n", address); +- return; +- } +- if (!reliable) +- strcpy(reliab, "? "); +- +- if (!modname) +- modname = delim = ""; +- printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", +- address, reliab, delim, modname, delim, symname, offset, symsize); +-#else +- printk(" [<%08lx>]\n", address); +-#endif +-} +- +-static inline int valid_stack_ptr(struct thread_info *tinfo, +- void *p, unsigned int size) +-{ +- void *t = tinfo; +- return p > t && p <= t + THREAD_SIZE - size; +-} +- +-/* The form of the top of the frame on the stack */ +-struct stack_frame { +- struct stack_frame *next_frame; +- unsigned long return_address; +-}; +- +-static inline unsigned long +-print_context_stack(struct thread_info *tinfo, +- unsigned long *stack, unsigned long bp, +- const struct stacktrace_ops *ops, void *data) +-{ +- struct stack_frame *frame = (struct stack_frame *)bp; +- +- while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { +- unsigned long addr; +- +- addr = *stack; +- if (__kernel_text_address(addr)) { +- if ((unsigned long) stack == bp + 4) { +- ops->address(data, addr, 1); +- frame = frame->next_frame; +- bp = (unsigned long) frame; +- } else { +- ops->address(data, addr, bp == 0); +- } +- } +- stack++; +- } +- return bp; +-} +- +-void dump_trace(struct task_struct *task, struct pt_regs *regs, +- unsigned long *stack, unsigned long bp, +- const struct stacktrace_ops *ops, void *data) +-{ +- if (!task) +- task = current; +- +- if (!stack) { +- unsigned long dummy; +- stack = &dummy; +- if (task != current) +- stack = (unsigned long *)task->thread.sp; +- } +- +-#ifdef CONFIG_FRAME_POINTER +- if (!bp) { +- if (task == current) { +- /* Grab bp right from our regs */ +- asm("movl %%ebp, %0" : "=r" (bp) :); +- } else { +- /* bp is the last reg pushed by switch_to */ +- bp = *(unsigned long *) task->thread.sp; +- } +- } +-#endif +- +- for (;;) { +- struct thread_info *context; +- +- context = (struct thread_info *) +- ((unsigned long)stack & (~(THREAD_SIZE - 1))); +- bp = print_context_stack(context, stack, bp, ops, data); +- /* +- * Should be after the line below, but somewhere +- * in early boot context comes out corrupted and we +- * can't reference it: +- */ +- if (ops->stack(data, "IRQ") < 0) +- break; +- stack = (unsigned long *)context->previous_esp; +- if (!stack) +- break; +- touch_nmi_watchdog(); +- } +-} +-EXPORT_SYMBOL(dump_trace); +- +-static void +-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +-{ +- printk(data); +- print_symbol(msg, symbol); +- printk("\n"); +-} +- +-static void print_trace_warning(void *data, char *msg) +-{ +- printk("%s%s\n", (char *)data, msg); +-} +- +-static int print_trace_stack(void *data, char *name) +-{ +- return 0; +-} +- +-/* +- * Print one address/symbol entries per line. +- */ +-static void print_trace_address(void *data, unsigned long addr, int reliable) +-{ +- printk("%s [<%08lx>] ", (char *)data, addr); +- if (!reliable) +- printk("? "); +- print_symbol("%s\n", addr); +- touch_nmi_watchdog(); +-} +- +-static const struct stacktrace_ops print_trace_ops = { +- .warning = print_trace_warning, +- .warning_symbol = print_trace_warning_symbol, +- .stack = print_trace_stack, +- .address = print_trace_address, +-}; +- +-static void +-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, +- unsigned long *stack, unsigned long bp, char *log_lvl) +-{ +- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); +- printk("%s =======================\n", log_lvl); +-} +- +-void show_trace(struct task_struct *task, struct pt_regs *regs, +- unsigned long *stack, unsigned long bp) +-{ +- show_trace_log_lvl(task, regs, stack, bp, ""); +-} +- +-static void +-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, +- unsigned long *sp, unsigned long bp, char *log_lvl) +-{ +- unsigned long *stack; +- int i; +- +- if (sp == NULL) { +- if (task) +- sp = (unsigned long *)task->thread.sp; +- else +- sp = (unsigned long *)&sp; +- } +- +- stack = sp; +- for (i = 0; i < kstack_depth_to_print; i++) { +- if (kstack_end(stack)) +- break; +- if (i && ((i % 8) == 0)) +- printk("\n%s ", log_lvl); +- printk("%08lx ", *stack++); +- } +- printk("\n%sCall Trace:\n", log_lvl); +- +- show_trace_log_lvl(task, regs, sp, bp, log_lvl); +-} +- +-void show_stack(struct task_struct *task, unsigned long *sp) +-{ +- printk(" "); +- show_stack_log_lvl(task, NULL, sp, 0, ""); +-} +- +-/* +- * The architecture-independent dump_stack generator +- */ +-void dump_stack(void) +-{ +- unsigned long bp = 0; +- unsigned long stack; +- +-#ifdef CONFIG_FRAME_POINTER +- if (!bp) +- asm("movl %%ebp, %0" : "=r" (bp):); +-#endif +- +- printk("Pid: %d, comm: %.20s %s %s %.*s\n", +- current->pid, current->comm, print_tainted(), +- init_utsname()->release, +- (int)strcspn(init_utsname()->version, " "), +- init_utsname()->version); +- +- show_trace(current, NULL, &stack, bp); +-} +- +-EXPORT_SYMBOL(dump_stack); +- +-void show_registers(struct pt_regs *regs) +-{ +- int i; +- +- print_modules(); +- __show_registers(regs, 0); +- +- printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", +- TASK_COMM_LEN, current->comm, task_pid_nr(current), +- current_thread_info(), current, task_thread_info(current)); +- /* +- * When in-kernel, we also print out the stack and code at the +- * time of the fault.. +- */ +- if (!user_mode_vm(regs)) { +- unsigned int code_prologue = code_bytes * 43 / 64; +- unsigned int code_len = code_bytes; +- unsigned char c; +- u8 *ip; +- +- printk("\n" KERN_EMERG "Stack: "); +- show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); +- +- printk(KERN_EMERG "Code: "); +- +- ip = (u8 *)regs->ip - code_prologue; +- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { +- /* try starting at EIP */ +- ip = (u8 *)regs->ip; +- code_len = code_len - code_prologue + 1; +- } +- for (i = 0; i < code_len; i++, ip++) { +- if (ip < (u8 *)PAGE_OFFSET || +- probe_kernel_address(ip, c)) { +- printk(" Bad EIP value."); +- break; +- } +- if (ip == (u8 *)regs->ip) +- printk("<%02x> ", c); +- else +- printk("%02x ", c); +- } +- } +- printk("\n"); +-} +- +-int is_valid_bugaddr(unsigned long ip) +-{ +- unsigned short ud2; +- +- if (ip < PAGE_OFFSET) +- return 0; +- if (probe_kernel_address((unsigned short *)ip, ud2)) +- return 0; +- +- return ud2 == 0x0b0f; +-} +- +-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +-static int die_owner = -1; +-static unsigned int die_nest_count; +- +-unsigned __kprobes long oops_begin(void) +-{ +- unsigned long flags; +- +- oops_enter(); +- +- if (die_owner != raw_smp_processor_id()) { +- console_verbose(); +- raw_local_irq_save(flags); +- __raw_spin_lock(&die_lock); +- die_owner = smp_processor_id(); +- die_nest_count = 0; +- bust_spinlocks(1); +- } else { +- raw_local_irq_save(flags); +- } +- die_nest_count++; +- return flags; +-} +- +-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +-{ +- bust_spinlocks(0); +- die_owner = -1; +- add_taint(TAINT_DIE); +- __raw_spin_unlock(&die_lock); +- raw_local_irq_restore(flags); +- +- if (!regs) +- return; +- +- if (kexec_should_crash(current)) +- crash_kexec(regs); +- +- if (in_interrupt()) +- panic("Fatal exception in interrupt"); +- +- if (panic_on_oops) +- panic("Fatal exception"); +- +- oops_exit(); +- do_exit(signr); +-} +- +-int __kprobes __die(const char *str, struct pt_regs *regs, long err) +-{ +- unsigned short ss; +- unsigned long sp; +- +- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); +-#ifdef CONFIG_PREEMPT +- printk("PREEMPT "); +-#endif +-#ifdef CONFIG_SMP +- printk("SMP "); +-#endif +-#ifdef CONFIG_DEBUG_PAGEALLOC +- printk("DEBUG_PAGEALLOC"); +-#endif +- printk("\n"); +- if (notify_die(DIE_OOPS, str, regs, err, +- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) +- return 1; +- +- show_registers(regs); +- /* Executive summary in case the oops scrolled away */ +- sp = (unsigned long) (®s->sp); +- savesegment(ss, ss); +- if (user_mode(regs)) { +- sp = regs->sp; +- ss = regs->ss & 0xffff; +- } +- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); +- print_symbol("%s", regs->ip); +- printk(" SS:ESP %04x:%08lx\n", ss, sp); +- return 0; +-} +- +-/* +- * This is gone through when something in the kernel has done something bad +- * and is about to be terminated: +- */ +-void die(const char *str, struct pt_regs *regs, long err) +-{ +- unsigned long flags = oops_begin(); +- +- if (die_nest_count < 3) { +- report_bug(regs->ip, regs); +- +- if (__die(str, regs, err)) +- regs = NULL; +- } else { +- printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); +- } +- +- oops_end(flags, regs, SIGSEGV); +-} +- +-static inline void +-die_if_kernel(const char *str, struct pt_regs *regs, long err) +-{ +- if (!user_mode_vm(regs)) +- die(str, regs, err); +-} +- +-static void __kprobes +-do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, +- long error_code, siginfo_t *info) +-{ +- struct task_struct *tsk = current; +- +- if (regs->flags & X86_VM_MASK) { +- if (vm86) +- goto vm86_trap; +- goto trap_signal; +- } +- +- if (!user_mode(regs)) +- goto kernel_trap; +- +-trap_signal: +- /* +- * We want error_code and trap_no set for userspace faults and +- * kernelspace faults which result in die(), but not +- * kernelspace faults which are fixed up. die() gives the +- * process no chance to handle the signal and notice the +- * kernel fault information, so that won't result in polluting +- * the information about previously queued, but not yet +- * delivered, faults. See also do_general_protection below. +- */ +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = trapnr; +- +- if (info) +- force_sig_info(signr, info, tsk); +- else +- force_sig(signr, tsk); +- return; +- +-kernel_trap: +- if (!fixup_exception(regs)) { +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = trapnr; +- die(str, regs, error_code); +- } +- return; +- +-vm86_trap: +- if (handle_vm86_trap((struct kernel_vm86_regs *) regs, +- error_code, trapnr)) +- goto trap_signal; +- return; +-} +- +-#define DO_ERROR(trapnr, signr, str, name) \ +-void do_##name(struct pt_regs *regs, long error_code) \ +-{ \ +- trace_hardirqs_fixup(); \ +- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ +- == NOTIFY_STOP) \ +- return; \ +- do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ +-} +- +-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ +-void do_##name(struct pt_regs *regs, long error_code) \ +-{ \ +- siginfo_t info; \ +- if (irq) \ +- local_irq_enable(); \ +- info.si_signo = signr; \ +- info.si_errno = 0; \ +- info.si_code = sicode; \ +- info.si_addr = (void __user *)siaddr; \ +- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ +- == NOTIFY_STOP) \ +- return; \ +- do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ +-} +- +-#define DO_VM86_ERROR(trapnr, signr, str, name) \ +-void do_##name(struct pt_regs *regs, long error_code) \ +-{ \ +- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ +- == NOTIFY_STOP) \ +- return; \ +- do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ +-} +- +-#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +-void do_##name(struct pt_regs *regs, long error_code) \ +-{ \ +- siginfo_t info; \ +- info.si_signo = signr; \ +- info.si_errno = 0; \ +- info.si_code = sicode; \ +- info.si_addr = (void __user *)siaddr; \ +- trace_hardirqs_fixup(); \ +- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ +- == NOTIFY_STOP) \ +- return; \ +- do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ +-} +- +-DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +-#ifndef CONFIG_KPROBES +-DO_VM86_ERROR(3, SIGTRAP, "int3", int3) +-#endif +-DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) +-DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) +-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) +-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) +-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +-DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) +-DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) +- +-void __kprobes +-do_general_protection(struct pt_regs *regs, long error_code) +-{ +- struct task_struct *tsk; +- struct thread_struct *thread; +- +- thread = ¤t->thread; +- +- if (regs->flags & X86_VM_MASK) +- goto gp_in_vm86; +- +- tsk = current; +- if (!user_mode(regs)) +- goto gp_in_kernel; +- +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = 13; +- +- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && +- printk_ratelimit()) { +- printk(KERN_INFO +- "%s[%d] general protection ip:%lx sp:%lx error:%lx", +- tsk->comm, task_pid_nr(tsk), +- regs->ip, regs->sp, error_code); +- print_vma_addr(" in ", regs->ip); +- printk("\n"); +- } +- +- force_sig(SIGSEGV, tsk); +- return; +- +-gp_in_vm86: +- local_irq_enable(); +- handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); +- return; +- +-gp_in_kernel: +- if (fixup_exception(regs)) +- return; +- +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = 13; +- if (notify_die(DIE_GPF, "general protection fault", regs, +- error_code, 13, SIGSEGV) == NOTIFY_STOP) +- return; +- die("general protection fault", regs, error_code); +-} +- +-static notrace __kprobes void +-mem_parity_error(unsigned char reason, struct pt_regs *regs) +-{ +- printk(KERN_EMERG +- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", +- reason, smp_processor_id()); +- +- printk(KERN_EMERG +- "You have some hardware problem, likely on the PCI bus.\n"); +- +-#if defined(CONFIG_EDAC) +- if (edac_handler_set()) { +- edac_atomic_assert_error(); +- return; +- } +-#endif +- +- if (panic_on_unrecovered_nmi) +- panic("NMI: Not continuing"); +- +- printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); +- +- /* Clear and disable the memory parity error line. */ +- clear_mem_error(reason); +-} +- +-static notrace __kprobes void +-io_check_error(unsigned char reason, struct pt_regs *regs) +-{ +- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); +- show_registers(regs); +- +- /* Re-enable the IOCK line, wait for a few seconds */ +- clear_io_check_error(reason); +-} +- +-static notrace __kprobes void +-unknown_nmi_error(unsigned char reason, struct pt_regs *regs) +-{ +- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) +- return; +-#ifdef CONFIG_MCA +- /* +- * Might actually be able to figure out what the guilty party +- * is: +- */ +- if (MCA_bus) { +- mca_handle_nmi(); +- return; +- } +-#endif +- printk(KERN_EMERG +- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", +- reason, smp_processor_id()); +- +- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); +- if (panic_on_unrecovered_nmi) +- panic("NMI: Not continuing"); +- +- printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); +-} +- +-static DEFINE_SPINLOCK(nmi_print_lock); +- +-void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +-{ +- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) +- return; +- +- spin_lock(&nmi_print_lock); +- /* +- * We are in trouble anyway, lets at least try +- * to get a message out: +- */ +- bust_spinlocks(1); +- printk(KERN_EMERG "%s", str); +- printk(" on CPU%d, ip %08lx, registers:\n", +- smp_processor_id(), regs->ip); +- show_registers(regs); +- if (do_panic) +- panic("Non maskable interrupt"); +- console_silent(); +- spin_unlock(&nmi_print_lock); +- bust_spinlocks(0); +- +- /* +- * If we are in kernel we are probably nested up pretty bad +- * and might aswell get out now while we still can: +- */ +- if (!user_mode_vm(regs)) { +- current->thread.trap_no = 2; +- crash_kexec(regs); +- } +- +- do_exit(SIGSEGV); +-} +- +-static notrace __kprobes void default_do_nmi(struct pt_regs *regs) +-{ +- unsigned char reason = 0; +- int cpu; +- +- cpu = smp_processor_id(); +- +- /* Only the BSP gets external NMIs from the system. */ +- if (!cpu) +- reason = get_nmi_reason(); +- +- if (!(reason & 0xc0)) { +- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) +- == NOTIFY_STOP) +- return; +-#ifdef CONFIG_X86_LOCAL_APIC +- /* +- * Ok, so this is none of the documented NMI sources, +- * so it must be the NMI watchdog. +- */ +- if (nmi_watchdog_tick(regs, reason)) +- return; +- if (!do_nmi_callback(regs, cpu)) +- unknown_nmi_error(reason, regs); +-#else +- unknown_nmi_error(reason, regs); +-#endif +- +- return; +- } +- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) +- return; +- +- /* AK: following checks seem to be broken on modern chipsets. FIXME */ +- if (reason & 0x80) +- mem_parity_error(reason, regs); +- if (reason & 0x40) +- io_check_error(reason, regs); +- /* +- * Reassert NMI in case it became active meanwhile +- * as it's edge-triggered: +- */ +- reassert_nmi(); +-} +- +-notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) +-{ +- int cpu; +- +- nmi_enter(); +- +- cpu = smp_processor_id(); +- +- ++nmi_count(cpu); +- +- if (!ignore_nmis) +- default_do_nmi(regs); +- +- nmi_exit(); +-} +- +-void stop_nmi(void) +-{ +- acpi_nmi_disable(); +- ignore_nmis++; +-} +- +-void restart_nmi(void) +-{ +- ignore_nmis--; +- acpi_nmi_enable(); +-} +- +-#ifdef CONFIG_KPROBES +-void __kprobes do_int3(struct pt_regs *regs, long error_code) +-{ +- trace_hardirqs_fixup(); +- +- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) +- == NOTIFY_STOP) +- return; +- /* +- * This is an interrupt gate, because kprobes wants interrupts +- * disabled. Normal trap handlers don't. +- */ +- restore_interrupts(regs); +- +- do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); +-} +-#endif +- +-/* +- * Our handling of the processor debug registers is non-trivial. +- * We do not clear them on entry and exit from the kernel. Therefore +- * it is possible to get a watchpoint trap here from inside the kernel. +- * However, the code in ./ptrace.c has ensured that the user can +- * only set watchpoints on userspace addresses. Therefore the in-kernel +- * watchpoint trap can only occur in code which is reading/writing +- * from user space. Such code must not hold kernel locks (since it +- * can equally take a page fault), therefore it is safe to call +- * force_sig_info even though that claims and releases locks. +- * +- * Code in ./signal.c ensures that the debug control register +- * is restored before we deliver any signal, and therefore that +- * user code runs with the correct debug control register even though +- * we clear it here. +- * +- * Being careful here means that we don't have to be as careful in a +- * lot of more complicated places (task switching can be a bit lazy +- * about restoring all the debug state, and ptrace doesn't have to +- * find every occurrence of the TF bit that could be saved away even +- * by user code) +- */ +-void __kprobes do_debug(struct pt_regs *regs, long error_code) +-{ +- struct task_struct *tsk = current; +- unsigned int condition; +- +- trace_hardirqs_fixup(); +- +- get_debugreg(condition, 6); +- +- /* +- * The processor cleared BTF, so don't mark that we need it set. +- */ +- clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); +- tsk->thread.debugctlmsr = 0; +- +- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, +- SIGTRAP) == NOTIFY_STOP) +- return; +- /* It's safe to allow irq's after DR6 has been saved */ +- if (regs->flags & X86_EFLAGS_IF) +- local_irq_enable(); +- +- /* Mask out spurious debug traps due to lazy DR7 setting */ +- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { +- if (!tsk->thread.debugreg7) +- goto clear_dr7; +- } +- +- if (regs->flags & X86_VM_MASK) +- goto debug_vm86; +- +- /* Save debug status register where ptrace can see it */ +- tsk->thread.debugreg6 = condition; +- +- /* +- * Single-stepping through TF: make sure we ignore any events in +- * kernel space (but re-enable TF when returning to user mode). +- */ +- if (condition & DR_STEP) { +- /* +- * We already checked v86 mode above, so we can +- * check for kernel mode by just checking the CPL +- * of CS. +- */ +- if (!user_mode(regs)) +- goto clear_TF_reenable; +- } +- +- /* Ok, finally something we can handle */ +- send_sigtrap(tsk, regs, error_code); +- +- /* +- * Disable additional traps. They'll be re-enabled when +- * the signal is delivered. +- */ +-clear_dr7: +- set_debugreg(0, 7); +- return; +- +-debug_vm86: +- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); +- return; +- +-clear_TF_reenable: +- set_tsk_thread_flag(tsk, TIF_SINGLESTEP); +- regs->flags &= ~X86_EFLAGS_TF; +- return; +-} +- +-/* +- * Note that we play around with the 'TS' bit in an attempt to get +- * the correct behaviour even in the presence of the asynchronous +- * IRQ13 behaviour +- */ +-void math_error(void __user *ip) +-{ +- struct task_struct *task; +- siginfo_t info; +- unsigned short cwd, swd; +- +- /* +- * Save the info for the exception handler and clear the error. +- */ +- task = current; +- save_init_fpu(task); +- task->thread.trap_no = 16; +- task->thread.error_code = 0; +- info.si_signo = SIGFPE; +- info.si_errno = 0; +- info.si_code = __SI_FAULT; +- info.si_addr = ip; +- /* +- * (~cwd & swd) will mask out exceptions that are not set to unmasked +- * status. 0x3f is the exception bits in these regs, 0x200 is the +- * C1 reg you need in case of a stack fault, 0x040 is the stack +- * fault bit. We should only be taking one exception at a time, +- * so if this combination doesn't produce any single exception, +- * then we have a bad program that isn't synchronizing its FPU usage +- * and it will suffer the consequences since we won't be able to +- * fully reproduce the context of the exception +- */ +- cwd = get_fpu_cwd(task); +- swd = get_fpu_swd(task); +- switch (swd & ~cwd & 0x3f) { +- case 0x000: /* No unmasked exception */ +- return; +- default: /* Multiple exceptions */ +- break; +- case 0x001: /* Invalid Op */ +- /* +- * swd & 0x240 == 0x040: Stack Underflow +- * swd & 0x240 == 0x240: Stack Overflow +- * User must clear the SF bit (0x40) if set +- */ +- info.si_code = FPE_FLTINV; +- break; +- case 0x002: /* Denormalize */ +- case 0x010: /* Underflow */ +- info.si_code = FPE_FLTUND; +- break; +- case 0x004: /* Zero Divide */ +- info.si_code = FPE_FLTDIV; +- break; +- case 0x008: /* Overflow */ +- info.si_code = FPE_FLTOVF; +- break; +- case 0x020: /* Precision */ +- info.si_code = FPE_FLTRES; +- break; +- } +- force_sig_info(SIGFPE, &info, task); +-} +- +-void do_coprocessor_error(struct pt_regs *regs, long error_code) +-{ +- ignore_fpu_irq = 1; +- math_error((void __user *)regs->ip); +-} +- +-static void simd_math_error(void __user *ip) +-{ +- struct task_struct *task; +- siginfo_t info; +- unsigned short mxcsr; +- +- /* +- * Save the info for the exception handler and clear the error. +- */ +- task = current; +- save_init_fpu(task); +- task->thread.trap_no = 19; +- task->thread.error_code = 0; +- info.si_signo = SIGFPE; +- info.si_errno = 0; +- info.si_code = __SI_FAULT; +- info.si_addr = ip; +- /* +- * The SIMD FPU exceptions are handled a little differently, as there +- * is only a single status/control register. Thus, to determine which +- * unmasked exception was caught we must mask the exception mask bits +- * at 0x1f80, and then use these to mask the exception bits at 0x3f. +- */ +- mxcsr = get_fpu_mxcsr(task); +- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { +- case 0x000: +- default: +- break; +- case 0x001: /* Invalid Op */ +- info.si_code = FPE_FLTINV; +- break; +- case 0x002: /* Denormalize */ +- case 0x010: /* Underflow */ +- info.si_code = FPE_FLTUND; +- break; +- case 0x004: /* Zero Divide */ +- info.si_code = FPE_FLTDIV; +- break; +- case 0x008: /* Overflow */ +- info.si_code = FPE_FLTOVF; +- break; +- case 0x020: /* Precision */ +- info.si_code = FPE_FLTRES; +- break; +- } +- force_sig_info(SIGFPE, &info, task); +-} +- +-void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) +-{ +- if (cpu_has_xmm) { +- /* Handle SIMD FPU exceptions on PIII+ processors. */ +- ignore_fpu_irq = 1; +- simd_math_error((void __user *)regs->ip); +- return; +- } +- /* +- * Handle strange cache flush from user space exception +- * in all other cases. This is undocumented behaviour. +- */ +- if (regs->flags & X86_VM_MASK) { +- handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); +- return; +- } +- current->thread.trap_no = 19; +- current->thread.error_code = error_code; +- die_if_kernel("cache flush denied", regs, error_code); +- force_sig(SIGSEGV, current); +-} +- +-#ifndef CONFIG_XEN +-void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) +-{ +-#if 0 +- /* No need to warn about this any longer. */ +- printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); +-#endif +-} +- +-unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) +-{ +- struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); +- unsigned long base = (kesp - uesp) & -THREAD_SIZE; +- unsigned long new_kesp = kesp - base; +- unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; +- __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; +- +- /* Set up base for espfix segment */ +- desc &= 0x00f0ff0000000000ULL; +- desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | +- ((((__u64)base) << 32) & 0xff00000000000000ULL) | +- ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | +- (lim_pages & 0xffff); +- *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; +- +- return new_kesp; +-} +-#endif +- +-/* +- * 'math_state_restore()' saves the current math information in the +- * old math state array, and gets the new ones from the current task +- * +- * Careful.. There are problems with IBM-designed IRQ13 behaviour. +- * Don't touch unless you *really* know how it works. +- * +- * Must be called with kernel preemption disabled (in this case, +- * local interrupts are disabled at the call-site in entry.S). +- */ +-asmlinkage void math_state_restore(void) +-{ +- struct thread_info *thread = current_thread_info(); +- struct task_struct *tsk = thread->task; +- +- if (!tsk_used_math(tsk)) { +- local_irq_enable(); +- /* +- * does a slab alloc which can sleep +- */ +- if (init_fpu(tsk)) { +- /* +- * ran out of memory! +- */ +- do_group_exit(SIGKILL); +- return; +- } +- local_irq_disable(); +- } +- +- /* NB. 'clts' is done for us by Xen during virtual trap. */ +- restore_fpu(tsk); +- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ +- tsk->fpu_counter++; +-} +-EXPORT_SYMBOL_GPL(math_state_restore); +- +-#ifndef CONFIG_MATH_EMULATION +- +-asmlinkage void math_emulate(long arg) +-{ +- printk(KERN_EMERG +- "math-emulation not enabled and no coprocessor found.\n"); +- printk(KERN_EMERG "killing %s.\n", current->comm); +- force_sig(SIGFPE, current); +- schedule(); +-} +- +-#endif /* CONFIG_MATH_EMULATION */ +- +-/* +- * NB. All these are "trap gates" (i.e. events_mask isn't set) except +- * for those that specify |4 in the second field. +- */ +-static const trap_info_t __cpuinitconst trap_table[] = { +- { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, +- { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, +- { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, +- { 4, 3, __KERNEL_CS, (unsigned long)overflow }, +- { 5, 0, __KERNEL_CS, (unsigned long)bounds }, +- { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, +- { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available }, +- { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, +- { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, +- { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, +- { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, +- { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, +- { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault }, +- { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment }, +- { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, +- { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, +-#ifdef CONFIG_X86_MCE +- { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, +-#endif +- { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, +- { SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)system_call }, +- { 0, 0, 0, 0 } +-}; +- +-void __init trap_init(void) +-{ +- int ret; +- +- ret = HYPERVISOR_set_trap_table(trap_table); +- if (ret) +- printk("HYPERVISOR_set_trap_table failed: error %d\n", ret); +- +- if (cpu_has_fxsr) { +- printk(KERN_INFO "Enabling fast FPU save and restore... "); +- set_in_cr4(X86_CR4_OSFXSR); +- printk("done.\n"); +- } +- if (cpu_has_xmm) { +- printk(KERN_INFO +- "Enabling unmasked SIMD FPU exception support... "); +- set_in_cr4(X86_CR4_OSXMMEXCPT); +- printk("done.\n"); +- } +- +- init_thread_xstate(); +- /* +- * Should be a barrier for any external CPU state: +- */ +- cpu_init(); +-} +- +-void __cpuinit smp_trap_init(trap_info_t *trap_ctxt) +-{ +- const trap_info_t *t = trap_table; +- +- for (t = trap_table; t->address; t++) { +- trap_ctxt[t->vector].flags = t->flags; +- trap_ctxt[t->vector].cs = t->cs; +- trap_ctxt[t->vector].address = t->address; +- } +-} +- +-static int __init kstack_setup(char *s) +-{ +- kstack_depth_to_print = simple_strtoul(s, NULL, 0); +- +- return 1; +-} +-__setup("kstack=", kstack_setup); +- +-static int __init code_bytes_setup(char *s) +-{ +- code_bytes = simple_strtoul(s, NULL, 0); +- if (code_bytes > 8192) +- code_bytes = 8192; +- +- return 1; +-} +-__setup("code_bytes=", code_bytes_setup); +--- head-2010-01-18.orig/arch/x86/kernel/traps_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +@@ -1,1238 +0,0 @@ +-/* +- * Copyright (C) 1991, 1992 Linus Torvalds +- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs +- * +- * Pentium III FXSR, SSE support +- * Gareth Hughes , May 2000 +- */ +- +-/* +- * 'Traps.c' handles hardware traps and faults after we have saved some +- * state in 'entry.S'. +- */ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#if defined(CONFIG_EDAC) +-#include +-#endif +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +- +-int panic_on_unrecovered_nmi; +-int kstack_depth_to_print = 12; +-static unsigned int code_bytes = 64; +-static int ignore_nmis; +-static int die_counter; +- +-static inline void conditional_sti(struct pt_regs *regs) +-{ +- if (regs->flags & X86_EFLAGS_IF) +- local_irq_enable(); +-} +- +-static inline void preempt_conditional_sti(struct pt_regs *regs) +-{ +- inc_preempt_count(); +- if (regs->flags & X86_EFLAGS_IF) +- local_irq_enable(); +-} +- +-static inline void preempt_conditional_cli(struct pt_regs *regs) +-{ +- if (regs->flags & X86_EFLAGS_IF) +- local_irq_disable(); +- /* Make sure to not schedule here because we could be running +- on an exception stack. */ +- dec_preempt_count(); +-} +- +-void printk_address(unsigned long address, int reliable) +-{ +- printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); +-} +- +-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, +- unsigned *usedp, char **idp) +-{ +-#ifndef CONFIG_X86_NO_TSS +- static char ids[][8] = { +- [DEBUG_STACK - 1] = "#DB", +- [NMI_STACK - 1] = "NMI", +- [DOUBLEFAULT_STACK - 1] = "#DF", +- [STACKFAULT_STACK - 1] = "#SS", +- [MCE_STACK - 1] = "#MC", +-#if DEBUG_STKSZ > EXCEPTION_STKSZ +- [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" +-#endif +- }; +- unsigned k; +- +- /* +- * Iterate over all exception stacks, and figure out whether +- * 'stack' is in one of them: +- */ +- for (k = 0; k < N_EXCEPTION_STACKS; k++) { +- unsigned long end = per_cpu(orig_ist, cpu).ist[k]; +- /* +- * Is 'stack' above this exception frame's end? +- * If yes then skip to the next frame. +- */ +- if (stack >= end) +- continue; +- /* +- * Is 'stack' above this exception frame's start address? +- * If yes then we found the right frame. +- */ +- if (stack >= end - EXCEPTION_STKSZ) { +- /* +- * Make sure we only iterate through an exception +- * stack once. If it comes up for the second time +- * then there's something wrong going on - just +- * break out and return NULL: +- */ +- if (*usedp & (1U << k)) +- break; +- *usedp |= 1U << k; +- *idp = ids[k]; +- return (unsigned long *)end; +- } +- /* +- * If this is a debug stack, and if it has a larger size than +- * the usual exception stacks, then 'stack' might still +- * be within the lower portion of the debug stack: +- */ +-#if DEBUG_STKSZ > EXCEPTION_STKSZ +- if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { +- unsigned j = N_EXCEPTION_STACKS - 1; +- +- /* +- * Black magic. A large debug stack is composed of +- * multiple exception stack entries, which we +- * iterate through now. Dont look: +- */ +- do { +- ++j; +- end -= EXCEPTION_STKSZ; +- ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); +- } while (stack < end - EXCEPTION_STKSZ); +- if (*usedp & (1U << j)) +- break; +- *usedp |= 1U << j; +- *idp = ids[j]; +- return (unsigned long *)end; +- } +-#endif +- } +-#endif +- return NULL; +-} +- +-/* +- * x86-64 can have up to three kernel stacks: +- * process stack +- * interrupt stack +- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack +- */ +- +-static inline int valid_stack_ptr(struct thread_info *tinfo, +- void *p, unsigned int size, void *end) +-{ +- void *t = tinfo; +- if (end) { +- if (p < end && p >= (end-THREAD_SIZE)) +- return 1; +- else +- return 0; +- } +- return p > t && p < t + THREAD_SIZE - size; +-} +- +-/* The form of the top of the frame on the stack */ +-struct stack_frame { +- struct stack_frame *next_frame; +- unsigned long return_address; +-}; +- +-static inline unsigned long +-print_context_stack(struct thread_info *tinfo, +- unsigned long *stack, unsigned long bp, +- const struct stacktrace_ops *ops, void *data, +- unsigned long *end) +-{ +- struct stack_frame *frame = (struct stack_frame *)bp; +- +- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { +- unsigned long addr; +- +- addr = *stack; +- if (__kernel_text_address(addr)) { +- if ((unsigned long) stack == bp + 8) { +- ops->address(data, addr, 1); +- frame = frame->next_frame; +- bp = (unsigned long) frame; +- } else { +- ops->address(data, addr, bp == 0); +- } +- } +- stack++; +- } +- return bp; +-} +- +-void dump_trace(struct task_struct *task, struct pt_regs *regs, +- unsigned long *stack, unsigned long bp, +- const struct stacktrace_ops *ops, void *data) +-{ +- const unsigned cpu = get_cpu(); +- unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; +- unsigned used = 0; +- struct thread_info *tinfo; +- +- if (!task) +- task = current; +- +- if (!stack) { +- unsigned long dummy; +- stack = &dummy; +- if (task && task != current) +- stack = (unsigned long *)task->thread.sp; +- } +- +-#ifdef CONFIG_FRAME_POINTER +- if (!bp) { +- if (task == current) { +- /* Grab bp right from our regs */ +- asm("movq %%rbp, %0" : "=r" (bp) :); +- } else { +- /* bp is the last reg pushed by switch_to */ +- bp = *(unsigned long *) task->thread.sp; +- } +- } +-#endif +- +- /* +- * Print function call entries in all stacks, starting at the +- * current stack address. If the stacks consist of nested +- * exceptions +- */ +- tinfo = task_thread_info(task); +- for (;;) { +- char *id; +- unsigned long *estack_end; +- estack_end = in_exception_stack(cpu, (unsigned long)stack, +- &used, &id); +- +- if (estack_end) { +- if (ops->stack(data, id) < 0) +- break; +- +- bp = print_context_stack(tinfo, stack, bp, ops, +- data, estack_end); +- ops->stack(data, ""); +- /* +- * We link to the next stack via the +- * second-to-last pointer (index -2 to end) in the +- * exception stack: +- */ +- stack = (unsigned long *) estack_end[-2]; +- continue; +- } +- if (irqstack_end) { +- unsigned long *irqstack; +- irqstack = irqstack_end - +- (IRQSTACKSIZE - 64) / sizeof(*irqstack); +- +- if (stack >= irqstack && stack < irqstack_end) { +- if (ops->stack(data, "IRQ") < 0) +- break; +- bp = print_context_stack(tinfo, stack, bp, +- ops, data, irqstack_end); +- /* +- * We link to the next stack (which would be +- * the process stack normally) the last +- * pointer (index -1 to end) in the IRQ stack: +- */ +- stack = (unsigned long *) (irqstack_end[-1]); +- irqstack_end = NULL; +- ops->stack(data, "EOI"); +- continue; +- } +- } +- break; +- } +- +- /* +- * This handles the process stack: +- */ +- bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); +- put_cpu(); +-} +-EXPORT_SYMBOL(dump_trace); +- +-static void +-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +-{ +- print_symbol(msg, symbol); +- printk("\n"); +-} +- +-static void print_trace_warning(void *data, char *msg) +-{ +- printk("%s\n", msg); +-} +- +-static int print_trace_stack(void *data, char *name) +-{ +- printk(" <%s> ", name); +- return 0; +-} +- +-static void print_trace_address(void *data, unsigned long addr, int reliable) +-{ +- touch_nmi_watchdog(); +- printk_address(addr, reliable); +-} +- +-static const struct stacktrace_ops print_trace_ops = { +- .warning = print_trace_warning, +- .warning_symbol = print_trace_warning_symbol, +- .stack = print_trace_stack, +- .address = print_trace_address, +-}; +- +-static void +-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, +- unsigned long *stack, unsigned long bp, char *log_lvl) +-{ +- printk("\nCall Trace:\n"); +- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); +- printk("\n"); +-} +- +-void show_trace(struct task_struct *task, struct pt_regs *regs, +- unsigned long *stack, unsigned long bp) +-{ +- show_trace_log_lvl(task, regs, stack, bp, ""); +-} +- +-static void +-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, +- unsigned long *sp, unsigned long bp, char *log_lvl) +-{ +- unsigned long *stack; +- int i; +- const int cpu = smp_processor_id(); +- unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); +- unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); +- +- // debugging aid: "show_stack(NULL, NULL);" prints the +- // back trace for this cpu. +- +- if (sp == NULL) { +- if (task) +- sp = (unsigned long *)task->thread.sp; +- else +- sp = (unsigned long *)&sp; +- } +- +- stack = sp; +- for (i = 0; i < kstack_depth_to_print; i++) { +- if (stack >= irqstack && stack <= irqstack_end) { +- if (stack == irqstack_end) { +- stack = (unsigned long *) (irqstack_end[-1]); +- printk(" "); +- } +- } else { +- if (((long) stack & (THREAD_SIZE-1)) == 0) +- break; +- } +- if (i && ((i % 4) == 0)) +- printk("\n"); +- printk(" %016lx", *stack++); +- touch_nmi_watchdog(); +- } +- show_trace_log_lvl(task, regs, sp, bp, log_lvl); +-} +- +-void show_stack(struct task_struct *task, unsigned long *sp) +-{ +- show_stack_log_lvl(task, NULL, sp, 0, ""); +-} +- +-/* +- * The architecture-independent dump_stack generator +- */ +-void dump_stack(void) +-{ +- unsigned long bp = 0; +- unsigned long stack; +- +-#ifdef CONFIG_FRAME_POINTER +- if (!bp) +- asm("movq %%rbp, %0" : "=r" (bp):); +-#endif +- +- printk("Pid: %d, comm: %.20s %s %s %.*s\n", +- current->pid, current->comm, print_tainted(), +- init_utsname()->release, +- (int)strcspn(init_utsname()->version, " "), +- init_utsname()->version); +- show_trace(NULL, NULL, &stack, bp); +-} +- +-EXPORT_SYMBOL(dump_stack); +- +-void show_registers(struct pt_regs *regs) +-{ +- int i; +- unsigned long sp; +- const int cpu = smp_processor_id(); +- struct task_struct *cur = cpu_pda(cpu)->pcurrent; +- +- sp = regs->sp; +- printk("CPU %d ", cpu); +- __show_regs(regs); +- printk("Process %s (pid: %d, threadinfo %p, task %p)\n", +- cur->comm, cur->pid, task_thread_info(cur), cur); +- +- /* +- * When in-kernel, we also print out the stack and code at the +- * time of the fault.. +- */ +- if (!user_mode(regs)) { +- unsigned int code_prologue = code_bytes * 43 / 64; +- unsigned int code_len = code_bytes; +- unsigned char c; +- u8 *ip; +- +- printk("Stack: "); +- show_stack_log_lvl(NULL, regs, (unsigned long *)sp, +- regs->bp, ""); +- printk("\n"); +- +- printk(KERN_EMERG "Code: "); +- +- ip = (u8 *)regs->ip - code_prologue; +- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { +- /* try starting at RIP */ +- ip = (u8 *)regs->ip; +- code_len = code_len - code_prologue + 1; +- } +- for (i = 0; i < code_len; i++, ip++) { +- if (ip < (u8 *)PAGE_OFFSET || +- probe_kernel_address(ip, c)) { +- printk(" Bad RIP value."); +- break; +- } +- if (ip == (u8 *)regs->ip) +- printk("<%02x> ", c); +- else +- printk("%02x ", c); +- } +- } +- printk("\n"); +-} +- +-int is_valid_bugaddr(unsigned long ip) +-{ +- unsigned short ud2; +- +- if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) +- return 0; +- +- return ud2 == 0x0b0f; +-} +- +-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +-static int die_owner = -1; +-static unsigned int die_nest_count; +- +-unsigned __kprobes long oops_begin(void) +-{ +- int cpu; +- unsigned long flags; +- +- oops_enter(); +- +- /* racy, but better than risking deadlock. */ +- raw_local_irq_save(flags); +- cpu = smp_processor_id(); +- if (!__raw_spin_trylock(&die_lock)) { +- if (cpu == die_owner) +- /* nested oops. should stop eventually */; +- else +- __raw_spin_lock(&die_lock); +- } +- die_nest_count++; +- die_owner = cpu; +- console_verbose(); +- bust_spinlocks(1); +- return flags; +-} +- +-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +-{ +- die_owner = -1; +- bust_spinlocks(0); +- die_nest_count--; +- if (!die_nest_count) +- /* Nest count reaches zero, release the lock. */ +- __raw_spin_unlock(&die_lock); +- raw_local_irq_restore(flags); +- if (!regs) { +- oops_exit(); +- return; +- } +- if (panic_on_oops) +- panic("Fatal exception"); +- oops_exit(); +- do_exit(signr); +-} +- +-int __kprobes __die(const char *str, struct pt_regs *regs, long err) +-{ +- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); +-#ifdef CONFIG_PREEMPT +- printk("PREEMPT "); +-#endif +-#ifdef CONFIG_SMP +- printk("SMP "); +-#endif +-#ifdef CONFIG_DEBUG_PAGEALLOC +- printk("DEBUG_PAGEALLOC"); +-#endif +- printk("\n"); +- if (notify_die(DIE_OOPS, str, regs, err, +- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) +- return 1; +- +- show_registers(regs); +- add_taint(TAINT_DIE); +- /* Executive summary in case the oops scrolled away */ +- printk(KERN_ALERT "RIP "); +- printk_address(regs->ip, 1); +- printk(" RSP <%016lx>\n", regs->sp); +- if (kexec_should_crash(current)) +- crash_kexec(regs); +- return 0; +-} +- +-void die(const char *str, struct pt_regs *regs, long err) +-{ +- unsigned long flags = oops_begin(); +- +- if (!user_mode(regs)) +- report_bug(regs->ip, regs); +- +- if (__die(str, regs, err)) +- regs = NULL; +- oops_end(flags, regs, SIGSEGV); +-} +- +-#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL) +-notrace __kprobes void +-die_nmi(char *str, struct pt_regs *regs, int do_panic) +-{ +- unsigned long flags; +- +- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) +- return; +- +- flags = oops_begin(); +- /* +- * We are in trouble anyway, lets at least try +- * to get a message out. +- */ +- printk(KERN_EMERG "%s", str); +- printk(" on CPU%d, ip %08lx, registers:\n", +- smp_processor_id(), regs->ip); +- show_registers(regs); +- if (kexec_should_crash(current)) +- crash_kexec(regs); +- if (do_panic || panic_on_oops) +- panic("Non maskable interrupt"); +- oops_end(flags, NULL, SIGBUS); +- nmi_exit(); +- local_irq_enable(); +- do_exit(SIGBUS); +-} +-#endif +- +-static void __kprobes +-do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, +- long error_code, siginfo_t *info) +-{ +- struct task_struct *tsk = current; +- +- if (!user_mode(regs)) +- goto kernel_trap; +- +- /* +- * We want error_code and trap_no set for userspace faults and +- * kernelspace faults which result in die(), but not +- * kernelspace faults which are fixed up. die() gives the +- * process no chance to handle the signal and notice the +- * kernel fault information, so that won't result in polluting +- * the information about previously queued, but not yet +- * delivered, faults. See also do_general_protection below. +- */ +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = trapnr; +- +- if (show_unhandled_signals && unhandled_signal(tsk, signr) && +- printk_ratelimit()) { +- printk(KERN_INFO +- "%s[%d] trap %s ip:%lx sp:%lx error:%lx", +- tsk->comm, tsk->pid, str, +- regs->ip, regs->sp, error_code); +- print_vma_addr(" in ", regs->ip); +- printk("\n"); +- } +- +- if (info) +- force_sig_info(signr, info, tsk); +- else +- force_sig(signr, tsk); +- return; +- +-kernel_trap: +- if (!fixup_exception(regs)) { +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = trapnr; +- die(str, regs, error_code); +- } +- return; +-} +- +-#define DO_ERROR(trapnr, signr, str, name) \ +-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +-{ \ +- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ +- == NOTIFY_STOP) \ +- return; \ +- conditional_sti(regs); \ +- do_trap(trapnr, signr, str, regs, error_code, NULL); \ +-} +- +-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +-{ \ +- siginfo_t info; \ +- info.si_signo = signr; \ +- info.si_errno = 0; \ +- info.si_code = sicode; \ +- info.si_addr = (void __user *)siaddr; \ +- trace_hardirqs_fixup(); \ +- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ +- == NOTIFY_STOP) \ +- return; \ +- conditional_sti(regs); \ +- do_trap(trapnr, signr, str, regs, error_code, &info); \ +-} +- +-DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +-DO_ERROR(4, SIGSEGV, "overflow", overflow) +-DO_ERROR(5, SIGSEGV, "bounds", bounds) +-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) +-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) +-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +- +-/* Runs on IST stack */ +-asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) +-{ +- if (notify_die(DIE_TRAP, "stack segment", regs, error_code, +- 12, SIGBUS) == NOTIFY_STOP) +- return; +- preempt_conditional_sti(regs); +- do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); +- preempt_conditional_cli(regs); +-} +- +-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) +-{ +- static const char str[] = "double fault"; +- struct task_struct *tsk = current; +- +- /* Return not checked because double check cannot be ignored */ +- notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); +- +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = 8; +- +- /* This is always a kernel trap and never fixable (and thus must +- never return). */ +- for (;;) +- die(str, regs, error_code); +-} +- +-asmlinkage void __kprobes +-do_general_protection(struct pt_regs *regs, long error_code) +-{ +- struct task_struct *tsk; +- +- conditional_sti(regs); +- +- tsk = current; +- if (!user_mode(regs)) +- goto gp_in_kernel; +- +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = 13; +- +- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && +- printk_ratelimit()) { +- printk(KERN_INFO +- "%s[%d] general protection ip:%lx sp:%lx error:%lx", +- tsk->comm, tsk->pid, +- regs->ip, regs->sp, error_code); +- print_vma_addr(" in ", regs->ip); +- printk("\n"); +- } +- +- force_sig(SIGSEGV, tsk); +- return; +- +-gp_in_kernel: +- if (fixup_exception(regs)) +- return; +- +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = 13; +- if (notify_die(DIE_GPF, "general protection fault", regs, +- error_code, 13, SIGSEGV) == NOTIFY_STOP) +- return; +- die("general protection fault", regs, error_code); +-} +- +-static notrace __kprobes void +-mem_parity_error(unsigned char reason, struct pt_regs *regs) +-{ +- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", +- reason); +- printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); +- +-#if defined(CONFIG_EDAC) +- if (edac_handler_set()) { +- edac_atomic_assert_error(); +- return; +- } +-#endif +- +- if (panic_on_unrecovered_nmi) +- panic("NMI: Not continuing"); +- +- printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); +- +- /* Clear and disable the memory parity error line. */ +- clear_mem_error(reason); +-} +- +-static notrace __kprobes void +-io_check_error(unsigned char reason, struct pt_regs *regs) +-{ +- printk("NMI: IOCK error (debug interrupt?)\n"); +- show_registers(regs); +- +- /* Re-enable the IOCK line, wait for a few seconds */ +- clear_io_check_error(reason); +-} +- +-static notrace __kprobes void +-unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +-{ +- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) +- return; +- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", +- reason); +- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); +- +- if (panic_on_unrecovered_nmi) +- panic("NMI: Not continuing"); +- +- printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); +-} +- +-/* Runs on IST stack. This code must keep interrupts off all the time. +- Nested NMIs are prevented by the CPU. */ +-asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) +-{ +- unsigned char reason = 0; +- int cpu; +- +- cpu = smp_processor_id(); +- +- /* Only the BSP gets external NMIs from the system. */ +- if (!cpu) +- reason = get_nmi_reason(); +- +- if (!(reason & 0xc0)) { +- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) +- == NOTIFY_STOP) +- return; +-#ifdef CONFIG_X86_LOCAL_APIC +- /* +- * Ok, so this is none of the documented NMI sources, +- * so it must be the NMI watchdog. +- */ +- if (nmi_watchdog_tick(regs, reason)) +- return; +-#endif +- if (!do_nmi_callback(regs, cpu)) +- unknown_nmi_error(reason, regs); +- +- return; +- } +- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) +- return; +- +- /* AK: following checks seem to be broken on modern chipsets. FIXME */ +- if (reason & 0x80) +- mem_parity_error(reason, regs); +- if (reason & 0x40) +- io_check_error(reason, regs); +-} +- +-asmlinkage notrace __kprobes void +-do_nmi(struct pt_regs *regs, long error_code) +-{ +- nmi_enter(); +- +- add_pda(__nmi_count, 1); +- +- if (!ignore_nmis) +- default_do_nmi(regs); +- +- nmi_exit(); +-} +- +-void stop_nmi(void) +-{ +- acpi_nmi_disable(); +- ignore_nmis++; +-} +- +-void restart_nmi(void) +-{ +- ignore_nmis--; +- acpi_nmi_enable(); +-} +- +-/* runs on IST stack. */ +-asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) +-{ +- trace_hardirqs_fixup(); +- +- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) +- == NOTIFY_STOP) +- return; +- +- preempt_conditional_sti(regs); +- do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); +- preempt_conditional_cli(regs); +-} +- +-/* Help handler running on IST stack to switch back to user stack +- for scheduling or signal handling. The actual stack switch is done in +- entry.S */ +-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +-{ +- struct pt_regs *regs = eregs; +- /* Did already sync */ +- if (eregs == (struct pt_regs *)eregs->sp) +- ; +- /* Exception from user space */ +- else if (user_mode(eregs)) +- regs = task_pt_regs(current); +- /* Exception from kernel and interrupts are enabled. Move to +- kernel process stack. */ +- else if (eregs->flags & X86_EFLAGS_IF) +- regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); +- if (eregs != regs) +- *regs = *eregs; +- return regs; +-} +- +-/* runs on IST stack. */ +-asmlinkage void __kprobes do_debug(struct pt_regs * regs, +- unsigned long error_code) +-{ +- struct task_struct *tsk = current; +- unsigned long condition; +- siginfo_t info; +- +- trace_hardirqs_fixup(); +- +- get_debugreg(condition, 6); +- +- /* +- * The processor cleared BTF, so don't mark that we need it set. +- */ +- clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); +- tsk->thread.debugctlmsr = 0; +- +- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, +- SIGTRAP) == NOTIFY_STOP) +- return; +- +- preempt_conditional_sti(regs); +- +- /* Mask out spurious debug traps due to lazy DR7 setting */ +- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { +- if (!tsk->thread.debugreg7) +- goto clear_dr7; +- } +- +- tsk->thread.debugreg6 = condition; +- +- /* +- * Single-stepping through TF: make sure we ignore any events in +- * kernel space (but re-enable TF when returning to user mode). +- */ +- if (condition & DR_STEP) { +- if (!user_mode(regs)) +- goto clear_TF_reenable; +- } +- +- /* Ok, finally something we can handle */ +- tsk->thread.trap_no = 1; +- tsk->thread.error_code = error_code; +- info.si_signo = SIGTRAP; +- info.si_errno = 0; +- info.si_code = TRAP_BRKPT; +- info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; +- force_sig_info(SIGTRAP, &info, tsk); +- +-clear_dr7: +- set_debugreg(0, 7); +- preempt_conditional_cli(regs); +- return; +- +-clear_TF_reenable: +- set_tsk_thread_flag(tsk, TIF_SINGLESTEP); +- regs->flags &= ~X86_EFLAGS_TF; +- preempt_conditional_cli(regs); +- return; +-} +- +-static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) +-{ +- if (fixup_exception(regs)) +- return 1; +- +- notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); +- /* Illegal floating point operation in the kernel */ +- current->thread.trap_no = trapnr; +- die(str, regs, 0); +- return 0; +-} +- +-/* +- * Note that we play around with the 'TS' bit in an attempt to get +- * the correct behaviour even in the presence of the asynchronous +- * IRQ13 behaviour +- */ +-asmlinkage void do_coprocessor_error(struct pt_regs *regs) +-{ +- void __user *ip = (void __user *)(regs->ip); +- struct task_struct *task; +- siginfo_t info; +- unsigned short cwd, swd; +- +- conditional_sti(regs); +- if (!user_mode(regs) && +- kernel_math_error(regs, "kernel x87 math error", 16)) +- return; +- +- /* +- * Save the info for the exception handler and clear the error. +- */ +- task = current; +- save_init_fpu(task); +- task->thread.trap_no = 16; +- task->thread.error_code = 0; +- info.si_signo = SIGFPE; +- info.si_errno = 0; +- info.si_code = __SI_FAULT; +- info.si_addr = ip; +- /* +- * (~cwd & swd) will mask out exceptions that are not set to unmasked +- * status. 0x3f is the exception bits in these regs, 0x200 is the +- * C1 reg you need in case of a stack fault, 0x040 is the stack +- * fault bit. We should only be taking one exception at a time, +- * so if this combination doesn't produce any single exception, +- * then we have a bad program that isn't synchronizing its FPU usage +- * and it will suffer the consequences since we won't be able to +- * fully reproduce the context of the exception +- */ +- cwd = get_fpu_cwd(task); +- swd = get_fpu_swd(task); +- switch (swd & ~cwd & 0x3f) { +- case 0x000: /* No unmasked exception */ +- default: /* Multiple exceptions */ +- break; +- case 0x001: /* Invalid Op */ +- /* +- * swd & 0x240 == 0x040: Stack Underflow +- * swd & 0x240 == 0x240: Stack Overflow +- * User must clear the SF bit (0x40) if set +- */ +- info.si_code = FPE_FLTINV; +- break; +- case 0x002: /* Denormalize */ +- case 0x010: /* Underflow */ +- info.si_code = FPE_FLTUND; +- break; +- case 0x004: /* Zero Divide */ +- info.si_code = FPE_FLTDIV; +- break; +- case 0x008: /* Overflow */ +- info.si_code = FPE_FLTOVF; +- break; +- case 0x020: /* Precision */ +- info.si_code = FPE_FLTRES; +- break; +- } +- force_sig_info(SIGFPE, &info, task); +-} +- +-asmlinkage void bad_intr(void) +-{ +- printk("bad interrupt"); +-} +- +-asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) +-{ +- void __user *ip = (void __user *)(regs->ip); +- struct task_struct *task; +- siginfo_t info; +- unsigned short mxcsr; +- +- conditional_sti(regs); +- if (!user_mode(regs) && +- kernel_math_error(regs, "kernel simd math error", 19)) +- return; +- +- /* +- * Save the info for the exception handler and clear the error. +- */ +- task = current; +- save_init_fpu(task); +- task->thread.trap_no = 19; +- task->thread.error_code = 0; +- info.si_signo = SIGFPE; +- info.si_errno = 0; +- info.si_code = __SI_FAULT; +- info.si_addr = ip; +- /* +- * The SIMD FPU exceptions are handled a little differently, as there +- * is only a single status/control register. Thus, to determine which +- * unmasked exception was caught we must mask the exception mask bits +- * at 0x1f80, and then use these to mask the exception bits at 0x3f. +- */ +- mxcsr = get_fpu_mxcsr(task); +- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { +- case 0x000: +- default: +- break; +- case 0x001: /* Invalid Op */ +- info.si_code = FPE_FLTINV; +- break; +- case 0x002: /* Denormalize */ +- case 0x010: /* Underflow */ +- info.si_code = FPE_FLTUND; +- break; +- case 0x004: /* Zero Divide */ +- info.si_code = FPE_FLTDIV; +- break; +- case 0x008: /* Overflow */ +- info.si_code = FPE_FLTOVF; +- break; +- case 0x020: /* Precision */ +- info.si_code = FPE_FLTRES; +- break; +- } +- force_sig_info(SIGFPE, &info, task); +-} +- +-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) +-{ +-} +- +-#if 0 +-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) +-{ +-} +-#endif +- +-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) +-{ +-} +- +-/* +- * 'math_state_restore()' saves the current math information in the +- * old math state array, and gets the new ones from the current task +- * +- * Careful.. There are problems with IBM-designed IRQ13 behaviour. +- * Don't touch unless you *really* know how it works. +- */ +-asmlinkage void math_state_restore(void) +-{ +- struct task_struct *me = current; +- +- if (!used_math()) { +- local_irq_enable(); +- /* +- * does a slab alloc which can sleep +- */ +- if (init_fpu(me)) { +- /* +- * ran out of memory! +- */ +- do_group_exit(SIGKILL); +- return; +- } +- local_irq_disable(); +- } +- +- /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */ +- +- /* +- * Paranoid restore. send a SIGSEGV if we fail to restore the state. +- */ +- if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { +- stts(); +- force_sig(SIGSEGV, me); +- return; +- } +- task_thread_info(me)->status |= TS_USEDFPU; +- me->fpu_counter++; +-} +-EXPORT_SYMBOL_GPL(math_state_restore); +- +- +-/* +- * NB. All these are "interrupt gates" (i.e. events_mask is set) because we +- * specify |4 in the second field. +- */ +-static const trap_info_t __cpuinitconst trap_table[] = { +- { 0, 0|4, __KERNEL_CS, (unsigned long)divide_error }, +- { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, +- { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, +- { 4, 3|4, __KERNEL_CS, (unsigned long)overflow }, +- { 5, 0|4, __KERNEL_CS, (unsigned long)bounds }, +- { 6, 0|4, __KERNEL_CS, (unsigned long)invalid_op }, +- { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available }, +- { 9, 0|4, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun}, +- { 10, 0|4, __KERNEL_CS, (unsigned long)invalid_TSS }, +- { 11, 0|4, __KERNEL_CS, (unsigned long)segment_not_present }, +- { 12, 0|4, __KERNEL_CS, (unsigned long)stack_segment }, +- { 13, 0|4, __KERNEL_CS, (unsigned long)general_protection }, +- { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault }, +- { 15, 0|4, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, +- { 16, 0|4, __KERNEL_CS, (unsigned long)coprocessor_error }, +- { 17, 0|4, __KERNEL_CS, (unsigned long)alignment_check }, +-#ifdef CONFIG_X86_MCE +- { 18, 0|4, __KERNEL_CS, (unsigned long)machine_check }, +-#endif +- { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, +-#ifdef CONFIG_IA32_EMULATION +- { IA32_SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)ia32_syscall}, +-#endif +- { 0, 0, 0, 0 } +-}; +- +-void __init trap_init(void) +-{ +- int ret; +- +- ret = HYPERVISOR_set_trap_table(trap_table); +- if (ret) +- printk("HYPERVISOR_set_trap_table failed: error %d\n", ret); +- /* +- * initialize the per thread extended state: +- */ +- init_thread_xstate(); +- /* +- * Should be a barrier for any external CPU state: +- */ +- cpu_init(); +-} +- +-void __cpuinit smp_trap_init(trap_info_t *trap_ctxt) +-{ +- const trap_info_t *t = trap_table; +- +- for (t = trap_table; t->address; t++) { +- trap_ctxt[t->vector].flags = t->flags; +- trap_ctxt[t->vector].cs = t->cs; +- trap_ctxt[t->vector].address = t->address; +- } +-} +- +-static int __init oops_setup(char *s) +-{ +- if (!s) +- return -EINVAL; +- if (!strcmp(s, "panic")) +- panic_on_oops = 1; +- return 0; +-} +-early_param("oops", oops_setup); +- +-static int __init kstack_setup(char *s) +-{ +- if (!s) +- return -EINVAL; +- kstack_depth_to_print = simple_strtoul(s, NULL, 0); +- return 0; +-} +-early_param("kstack", kstack_setup); +- +-static int __init code_bytes_setup(char *s) +-{ +- code_bytes = simple_strtoul(s, NULL, 0); +- if (code_bytes > 8192) +- code_bytes = 8192; +- +- return 1; +-} +-__setup("code_bytes=", code_bytes_setup); +--- head-2010-01-18.orig/arch/x86/mm/fault-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/fault-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + /* + * Page fault error code bits +@@ -370,8 +371,6 @@ static int is_errata100(struct pt_regs * + return 0; + } + +-void do_invalid_op(struct pt_regs *, unsigned long); +- + static int is_f00f_bug(struct pt_regs *regs, unsigned long address) + { + #ifdef CONFIG_X86_F00F_BUG +@@ -609,11 +608,6 @@ void __kprobes do_page_fault(struct pt_r + unsigned long flags; + #endif + +- /* +- * We can fault from pretty much anywhere, with unknown IRQ state. +- */ +- trace_hardirqs_fixup(); +- + /* Set the "privileged fault" bit to something sane. */ + if (user_mode_vm(regs)) + error_code |= PF_USER; +@@ -677,24 +671,23 @@ void __kprobes do_page_fault(struct pt_r + } + + +-#ifdef CONFIG_X86_32 +- /* It's safe to allow irq's after cr2 has been saved and the vmalloc +- fault has been handled. */ +- if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK)) +- local_irq_enable(); +- + /* +- * If we're in an interrupt, have no user context or are running in an +- * atomic region then we must not take the fault. ++ * It's safe to allow irq's after cr2 has been saved and the ++ * vmalloc fault has been handled. ++ * ++ * User-mode registers count as a user access even for any ++ * potential system fault or CPU buglet. + */ +- if (in_atomic() || !mm) +- goto bad_area_nosemaphore; +-#else /* CONFIG_X86_64 */ +- if (likely(regs->flags & X86_EFLAGS_IF)) ++ if (user_mode_vm(regs)) { ++ local_irq_enable(); ++ error_code |= PF_USER; ++ } else if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); + ++#ifdef CONFIG_X86_64 + if (unlikely(error_code & PF_RSVD)) + pgtable_bad(address, regs, error_code); ++#endif + + /* + * If we're in an interrupt, have no user context or are running in an +@@ -703,15 +696,9 @@ void __kprobes do_page_fault(struct pt_r + if (unlikely(in_atomic() || !mm)) + goto bad_area_nosemaphore; + +- /* +- * User-mode registers count as a user access even for any +- * potential system fault or CPU buglet. +- */ +- if (user_mode_vm(regs)) +- error_code |= PF_USER; + again: +-#endif +- /* When running in the kernel we expect faults to occur only to ++ /* ++ * When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an + * erroneous fault occurring in a code path which already holds mmap_sem +@@ -774,9 +761,6 @@ good_area: + goto bad_area; + } + +-#ifdef CONFIG_X86_32 +-survive: +-#endif + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo +@@ -911,12 +895,11 @@ out_of_memory: + up_read(&mm->mmap_sem); + if (is_global_init(tsk)) { + yield(); +-#ifdef CONFIG_X86_32 +- down_read(&mm->mmap_sem); +- goto survive; +-#else ++ /* ++ * Re-lookup the vma - in theory the vma tree might ++ * have changed: ++ */ + goto again; +-#endif + } + + printk("VM: killing process %s\n", tsk->comm); +@@ -946,14 +929,15 @@ LIST_HEAD(pgd_list); + + void vmalloc_sync_all(void) + { +-#ifdef CONFIG_X86_32 +- unsigned long address = VMALLOC_START & PGDIR_MASK; ++ unsigned long address; + ++#ifdef CONFIG_X86_32 + if (SHARED_KERNEL_PMD) + return; + +- BUILD_BUG_ON(TASK_SIZE & ~PMD_MASK); +- for (; address < hypervisor_virt_start; address += PMD_SIZE) { ++ for (address = VMALLOC_START & PMD_MASK; ++ address >= TASK_SIZE && address < FIXADDR_TOP; ++ address += PMD_SIZE) { + unsigned long flags; + struct page *page; + +@@ -966,10 +950,8 @@ void vmalloc_sync_all(void) + spin_unlock_irqrestore(&pgd_lock, flags); + } + #else /* CONFIG_X86_64 */ +- unsigned long start = VMALLOC_START & PGDIR_MASK; +- unsigned long address; +- +- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { ++ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; ++ address += PGDIR_SIZE) { + const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; + struct page *page; +--- head-2010-01-18.orig/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/highmem_32-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -137,6 +137,7 @@ void *kmap_atomic_pfn(unsigned long pfn, + + return (void*) vaddr; + } ++EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ + + struct page *kmap_atomic_to_page(void *ptr) + { +--- head-2010-01-18.orig/arch/x86/mm/init_32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_32-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -34,6 +34,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -51,6 +52,7 @@ + #include + #include + #include ++#include + + unsigned int __VMALLOC_RESERVE = 128 << 20; + +@@ -206,11 +208,32 @@ static void __init kernel_physical_mappi + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; +- unsigned pages_2m = 0, pages_4k = 0; ++ unsigned pages_2m, pages_4k; ++ int mapping_iter; + +- if (!cpu_has_pse) ++ /* ++ * First iteration will setup identity mapping using large/small pages ++ * based on use_pse, with other attributes same as set by ++ * the early code in head_32.S ++ * ++ * Second iteration will setup the appropriate attributes (NX, GLOBAL..) ++ * as desired for the kernel identity mapping. ++ * ++ * This two pass mechanism conforms to the TLB app note which says: ++ * ++ * "Software should not write to a paging-structure entry in a way ++ * that would change, for any linear address, both the page size ++ * and either the page frame or attributes." ++ */ ++ mapping_iter = 1; ++ ++ if (!cpu_has_pse) { + use_pse = 0; ++ mapping_iter = 0; ++ } + ++repeat: ++ pages_2m = pages_4k = 0; + pfn = start_pfn; + pgd_idx = pgd_index((pfn<= xen_start_info->nr_pages || pte_present(*pte)) +@@ -279,12 +317,34 @@ static void __init kernel_physical_mappi + prot = PAGE_KERNEL_EXEC; + + pages_4k++; +- set_pte(pte, pfn_pte(pfn, prot)); ++ if (mapping_iter == 1) ++ set_pte(pte, pfn_pte(pfn, init_prot)); ++ else ++ set_pte(pte, pfn_pte(pfn, prot)); + } + } + } +- update_page_count(PG_LEVEL_2M, pages_2m); +- update_page_count(PG_LEVEL_4K, pages_4k); ++ if (mapping_iter <= 1) { ++ /* ++ * update direct mapping page count only in the first ++ * iteration. ++ */ ++ update_page_count(PG_LEVEL_2M, pages_2m); ++ update_page_count(PG_LEVEL_4K, pages_4k); ++ } ++ if (mapping_iter == 1) { ++ /* ++ * local global flush tlb, which will flush the previous ++ * mappings present in both small and large page TLB's. ++ */ ++ __flush_tlb_all(); ++ ++ /* ++ * Second iteration will set the actual desired PTE attributes. ++ */ ++ mapping_iter = 2; ++ goto repeat; ++ } + } + + /* +@@ -306,7 +366,6 @@ int devmem_is_allowed(unsigned long page + return 0; + } + +-#ifdef CONFIG_HIGHMEM + pte_t *kmap_pte; + pgprot_t kmap_prot; + +@@ -329,6 +388,7 @@ static void __init kmap_init(void) + kmap_prot = PAGE_KERNEL; + } + ++#ifdef CONFIG_HIGHMEM + static void __init permanent_kmaps_init(pgd_t *pgd_base) + { + unsigned long vaddr; +@@ -416,7 +476,6 @@ static void __init set_highmem_pages_ini + #endif /* !CONFIG_NUMA */ + + #else +-# define kmap_init() do { } while (0) + # define permanent_kmaps_init(pgd_base) do { } while (0) + # define set_highmem_pages_init() do { } while (0) + #endif /* CONFIG_HIGHMEM */ +@@ -775,7 +834,7 @@ static unsigned long __init extend_init_ + return start_pfn; + } + +-static void __init find_early_table_space(unsigned long end) ++static void __init find_early_table_space(unsigned long end, int use_pse) + { + unsigned long puds, pmds, ptes, tables; + +@@ -785,7 +844,7 @@ static void __init find_early_table_spac + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); + +- if (cpu_has_pse) { ++ if (use_pse) { + unsigned long extra; + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); +@@ -818,12 +877,22 @@ unsigned long __init_refok init_memory_m + pgd_t *pgd_base = swapper_pg_dir; + unsigned long start_pfn, end_pfn; + unsigned long big_page_start; ++#ifdef CONFIG_DEBUG_PAGEALLOC ++ /* ++ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. ++ * This will simplify cpa(), which otherwise needs to support splitting ++ * large pages into small in interrupt context, etc. ++ */ ++ int use_pse = 0; ++#else ++ int use_pse = cpu_has_pse; ++#endif + + /* + * Find space for the kernel direct mapping tables. + */ + if (!after_init_bootmem) +- find_early_table_space(end); ++ find_early_table_space(end, use_pse); + + #ifdef CONFIG_X86_PAE + set_nx(); +@@ -869,7 +938,7 @@ unsigned long __init_refok init_memory_m + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, +- cpu_has_pse); ++ use_pse); + + /* tail is not big page alignment ? */ + start_pfn = end_pfn; +@@ -954,6 +1023,8 @@ void __init mem_init(void) + + pci_iommu_alloc(); + ++ start_periodic_check_for_corruption(); ++ + #ifdef CONFIG_FLATMEM + BUG_ON(!mem_map); + #endif +@@ -1038,7 +1109,6 @@ void __init mem_init(void) + if (boot_cpu_data.wp_works_ok < 0) + test_wp_bit(); + +- cpa_init(); + save_pg_dir(); + zap_low_mappings(); + +--- head-2010-01-18.orig/arch/x86/mm/init_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/init_64-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -34,6 +34,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -157,6 +158,62 @@ static unsigned long __meminitdata table + static unsigned long __meminitdata table_cur; + static unsigned long __meminitdata table_top; + ++pteval_t __supported_pte_mask __read_mostly = ~0UL; ++EXPORT_SYMBOL_GPL(__supported_pte_mask); ++ ++static int do_not_nx __cpuinitdata; ++ ++/* ++ * noexec=on|off ++ * Control non-executable mappings for 64-bit processes. ++ * ++ * on Enable (default) ++ * off Disable ++ */ ++static int __init nonx_setup(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ if (!strncmp(str, "on", 2)) { ++ __supported_pte_mask |= _PAGE_NX; ++ do_not_nx = 0; ++ } else if (!strncmp(str, "off", 3)) { ++ do_not_nx = 1; ++ __supported_pte_mask &= ~_PAGE_NX; ++ } ++ return 0; ++} ++early_param("noexec", nonx_setup); ++ ++void __cpuinit check_efer(void) ++{ ++ unsigned long efer; ++ ++ rdmsrl(MSR_EFER, efer); ++ if (!(efer & EFER_NX) || do_not_nx) ++ __supported_pte_mask &= ~_PAGE_NX; ++} ++ ++int force_personality32; ++ ++/* ++ * noexec32=on|off ++ * Control non executable heap for 32bit processes. ++ * To control the stack too use noexec=off ++ * ++ * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) ++ * off PROT_READ implies PROT_EXEC ++ */ ++static int __init nonx32_setup(char *str) ++{ ++ if (!strcmp(str, "on")) ++ force_personality32 &= ~READ_IMPLIES_EXEC; ++ else if (!strcmp(str, "off")) ++ force_personality32 |= READ_IMPLIES_EXEC; ++ return 1; ++} ++__setup("noexec32=", nonx32_setup); ++ + /* + * NOTE: This function is marked __ref because it calls __init function + * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. +@@ -214,14 +271,6 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig + } + + pte = pte_offset_kernel(pmd, vaddr); +- if (!pte_none(*pte) && __pte_val(new_pte) && +-#ifdef CONFIG_ACPI +- /* __acpi_map_table() fails to properly call clear_fixmap() */ +- (vaddr < __fix_to_virt(FIX_ACPI_END) || +- vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) && +-#endif +- __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask)) +- pte_ERROR(*pte); + set_pte(pte, new_pte); + + /* +@@ -306,7 +355,7 @@ void __init init_extra_mapping_uc(unsign + void __init cleanup_highmap(void) + { + unsigned long vaddr = __START_KERNEL_map; +- unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1; ++ unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; + pmd_t *pmd = level2_kernel_pgt; + pmd_t *last_pmd = pmd + PTRS_PER_PMD; + +@@ -336,7 +385,7 @@ static __ref void *alloc_low_page(unsign + if (pfn >= table_top) + panic("alloc_low_page: ran out of memory"); + +- adr = early_ioremap(pfn_to_mfn(pfn) * PAGE_SIZE, PAGE_SIZE); ++ adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); + memset(adr, 0, PAGE_SIZE); + *phys = pfn * PAGE_SIZE; + return adr; +@@ -382,7 +431,8 @@ static inline int __meminit make_readonl + } + + static unsigned long __meminit +-phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) ++phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, ++ pgprot_t prot) + { + unsigned pages = 0; + unsigned long last_map_addr = end; +@@ -391,49 +441,58 @@ phys_pte_init(pte_t *pte_page, unsigned + pte_t *pte = pte_page + pte_index(addr); + + for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { +- unsigned long pteval = addr | __PAGE_KERNEL; ++ unsigned long pteval = addr | pgprot_val(prot); + + if (addr >= end || + (!after_bootmem && + (addr >> PAGE_SHIFT) >= xen_start_info->nr_pages)) + break; + +- if (__pte_val(*pte)) ++ /* ++ * We will re-use the existing mapping. ++ * Xen for example has some special requirements, like mapping ++ * pagetable pages as RO. So assume someone who pre-setup ++ * these mappings are more intelligent. ++ */ ++ if (__pte_val(*pte)) { ++ pages++; + continue; ++ } + + if (make_readonly(addr)) + pteval &= ~_PAGE_RW; + if (0) + printk(" pte=%p addr=%lx pte=%016lx\n", + pte, addr, pteval); ++ pages++; + if (!after_bootmem) + *pte = __pte(pteval & __supported_pte_mask); + else + set_pte(pte, __pte(pteval & __supported_pte_mask)); + last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; +- pages++; + } ++ + update_page_count(PG_LEVEL_4K, pages); + + return last_map_addr; + } + + static unsigned long __meminit +-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) ++phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end, ++ pgprot_t prot) + { + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + + BUG_ON(!max_pfn_mapped); +- return phys_pte_init(pte, address, end); ++ return phys_pte_init(pte, address, end, prot); + } + + static unsigned long __meminit + phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, +- unsigned long page_size_mask) ++ unsigned long page_size_mask, pgprot_t prot) + { + unsigned long pages = 0; + unsigned long last_map_addr = end; +- unsigned long start = address; + + int i = pmd_index(address); + +@@ -441,6 +500,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned + unsigned long pte_phys; + pmd_t *pmd = pmd_page + pmd_index(address); + pte_t *pte; ++ pgprot_t new_prot = prot; + + if (address >= end) + break; +@@ -449,27 +509,42 @@ phys_pmd_init(pmd_t *pmd_page, unsigned + if (!pmd_large(*pmd)) { + spin_lock(&init_mm.page_table_lock); + last_map_addr = phys_pte_update(pmd, address, +- end); ++ end, prot); + spin_unlock(&init_mm.page_table_lock); ++ continue; + } +- /* Count entries we're using from level2_ident_pgt */ +- if (start == 0) ++ /* ++ * If we are ok with PG_LEVEL_2M mapping, then we will ++ * use the existing mapping, ++ * ++ * Otherwise, we will split the large page mapping but ++ * use the same existing protection bits except for ++ * large page, so that we don't violate Intel's TLB ++ * Application note (317080) which says, while changing ++ * the page sizes, new and old translations should ++ * not differ with respect to page frame and ++ * attributes. ++ */ ++ if (page_size_mask & (1 << PG_LEVEL_2M)) { + pages++; +- continue; ++ continue; ++ } ++ new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); + } + + if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); ++ pfn_pte(address >> PAGE_SHIFT, ++ __pgprot(pgprot_val(prot) | _PAGE_PSE))); + spin_unlock(&init_mm.page_table_lock); + last_map_addr = (address & PMD_MASK) + PMD_SIZE; + continue; + } + + pte = alloc_low_page(&pte_phys); +- last_map_addr = phys_pte_init(pte, address, end); ++ last_map_addr = phys_pte_init(pte, address, end, new_prot); + unmap_low_page(pte); + + if (!after_bootmem) { +@@ -490,13 +565,13 @@ phys_pmd_init(pmd_t *pmd_page, unsigned + + static unsigned long __meminit + phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, +- unsigned long page_size_mask) ++ unsigned long page_size_mask, pgprot_t prot) + { + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long last_map_addr; + + BUG_ON(!max_pfn_mapped); +- last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); ++ last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot); + __flush_tlb_all(); + return last_map_addr; + } +@@ -513,15 +588,34 @@ phys_pud_init(pud_t *pud_page, unsigned + unsigned long pmd_phys; + pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; ++ pgprot_t prot = PAGE_KERNEL; + + if (addr >= end) + break; + + if (__pud_val(*pud)) { +- if (!pud_large(*pud)) ++ if (!pud_large(*pud)) { + last_map_addr = phys_pmd_update(pud, addr, end, +- page_size_mask); +- continue; ++ page_size_mask, prot); ++ continue; ++ } ++ /* ++ * If we are ok with PG_LEVEL_1G mapping, then we will ++ * use the existing mapping. ++ * ++ * Otherwise, we will split the gbpage mapping but use ++ * the same existing protection bits except for large ++ * page, so that we don't violate Intel's TLB ++ * Application note (317080) which says, while changing ++ * the page sizes, new and old translations should ++ * not differ with respect to page frame and ++ * attributes. ++ */ ++ if (page_size_mask & (1 << PG_LEVEL_1G)) { ++ pages++; ++ continue; ++ } ++ prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); + } + + if (page_size_mask & (1<> PAGE_SHIFT); + +@@ -825,11 +923,13 @@ unsigned long __init_refok init_memory_m + unsigned long last_map_addr = 0; + unsigned long page_size_mask = 0; + unsigned long start_pfn, end_pfn; ++ unsigned long pos; + + struct map_range mr[NR_RANGE_MR]; + int nr_range, i; ++ int use_pse, use_gbpages; + +- printk(KERN_INFO "init_memory_mapping\n"); ++ printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); + + /* + * Find space for the kernel direct mapping tables. +@@ -841,9 +941,21 @@ unsigned long __init_refok init_memory_m + if (!after_bootmem) + init_gbpages(); + +- if (direct_gbpages) ++#ifdef CONFIG_DEBUG_PAGEALLOC ++ /* ++ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. ++ * This will simplify cpa(), which otherwise needs to support splitting ++ * large pages into small in interrupt context, etc. ++ */ ++ use_pse = use_gbpages = 0; ++#else ++ use_pse = cpu_has_pse; ++ use_gbpages = direct_gbpages; ++#endif ++ ++ if (use_gbpages) + page_size_mask |= 1 << PG_LEVEL_1G; +- if (cpu_has_pse) ++ if (use_pse) + page_size_mask |= 1 << PG_LEVEL_2M; + + memset(mr, 0, sizeof(mr)); +@@ -851,35 +963,50 @@ unsigned long __init_refok init_memory_m + + /* head if not big page alignment ?*/ + start_pfn = start >> PAGE_SHIFT; +- end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT) ++ pos = start_pfn << PAGE_SHIFT; ++ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); ++ pos = end_pfn << PAGE_SHIFT; ++ } + + /* big page (2M) range*/ +- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) ++ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +- end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT) ++ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); +- if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT))) +- end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)); +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, +- page_size_mask & (1< ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) ++ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, ++ page_size_mask & (1<>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, ++ start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) ++ << (PUD_SHIFT - PAGE_SHIFT); ++ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, +- page_size_mask & (1<>PMD_SHIFT) ++ << (PMD_SHIFT - PAGE_SHIFT); ++ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); ++ if (start_pfn < end_pfn) { ++ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, ++ page_size_mask & (1<>PAGE_SHIFT; + end_pfn = end>>PAGE_SHIFT; + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + +@@ -904,7 +1031,7 @@ unsigned long __init_refok init_memory_m + (mr[i].page_size_mask & (1<> PAGE_SHIFT; + int ret; + +- last_mapped_pfn = init_memory_mapping(start, start + size-1); ++ last_mapped_pfn = init_memory_mapping(start, start + size); + if (last_mapped_pfn > max_pfn_mapped) + max_pfn_mapped = last_mapped_pfn; + + ret = __add_pages(zone, start_pfn, nr_pages); +- WARN_ON(1); ++ WARN_ON_ONCE(ret); + + return ret; + } +@@ -1062,8 +1189,11 @@ static struct kcore_list kcore_mem, kcor + void __init mem_init(void) + { + long codesize, reservedpages, datasize, initsize; ++ unsigned long absent_pages; + unsigned long pfn; + ++ start_periodic_check_for_corruption(); ++ + pci_iommu_alloc(); + + /* clear_bss() already clear the empty_zero_page */ +@@ -1076,13 +1206,15 @@ void __init mem_init(void) + #else + totalram_pages = free_all_bootmem(); + #endif ++ + /* XEN: init pages outside initial allocation. */ + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + ClearPageReserved(pfn_to_page(pfn)); + init_page_count(pfn_to_page(pfn)); + } +- reservedpages = max_pfn - totalram_pages - +- absent_pages_in_range(0, max_pfn); ++ ++ absent_pages = absent_pages_in_range(0, max_pfn); ++ reservedpages = max_pfn - totalram_pages - absent_pages; + after_bootmem = 1; + + codesize = (unsigned long) &_etext - (unsigned long) &_text; +@@ -1099,15 +1231,14 @@ void __init mem_init(void) + VSYSCALL_END - VSYSCALL_START); + + printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " +- "%ldk reserved, %ldk data, %ldk init)\n", ++ "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + max_pfn << (PAGE_SHIFT-10), + codesize >> 10, ++ absent_pages << (PAGE_SHIFT-10), + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10); +- +- cpa_init(); + } + + void free_init_pages(char *what, unsigned long begin, unsigned long end) +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/mm/iomap_32-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -0,0 +1,61 @@ ++/* ++ * Copyright © 2008 Ingo Molnar ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++ ++#include ++#include ++#include ++ ++/* Map 'mfn' using fixed map 'type' and protections 'prot' ++ */ ++void * ++iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot) ++{ ++ enum fixed_addresses idx; ++ unsigned long vaddr; ++ ++ pagefault_disable(); ++ ++ idx = type + KM_TYPE_NR*smp_processor_id(); ++ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ++ pgprot_val(prot) |= _PAGE_IOMAP; ++ set_pte_at(&init_mm, vaddr, kmap_pte-idx, pfn_pte_ma(mfn, prot)); ++ /*arch_flush_lazy_mmu_mode()*/; ++ ++ return (void*) vaddr; ++} ++EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); ++ ++void ++iounmap_atomic(void *kvaddr, enum km_type type) ++{ ++ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; ++ enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); ++ ++ /* ++ * Force other mappings to Oops if they'll try to access this pte ++ * without first remap it. Keeping stale mappings around is a bad idea ++ * also, in case the page changes cacheability attributes or becomes ++ * a protected page in a hypervisor. ++ */ ++ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) ++ kpte_clear_flush(kmap_pte-idx, vaddr); ++ ++ /*arch_flush_lazy_mmu_mode();*/ ++ pagefault_enable(); ++} ++EXPORT_SYMBOL_GPL(iounmap_atomic); +--- head-2010-01-18.orig/arch/x86/mm/ioremap-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/ioremap-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -25,20 +25,51 @@ + + #ifdef CONFIG_X86_64 + +-#ifndef CONFIG_XEN ++static inline int phys_addr_valid(unsigned long addr) ++{ ++ return addr < (1UL << boot_cpu_data.x86_phys_bits); ++} ++ ++#define phys_base 0 ++ + unsigned long __phys_addr(unsigned long x) + { +- if (x >= __START_KERNEL_map) +- return x - __START_KERNEL_map + phys_base; +- return x - PAGE_OFFSET; ++ if (x >= __START_KERNEL_map) { ++ x -= __START_KERNEL_map; ++ VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); ++ x += phys_base; ++ } else { ++ VIRTUAL_BUG_ON(x < PAGE_OFFSET); ++ x -= PAGE_OFFSET; ++ VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : ++ !phys_addr_valid(x)); ++ } ++ return x; + } + EXPORT_SYMBOL(__phys_addr); +-#endif + +-static inline int phys_addr_valid(unsigned long addr) ++bool __virt_addr_valid(unsigned long x) + { +- return addr < (1UL << boot_cpu_data.x86_phys_bits); ++ if (x >= __START_KERNEL_map) { ++ x -= __START_KERNEL_map; ++ if (x >= KERNEL_IMAGE_SIZE) ++ return false; ++ x += phys_base; ++ } else { ++ if (x < PAGE_OFFSET) ++ return false; ++ x -= PAGE_OFFSET; ++ if (system_state == SYSTEM_BOOTING ? ++ x > MAXMEM : !phys_addr_valid(x)) { ++ return false; ++ } ++ } ++ ++ return pfn_valid(x >> PAGE_SHIFT); + } ++EXPORT_SYMBOL(__virt_addr_valid); ++ ++#undef phys_base + + #else + +@@ -47,6 +78,28 @@ static inline int phys_addr_valid(unsign + return 1; + } + ++#ifdef CONFIG_DEBUG_VIRTUAL ++unsigned long __phys_addr(unsigned long x) ++{ ++ /* VMALLOC_* aren't constants; not available at the boot time */ ++ VIRTUAL_BUG_ON(x < PAGE_OFFSET); ++ VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && ++ is_vmalloc_addr((void *) x)); ++ return x - PAGE_OFFSET; ++} ++EXPORT_SYMBOL(__phys_addr); ++#endif ++ ++bool __virt_addr_valid(unsigned long x) ++{ ++ if (x < PAGE_OFFSET) ++ return false; ++ if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) ++ return false; ++ return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); ++} ++EXPORT_SYMBOL(__virt_addr_valid); ++ + #endif + + static int direct_remap_area_pte_fn(pte_t *pte, +@@ -103,7 +156,7 @@ static int __direct_remap_pfn_range(stru + * Fill in the machine address: PTE ptr is done later by + * apply_to_page_range(). + */ +- pgprot_val(prot) |= _PAGE_IO; ++ pgprot_val(prot) |= _PAGE_IOMAP; + v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot))); + + mfn++; +@@ -240,6 +293,25 @@ int page_is_ram(unsigned long pagenr) + return 0; + } + ++int pagerange_is_ram(unsigned long start, unsigned long end) ++{ ++ int ram_page = 0, not_rampage = 0; ++ unsigned long page_nr; ++ ++ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); ++ ++page_nr) { ++ if (page_is_ram(mfn_to_local_pfn(page_nr))) ++ ram_page = 1; ++ else ++ not_rampage = 1; ++ ++ if (ram_page == not_rampage) ++ return -1; ++ } ++ ++ return ram_page; ++} ++ + /* + * Fix up the linear direct mapping of the kernel to avoid cache attribute + * conflicts. +@@ -327,6 +399,12 @@ static void __iomem *__ioremap_caller(re + return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr); + + /* ++ * Check if the request spans more than any BAR in the iomem resource ++ * tree. ++ */ ++ WARN_ON(iomem_map_sanity_check(phys_addr, size)); ++ ++ /* + * Don't allow anybody to remap normal RAM that we're using.. + */ + for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) { +@@ -381,16 +459,16 @@ static void __iomem *__ioremap_caller(re + switch (prot_val) { + case _PAGE_CACHE_UC: + default: +- prot = PAGE_KERNEL_NOCACHE; ++ prot = PAGE_KERNEL_IO_NOCACHE; + break; + case _PAGE_CACHE_UC_MINUS: +- prot = PAGE_KERNEL_UC_MINUS; ++ prot = PAGE_KERNEL_IO_UC_MINUS; + break; + case _PAGE_CACHE_WC: +- prot = PAGE_KERNEL_WC; ++ prot = PAGE_KERNEL_IO_WC; + break; + case _PAGE_CACHE_WB: +- prot = PAGE_KERNEL; ++ prot = PAGE_KERNEL_IO; + break; + } + +@@ -490,7 +568,7 @@ static void __iomem *ioremap_default(res + unsigned long size) + { + unsigned long flags; +- void *ret; ++ void __iomem *ret; + int err; + + /* +@@ -502,11 +580,11 @@ static void __iomem *ioremap_default(res + if (err < 0) + return NULL; + +- ret = (void *) __ioremap_caller(phys_addr, size, flags, +- __builtin_return_address(0)); ++ ret = __ioremap_caller(phys_addr, size, flags, ++ __builtin_return_address(0)); + + free_memtype(phys_addr, phys_addr + size); +- return (void __iomem *)ret; ++ return ret; + } + #endif + +@@ -602,7 +680,7 @@ void unxlate_dev_mem_ptr(unsigned long p + } + #endif + +-int __initdata early_ioremap_debug; ++static int __initdata early_ioremap_debug; + + static int __init early_ioremap_debug_setup(char *str) + { +@@ -721,12 +799,12 @@ static void __init __early_set_fixmap(en + } + + static inline void __init early_set_fixmap(enum fixed_addresses idx, +- unsigned long phys) ++ unsigned long phys, pgprot_t prot) + { + if (after_paging_init) +- set_fixmap(idx, phys); ++ __set_fixmap(idx, phys, prot); + else +- __early_set_fixmap(idx, phys, PAGE_KERNEL); ++ __early_set_fixmap(idx, phys, prot); + } + + static inline void __init early_clear_fixmap(enum fixed_addresses idx) +@@ -737,16 +815,22 @@ static inline void __init early_clear_fi + __early_set_fixmap(idx, 0, __pgprot(0)); + } + +- +-int __initdata early_ioremap_nested; +- ++static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; ++static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; + static int __init check_early_ioremap_leak(void) + { +- if (!early_ioremap_nested) ++ int count = 0; ++ int i; ++ ++ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) ++ if (prev_map[i]) ++ count++; ++ ++ if (!count) + return 0; + WARN(1, KERN_WARNING + "Debug warning: early ioremap leak of %d areas detected.\n", +- early_ioremap_nested); ++ count); + printk(KERN_WARNING + "please boot with early_ioremap_debug and report the dmesg.\n"); + +@@ -754,18 +838,33 @@ static int __init check_early_ioremap_le + } + late_initcall(check_early_ioremap_leak); + +-void __init *early_ioremap(unsigned long phys_addr, unsigned long size) ++static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) + { + unsigned long offset, last_addr; +- unsigned int nrpages, nesting; ++ unsigned int nrpages; + enum fixed_addresses idx0, idx; ++ int i, slot; + + WARN_ON(system_state != SYSTEM_BOOTING); + +- nesting = early_ioremap_nested; ++ slot = -1; ++ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { ++ if (!prev_map[i]) { ++ slot = i; ++ break; ++ } ++ } ++ ++ if (slot < 0) { ++ printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n", ++ phys_addr, size); ++ WARN_ON(1); ++ return NULL; ++ } ++ + if (early_ioremap_debug) { + printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", +- phys_addr, size, nesting); ++ phys_addr, size, slot); + dump_stack(); + } + +@@ -776,17 +875,13 @@ void __init *early_ioremap(unsigned long + return NULL; + } + +- if (nesting >= FIX_BTMAPS_NESTING) { +- WARN_ON(1); +- return NULL; +- } +- early_ioremap_nested++; ++ prev_size[slot] = size; + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; +- size = PAGE_ALIGN(last_addr) - phys_addr; ++ size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. +@@ -800,10 +895,10 @@ void __init *early_ioremap(unsigned long + /* + * Ok, go for it.. + */ +- idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; ++ idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + idx = idx0; + while (nrpages > 0) { +- early_set_fixmap(idx, phys_addr); ++ early_set_fixmap(idx, phys_addr, prot); + phys_addr += PAGE_SIZE; + --idx; + --nrpages; +@@ -811,24 +906,55 @@ void __init *early_ioremap(unsigned long + if (early_ioremap_debug) + printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); + +- return (void *) (offset + fix_to_virt(idx0)); ++ prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); ++ return prev_map[slot]; + } + +-void __init early_iounmap(void *addr, unsigned long size) ++/* Remap an IO device */ ++void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size) ++{ ++ return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); ++} ++ ++/* Remap memory */ ++void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size) ++{ ++ return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL); ++} ++ ++void __init early_iounmap(void __iomem *addr, unsigned long size) + { + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; +- int nesting; ++ int i, slot; + +- nesting = --early_ioremap_nested; +- if (WARN_ON(nesting < 0)) ++ slot = -1; ++ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { ++ if (prev_map[i] == addr) { ++ slot = i; ++ break; ++ } ++ } ++ ++ if (slot < 0) { ++ printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", ++ addr, size); ++ WARN_ON(1); ++ return; ++ } ++ ++ if (prev_size[slot] != size) { ++ printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", ++ addr, size, slot, prev_size[slot]); ++ WARN_ON(1); + return; ++ } + + if (early_ioremap_debug) { + printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, +- size, nesting); ++ size, slot); + dump_stack(); + } + +@@ -840,12 +966,13 @@ void __init early_iounmap(void *addr, un + offset = virt_addr & ~PAGE_MASK; + nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; + +- idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; ++ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + early_clear_fixmap(idx); + --idx; + --nrpages; + } ++ prev_map[slot] = NULL; + } + + void __this_fixmap_does_not_exist(void) +--- head-2010-01-18.orig/arch/x86/mm/pageattr-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pageattr-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -25,15 +25,27 @@ + * The current flushing context - we pass it instead of 5 arguments: + */ + struct cpa_data { +- unsigned long vaddr; ++ unsigned long *vaddr; + pgprot_t mask_set; + pgprot_t mask_clr; + int numpages; +- int flushtlb; ++ int flags; + unsigned long pfn; + unsigned force_split : 1; ++ int curpage; + }; + ++/* ++ * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) ++ * using cpa_lock. So that we don't allow any other cpu, with stale large tlb ++ * entries change the page attribute in parallel to some other cpu ++ * splitting a large page entry along with changing the attribute. ++ */ ++static DEFINE_SPINLOCK(cpa_lock); ++ ++#define CPA_FLUSHTLB 1 ++#define CPA_ARRAY 2 ++ + #ifdef CONFIG_PROC_FS + static unsigned long direct_pages_count[PG_LEVEL_NUM]; + +@@ -53,23 +65,22 @@ static void split_page_count(int level) + direct_pages_count[level - 1] += PTRS_PER_PTE; + } + +-int arch_report_meminfo(char *page) ++void arch_report_meminfo(struct seq_file *m) + { +- int n = sprintf(page, "DirectMap4k: %8lu kB\n", ++ seq_printf(m, "DirectMap4k: %8lu kB\n", + direct_pages_count[PG_LEVEL_4K] << 2); + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +- n += sprintf(page + n, "DirectMap2M: %8lu kB\n", ++ seq_printf(m, "DirectMap2M: %8lu kB\n", + direct_pages_count[PG_LEVEL_2M] << 11); + #else +- n += sprintf(page + n, "DirectMap4M: %8lu kB\n", ++ seq_printf(m, "DirectMap4M: %8lu kB\n", + direct_pages_count[PG_LEVEL_2M] << 12); + #endif + #ifdef CONFIG_X86_64 + if (direct_gbpages) +- n += sprintf(page + n, "DirectMap1G: %8lu kB\n", ++ seq_printf(m, "DirectMap1G: %8lu kB\n", + direct_pages_count[PG_LEVEL_1G] << 20); + #endif +- return n; + } + #else + static inline void split_page_count(int level) { } +@@ -84,7 +95,7 @@ static inline unsigned long highmap_star + + static inline unsigned long highmap_end_pfn(void) + { +- return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; ++ return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; + } + + #endif +@@ -190,6 +201,41 @@ static void cpa_flush_range(unsigned lon + } + } + ++static void cpa_flush_array(unsigned long *start, int numpages, int cache) ++{ ++ unsigned int i, level; ++ unsigned long *addr; ++ ++ BUG_ON(irqs_disabled()); ++ ++ on_each_cpu(__cpa_flush_range, NULL, 1); ++ ++ if (!cache) ++ return; ++ ++ /* 4M threshold */ ++ if (numpages >= 1024) { ++ if (boot_cpu_data.x86_model >= 4) ++ wbinvd(); ++ return; ++ } ++ /* ++ * We only need to flush on one CPU, ++ * clflush is a MESI-coherent instruction that ++ * will cause all other CPUs to flush the same ++ * cachelines: ++ */ ++ for (i = 0, addr = start; i < numpages; i++, addr++) { ++ pte_t *pte = lookup_address(*addr, &level); ++ ++ /* ++ * Only flush present addresses: ++ */ ++ if (pte && (__pte_val(*pte) & _PAGE_PRESENT)) ++ clflush_cache_range((void *) *addr, PAGE_SIZE); ++ } ++} ++ + /* + * Certain areas of memory on x86 require very specific protection flags, + * for example the BIOS area or kernel text. Callers don't always get this +@@ -414,7 +460,7 @@ try_preserve_large_page(pte_t *kpte, uns + */ + new_pte = pfn_pte_ma(__pte_mfn(old_pte), canon_pgprot(new_prot)); + __set_pmd_pte(kpte, address, level, new_pte); +- cpa->flushtlb = 1; ++ cpa->flags |= CPA_FLUSHTLB; + do_split = 0; + } + +@@ -424,84 +470,6 @@ out_unlock: + return do_split; + } + +-static LIST_HEAD(page_pool); +-static unsigned long pool_size, pool_pages, pool_low; +-static unsigned long pool_used, pool_failed; +- +-static void cpa_fill_pool(struct page **ret) +-{ +- gfp_t gfp = GFP_KERNEL; +- unsigned long flags; +- struct page *p; +- +- /* +- * Avoid recursion (on debug-pagealloc) and also signal +- * our priority to get to these pagetables: +- */ +- if (current->flags & PF_MEMALLOC) +- return; +- current->flags |= PF_MEMALLOC; +- +- /* +- * Allocate atomically from atomic contexts: +- */ +- if (in_atomic() || irqs_disabled() || debug_pagealloc) +- gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; +- +- while (pool_pages < pool_size || (ret && !*ret)) { +- p = alloc_pages(gfp, 0); +- if (!p) { +- pool_failed++; +- break; +- } +- /* +- * If the call site needs a page right now, provide it: +- */ +- if (ret && !*ret) { +- *ret = p; +- continue; +- } +- spin_lock_irqsave(&pgd_lock, flags); +- list_add(&p->lru, &page_pool); +- pool_pages++; +- spin_unlock_irqrestore(&pgd_lock, flags); +- } +- +- current->flags &= ~PF_MEMALLOC; +-} +- +-#define SHIFT_MB (20 - PAGE_SHIFT) +-#define ROUND_MB_GB ((1 << 10) - 1) +-#define SHIFT_MB_GB 10 +-#define POOL_PAGES_PER_GB 16 +- +-void __init cpa_init(void) +-{ +- struct sysinfo si; +- unsigned long gb; +- +- si_meminfo(&si); +- /* +- * Calculate the number of pool pages: +- * +- * Convert totalram (nr of pages) to MiB and round to the next +- * GiB. Shift MiB to Gib and multiply the result by +- * POOL_PAGES_PER_GB: +- */ +- if (debug_pagealloc) { +- gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; +- pool_size = POOL_PAGES_PER_GB * gb; +- } else { +- pool_size = 1; +- } +- pool_low = pool_size; +- +- cpa_fill_pool(NULL); +- printk(KERN_DEBUG +- "CPA: page pool initialized %lu of %lu pages preallocated\n", +- pool_pages, pool_size); +-} +- + static int split_large_page(pte_t *kpte, unsigned long address) + { + unsigned long flags, mfn, mfninc = 1; +@@ -510,28 +478,15 @@ static int split_large_page(pte_t *kpte, + pgprot_t ref_prot; + struct page *base; + +- /* +- * Get a page from the pool. The pool list is protected by the +- * pgd_lock, which we have to take anyway for the split +- * operation: +- */ +- spin_lock_irqsave(&pgd_lock, flags); +- if (list_empty(&page_pool)) { +- spin_unlock_irqrestore(&pgd_lock, flags); +- base = NULL; +- cpa_fill_pool(&base); +- if (!base) +- return -ENOMEM; +- spin_lock_irqsave(&pgd_lock, flags); +- } else { +- base = list_first_entry(&page_pool, struct page, lru); +- list_del(&base->lru); +- pool_pages--; +- +- if (pool_pages < pool_low) +- pool_low = pool_pages; +- } ++ if (!debug_pagealloc) ++ spin_unlock(&cpa_lock); ++ base = alloc_pages(GFP_KERNEL, 0); ++ if (!debug_pagealloc) ++ spin_lock(&cpa_lock); ++ if (!base) ++ return -ENOMEM; + ++ spin_lock_irqsave(&pgd_lock, flags); + /* + * Check for races, another CPU might have split this page + * up for us already: +@@ -592,11 +547,8 @@ out_unlock: + * If we dropped out via the lookup_address check under + * pgd_lock then stick the page back into the pool: + */ +- if (base) { +- list_add(&base->lru, &page_pool); +- pool_pages++; +- } else +- pool_used++; ++ if (base) ++ __free_page(base); + spin_unlock_irqrestore(&pgd_lock, flags); + + return 0; +@@ -604,11 +556,16 @@ out_unlock: + + static int __change_page_attr(struct cpa_data *cpa, int primary) + { +- unsigned long address = cpa->vaddr; ++ unsigned long address; + int do_split, err; + unsigned int level; + pte_t *kpte, old_pte; + ++ if (cpa->flags & CPA_ARRAY) ++ address = cpa->vaddr[cpa->curpage]; ++ else ++ address = *cpa->vaddr; ++ + repeat: + kpte = lookup_address(address, &level); + if (!kpte) +@@ -620,7 +577,7 @@ repeat: + return 0; + WARN(1, KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", address, +- cpa->vaddr); ++ *cpa->vaddr); + return -EINVAL; + } + +@@ -647,7 +604,7 @@ repeat: + */ + if (__pte_val(old_pte) != __pte_val(new_pte)) { + set_pte_atomic(kpte, new_pte); +- cpa->flushtlb = 1; ++ cpa->flags |= CPA_FLUSHTLB; + } + cpa->numpages = 1; + return 0; +@@ -671,7 +628,25 @@ repeat: + */ + err = split_large_page(kpte, address); + if (!err) { +- cpa->flushtlb = 1; ++ /* ++ * Do a global flush tlb after splitting the large page ++ * and before we do the actual change page attribute in the PTE. ++ * ++ * With out this, we violate the TLB application note, that says ++ * "The TLBs may contain both ordinary and large-page ++ * translations for a 4-KByte range of linear addresses. This ++ * may occur if software modifies the paging structures so that ++ * the page size used for the address range changes. If the two ++ * translations differ with respect to page frame or attributes ++ * (e.g., permissions), processor behavior is undefined and may ++ * be implementation-specific." ++ * ++ * We do this global tlb flush inside the cpa_lock, so that we ++ * don't allow any other cpu, with stale tlb entries change the ++ * page attribute in parallel, that also falls into the ++ * just split large page entry. ++ */ ++ flush_tlb_all(); + goto repeat; + } + +@@ -684,6 +659,7 @@ static int cpa_process_alias(struct cpa_ + { + struct cpa_data alias_cpa; + int ret = 0; ++ unsigned long temp_cpa_vaddr, vaddr; + + if (cpa->pfn >= max_pfn_mapped) + return 0; +@@ -696,16 +672,24 @@ static int cpa_process_alias(struct cpa_ + * No need to redo, when the primary call touched the direct + * mapping already: + */ +- if (!(within(cpa->vaddr, PAGE_OFFSET, ++ if (cpa->flags & CPA_ARRAY) ++ vaddr = cpa->vaddr[cpa->curpage]; ++ else ++ vaddr = *cpa->vaddr; ++ ++ if (!(within(vaddr, PAGE_OFFSET, + PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) + #ifdef CONFIG_X86_64 +- || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), ++ || within(vaddr, PAGE_OFFSET + (1UL<<32), + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) + #endif + )) { + + alias_cpa = *cpa; +- alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); ++ temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); ++ alias_cpa.vaddr = &temp_cpa_vaddr; ++ alias_cpa.flags &= ~CPA_ARRAY; ++ + + ret = __change_page_attr_set_clr(&alias_cpa, 0); + } +@@ -717,7 +701,7 @@ static int cpa_process_alias(struct cpa_ + * No need to redo, when the primary call touched the high + * mapping already: + */ +- if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) ++ if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) + return 0; + + /* +@@ -728,8 +712,9 @@ static int cpa_process_alias(struct cpa_ + return 0; + + alias_cpa = *cpa; +- alias_cpa.vaddr = +- (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map; ++ temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map; ++ alias_cpa.vaddr = &temp_cpa_vaddr; ++ alias_cpa.flags &= ~CPA_ARRAY; + + /* + * The high mapping range is imprecise, so ignore the return value. +@@ -749,8 +734,15 @@ static int __change_page_attr_set_clr(st + * preservation check. + */ + cpa->numpages = numpages; ++ /* for array changes, we can't use large page */ ++ if (cpa->flags & CPA_ARRAY) ++ cpa->numpages = 1; + ++ if (!debug_pagealloc) ++ spin_lock(&cpa_lock); + ret = __change_page_attr(cpa, checkalias); ++ if (!debug_pagealloc) ++ spin_unlock(&cpa_lock); + if (ret) + return ret; + +@@ -767,7 +759,11 @@ static int __change_page_attr_set_clr(st + */ + BUG_ON(cpa->numpages > numpages); + numpages -= cpa->numpages; +- cpa->vaddr += cpa->numpages * PAGE_SIZE; ++ if (cpa->flags & CPA_ARRAY) ++ cpa->curpage++; ++ else ++ *cpa->vaddr += cpa->numpages * PAGE_SIZE; ++ + } + return 0; + } +@@ -778,9 +774,9 @@ static inline int cache_attr(pgprot_t at + (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); + } + +-static int change_page_attr_set_clr(unsigned long addr, int numpages, ++static int change_page_attr_set_clr(unsigned long *addr, int numpages, + pgprot_t mask_set, pgprot_t mask_clr, +- int force_split) ++ int force_split, int array) + { + struct cpa_data cpa; + int ret, cache, checkalias; +@@ -795,21 +791,40 @@ static int change_page_attr_set_clr(unsi + return 0; + + /* Ensure we are PAGE_SIZE aligned */ +- if (addr & ~PAGE_MASK) { +- addr &= PAGE_MASK; +- /* +- * People should not be passing in unaligned addresses: +- */ +- WARN_ON_ONCE(1); ++ if (!array) { ++ if (*addr & ~PAGE_MASK) { ++ *addr &= PAGE_MASK; ++ /* ++ * People should not be passing in unaligned addresses: ++ */ ++ WARN_ON_ONCE(1); ++ } ++ } else { ++ int i; ++ for (i = 0; i < numpages; i++) { ++ if (addr[i] & ~PAGE_MASK) { ++ addr[i] &= PAGE_MASK; ++ WARN_ON_ONCE(1); ++ } ++ } + } + ++ /* Must avoid aliasing mappings in the highmem code */ ++ kmap_flush_unused(); ++ ++ vm_unmap_aliases(); ++ + cpa.vaddr = addr; + cpa.numpages = numpages; + cpa.mask_set = mask_set; + cpa.mask_clr = mask_clr; +- cpa.flushtlb = 0; ++ cpa.flags = 0; ++ cpa.curpage = 0; + cpa.force_split = force_split; + ++ if (array) ++ cpa.flags |= CPA_ARRAY; ++ + /* No alias checking for _NX bit modifications */ + checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; + +@@ -818,7 +833,7 @@ static int change_page_attr_set_clr(unsi + /* + * Check whether we really changed something: + */ +- if (!cpa.flushtlb) ++ if (!(cpa.flags & CPA_FLUSHTLB)) + goto out; + + /* +@@ -833,27 +848,30 @@ static int change_page_attr_set_clr(unsi + * error case we fall back to cpa_flush_all (which uses + * wbindv): + */ +- if (!ret && cpu_has_clflush) +- cpa_flush_range(addr, numpages, cache); +- else ++ if (!ret && cpu_has_clflush) { ++ if (cpa.flags & CPA_ARRAY) ++ cpa_flush_array(addr, numpages, cache); ++ else ++ cpa_flush_range(*addr, numpages, cache); ++ } else + cpa_flush_all(cache); + + out: +- cpa_fill_pool(NULL); +- + return ret; + } + +-static inline int change_page_attr_set(unsigned long addr, int numpages, +- pgprot_t mask) ++static inline int change_page_attr_set(unsigned long *addr, int numpages, ++ pgprot_t mask, int array) + { +- return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0); ++ return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, ++ array); + } + +-static inline int change_page_attr_clear(unsigned long addr, int numpages, +- pgprot_t mask) ++static inline int change_page_attr_clear(unsigned long *addr, int numpages, ++ pgprot_t mask, int array) + { +- return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); ++ return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, ++ array); + } + + #ifdef CONFIG_XEN +@@ -906,8 +924,8 @@ int _set_memory_uc(unsigned long addr, i + /* + * for now UC MINUS. see comments in ioremap_nocache() + */ +- return change_page_attr_set(addr, numpages, +- __pgprot(_PAGE_CACHE_UC_MINUS)); ++ return change_page_attr_set(&addr, numpages, ++ __pgprot(_PAGE_CACHE_UC_MINUS), 0); + } + + int set_memory_uc(unsigned long addr, int numpages) +@@ -923,10 +941,48 @@ int set_memory_uc(unsigned long addr, in + } + EXPORT_SYMBOL(set_memory_uc); + ++int set_memory_array_uc(unsigned long *addr, int addrinarray) ++{ ++ unsigned long start; ++ unsigned long end; ++ int i; ++ /* ++ * for now UC MINUS. see comments in ioremap_nocache() ++ */ ++ for (i = 0; i < addrinarray; i++) { ++ start = __pa(addr[i]); ++ for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { ++ if (end != __pa(addr[i + 1])) ++ break; ++ i++; ++ } ++ if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) ++ goto out; ++ } ++ ++ return change_page_attr_set(addr, addrinarray, ++ __pgprot(_PAGE_CACHE_UC_MINUS), 1); ++out: ++ for (i = 0; i < addrinarray; i++) { ++ unsigned long tmp = __pa(addr[i]); ++ ++ if (tmp == start) ++ break; ++ for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { ++ if (end != __pa(addr[i + 1])) ++ break; ++ i++; ++ } ++ free_memtype(tmp, end); ++ } ++ return -EINVAL; ++} ++EXPORT_SYMBOL(set_memory_array_uc); ++ + int _set_memory_wc(unsigned long addr, int numpages) + { +- return change_page_attr_set(addr, numpages, +- __pgprot(_PAGE_CACHE_WC)); ++ return change_page_attr_set(&addr, numpages, ++ __pgprot(_PAGE_CACHE_WC), 0); + } + + int set_memory_wc(unsigned long addr, int numpages) +@@ -944,8 +1000,8 @@ EXPORT_SYMBOL(set_memory_wc); + + int _set_memory_wb(unsigned long addr, int numpages) + { +- return change_page_attr_clear(addr, numpages, +- __pgprot(_PAGE_CACHE_MASK)); ++ return change_page_attr_clear(&addr, numpages, ++ __pgprot(_PAGE_CACHE_MASK), 0); + } + + int set_memory_wb(unsigned long addr, int numpages) +@@ -956,37 +1012,59 @@ int set_memory_wb(unsigned long addr, in + } + EXPORT_SYMBOL(set_memory_wb); + ++int set_memory_array_wb(unsigned long *addr, int addrinarray) ++{ ++ int i; ++ ++ for (i = 0; i < addrinarray; i++) { ++ unsigned long start = __pa(addr[i]); ++ unsigned long end; ++ ++ for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { ++ if (end != __pa(addr[i + 1])) ++ break; ++ i++; ++ } ++ free_memtype(start, end); ++ } ++ return change_page_attr_clear(addr, addrinarray, ++ __pgprot(_PAGE_CACHE_MASK), 1); ++} ++EXPORT_SYMBOL(set_memory_array_wb); ++ + int set_memory_x(unsigned long addr, int numpages) + { +- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); ++ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); + } + EXPORT_SYMBOL(set_memory_x); + + int set_memory_nx(unsigned long addr, int numpages) + { +- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); ++ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); + } + EXPORT_SYMBOL(set_memory_nx); + + int set_memory_ro(unsigned long addr, int numpages) + { +- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); ++ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); + } ++EXPORT_SYMBOL_GPL(set_memory_ro); + + int set_memory_rw(unsigned long addr, int numpages) + { +- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); ++ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); + } ++EXPORT_SYMBOL_GPL(set_memory_rw); + + int set_memory_np(unsigned long addr, int numpages) + { +- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); ++ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); + } + + int set_memory_4k(unsigned long addr, int numpages) + { +- return change_page_attr_set_clr(addr, numpages, __pgprot(0), +- __pgprot(0), 1); ++ return change_page_attr_set_clr(&addr, numpages, __pgprot(0), ++ __pgprot(0), 1, 0); + } + + int set_pages_uc(struct page *page, int numpages) +@@ -1039,22 +1117,38 @@ int set_pages_rw(struct page *page, int + + static int __set_pages_p(struct page *page, int numpages) + { +- struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), ++ unsigned long tempaddr = (unsigned long) page_address(page); ++ struct cpa_data cpa = { .vaddr = &tempaddr, + .numpages = numpages, + .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), +- .mask_clr = __pgprot(0)}; ++ .mask_clr = __pgprot(0), ++ .flags = 0}; + +- return __change_page_attr_set_clr(&cpa, 1); ++ /* ++ * No alias checking needed for setting present flag. otherwise, ++ * we may need to break large pages for 64-bit kernel text ++ * mappings (this adds to complexity if we want to do this from ++ * atomic context especially). Let's keep it simple! ++ */ ++ return __change_page_attr_set_clr(&cpa, 0); + } + + static int __set_pages_np(struct page *page, int numpages) + { +- struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), ++ unsigned long tempaddr = (unsigned long) page_address(page); ++ struct cpa_data cpa = { .vaddr = &tempaddr, + .numpages = numpages, + .mask_set = __pgprot(0), +- .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; ++ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), ++ .flags = 0}; + +- return __change_page_attr_set_clr(&cpa, 1); ++ /* ++ * No alias checking needed for setting not present flag. otherwise, ++ * we may need to break large pages for 64-bit kernel text ++ * mappings (this adds to complexity if we want to do this from ++ * atomic context especially). Let's keep it simple! ++ */ ++ return __change_page_attr_set_clr(&cpa, 0); + } + + void kernel_map_pages(struct page *page, int numpages, int enable) +@@ -1074,11 +1168,8 @@ void kernel_map_pages(struct page *page, + + /* + * The return value is ignored as the calls cannot fail. +- * Large pages are kept enabled at boot time, and are +- * split up quickly with DEBUG_PAGEALLOC. If a splitup +- * fails here (due to temporary memory shortage) no damage +- * is done because we just keep the largepage intact up +- * to the next attempt when it will likely be split up: ++ * Large pages for identity mappings are not used at boot time ++ * and hence no memory allocations during large page split. + */ + if (enable) + __set_pages_p(page, numpages); +@@ -1090,53 +1181,8 @@ void kernel_map_pages(struct page *page, + * but that can deadlock->flush only current cpu: + */ + __flush_tlb_all(); +- +- /* +- * Try to refill the page pool here. We can do this only after +- * the tlb flush. +- */ +- cpa_fill_pool(NULL); +-} +- +-#ifdef CONFIG_DEBUG_FS +-static int dpa_show(struct seq_file *m, void *v) +-{ +- seq_puts(m, "DEBUG_PAGEALLOC\n"); +- seq_printf(m, "pool_size : %lu\n", pool_size); +- seq_printf(m, "pool_pages : %lu\n", pool_pages); +- seq_printf(m, "pool_low : %lu\n", pool_low); +- seq_printf(m, "pool_used : %lu\n", pool_used); +- seq_printf(m, "pool_failed : %lu\n", pool_failed); +- +- return 0; + } + +-static int dpa_open(struct inode *inode, struct file *filp) +-{ +- return single_open(filp, dpa_show, NULL); +-} +- +-static const struct file_operations dpa_fops = { +- .open = dpa_open, +- .read = seq_read, +- .llseek = seq_lseek, +- .release = single_release, +-}; +- +-static int __init debug_pagealloc_proc_init(void) +-{ +- struct dentry *de; +- +- de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL, +- &dpa_fops); +- if (!de) +- return -ENOMEM; +- +- return 0; +-} +-__initcall(debug_pagealloc_proc_init); +-#endif +- + #ifdef CONFIG_HIBERNATION + + bool kernel_page_present(struct page *page) +--- head-2010-01-18.orig/arch/x86/mm/pat-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pat-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -7,24 +7,24 @@ + * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. + */ + +-#include ++#include ++#include ++#include + #include + #include ++#include + #include +-#include +-#include +-#include + +-#include +-#include ++#include + #include +-#include ++#include + #include +-#include +-#include +-#include + #include ++#include + #include ++#include ++#include ++#include + #include + + #ifdef CONFIG_X86_PAT +@@ -46,6 +46,7 @@ early_param("nopat", nopat); + + + static int debug_enable; ++ + static int __init pat_debug_setup(char *str) + { + debug_enable = 1; +@@ -157,14 +158,14 @@ static char *cattr_name(unsigned long fl + */ + + struct memtype { +- u64 start; +- u64 end; +- unsigned long type; +- struct list_head nd; ++ u64 start; ++ u64 end; ++ unsigned long type; ++ struct list_head nd; + }; + + static LIST_HEAD(memtype_list); +-static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ ++static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ + + /* + * Does intersection of PAT memory type and MTRR memory type and returns +@@ -192,8 +193,8 @@ static unsigned long pat_x_mtrr_type(u64 + return req_type; + } + +-static int chk_conflict(struct memtype *new, struct memtype *entry, +- unsigned long *type) ++static int ++chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) + { + if (new->type != entry->type) { + if (type) { +@@ -223,6 +224,66 @@ static struct memtype *cached_entry; + static u64 cached_start; + + /* ++ * For RAM pages, mark the pages as non WB memory type using ++ * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or ++ * set_memory_wc() on a RAM page at a time before marking it as WB again. ++ * This is ok, because only one driver will be owning the page and ++ * doing set_memory_*() calls. ++ * ++ * For now, we use PageNonWB to track that the RAM page is being mapped ++ * as non WB. In future, we will have to use one more flag ++ * (or some other mechanism in page_struct) to distinguish between ++ * UC and WC mapping. ++ */ ++static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, ++ unsigned long *new_type) ++{ ++ struct page *page; ++ u64 pfn, end_pfn; ++ ++ for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { ++ page = pfn_to_page(pfn); ++ if (page_mapped(page) || PageNonWB(page)) ++ goto out; ++ ++ SetPageNonWB(page); ++ } ++ return 0; ++ ++out: ++ end_pfn = pfn; ++ for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { ++ page = pfn_to_page(pfn); ++ ClearPageNonWB(page); ++ } ++ ++ return -EINVAL; ++} ++ ++static int free_ram_pages_type(u64 start, u64 end) ++{ ++ struct page *page; ++ u64 pfn, end_pfn; ++ ++ for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { ++ page = pfn_to_page(pfn); ++ if (page_mapped(page) || !PageNonWB(page)) ++ goto out; ++ ++ ClearPageNonWB(page); ++ } ++ return 0; ++ ++out: ++ end_pfn = pfn; ++ for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { ++ page = pfn_to_page(pfn); ++ SetPageNonWB(page); ++ } ++ return -EINVAL; ++} ++ ++/* + * req_type typically has one of the: + * - _PAGE_CACHE_WB + * - _PAGE_CACHE_WC +@@ -238,14 +299,15 @@ static u64 cached_start; + * it will return a negative return value. + */ + int reserve_memtype(u64 start, u64 end, unsigned long req_type, +- unsigned long *new_type) ++ unsigned long *new_type) + { + struct memtype *new, *entry; + unsigned long actual_type; + struct list_head *where; ++ int is_range_ram; + int err = 0; + +- BUG_ON(start >= end); /* end is exclusive */ ++ BUG_ON(start >= end); /* end is exclusive */ + + if (!pat_enabled) { + /* This is identical to page table setting without PAT */ +@@ -278,17 +340,24 @@ int reserve_memtype(u64 start, u64 end, + actual_type = _PAGE_CACHE_WB; + else + actual_type = _PAGE_CACHE_UC_MINUS; +- } else ++ } else { + actual_type = pat_x_mtrr_type(start, end, + req_type & _PAGE_CACHE_MASK); ++ } ++ ++ is_range_ram = pagerange_is_ram(start, end); ++ if (is_range_ram == 1) ++ return reserve_ram_pages_type(start, end, req_type, new_type); ++ else if (is_range_ram < 0) ++ return -EINVAL; + + new = kmalloc(sizeof(struct memtype), GFP_KERNEL); + if (!new) + return -ENOMEM; + +- new->start = start; +- new->end = end; +- new->type = actual_type; ++ new->start = start; ++ new->end = end; ++ new->type = actual_type; + + if (new_type) + *new_type = actual_type; +@@ -347,6 +416,7 @@ int reserve_memtype(u64 start, u64 end, + start, end, cattr_name(new->type), cattr_name(req_type)); + kfree(new); + spin_unlock(&memtype_lock); ++ + return err; + } + +@@ -370,6 +440,7 @@ int free_memtype(u64 start, u64 end) + { + struct memtype *entry; + int err = -EINVAL; ++ int is_range_ram; + + if (!pat_enabled) + return 0; +@@ -378,6 +449,12 @@ int free_memtype(u64 start, u64 end) + if (is_ISA_range(start, end - 1)) + return 0; + ++ is_range_ram = pagerange_is_ram(start, end); ++ if (is_range_ram == 1) ++ return free_ram_pages_type(start, end); ++ else if (is_range_ram < 0) ++ return -EINVAL; ++ + spin_lock(&memtype_lock); + list_for_each_entry(entry, &memtype_list, nd) { + if (entry->start == start && entry->end == end) { +@@ -398,6 +475,7 @@ int free_memtype(u64 start, u64 end) + } + + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); ++ + return err; + } + +@@ -415,12 +493,16 @@ static inline int range_is_allowed(unsig + return 1; + } + #else ++/* This check is needed to avoid cache aliasing when PAT is enabled */ + static inline int range_is_allowed(unsigned long mfn, unsigned long size) + { + u64 from = ((u64)mfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + ++ if (!pat_enabled) ++ return 1; ++ + while (cursor < to) { + if (!devmem_is_allowed(mfn)) { + printk(KERN_INFO +@@ -504,9 +586,9 @@ int phys_mem_access_prot_allowed(struct + + void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot) + { ++ unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); + u64 addr = (u64)mfn << PAGE_SHIFT; + unsigned long flags; +- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); + + reserve_memtype(addr, addr + size, want_flags, &flags); + if (flags != want_flags) { +@@ -526,7 +608,7 @@ void unmap_devmem(unsigned long mfn, uns + free_memtype(addr, addr + size); + } + +-#if defined(CONFIG_DEBUG_FS) ++#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) + + /* get Nth element of the linked list */ + static struct memtype *memtype_get_idx(loff_t pos) +@@ -549,6 +631,7 @@ static struct memtype *memtype_get_idx(l + } + spin_unlock(&memtype_lock); + kfree(print_entry); ++ + return NULL; + } + +@@ -579,6 +662,7 @@ static int memtype_seq_show(struct seq_f + seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), + print_entry->start, print_entry->end); + kfree(print_entry); ++ + return 0; + } + +@@ -610,4 +694,4 @@ static int __init pat_memtype_list_init( + + late_initcall(pat_memtype_list_init); + +-#endif /* CONFIG_DEBUG_FS */ ++#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ +--- head-2010-01-18.orig/arch/x86/mm/pgtable-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pgtable-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -129,7 +129,7 @@ void __pud_free_tlb(struct mmu_gather *t + static void _pin_lock(struct mm_struct *mm, int lock) { + if (lock) + spin_lock(&mm->page_table_lock); +-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS ++#if USE_SPLIT_PTLOCKS + /* While mm->page_table_lock protects us against insertions and + * removals of higher level page table pages, it doesn't protect + * against updates of pte-s. Such updates, however, require the +@@ -408,10 +408,8 @@ static inline void pgd_list_del(pgd_t *p + #define UNSHARED_PTRS_PER_PGD \ + (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) + +-static void pgd_ctor(void *p) ++static void pgd_ctor(pgd_t *pgd) + { +- pgd_t *pgd = p; +- + pgd_test_and_unpin(pgd); + + /* If the pgd points to a shared pagetable level (either the +@@ -440,7 +438,7 @@ static void pgd_ctor(void *p) + pgd_list_add(pgd); + } + +-static void pgd_dtor(void *pgd) ++static void pgd_dtor(pgd_t *pgd) + { + unsigned long flags; /* can be called from interrupt context */ + +--- head-2010-01-18.orig/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/mm/pgtable_32-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -122,7 +122,6 @@ void __init reserve_top_address(unsigned + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", + (int)-reserve); + __FIXADDR_TOP = -reserve - PAGE_SIZE; +- __VMALLOC_RESERVE += reserve; + } + + /* +@@ -135,7 +134,8 @@ static int __init parse_vmalloc(char *ar + if (!arg) + return -EINVAL; + +- __VMALLOC_RESERVE = memparse(arg, &arg); ++ /* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/ ++ __VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET; + return 0; + } + early_param("vmalloc", parse_vmalloc); +--- head-2010-01-18.orig/arch/x86/pci/irq-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/pci/irq-xen.c 2009-11-06 10:51:47.000000000 +0100 +@@ -499,7 +499,7 @@ static int pirq_amd756_get(struct pci_de + if (pirq <= 4) + irq = read_config_nybble(router, 0x56, pirq - 1); + dev_info(&dev->dev, +- "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", ++ "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n", + dev->vendor, dev->device, pirq, irq); + return irq; + } +@@ -507,7 +507,7 @@ static int pirq_amd756_get(struct pci_de + static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) + { + dev_info(&dev->dev, +- "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", ++ "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n", + dev->vendor, dev->device, pirq, irq); + if (pirq <= 4) + write_config_nybble(router, 0x56, pirq - 1, irq); +@@ -596,13 +596,20 @@ static __init int intel_router_probe(str + case PCI_DEVICE_ID_INTEL_ICH10_1: + case PCI_DEVICE_ID_INTEL_ICH10_2: + case PCI_DEVICE_ID_INTEL_ICH10_3: +- case PCI_DEVICE_ID_INTEL_PCH_0: +- case PCI_DEVICE_ID_INTEL_PCH_1: + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } ++ ++ if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && ++ (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) { ++ r->name = "PIIX/ICH"; ++ r->get = pirq_piix_get; ++ r->set = pirq_piix_set; ++ return 1; ++ } ++ + return 0; + } + +@@ -829,7 +836,7 @@ static void __init pirq_find_router(stru + r->get = NULL; + r->set = NULL; + +- DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", ++ DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n", + rt->rtr_vendor, rt->rtr_device); + + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); +@@ -849,7 +856,7 @@ static void __init pirq_find_router(stru + h->probe(r, pirq_router_dev, pirq_router_dev->device)) + break; + } +- dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", ++ dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n", + pirq_router.name, + pirq_router_dev->vendor, pirq_router_dev->device); + +@@ -1049,35 +1056,44 @@ static void __init pcibios_fixup_irqs(vo + if (io_apic_assign_pci_irqs) { + int irq; + +- if (pin) { +- /* +- * interrupt pins are numbered starting +- * from 1 +- */ +- pin--; +- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, +- PCI_SLOT(dev->devfn), pin); +- /* +- * Busses behind bridges are typically not listed in the MP-table. +- * In this case we have to look up the IRQ based on the parent bus, +- * parent slot, and pin number. The SMP code detects such bridged +- * busses itself so we should get into this branch reliably. +- */ +- if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ +- struct pci_dev *bridge = dev->bus->self; ++ if (!pin) ++ continue; ++ ++ /* ++ * interrupt pins are numbered starting from 1 ++ */ ++ pin--; ++ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, ++ PCI_SLOT(dev->devfn), pin); ++ /* ++ * Busses behind bridges are typically not listed in the ++ * MP-table. In this case we have to look up the IRQ ++ * based on the parent bus, parent slot, and pin number. ++ * The SMP code detects such bridged busses itself so we ++ * should get into this branch reliably. ++ */ ++ if (irq < 0 && dev->bus->parent) { ++ /* go back to the bridge */ ++ struct pci_dev *bridge = dev->bus->self; ++ int bus; + +- pin = (pin + PCI_SLOT(dev->devfn)) % 4; +- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, +- PCI_SLOT(bridge->devfn), pin); +- if (irq >= 0) +- dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", +- pci_name(bridge), +- 'A' + pin, irq); +- } +- if (irq >= 0) { +- dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); +- dev->irq = irq; +- } ++ pin = (pin + PCI_SLOT(dev->devfn)) % 4; ++ bus = bridge->bus->number; ++ irq = IO_APIC_get_PCI_irq_vector(bus, ++ PCI_SLOT(bridge->devfn), pin); ++ if (irq >= 0) ++ dev_warn(&dev->dev, ++ "using bridge %s INT %c to " ++ "get IRQ %d\n", ++ pci_name(bridge), ++ 'A' + pin, irq); ++ } ++ if (irq >= 0) { ++ dev_info(&dev->dev, ++ "PCI->APIC IRQ transform: INT %c " ++ "-> IRQ %d\n", ++ 'A' + pin, irq); ++ dev->irq = irq; + } + } + #endif +--- head-2010-01-18.orig/arch/x86/xen/Kconfig 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/arch/x86/xen/Kconfig 2009-11-06 10:51:47.000000000 +0100 +@@ -31,7 +31,7 @@ config XEN_SAVE_RESTORE + + config XEN_DEBUG_FS + bool "Enable Xen debug and tuning parameters in debugfs" +- depends on XEN && DEBUG_FS ++ depends on PARAVIRT_XEN && DEBUG_FS + default n + help + Enable statistics output and various tuning options in debugfs. +--- head-2010-01-18.orig/drivers/acpi/acpica/hwsleep.c 2009-11-06 10:45:37.000000000 +0100 ++++ head-2010-01-18/drivers/acpi/acpica/hwsleep.c 2009-11-06 10:51:47.000000000 +0100 +@@ -396,8 +396,7 @@ acpi_status asmlinkage acpi_enter_sleep_ + err = acpi_notify_hypervisor_state(sleep_state, + PM1Acontrol, PM1Bcontrol); + if (err) { +- ACPI_DEBUG_PRINT((ACPI_DB_ERROR, +- "Hypervisor failure [%d]\n", err)); ++ printk(KERN_ERR "ACPI: Hypervisor failure [%d]\n", err); + return_ACPI_STATUS(AE_ERROR); + } + #endif +--- head-2010-01-18.orig/drivers/acpi/processor_extcntl.c 2009-11-06 10:46:41.000000000 +0100 ++++ head-2010-01-18/drivers/acpi/processor_extcntl.c 2009-11-06 10:51:47.000000000 +0100 +@@ -30,7 +30,6 @@ + + #include + +-#define ACPI_PROCESSOR_COMPONENT 0x01000000 + #define ACPI_PROCESSOR_CLASS "processor" + #define _COMPONENT ACPI_PROCESSOR_COMPONENT + ACPI_MODULE_NAME("processor_extcntl") +--- head-2010-01-18.orig/drivers/firmware/dmi_scan.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/drivers/firmware/dmi_scan.c 2009-11-06 10:51:47.000000000 +0100 +@@ -424,6 +424,11 @@ static bool dmi_matches(const struct dmi + { + int i; + ++#ifdef CONFIG_XEN ++ if (!is_initial_xendomain()) ++ return false; ++#endif ++ + WARN(!dmi_initialized, KERN_ERR "dmi check: not initialized yet.\n"); + + for (i = 0; i < ARRAY_SIZE(dmi->matches); i++) { +--- head-2010-01-18.orig/drivers/pci/msi-xen.c 2009-12-04 11:23:40.000000000 +0100 ++++ head-2010-01-18/drivers/pci/msi-xen.c 2009-12-04 11:28:36.000000000 +0100 +@@ -305,8 +305,16 @@ static int msi_map_vector(struct pci_dev + * dev->irq in dom0 will be 'Xen pirq' if this device belongs to + * to another domain, and will be 'Linux irq' if it belongs to dom0. + */ +- return ((domid != DOMID_SELF) ? +- map_irq.pirq : evtchn_map_pirq(-1, map_irq.pirq)); ++ if (domid == DOMID_SELF) { ++ rc = evtchn_map_pirq(-1, map_irq.pirq); ++ dev_printk(KERN_DEBUG, &dev->dev, ++ "irq %d (%d) for MSI/MSI-X\n", ++ rc, map_irq.pirq); ++ return rc; ++ } ++ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for dom%d MSI/MSI-X\n", ++ map_irq.pirq, domid); ++ return map_irq.pirq; + } + + static void pci_intx_for_msi(struct pci_dev *dev, int enable) +@@ -761,3 +769,24 @@ void pci_msi_init_pci_dev(struct pci_dev + INIT_LIST_HEAD(&dev->msi_list); + #endif + } ++ ++#ifdef CONFIG_ACPI ++#include ++#include ++static void __devinit msi_acpi_init(void) ++{ ++ if (acpi_pci_disabled) ++ return; ++ pci_osc_support_set(OSC_MSI_SUPPORT); ++ pcie_osc_support_set(OSC_MSI_SUPPORT); ++} ++#else ++static inline void msi_acpi_init(void) { } ++#endif /* CONFIG_ACPI */ ++ ++void __devinit msi_init(void) ++{ ++ if (!pci_msi_enable) ++ return; ++ msi_acpi_init(); ++} +--- head-2010-01-18.orig/drivers/xen/Makefile 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/Makefile 2009-11-06 10:51:47.000000000 +0100 +@@ -1,4 +1,5 @@ + obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o ++xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o + xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o + xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o + +@@ -10,6 +11,7 @@ obj-y += xenbus/ + obj-$(CONFIG_XEN) += char/ + + obj-$(CONFIG_XEN) += features.o util.o ++obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y) + obj-$(CONFIG_XEN_XENCOMM) += $(xen-xencomm-y) + obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y) + obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ +--- head-2010-01-18.orig/drivers/xen/blkback/vbd.c 2009-05-19 09:16:41.000000000 +0200 ++++ head-2010-01-18/drivers/xen/blkback/vbd.c 2009-11-06 10:51:47.000000000 +0100 +@@ -94,7 +94,8 @@ int vbd_create(blkif_t *blkif, blkif_vde + void vbd_free(struct vbd *vbd) + { + if (vbd->bdev) +- blkdev_put(vbd->bdev); ++ blkdev_put(vbd->bdev, ++ vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; + } + +--- head-2010-01-18.orig/drivers/xen/blkfront/blkfront.c 2009-11-06 10:51:32.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blkfront/blkfront.c 2010-01-18 16:50:18.000000000 +0100 +@@ -484,9 +484,15 @@ static void blkif_restart_queue_callback + schedule_work(&info->work); + } + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + int blkif_open(struct inode *inode, struct file *filep) + { +- struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; ++ struct block_device *bd = inode->i_bdev; ++#else ++int blkif_open(struct block_device *bd, fmode_t mode) ++{ ++#endif ++ struct blkfront_info *info = bd->bd_disk->private_data; + + if(info->is_ready < 0) + return -ENODEV; +@@ -495,9 +501,16 @@ int blkif_open(struct inode *inode, stru + } + + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + int blkif_release(struct inode *inode, struct file *filep) + { +- struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; ++ struct gendisk *disk = inode->i_bdev->bd_disk; ++#else ++int blkif_release(struct gendisk *disk, fmode_t mode) ++{ ++#endif ++ struct blkfront_info *info = disk->private_data; ++ + info->users--; + if (info->users == 0) { + /* Check whether we have been instructed to close. We will +@@ -516,9 +529,16 @@ int blkif_release(struct inode *inode, s + } + + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + int blkif_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) + { ++ struct block_device *bd = inode->i_bdev; ++#else ++int blkif_ioctl(struct block_device *bd, fmode_t mode, ++ unsigned command, unsigned long argument) ++{ ++#endif + int i; + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", +@@ -527,7 +547,6 @@ int blkif_ioctl(struct inode *inode, str + switch (command) { + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + case HDIO_GETGEO: { +- struct block_device *bd = inode->i_bdev; + struct hd_geometry geo; + int ret; + +@@ -554,8 +573,7 @@ int blkif_ioctl(struct inode *inode, str + return 0; + + case CDROM_GET_CAPABILITY: { +- struct blkfront_info *info = +- inode->i_bdev->bd_disk->private_data; ++ struct blkfront_info *info = bd->bd_disk->private_data; + struct gendisk *gd = info->gd; + if (gd->flags & GENHD_FL_CD) + return 0; +--- head-2010-01-18.orig/drivers/xen/blkfront/block.h 2010-01-18 16:30:27.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blkfront/block.h 2010-01-18 16:49:13.000000000 +0100 +@@ -123,10 +123,17 @@ struct blkfront_info + + extern spinlock_t blkif_io_lock; + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + extern int blkif_open(struct inode *inode, struct file *filep); + extern int blkif_release(struct inode *inode, struct file *filep); + extern int blkif_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument); ++#else ++extern int blkif_open(struct block_device *bdev, fmode_t mode); ++extern int blkif_release(struct gendisk *disk, fmode_t mode); ++extern int blkif_ioctl(struct block_device *bdev, fmode_t mode, ++ unsigned command, unsigned long argument); ++#endif + extern int blkif_getgeo(struct block_device *, struct hd_geometry *); + extern int blkif_check(dev_t dev); + extern int blkif_revalidate(dev_t dev); +--- head-2010-01-18.orig/drivers/xen/blkfront/vbd.c 2010-01-18 16:30:28.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blkfront/vbd.c 2010-01-18 16:49:11.000000000 +0100 +@@ -110,7 +110,11 @@ static struct block_device_operations xl + .owner = THIS_MODULE, + .open = blkif_open, + .release = blkif_release, ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + .ioctl = blkif_ioctl, ++#else ++ .locked_ioctl = blkif_ioctl, ++#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + .getgeo = blkif_getgeo + #endif +--- head-2010-01-18.orig/drivers/xen/blktap2/device.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/blktap2/device.c 2009-11-06 10:51:47.000000000 +0100 +@@ -36,10 +36,10 @@ dev_to_blktap(struct blktap_device *dev) + } + + static int +-blktap_device_open(struct inode *inode, struct file *filep) ++blktap_device_open(struct block_device *bd, fmode_t mode) + { + struct blktap *tap; +- struct blktap_device *dev = inode->i_bdev->bd_disk->private_data; ++ struct blktap_device *dev = bd->bd_disk->private_data; + + if (!dev) + return -ENOENT; +@@ -55,9 +55,9 @@ blktap_device_open(struct inode *inode, + } + + static int +-blktap_device_release(struct inode *inode, struct file *filep) ++blktap_device_release(struct gendisk *disk, fmode_t mode) + { +- struct blktap_device *dev = inode->i_bdev->bd_disk->private_data; ++ struct blktap_device *dev = disk->private_data; + struct blktap *tap = dev_to_blktap(dev); + + dev->users--; +@@ -85,18 +85,17 @@ blktap_device_getgeo(struct block_device + } + + static int +-blktap_device_ioctl(struct inode *inode, struct file *filep, ++blktap_device_ioctl(struct block_device *bd, fmode_t mode, + unsigned command, unsigned long argument) + { + int i; + +- DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", +- command, (long)argument, inode->i_rdev); ++ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx\n", ++ command, (long)argument); + + switch (command) { + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + case HDIO_GETGEO: { +- struct block_device *bd = inode->i_bdev; + struct hd_geometry geo; + int ret; + +--- head-2010-01-18.orig/drivers/xen/core/evtchn.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/evtchn.c 2009-11-06 10:51:47.000000000 +0100 +@@ -145,7 +145,7 @@ static void bind_evtchn_to_cpu(unsigned + BUG_ON(!test_bit(chn, s->evtchn_mask)); + + if (irq != -1) +- irq_desc[irq].affinity = cpumask_of_cpu(cpu); ++ irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu); + + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); +@@ -158,7 +158,7 @@ static void init_evtchn_cpu_bindings(voi + + /* By default all event channels notify CPU#0. */ + for (i = 0; i < NR_IRQS; i++) +- irq_desc[i].affinity = cpumask_of_cpu(0); ++ irq_to_desc(i)->affinity = cpumask_of_cpu(0); + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +@@ -725,7 +725,7 @@ static void end_dynirq(unsigned int irq) + { + int evtchn = evtchn_from_irq(irq); + +- if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED)) ++ if (VALID_EVTCHN(evtchn) && !(irq_to_desc(irq)->status & IRQ_DISABLED)) + unmask_evtchn(evtchn); + } + +@@ -816,7 +816,7 @@ static unsigned int startup_pirq(unsigne + bind_pirq.pirq = evtchn_get_xen_pirq(irq); + /* NB. We are happy to share unless we are probing. */ + bind_pirq.flags = test_and_clear_bit(irq - PIRQ_BASE, probing_pirq) +- || (irq_desc[irq].status & IRQ_AUTODETECT) ++ || (irq_to_desc(irq)->status & IRQ_AUTODETECT) + ? 0 : BIND_PIRQ__WILL_SHARE; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { + if (bind_pirq.flags) +@@ -882,7 +882,7 @@ static void end_pirq(unsigned int irq) + { + int evtchn = evtchn_from_irq(irq); + +- if ((irq_desc[irq].status & (IRQ_DISABLED|IRQ_PENDING)) == ++ if ((irq_to_desc(irq)->status & (IRQ_DISABLED|IRQ_PENDING)) == + (IRQ_DISABLED|IRQ_PENDING)) { + shutdown_pirq(irq); + } else if (VALID_EVTCHN(evtchn)) +@@ -1065,7 +1065,7 @@ static void restore_cpu_ipis(unsigned in + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ +- if (!(irq_desc[irq].status & IRQ_DISABLED)) ++ if (!(irq_to_desc(irq)->status & IRQ_DISABLED)) + unmask_evtchn(evtchn); + } + } +@@ -1201,7 +1201,7 @@ void __init xen_init_IRQ(void) + for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) { + irq_bindcount[i] = 0; + +- irq_desc[i].status |= IRQ_NOPROBE; ++ irq_to_desc(i)->status |= IRQ_NOPROBE; + set_irq_chip_and_handler_name(i, &dynirq_chip, + handle_level_irq, "level"); + } +--- head-2010-01-18.orig/drivers/xen/core/smpboot.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/smpboot.c 2009-11-06 10:51:47.000000000 +0100 +@@ -25,10 +25,6 @@ + #include + #include + +-extern irqreturn_t smp_reschedule_interrupt(int, void *); +-extern irqreturn_t smp_call_function_interrupt(int, void *); +-extern irqreturn_t smp_call_function_single_interrupt(int, void *); +- + extern int local_setup_timer(unsigned int cpu); + extern void local_teardown_timer(unsigned int cpu); + +@@ -179,7 +175,7 @@ static void __cpuexit xen_smp_intr_exit( + } + #endif + +-void __cpuinit cpu_bringup(void) ++static void __cpuinit cpu_bringup(void) + { + cpu_init(); + identify_secondary_cpu(¤t_cpu_data); +@@ -432,6 +428,20 @@ int __cpuinit __cpu_up(unsigned int cpu) + return 0; + } + ++void __ref play_dead(void) ++{ ++ idle_task_exit(); ++ local_irq_disable(); ++ cpu_clear(smp_processor_id(), cpu_initialized); ++ preempt_enable_no_resched(); ++ VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); ++#ifdef CONFIG_HOTPLUG_CPU ++ cpu_bringup(); ++#else ++ BUG(); ++#endif ++} ++ + void __init smp_cpus_done(unsigned int max_cpus) + { + } +--- head-2010-01-18.orig/drivers/xen/core/spinlock.c 2010-01-18 16:45:21.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/spinlock.c 2009-11-17 15:19:07.000000000 +0100 +@@ -14,8 +14,6 @@ + + #ifdef TICKET_SHIFT + +-extern irqreturn_t smp_reschedule_interrupt(int, void *); +- + static DEFINE_PER_CPU(int, spinlock_irq) = -1; + static char spinlock_name[NR_CPUS][15]; + +--- head-2010-01-18.orig/drivers/xen/netfront/netfront.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/drivers/xen/netfront/netfront.c 2009-11-06 10:51:47.000000000 +0100 +@@ -956,7 +956,7 @@ static int network_start_xmit(struct sk_ + return 0; + } + +- frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; ++ frags += DIV_ROUND_UP(offset + len, PAGE_SIZE); + if (unlikely(frags > MAX_SKB_FRAGS + 1)) { + printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", + frags); +--- head-2010-01-18.orig/drivers/xen/scsifront/scsifront.c 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/drivers/xen/scsifront/scsifront.c 2008-09-15 15:22:12.000000000 +0200 +@@ -348,7 +348,7 @@ static int scsifront_queuecommand(struct + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); + + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction; +- ring_req->timeout_per_command = (sc->timeout_per_command / HZ); ++ ring_req->timeout_per_command = (sc->request->timeout / HZ); + + info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc; + info->shadow[rqid].sc_data_direction = sc->sc_data_direction; +@@ -418,7 +418,7 @@ static int scsifront_dev_reset_handler(s + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); + + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction; +- ring_req->timeout_per_command = (sc->timeout_per_command / HZ); ++ ring_req->timeout_per_command = (sc->request->timeout / HZ); + ring_req->nr_segments = 0; + + scsifront_do_request(info); +--- head-2010-01-18.orig/drivers/xen/xenbus/xenbus_probe.h 2009-11-06 10:51:07.000000000 +0100 ++++ head-2010-01-18/drivers/xen/xenbus/xenbus_probe.h 2009-11-06 10:51:47.000000000 +0100 +@@ -40,6 +40,11 @@ + #define XEN_BUS_ID_SIZE BUS_ID_SIZE + #endif + ++#ifdef CONFIG_PARAVIRT_XEN ++#define is_running_on_xen() xen_domain() ++#define is_initial_xendomain() xen_initial_domain() ++#endif ++ + #if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE) + extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); + extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); +--- head-2010-01-18.orig/include/xen/cpu_hotplug.h 2007-08-16 18:07:01.000000000 +0200 ++++ head-2010-01-18/include/xen/cpu_hotplug.h 2009-11-06 10:51:47.000000000 +0100 +@@ -15,8 +15,6 @@ void init_xenbus_allowed_cpumask(void); + int smp_suspend(void); + void smp_resume(void); + +-void cpu_bringup(void); +- + #else /* !defined(CONFIG_HOTPLUG_CPU) */ + + #define cpu_up_check(cpu) (0) +--- head-2010-01-18.orig/lib/swiotlb-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ head-2010-01-18/lib/swiotlb-xen.c 2009-12-14 17:24:14.000000000 +0100 +@@ -49,7 +49,6 @@ int swiotlb; + + int swiotlb_force; + +-static char *iotlb_virt_start; + static unsigned long iotlb_nslabs; + + /* +@@ -57,16 +56,7 @@ static unsigned long iotlb_nslabs; + * swiotlb_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ +-static unsigned long iotlb_pfn_start, iotlb_pfn_end; +- +-/* Does the given dma address reside within the swiotlb aperture? */ +-static inline int in_swiotlb_aperture(dma_addr_t dev_addr) +-{ +- unsigned long pfn = mfn_to_local_pfn(dev_addr >> PAGE_SHIFT); +- return (pfn_valid(pfn) +- && (pfn >= iotlb_pfn_start) +- && (pfn < iotlb_pfn_end)); +-} ++static char *io_tlb_start, *io_tlb_end; + + /* + * When the IOMMU overflows we return a fallback buffer. This sets the size. +@@ -151,15 +141,15 @@ swiotlb_init_with_default_size(size_t de + /* + * Get IO TLB memory from the low pages + */ +- iotlb_virt_start = alloc_bootmem_pages(bytes); +- if (!iotlb_virt_start) ++ io_tlb_start = alloc_bootmem_pages(bytes); ++ if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer!\n"); + + dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; + for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) { + do { + rc = xen_create_contiguous_region( +- (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), ++ (unsigned long)io_tlb_start + (i << IO_TLB_SHIFT), + get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); +@@ -170,10 +160,10 @@ swiotlb_init_with_default_size(size_t de + "some DMA memory (e.g., dom0_mem=-128M).\n"); + iotlb_nslabs = i; + i <<= IO_TLB_SHIFT; +- free_bootmem(__pa(iotlb_virt_start + i), bytes - i); ++ free_bootmem(__pa(io_tlb_start + i), bytes - i); + bytes = i; + for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) { +- unsigned int bits = fls64(virt_to_bus(iotlb_virt_start + i - 1)); ++ unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1)); + + if (bits > dma_bits) + dma_bits = bits; +@@ -181,6 +171,7 @@ swiotlb_init_with_default_size(size_t de + break; + } + } ++ io_tlb_end = io_tlb_start + bytes; + + /* + * Allocate and initialize the free list array. This array is used +@@ -209,15 +200,12 @@ swiotlb_init_with_default_size(size_t de + if (rc) + panic("No suitable physical memory available for SWIOTLB overflow buffer!\n"); + +- iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT; +- iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT); +- + printk(KERN_INFO "Software IO TLB enabled: \n" + " Aperture: %lu megabytes\n" + " Kernel range: %p - %p\n" + " Address size: %u bits\n", + bytes >> 20, +- iotlb_virt_start, iotlb_virt_start + bytes, ++ io_tlb_start, io_tlb_end, + dma_bits); + } + +@@ -245,6 +233,18 @@ swiotlb_init(void) + printk(KERN_INFO "Software IO TLB disabled\n"); + } + ++static int is_swiotlb_buffer(dma_addr_t addr) ++{ ++ unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr)); ++ char *va = pfn_valid(pfn) ? __va(pfn << PAGE_SHIFT) : NULL; ++ ++#ifdef CONFIG_HIGHMEM ++ if (pfn >= highstart_pfn) ++ return 0; ++#endif ++ return va >= io_tlb_start && va < io_tlb_end; ++} ++ + /* + * We use __copy_to_user_inatomic to transfer to the host buffer because the + * buffer may be mapped read-only (e.g, in blkback driver) but lower-level +@@ -354,7 +354,7 @@ map_single(struct device *hwdev, struct + io_tlb_list[i] = 0; + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; +- dma_addr = iotlb_virt_start + (index << IO_TLB_SHIFT); ++ dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in the next +@@ -396,7 +396,7 @@ found: + + static struct phys_addr dma_addr_to_phys_addr(char *dma_addr) + { +- int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT; ++ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + struct phys_addr buffer = io_tlb_orig_addr[index]; + buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1); + buffer.page += buffer.offset >> PAGE_SHIFT; +@@ -412,7 +412,7 @@ unmap_single(struct device *hwdev, char + { + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; +- int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT; ++ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr); + + /* +@@ -504,7 +504,7 @@ _swiotlb_map_single(struct device *hwdev + * buffering it. + */ + if (!range_straddles_page_boundary(paddr, size) && +- !address_needs_mapping(hwdev, dev_addr)) ++ !address_needs_mapping(hwdev, dev_addr, size)) + return dev_addr; + + /* +@@ -555,9 +555,11 @@ void + swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir, struct dma_attrs *attrs) + { ++ char *dma_addr = bus_to_virt(dev_addr); ++ + BUG_ON(dir == DMA_NONE); +- if (in_swiotlb_aperture(dev_addr)) +- unmap_single(hwdev, bus_to_virt(dev_addr), size, dir); ++ if (is_swiotlb_buffer(dev_addr)) ++ unmap_single(hwdev, dma_addr, size, dir); + else + gnttab_dma_unmap_page(dev_addr); + } +@@ -583,36 +585,44 @@ void + swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) + { ++ char *dma_addr = bus_to_virt(dev_addr); ++ + BUG_ON(dir == DMA_NONE); +- if (in_swiotlb_aperture(dev_addr)) +- sync_single(hwdev, bus_to_virt(dev_addr), size, dir); ++ if (is_swiotlb_buffer(dev_addr)) ++ sync_single(hwdev, dma_addr, size, dir); + } + + void + swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) + { ++ char *dma_addr = bus_to_virt(dev_addr); ++ + BUG_ON(dir == DMA_NONE); +- if (in_swiotlb_aperture(dev_addr)) +- sync_single(hwdev, bus_to_virt(dev_addr), size, dir); ++ if (is_swiotlb_buffer(dev_addr)) ++ sync_single(hwdev, dma_addr, size, dir); + } + + void + swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, int dir) + { ++ char *dma_addr = bus_to_virt(dev_addr); ++ + BUG_ON(dir == DMA_NONE); +- if (in_swiotlb_aperture(dev_addr)) +- sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir); ++ if (is_swiotlb_buffer(dev_addr)) ++ sync_single(hwdev, dma_addr + offset, size, dir); + } + + void + swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, int dir) + { ++ char *dma_addr = bus_to_virt(dev_addr); ++ + BUG_ON(dir == DMA_NONE); +- if (in_swiotlb_aperture(dev_addr)) +- sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir); ++ if (is_swiotlb_buffer(dev_addr)) ++ sync_single(hwdev, dma_addr + offset, size, dir); + } + + void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int, +@@ -650,7 +660,7 @@ swiotlb_map_sg_attrs(struct device *hwde + + if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg)) + + sg->offset, sg->length) +- || address_needs_mapping(hwdev, dev_addr)) { ++ || address_needs_mapping(hwdev, dev_addr, sg->length)) { + gnttab_dma_unmap_page(dev_addr); + buffer.page = sg_page(sg); + buffer.offset = sg->offset; +@@ -694,7 +704,7 @@ swiotlb_unmap_sg_attrs(struct device *hw + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) { +- if (in_swiotlb_aperture(sg->dma_address)) ++ if (sg->dma_address != sg_phys(sg)) + unmap_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + else +@@ -727,7 +737,7 @@ swiotlb_sync_sg_for_cpu(struct device *h + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) { +- if (in_swiotlb_aperture(sg->dma_address)) ++ if (sg->dma_address != sg_phys(sg)) + sync_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } +@@ -743,7 +753,7 @@ swiotlb_sync_sg_for_device(struct device + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) { +- if (in_swiotlb_aperture(sg->dma_address)) ++ if (sg->dma_address != sg_phys(sg)) + sync_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } +--- head-2010-01-18.orig/mm/vmalloc.c 2010-01-18 15:20:21.000000000 +0100 ++++ head-2010-01-18/mm/vmalloc.c 2009-12-16 11:52:01.000000000 +0100 +@@ -482,6 +482,8 @@ static void vmap_debug_free_range(unsign + #ifdef CONFIG_DEBUG_PAGEALLOC + vunmap_page_range(start, end); + flush_tlb_kernel_range(start, end); ++#elif defined(CONFIG_XEN) && defined(CONFIG_X86) ++ vunmap_page_range(start, end); + #endif + } + --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-auto-include-xen-interface.diff +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-auto-include-xen-interface.diff @@ -0,0 +1,6149 @@ +Subject: xen3 include-xen-interface +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 983:3358caa6b3a3) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/COPYING 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,38 @@ ++XEN NOTICE ++========== ++ ++This copyright applies to all files within this subdirectory and its ++subdirectories: ++ include/public/*.h ++ include/public/hvm/*.h ++ include/public/io/*.h ++ ++The intention is that these files can be freely copied into the source ++tree of an operating system when porting that OS to run on Xen. Doing ++so does *not* cause the OS to become subject to the terms of the GPL. ++ ++All other files in the Xen source distribution are covered by version ++2 of the GNU General Public License except where explicitly stated ++otherwise within individual source files. ++ ++ -- Keir Fraser (on behalf of the Xen team) ++ ++===================================================================== ++ ++Permission is hereby granted, free of charge, to any person obtaining a copy ++of this software and associated documentation files (the "Software"), to ++deal in the Software without restriction, including without limitation the ++rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++sell copies of the Software, and to permit persons to whom the Software is ++furnished to do so, subject to the following conditions: ++ ++The above copyright notice and this permission notice shall be included in ++all copies or substantial portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++DEALINGS IN THE SOFTWARE. +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86/cpuid.h 2008-01-21 11:15:27.000000000 +0100 +@@ -0,0 +1,68 @@ ++/****************************************************************************** ++ * arch-x86/cpuid.h ++ * ++ * CPUID interface to Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2007 Citrix Systems, Inc. ++ * ++ * Authors: ++ * Keir Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ ++#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ ++ ++/* Xen identification leaves start at 0x40000000. */ ++#define XEN_CPUID_FIRST_LEAF 0x40000000 ++#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) ++ ++/* ++ * Leaf 1 (0x40000000) ++ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX ++ * are supported by the Xen host. ++ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification ++ * of a Xen host. ++ */ ++#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ ++#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ ++#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ ++ ++/* ++ * Leaf 2 (0x40000001) ++ * EAX[31:16]: Xen major version. ++ * EAX[15: 0]: Xen minor version. ++ * EBX-EDX: Reserved (currently all zeroes). ++ */ ++ ++/* ++ * Leaf 3 (0x40000002) ++ * EAX: Number of hypercall transfer pages. This register is always guaranteed ++ * to specify one hypercall page. ++ * EBX: Base address of Xen-specific MSRs. ++ * ECX: Features 1. Unused bits are set to zero. ++ * EDX: Features 2. Unused bits are set to zero. ++ */ ++ ++/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ ++#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 ++#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86/hvm/save.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,439 @@ ++/* ++ * Structure definitions for HVM state that is held by Xen and must ++ * be saved along with the domain's memory and device-model state. ++ * ++ * Copyright (c) 2007 XenSource Ltd. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__ ++#define __XEN_PUBLIC_HVM_SAVE_X86_H__ ++ ++/* ++ * Save/restore header: general info about the save file. ++ */ ++ ++#define HVM_FILE_MAGIC 0x54381286 ++#define HVM_FILE_VERSION 0x00000001 ++ ++struct hvm_save_header { ++ uint32_t magic; /* Must be HVM_FILE_MAGIC */ ++ uint32_t version; /* File format version */ ++ uint64_t changeset; /* Version of Xen that saved this file */ ++ uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ ++ uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */ ++}; ++ ++DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); ++ ++ ++/* ++ * Processor ++ */ ++ ++struct hvm_hw_cpu { ++ uint8_t fpu_regs[512]; ++ ++ uint64_t rax; ++ uint64_t rbx; ++ uint64_t rcx; ++ uint64_t rdx; ++ uint64_t rbp; ++ uint64_t rsi; ++ uint64_t rdi; ++ uint64_t rsp; ++ uint64_t r8; ++ uint64_t r9; ++ uint64_t r10; ++ uint64_t r11; ++ uint64_t r12; ++ uint64_t r13; ++ uint64_t r14; ++ uint64_t r15; ++ ++ uint64_t rip; ++ uint64_t rflags; ++ ++ uint64_t cr0; ++ uint64_t cr2; ++ uint64_t cr3; ++ uint64_t cr4; ++ ++ uint64_t dr0; ++ uint64_t dr1; ++ uint64_t dr2; ++ uint64_t dr3; ++ uint64_t dr6; ++ uint64_t dr7; ++ ++ uint32_t cs_sel; ++ uint32_t ds_sel; ++ uint32_t es_sel; ++ uint32_t fs_sel; ++ uint32_t gs_sel; ++ uint32_t ss_sel; ++ uint32_t tr_sel; ++ uint32_t ldtr_sel; ++ ++ uint32_t cs_limit; ++ uint32_t ds_limit; ++ uint32_t es_limit; ++ uint32_t fs_limit; ++ uint32_t gs_limit; ++ uint32_t ss_limit; ++ uint32_t tr_limit; ++ uint32_t ldtr_limit; ++ uint32_t idtr_limit; ++ uint32_t gdtr_limit; ++ ++ uint64_t cs_base; ++ uint64_t ds_base; ++ uint64_t es_base; ++ uint64_t fs_base; ++ uint64_t gs_base; ++ uint64_t ss_base; ++ uint64_t tr_base; ++ uint64_t ldtr_base; ++ uint64_t idtr_base; ++ uint64_t gdtr_base; ++ ++ uint32_t cs_arbytes; ++ uint32_t ds_arbytes; ++ uint32_t es_arbytes; ++ uint32_t fs_arbytes; ++ uint32_t gs_arbytes; ++ uint32_t ss_arbytes; ++ uint32_t tr_arbytes; ++ uint32_t ldtr_arbytes; ++ ++ uint64_t sysenter_cs; ++ uint64_t sysenter_esp; ++ uint64_t sysenter_eip; ++ ++ /* msr for em64t */ ++ uint64_t shadow_gs; ++ ++ /* msr content saved/restored. */ ++ uint64_t msr_flags; ++ uint64_t msr_lstar; ++ uint64_t msr_star; ++ uint64_t msr_cstar; ++ uint64_t msr_syscall_mask; ++ uint64_t msr_efer; ++ uint64_t msr_tsc_aux; ++ ++ /* guest's idea of what rdtsc() would return */ ++ uint64_t tsc; ++ ++ /* pending event, if any */ ++ union { ++ uint32_t pending_event; ++ struct { ++ uint8_t pending_vector:8; ++ uint8_t pending_type:3; ++ uint8_t pending_error_valid:1; ++ uint32_t pending_reserved:19; ++ uint8_t pending_valid:1; ++ }; ++ }; ++ /* error code for pending event */ ++ uint32_t error_code; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu); ++ ++ ++/* ++ * PIC ++ */ ++ ++struct hvm_hw_vpic { ++ /* IR line bitmasks. */ ++ uint8_t irr; ++ uint8_t imr; ++ uint8_t isr; ++ ++ /* Line IRx maps to IRQ irq_base+x */ ++ uint8_t irq_base; ++ ++ /* ++ * Where are we in ICW2-4 initialisation (0 means no init in progress)? ++ * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). ++ * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) ++ * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) ++ */ ++ uint8_t init_state:4; ++ ++ /* IR line with highest priority. */ ++ uint8_t priority_add:4; ++ ++ /* Reads from A=0 obtain ISR or IRR? */ ++ uint8_t readsel_isr:1; ++ ++ /* Reads perform a polling read? */ ++ uint8_t poll:1; ++ ++ /* Automatically clear IRQs from the ISR during INTA? */ ++ uint8_t auto_eoi:1; ++ ++ /* Automatically rotate IRQ priorities during AEOI? */ ++ uint8_t rotate_on_auto_eoi:1; ++ ++ /* Exclude slave inputs when considering in-service IRQs? */ ++ uint8_t special_fully_nested_mode:1; ++ ++ /* Special mask mode excludes masked IRs from AEOI and priority checks. */ ++ uint8_t special_mask_mode:1; ++ ++ /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ ++ uint8_t is_master:1; ++ ++ /* Edge/trigger selection. */ ++ uint8_t elcr; ++ ++ /* Virtual INT output. */ ++ uint8_t int_output; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); ++ ++ ++/* ++ * IO-APIC ++ */ ++ ++#ifdef __ia64__ ++#define VIOAPIC_IS_IOSAPIC 1 ++#define VIOAPIC_NUM_PINS 24 ++#else ++#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ ++#endif ++ ++struct hvm_hw_vioapic { ++ uint64_t base_address; ++ uint32_t ioregsel; ++ uint32_t id; ++ union vioapic_redir_entry ++ { ++ uint64_t bits; ++ struct { ++ uint8_t vector; ++ uint8_t delivery_mode:3; ++ uint8_t dest_mode:1; ++ uint8_t delivery_status:1; ++ uint8_t polarity:1; ++ uint8_t remote_irr:1; ++ uint8_t trig_mode:1; ++ uint8_t mask:1; ++ uint8_t reserve:7; ++#if !VIOAPIC_IS_IOSAPIC ++ uint8_t reserved[4]; ++ uint8_t dest_id; ++#else ++ uint8_t reserved[3]; ++ uint16_t dest_id; ++#endif ++ } fields; ++ } redirtbl[VIOAPIC_NUM_PINS]; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); ++ ++ ++/* ++ * LAPIC ++ */ ++ ++struct hvm_hw_lapic { ++ uint64_t apic_base_msr; ++ uint32_t disabled; /* VLAPIC_xx_DISABLED */ ++ uint32_t timer_divisor; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); ++ ++struct hvm_hw_lapic_regs { ++ uint8_t data[1024]; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); ++ ++ ++/* ++ * IRQs ++ */ ++ ++struct hvm_hw_pci_irqs { ++ /* ++ * Virtual interrupt wires for a single PCI bus. ++ * Indexed by: device*4 + INTx#. ++ */ ++ union { ++ unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ ++ uint64_t pad[2]; ++ }; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); ++ ++struct hvm_hw_isa_irqs { ++ /* ++ * Virtual interrupt wires for ISA devices. ++ * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). ++ */ ++ union { ++ unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ ++ uint64_t pad[1]; ++ }; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); ++ ++struct hvm_hw_pci_link { ++ /* ++ * PCI-ISA interrupt router. ++ * Each PCI is 'wire-ORed' into one of four links using ++ * the traditional 'barber's pole' mapping ((device + INTx#) & 3). ++ * The router provides a programmable mapping from each link to a GSI. ++ */ ++ uint8_t route[4]; ++ uint8_t pad0[4]; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); ++ ++/* ++ * PIT ++ */ ++ ++struct hvm_hw_pit { ++ struct hvm_hw_pit_channel { ++ uint32_t count; /* can be 65536 */ ++ uint16_t latched_count; ++ uint8_t count_latched; ++ uint8_t status_latched; ++ uint8_t status; ++ uint8_t read_state; ++ uint8_t write_state; ++ uint8_t write_latch; ++ uint8_t rw_mode; ++ uint8_t mode; ++ uint8_t bcd; /* not supported */ ++ uint8_t gate; /* timer start */ ++ } channels[3]; /* 3 x 16 bytes */ ++ uint32_t speaker_data_on; ++ uint32_t pad0; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); ++ ++ ++/* ++ * RTC ++ */ ++ ++#define RTC_CMOS_SIZE 14 ++struct hvm_hw_rtc { ++ /* CMOS bytes */ ++ uint8_t cmos_data[RTC_CMOS_SIZE]; ++ /* Index register for 2-part operations */ ++ uint8_t cmos_index; ++ uint8_t pad0; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); ++ ++ ++/* ++ * HPET ++ */ ++ ++#define HPET_TIMER_NUM 3 /* 3 timers supported now */ ++struct hvm_hw_hpet { ++ /* Memory-mapped, software visible registers */ ++ uint64_t capability; /* capabilities */ ++ uint64_t res0; /* reserved */ ++ uint64_t config; /* configuration */ ++ uint64_t res1; /* reserved */ ++ uint64_t isr; /* interrupt status reg */ ++ uint64_t res2[25]; /* reserved */ ++ uint64_t mc64; /* main counter */ ++ uint64_t res3; /* reserved */ ++ struct { /* timers */ ++ uint64_t config; /* configuration/cap */ ++ uint64_t cmp; /* comparator */ ++ uint64_t fsb; /* FSB route, not supported now */ ++ uint64_t res4; /* reserved */ ++ } timers[HPET_TIMER_NUM]; ++ uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ ++ ++ /* Hidden register state */ ++ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ ++}; ++ ++DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); ++ ++ ++/* ++ * PM timer ++ */ ++ ++struct hvm_hw_pmtimer { ++ uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ ++ uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ ++ uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); ++ ++/* ++ * MTRR MSRs ++ */ ++ ++struct hvm_hw_mtrr { ++#define MTRR_VCNT 8 ++#define NUM_FIXED_MSR 11 ++ uint64_t msr_pat_cr; ++ /* mtrr physbase & physmask msr pair*/ ++ uint64_t msr_mtrr_var[MTRR_VCNT*2]; ++ uint64_t msr_mtrr_fixed[NUM_FIXED_MSR]; ++ uint64_t msr_mtrr_cap; ++ uint64_t msr_mtrr_def_type; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); ++ ++/* ++ * Viridian hypervisor context. ++ */ ++ ++struct hvm_viridian_context { ++ uint64_t hypercall_gpa; ++ uint64_t guest_os_id; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(VIRIDIAN, 15, struct hvm_viridian_context); ++ ++/* ++ * Largest type-code in use ++ */ ++#define HVM_SAVE_CODE_MAX 15 ++ ++#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86/xen-mca.h 2009-05-19 09:16:41.000000000 +0200 +@@ -0,0 +1,422 @@ ++/****************************************************************************** ++ * arch-x86/mca.h ++ * ++ * Contributed by Advanced Micro Devices, Inc. ++ * Author: Christoph Egger ++ * ++ * Guest OS machine check interface to x86 Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++/* Full MCA functionality has the following Usecases from the guest side: ++ * ++ * Must have's: ++ * 1. Dom0 and DomU register machine check trap callback handlers ++ * (already done via "set_trap_table" hypercall) ++ * 2. Dom0 registers machine check event callback handler ++ * (doable via EVTCHNOP_bind_virq) ++ * 3. Dom0 and DomU fetches machine check data ++ * 4. Dom0 wants Xen to notify a DomU ++ * 5. Dom0 gets DomU ID from physical address ++ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") ++ * ++ * Nice to have's: ++ * 7. Dom0 wants Xen to deactivate a physical CPU ++ * This is better done as separate task, physical CPU hotplugging, ++ * and hypercall(s) should be sysctl's ++ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to ++ * move a DomU (or Dom0 itself) away from a malicious page ++ * producing correctable errors. ++ * 9. offlining physical page: ++ * Xen free's and never re-uses a certain physical page. ++ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's ++ * and tell Xen to trigger a machine check ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ ++#define __XEN_PUBLIC_ARCH_X86_MCA_H__ ++ ++/* Hypercall */ ++#define __HYPERVISOR_mca __HYPERVISOR_arch_0 ++ ++/* ++ * The xen-unstable repo has interface version 0x03000001; out interface ++ * is incompatible with that and any future minor revisions, so we ++ * choose a different version number range that is numerically less ++ * than that used in xen-unstable. ++ */ ++#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 ++ ++/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ ++#define XEN_MC_NONURGENT 0x0001 ++/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */ ++#define XEN_MC_URGENT 0x0002 ++/* IN: Dom0 acknowledges previosly-fetched telemetry */ ++#define XEN_MC_ACK 0x0004 ++ ++/* OUT: All is ok */ ++#define XEN_MC_OK 0x0 ++/* OUT: Domain could not fetch data. */ ++#define XEN_MC_FETCHFAILED 0x1 ++/* OUT: There was no machine check data to fetch. */ ++#define XEN_MC_NODATA 0x2 ++/* OUT: Between notification time and this hypercall an other ++ * (most likely) correctable error happened. The fetched data, ++ * does not match the original machine check data. */ ++#define XEN_MC_NOMATCH 0x4 ++ ++/* OUT: DomU did not register MC NMI handler. Try something else. */ ++#define XEN_MC_CANNOTHANDLE 0x8 ++/* OUT: Notifying DomU failed. Retry later or try something else. */ ++#define XEN_MC_NOTDELIVERED 0x10 ++/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ ++ ++ ++#ifndef __ASSEMBLY__ ++ ++#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ ++ ++/* ++ * Machine Check Architecure: ++ * structs are read-only and used to report all kinds of ++ * correctable and uncorrectable errors detected by the HW. ++ * Dom0 and DomU: register a handler to get notified. ++ * Dom0 only: Correctable errors are reported via VIRQ_MCA ++ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers ++ */ ++#define MC_TYPE_GLOBAL 0 ++#define MC_TYPE_BANK 1 ++#define MC_TYPE_EXTENDED 2 ++#define MC_TYPE_RECOVERY 3 ++ ++struct mcinfo_common { ++ uint16_t type; /* structure type */ ++ uint16_t size; /* size of this struct in bytes */ ++}; ++ ++ ++#define MC_FLAG_CORRECTABLE (1 << 0) ++#define MC_FLAG_UNCORRECTABLE (1 << 1) ++#define MC_FLAG_RECOVERABLE (1 << 2) ++#define MC_FLAG_POLLED (1 << 3) ++#define MC_FLAG_RESET (1 << 4) ++#define MC_FLAG_CMCI (1 << 5) ++#define MC_FLAG_MCE (1 << 6) ++/* contains global x86 mc information */ ++struct mcinfo_global { ++ struct mcinfo_common common; ++ ++ /* running domain at the time in error (most likely the impacted one) */ ++ uint16_t mc_domid; ++ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ ++ uint32_t mc_socketid; /* physical socket of the physical core */ ++ uint16_t mc_coreid; /* physical impacted core */ ++ uint16_t mc_core_threadid; /* core thread of physical core */ ++ uint32_t mc_apicid; ++ uint32_t mc_flags; ++ uint64_t mc_gstatus; /* global status */ ++}; ++ ++/* contains bank local x86 mc information */ ++struct mcinfo_bank { ++ struct mcinfo_common common; ++ ++ uint16_t mc_bank; /* bank nr */ ++ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 ++ * and if mc_addr is valid. Never valid on DomU. */ ++ uint64_t mc_status; /* bank status */ ++ uint64_t mc_addr; /* bank address, only valid ++ * if addr bit is set in mc_status */ ++ uint64_t mc_misc; ++ uint64_t mc_ctrl2; ++ uint64_t mc_tsc; ++}; ++ ++ ++struct mcinfo_msr { ++ uint64_t reg; /* MSR */ ++ uint64_t value; /* MSR value */ ++}; ++ ++/* contains mc information from other ++ * or additional mc MSRs */ ++struct mcinfo_extended { ++ struct mcinfo_common common; ++ ++ /* You can fill up to five registers. ++ * If you need more, then use this structure ++ * multiple times. */ ++ ++ uint32_t mc_msrs; /* Number of msr with valid values. */ ++ /* ++ * Currently Intel extended MSR (32/64) include all gp registers ++ * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be ++ * useful at present. So expand this array to 16/32 to leave room. ++ */ ++ struct mcinfo_msr mc_msr[sizeof(void *) * 4]; ++}; ++ ++/* Recovery Action flags. Giving recovery result information to DOM0 */ ++ ++/* Xen takes successful recovery action, the error is recovered */ ++#define REC_ACTION_RECOVERED (0x1 << 0) ++/* No action is performed by XEN */ ++#define REC_ACTION_NONE (0x1 << 1) ++/* It's possible DOM0 might take action ownership in some case */ ++#define REC_ACTION_NEED_RESET (0x1 << 2) ++ ++/* Different Recovery Action types, if the action is performed successfully, ++ * REC_ACTION_RECOVERED flag will be returned. ++ */ ++ ++/* Page Offline Action */ ++#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) ++/* CPU offline Action */ ++#define MC_ACTION_CPU_OFFLINE (0x1 << 1) ++/* L3 cache disable Action */ ++#define MC_ACTION_CACHE_SHRINK (0x1 << 2) ++ ++/* Below interface used between XEN/DOM0 for passing XEN's recovery action ++ * information to DOM0. ++ * usage Senario: After offlining broken page, XEN might pass its page offline ++ * recovery action result to DOM0. DOM0 will save the information in ++ * non-volatile memory for further proactive actions, such as offlining the ++ * easy broken page earlier when doing next reboot. ++*/ ++struct page_offline_action ++{ ++ /* Params for passing the offlined page number to DOM0 */ ++ uint64_t mfn; ++ uint64_t status; ++}; ++ ++struct cpu_offline_action ++{ ++ /* Params for passing the identity of the offlined CPU to DOM0 */ ++ uint32_t mc_socketid; ++ uint16_t mc_coreid; ++ uint16_t mc_core_threadid; ++}; ++ ++#define MAX_UNION_SIZE 16 ++struct mcinfo_recovery ++{ ++ struct mcinfo_common common; ++ uint16_t mc_bank; /* bank nr */ ++ uint8_t action_flags; ++ uint8_t action_types; ++ union { ++ struct page_offline_action page_retire; ++ struct cpu_offline_action cpu_offline; ++ uint8_t pad[MAX_UNION_SIZE]; ++ } action_info; ++}; ++ ++ ++#define MCINFO_HYPERCALLSIZE 1024 ++#define MCINFO_MAXSIZE 768 ++ ++struct mc_info { ++ /* Number of mcinfo_* entries in mi_data */ ++ uint32_t mi_nentries; ++ uint32_t _pad0; ++ uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; ++}; ++typedef struct mc_info mc_info_t; ++DEFINE_XEN_GUEST_HANDLE(mc_info_t); ++ ++#define __MC_MSR_ARRAYSIZE 8 ++#define __MC_NMSRS 1 ++#define MC_NCAPS 7 /* 7 CPU feature flag words */ ++#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ ++#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ ++#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ ++#define MC_CAPS_LINUX 3 /* Linux-defined */ ++#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ ++#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ ++#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ ++ ++struct mcinfo_logical_cpu { ++ uint32_t mc_cpunr; ++ uint32_t mc_chipid; ++ uint16_t mc_coreid; ++ uint16_t mc_threadid; ++ uint32_t mc_apicid; ++ uint32_t mc_clusterid; ++ uint32_t mc_ncores; ++ uint32_t mc_ncores_active; ++ uint32_t mc_nthreads; ++ int32_t mc_cpuid_level; ++ uint32_t mc_family; ++ uint32_t mc_vendor; ++ uint32_t mc_model; ++ uint32_t mc_step; ++ char mc_vendorid[16]; ++ char mc_brandid[64]; ++ uint32_t mc_cpu_caps[MC_NCAPS]; ++ uint32_t mc_cache_size; ++ uint32_t mc_cache_alignment; ++ int32_t mc_nmsrvals; ++ struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; ++}; ++typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; ++DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); ++ ++ ++/* ++ * OS's should use these instead of writing their own lookup function ++ * each with its own bugs and drawbacks. ++ * We use macros instead of static inline functions to allow guests ++ * to include this header in assembly files (*.S). ++ */ ++/* Prototype: ++ * uint32_t x86_mcinfo_nentries(struct mc_info *mi); ++ */ ++#define x86_mcinfo_nentries(_mi) \ ++ (_mi)->mi_nentries ++/* Prototype: ++ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); ++ */ ++#define x86_mcinfo_first(_mi) \ ++ ((struct mcinfo_common *)(_mi)->mi_data) ++/* Prototype: ++ * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); ++ */ ++#define x86_mcinfo_next(_mic) \ ++ ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) ++ ++/* Prototype: ++ * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); ++ */ ++#define x86_mcinfo_lookup(_ret, _mi, _type) \ ++ do { \ ++ uint32_t found, i; \ ++ struct mcinfo_common *_mic; \ ++ \ ++ found = 0; \ ++ (_ret) = NULL; \ ++ if (_mi == NULL) break; \ ++ _mic = x86_mcinfo_first(_mi); \ ++ for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ ++ if (_mic->type == (_type)) { \ ++ found = 1; \ ++ break; \ ++ } \ ++ _mic = x86_mcinfo_next(_mic); \ ++ } \ ++ (_ret) = found ? _mic : NULL; \ ++ } while (0) ++ ++ ++/* Usecase 1 ++ * Register machine check trap callback handler ++ * (already done via "set_trap_table" hypercall) ++ */ ++ ++/* Usecase 2 ++ * Dom0 registers machine check event callback handler ++ * done by EVTCHNOP_bind_virq ++ */ ++ ++/* Usecase 3 ++ * Fetch machine check data from hypervisor. ++ * Note, this hypercall is special, because both Dom0 and DomU must use this. ++ */ ++#define XEN_MC_fetch 1 ++struct xen_mc_fetch { ++ /* IN/OUT variables. */ ++ uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT, ++ XEN_MC_ACK if ack'ing an earlier fetch */ ++ /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, ++ XEN_MC_NODATA, XEN_MC_NOMATCH */ ++ uint32_t _pad0; ++ uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ ++ ++ /* OUT variables. */ ++ XEN_GUEST_HANDLE(mc_info_t) data; ++}; ++typedef struct xen_mc_fetch xen_mc_fetch_t; ++DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); ++ ++ ++/* Usecase 4 ++ * This tells the hypervisor to notify a DomU about the machine check error ++ */ ++#define XEN_MC_notifydomain 2 ++struct xen_mc_notifydomain { ++ /* IN variables. */ ++ uint16_t mc_domid; /* The unprivileged domain to notify. */ ++ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. ++ * Usually echo'd value from the fetch hypercall. */ ++ ++ /* IN/OUT variables. */ ++ uint32_t flags; ++ ++/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ ++/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ ++}; ++typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; ++DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); ++ ++#define XEN_MC_physcpuinfo 3 ++struct xen_mc_physcpuinfo { ++ /* IN/OUT */ ++ uint32_t ncpus; ++ uint32_t _pad0; ++ /* OUT */ ++ XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; ++}; ++ ++#define XEN_MC_msrinject 4 ++#define MC_MSRINJ_MAXMSRS 8 ++struct xen_mc_msrinject { ++ /* IN */ ++ uint32_t mcinj_cpunr; /* target processor id */ ++ uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ ++ uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ ++ uint32_t _pad0; ++ struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; ++}; ++ ++/* Flags for mcinj_flags above; bits 16-31 are reserved */ ++#define MC_MSRINJ_F_INTERPOSE 0x1 ++ ++#define XEN_MC_mceinject 5 ++struct xen_mc_mceinject { ++ unsigned int mceinj_cpunr; /* target processor id */ ++}; ++ ++struct xen_mc { ++ uint32_t cmd; ++ uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ ++ union { ++ struct xen_mc_fetch mc_fetch; ++ struct xen_mc_notifydomain mc_notifydomain; ++ struct xen_mc_physcpuinfo mc_physcpuinfo; ++ struct xen_mc_msrinject mc_msrinject; ++ struct xen_mc_mceinject mc_mceinject; ++ } u; ++}; ++typedef struct xen_mc xen_mc_t; ++DEFINE_XEN_GUEST_HANDLE(xen_mc_t); ++ ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86/xen-x86_32.h 2008-07-21 11:00:33.000000000 +0200 +@@ -0,0 +1,180 @@ ++/****************************************************************************** ++ * xen-x86_32.h ++ * ++ * Guest OS interface to x86 32-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2007, K A Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ ++#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ ++ ++/* ++ * Hypercall interface: ++ * Input: %ebx, %ecx, %edx, %esi, %edi (arguments 1-5) ++ * Output: %eax ++ * Access is via hypercall page (set up by guest loader or via a Xen MSR): ++ * call hypercall_page + hypercall-number * 32 ++ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) ++ */ ++ ++#if __XEN_INTERFACE_VERSION__ < 0x00030203 ++/* ++ * Legacy hypercall interface: ++ * As above, except the entry sequence to the hypervisor is: ++ * mov $hypercall-number*32,%eax ; int $0x82 ++ */ ++#define TRAP_INSTR "int $0x82" ++#endif ++ ++/* ++ * These flat segments are in the Xen-private section of every GDT. Since these ++ * are also present in the initial GDT, many OSes will be able to avoid ++ * installing their own GDT. ++ */ ++#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ ++#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ ++#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ ++#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ ++#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ ++#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ ++ ++#define FLAT_KERNEL_CS FLAT_RING1_CS ++#define FLAT_KERNEL_DS FLAT_RING1_DS ++#define FLAT_KERNEL_SS FLAT_RING1_SS ++#define FLAT_USER_CS FLAT_RING3_CS ++#define FLAT_USER_DS FLAT_RING3_DS ++#define FLAT_USER_SS FLAT_RING3_SS ++ ++#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 ++#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 ++#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 ++#define HYPERVISOR_VIRT_START_PAE \ ++ mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) ++#define MACH2PHYS_VIRT_START_PAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) ++#define MACH2PHYS_VIRT_END_PAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) ++ ++/* Non-PAE bounds are obsolete. */ ++#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 ++#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 ++#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 ++#define HYPERVISOR_VIRT_START_NONPAE \ ++ mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) ++#define MACH2PHYS_VIRT_START_NONPAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) ++#define MACH2PHYS_VIRT_END_NONPAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) ++ ++#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE ++#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE ++#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE ++ ++#ifndef HYPERVISOR_VIRT_START ++#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) ++#endif ++ ++#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) ++#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) ++#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) ++#ifndef machine_to_phys_mapping ++#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) ++#endif ++ ++/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ ++#if defined(__XEN__) || defined(__XEN_TOOLS__) ++#undef ___DEFINE_XEN_GUEST_HANDLE ++#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ typedef struct { type *p; } \ ++ __guest_handle_ ## name; \ ++ typedef struct { union { type *p; uint64_aligned_t q; }; } \ ++ __guest_handle_64_ ## name ++#undef set_xen_guest_handle ++#define set_xen_guest_handle(hnd, val) \ ++ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ ++ (hnd).p = val; \ ++ } while ( 0 ) ++#define uint64_aligned_t uint64_t __attribute__((aligned(8))) ++#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name ++#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) ++#endif ++ ++#ifndef __ASSEMBLY__ ++ ++struct cpu_user_regs { ++ uint32_t ebx; ++ uint32_t ecx; ++ uint32_t edx; ++ uint32_t esi; ++ uint32_t edi; ++ uint32_t ebp; ++ uint32_t eax; ++ uint16_t error_code; /* private */ ++ uint16_t entry_vector; /* private */ ++ uint32_t eip; ++ uint16_t cs; ++ uint8_t saved_upcall_mask; ++ uint8_t _pad0; ++ uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ ++ uint32_t esp; ++ uint16_t ss, _pad1; ++ uint16_t es, _pad2; ++ uint16_t ds, _pad3; ++ uint16_t fs, _pad4; ++ uint16_t gs, _pad5; ++}; ++typedef struct cpu_user_regs cpu_user_regs_t; ++DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); ++ ++/* ++ * Page-directory addresses above 4GB do not fit into architectural %cr3. ++ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests ++ * must use the following accessor macros to pack/unpack valid MFNs. ++ */ ++#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) ++#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) ++ ++struct arch_vcpu_info { ++ unsigned long cr2; ++ unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ ++}; ++typedef struct arch_vcpu_info arch_vcpu_info_t; ++ ++struct xen_callback { ++ unsigned long cs; ++ unsigned long eip; ++}; ++typedef struct xen_callback xen_callback_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86/xen-x86_64.h 2008-04-02 12:34:02.000000000 +0200 +@@ -0,0 +1,212 @@ ++/****************************************************************************** ++ * xen-x86_64.h ++ * ++ * Guest OS interface to x86 64-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ ++#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ ++ ++/* ++ * Hypercall interface: ++ * Input: %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5) ++ * Output: %rax ++ * Access is via hypercall page (set up by guest loader or via a Xen MSR): ++ * call hypercall_page + hypercall-number * 32 ++ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) ++ */ ++ ++#if __XEN_INTERFACE_VERSION__ < 0x00030203 ++/* ++ * Legacy hypercall interface: ++ * As above, except the entry sequence to the hypervisor is: ++ * mov $hypercall-number*32,%eax ; syscall ++ * Clobbered: %rcx, %r11, argument registers (as above) ++ */ ++#define TRAP_INSTR "syscall" ++#endif ++ ++/* ++ * 64-bit segment selectors ++ * These flat segments are in the Xen-private section of every GDT. Since these ++ * are also present in the initial GDT, many OSes will be able to avoid ++ * installing their own GDT. ++ */ ++ ++#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ ++#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ ++#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ ++#define FLAT_RING3_DS64 0x0000 /* NULL selector */ ++#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ ++#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ ++ ++#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 ++#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 ++#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 ++#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 ++#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 ++#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 ++#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 ++#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 ++#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 ++ ++#define FLAT_USER_DS64 FLAT_RING3_DS64 ++#define FLAT_USER_DS32 FLAT_RING3_DS32 ++#define FLAT_USER_DS FLAT_USER_DS64 ++#define FLAT_USER_CS64 FLAT_RING3_CS64 ++#define FLAT_USER_CS32 FLAT_RING3_CS32 ++#define FLAT_USER_CS FLAT_USER_CS64 ++#define FLAT_USER_SS64 FLAT_RING3_SS64 ++#define FLAT_USER_SS32 FLAT_RING3_SS32 ++#define FLAT_USER_SS FLAT_USER_SS64 ++ ++#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 ++#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 ++#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 ++#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 ++ ++#ifndef HYPERVISOR_VIRT_START ++#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) ++#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) ++#endif ++ ++#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) ++#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) ++#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) ++#ifndef machine_to_phys_mapping ++#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) ++#endif ++ ++/* ++ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) ++ * @which == SEGBASE_* ; @base == 64-bit base address ++ * Returns 0 on success. ++ */ ++#define SEGBASE_FS 0 ++#define SEGBASE_GS_USER 1 ++#define SEGBASE_GS_KERNEL 2 ++#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ ++ ++/* ++ * int HYPERVISOR_iret(void) ++ * All arguments are on the kernel stack, in the following format. ++ * Never returns if successful. Current kernel context is lost. ++ * The saved CS is mapped as follows: ++ * RING0 -> RING3 kernel mode. ++ * RING1 -> RING3 kernel mode. ++ * RING2 -> RING3 kernel mode. ++ * RING3 -> RING3 user mode. ++ * However RING0 indicates that the guest kernel should return to iteself ++ * directly with ++ * orb $3,1*8(%rsp) ++ * iretq ++ * If flags contains VGCF_in_syscall: ++ * Restore RAX, RIP, RFLAGS, RSP. ++ * Discard R11, RCX, CS, SS. ++ * Otherwise: ++ * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. ++ * All other registers are saved on hypercall entry and restored to user. ++ */ ++/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ ++#define _VGCF_in_syscall 8 ++#define VGCF_in_syscall (1<<_VGCF_in_syscall) ++#define VGCF_IN_SYSCALL VGCF_in_syscall ++ ++#ifndef __ASSEMBLY__ ++ ++struct iret_context { ++ /* Top of stack (%rsp at point of hypercall). */ ++ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; ++ /* Bottom of iret stack frame. */ ++}; ++ ++#if defined(__GNUC__) && !defined(__STRICT_ANSI__) ++/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ ++#define __DECL_REG(name) union { \ ++ uint64_t r ## name, e ## name; \ ++ uint32_t _e ## name; \ ++} ++#else ++/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ ++#define __DECL_REG(name) uint64_t r ## name ++#endif ++ ++struct cpu_user_regs { ++ uint64_t r15; ++ uint64_t r14; ++ uint64_t r13; ++ uint64_t r12; ++ __DECL_REG(bp); ++ __DECL_REG(bx); ++ uint64_t r11; ++ uint64_t r10; ++ uint64_t r9; ++ uint64_t r8; ++ __DECL_REG(ax); ++ __DECL_REG(cx); ++ __DECL_REG(dx); ++ __DECL_REG(si); ++ __DECL_REG(di); ++ uint32_t error_code; /* private */ ++ uint32_t entry_vector; /* private */ ++ __DECL_REG(ip); ++ uint16_t cs, _pad0[1]; ++ uint8_t saved_upcall_mask; ++ uint8_t _pad1[3]; ++ __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ ++ __DECL_REG(sp); ++ uint16_t ss, _pad2[3]; ++ uint16_t es, _pad3[3]; ++ uint16_t ds, _pad4[3]; ++ uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ ++ uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ ++}; ++typedef struct cpu_user_regs cpu_user_regs_t; ++DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); ++ ++#undef __DECL_REG ++ ++#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) ++#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) ++ ++struct arch_vcpu_info { ++ unsigned long cr2; ++ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ ++}; ++typedef struct arch_vcpu_info arch_vcpu_info_t; ++ ++typedef unsigned long xen_callback_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86/xen.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,200 @@ ++/****************************************************************************** ++ * arch-x86/xen.h ++ * ++ * Guest OS interface to x86 Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#include "../xen.h" ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ ++#define __XEN_PUBLIC_ARCH_X86_XEN_H__ ++ ++/* Structural guest handles introduced in 0x00030201. */ ++#if __XEN_INTERFACE_VERSION__ >= 0x00030201 ++#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ typedef struct { type *p; } __guest_handle_ ## name ++#else ++#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ typedef type * __guest_handle_ ## name ++#endif ++ ++#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ ___DEFINE_XEN_GUEST_HANDLE(name, type); \ ++ ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) ++#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) ++#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name ++#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) ++#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) ++#ifdef __XEN_TOOLS__ ++#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) ++#endif ++ ++#if defined(__i386__) ++#include "xen-x86_32.h" ++#elif defined(__x86_64__) ++#include "xen-x86_64.h" ++#endif ++ ++#ifndef __ASSEMBLY__ ++typedef unsigned long xen_pfn_t; ++#define PRI_xen_pfn "lx" ++#endif ++ ++/* ++ * SEGMENT DESCRIPTOR TABLES ++ */ ++/* ++ * A number of GDT entries are reserved by Xen. These are not situated at the ++ * start of the GDT because some stupid OSes export hard-coded selector values ++ * in their ABI. These hard-coded values are always near the start of the GDT, ++ * so Xen places itself out of the way, at the far end of the GDT. ++ */ ++#define FIRST_RESERVED_GDT_PAGE 14 ++#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) ++#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) ++ ++/* Maximum number of virtual CPUs in legacy multi-processor guests. */ ++#define XEN_LEGACY_MAX_VCPUS 32 ++ ++#ifndef __ASSEMBLY__ ++ ++typedef unsigned long xen_ulong_t; ++ ++/* ++ * Send an array of these to HYPERVISOR_set_trap_table(). ++ * The privilege level specifies which modes may enter a trap via a software ++ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate ++ * privilege levels as follows: ++ * Level == 0: Noone may enter ++ * Level == 1: Kernel may enter ++ * Level == 2: Kernel may enter ++ * Level == 3: Everyone may enter ++ */ ++#define TI_GET_DPL(_ti) ((_ti)->flags & 3) ++#define TI_GET_IF(_ti) ((_ti)->flags & 4) ++#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) ++#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) ++struct trap_info { ++ uint8_t vector; /* exception vector */ ++ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ ++ uint16_t cs; /* code selector */ ++ unsigned long address; /* code offset */ ++}; ++typedef struct trap_info trap_info_t; ++DEFINE_XEN_GUEST_HANDLE(trap_info_t); ++ ++typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ ++ ++/* ++ * The following is all CPU context. Note that the fpu_ctxt block is filled ++ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. ++ */ ++struct vcpu_guest_context { ++ /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ ++ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ ++#define VGCF_I387_VALID (1<<0) ++#define VGCF_IN_KERNEL (1<<2) ++#define _VGCF_i387_valid 0 ++#define VGCF_i387_valid (1<<_VGCF_i387_valid) ++#define _VGCF_in_kernel 2 ++#define VGCF_in_kernel (1<<_VGCF_in_kernel) ++#define _VGCF_failsafe_disables_events 3 ++#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) ++#define _VGCF_syscall_disables_events 4 ++#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) ++#define _VGCF_online 5 ++#define VGCF_online (1<<_VGCF_online) ++ unsigned long flags; /* VGCF_* flags */ ++ struct cpu_user_regs user_regs; /* User-level CPU registers */ ++ struct trap_info trap_ctxt[256]; /* Virtual IDT */ ++ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ ++ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ ++ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ ++ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ ++ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ ++ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ ++#ifdef __i386__ ++ unsigned long event_callback_cs; /* CS:EIP of event callback */ ++ unsigned long event_callback_eip; ++ unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ ++ unsigned long failsafe_callback_eip; ++#else ++ unsigned long event_callback_eip; ++ unsigned long failsafe_callback_eip; ++#ifdef __XEN__ ++ union { ++ unsigned long syscall_callback_eip; ++ struct { ++ unsigned int event_callback_cs; /* compat CS of event cb */ ++ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ ++ }; ++ }; ++#else ++ unsigned long syscall_callback_eip; ++#endif ++#endif ++ unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ ++#ifdef __x86_64__ ++ /* Segment base addresses. */ ++ uint64_t fs_base; ++ uint64_t gs_base_kernel; ++ uint64_t gs_base_user; ++#endif ++}; ++typedef struct vcpu_guest_context vcpu_guest_context_t; ++DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); ++ ++struct arch_shared_info { ++ unsigned long max_pfn; /* max pfn that appears in table */ ++ /* Frame containing list of mfns containing list of mfns containing p2m. */ ++ xen_pfn_t pfn_to_mfn_frame_list_list; ++ unsigned long nmi_reason; ++ uint64_t pad[32]; ++}; ++typedef struct arch_shared_info arch_shared_info_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++/* ++ * Prefix forces emulation of some non-trapping instructions. ++ * Currently only CPUID. ++ */ ++#ifdef __ASSEMBLY__ ++#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; ++#define XEN_CPUID XEN_EMULATE_PREFIX cpuid ++#else ++#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " ++#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" ++#endif ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86_32.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,27 @@ ++/****************************************************************************** ++ * arch-x86_32.h ++ * ++ * Guest OS interface to x86 32-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#include "arch-x86/xen.h" +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/arch-x86_64.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,27 @@ ++/****************************************************************************** ++ * arch-x86_64.h ++ * ++ * Guest OS interface to x86 64-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#include "arch-x86/xen.h" +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/dom0_ops.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,120 @@ ++/****************************************************************************** ++ * dom0_ops.h ++ * ++ * Process command requests from domain-0 guest OS. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2003, B Dragovic ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_DOM0_OPS_H__ ++#define __XEN_PUBLIC_DOM0_OPS_H__ ++ ++#include "xen.h" ++#include "platform.h" ++ ++#if __XEN_INTERFACE_VERSION__ >= 0x00030204 ++#error "dom0_ops.h is a compatibility interface only" ++#endif ++ ++#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION ++ ++#define DOM0_SETTIME XENPF_settime ++#define dom0_settime xenpf_settime ++#define dom0_settime_t xenpf_settime_t ++ ++#define DOM0_ADD_MEMTYPE XENPF_add_memtype ++#define dom0_add_memtype xenpf_add_memtype ++#define dom0_add_memtype_t xenpf_add_memtype_t ++ ++#define DOM0_DEL_MEMTYPE XENPF_del_memtype ++#define dom0_del_memtype xenpf_del_memtype ++#define dom0_del_memtype_t xenpf_del_memtype_t ++ ++#define DOM0_READ_MEMTYPE XENPF_read_memtype ++#define dom0_read_memtype xenpf_read_memtype ++#define dom0_read_memtype_t xenpf_read_memtype_t ++ ++#define DOM0_MICROCODE XENPF_microcode_update ++#define dom0_microcode xenpf_microcode_update ++#define dom0_microcode_t xenpf_microcode_update_t ++ ++#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk ++#define dom0_platform_quirk xenpf_platform_quirk ++#define dom0_platform_quirk_t xenpf_platform_quirk_t ++ ++typedef uint64_t cpumap_t; ++ ++/* Unsupported legacy operation -- defined for API compatibility. */ ++#define DOM0_MSR 15 ++struct dom0_msr { ++ /* IN variables. */ ++ uint32_t write; ++ cpumap_t cpu_mask; ++ uint32_t msr; ++ uint32_t in1; ++ uint32_t in2; ++ /* OUT variables. */ ++ uint32_t out1; ++ uint32_t out2; ++}; ++typedef struct dom0_msr dom0_msr_t; ++DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); ++ ++/* Unsupported legacy operation -- defined for API compatibility. */ ++#define DOM0_PHYSICAL_MEMORY_MAP 40 ++struct dom0_memory_map_entry { ++ uint64_t start, end; ++ uint32_t flags; /* reserved */ ++ uint8_t is_ram; ++}; ++typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; ++DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); ++ ++struct dom0_op { ++ uint32_t cmd; ++ uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ ++ union { ++ struct dom0_msr msr; ++ struct dom0_settime settime; ++ struct dom0_add_memtype add_memtype; ++ struct dom0_del_memtype del_memtype; ++ struct dom0_read_memtype read_memtype; ++ struct dom0_microcode microcode; ++ struct dom0_platform_quirk platform_quirk; ++ struct dom0_memory_map_entry physical_memory_map; ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct dom0_op dom0_op_t; ++DEFINE_XEN_GUEST_HANDLE(dom0_op_t); ++ ++#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/domctl.h 2010-01-07 09:38:29.000000000 +0100 +@@ -0,0 +1,904 @@ ++/****************************************************************************** ++ * domctl.h ++ * ++ * Domain management operations. For use by node control stack. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2003, B Dragovic ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_DOMCTL_H__ ++#define __XEN_PUBLIC_DOMCTL_H__ ++ ++#if !defined(__XEN__) && !defined(__XEN_TOOLS__) ++#error "domctl operations are intended for use by node control tools only" ++#endif ++ ++#include "xen.h" ++#include "grant_table.h" ++ ++#define XEN_DOMCTL_INTERFACE_VERSION 0x00000006 ++ ++struct xenctl_cpumap { ++ XEN_GUEST_HANDLE_64(uint8) bitmap; ++ uint32_t nr_cpus; ++}; ++ ++/* ++ * NB. xen_domctl.domain is an IN/OUT parameter for this operation. ++ * If it is specified as zero, an id is auto-allocated and returned. ++ */ ++/* XEN_DOMCTL_createdomain */ ++struct xen_domctl_createdomain { ++ /* IN parameters */ ++ uint32_t ssidref; ++ xen_domain_handle_t handle; ++ /* Is this an HVM guest (as opposed to a PV guest)? */ ++#define _XEN_DOMCTL_CDF_hvm_guest 0 ++#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) ++ /* Use hardware-assisted paging if available? */ ++#define _XEN_DOMCTL_CDF_hap 1 ++#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) ++ /* Should domain memory integrity be verifed by tboot during Sx? */ ++#define _XEN_DOMCTL_CDF_s3_integrity 2 ++#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) ++ uint32_t flags; ++ /* Disable out-of-sync shadow page tables? */ ++#define _XEN_DOMCTL_CDF_oos_off 3 ++#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) ++}; ++typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); ++ ++/* XEN_DOMCTL_getdomaininfo */ ++struct xen_domctl_getdomaininfo { ++ /* OUT variables. */ ++ domid_t domain; /* Also echoed in domctl.domain */ ++ /* Domain is scheduled to die. */ ++#define _XEN_DOMINF_dying 0 ++#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) ++ /* Domain is an HVM guest (as opposed to a PV guest). */ ++#define _XEN_DOMINF_hvm_guest 1 ++#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) ++ /* The guest OS has shut down. */ ++#define _XEN_DOMINF_shutdown 2 ++#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) ++ /* Currently paused by control software. */ ++#define _XEN_DOMINF_paused 3 ++#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) ++ /* Currently blocked pending an event. */ ++#define _XEN_DOMINF_blocked 4 ++#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) ++ /* Domain is currently running. */ ++#define _XEN_DOMINF_running 5 ++#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) ++ /* Being debugged. */ ++#define _XEN_DOMINF_debugged 6 ++#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) ++ /* XEN_DOMINF_shutdown guest-supplied code. */ ++#define XEN_DOMINF_shutdownmask 255 ++#define XEN_DOMINF_shutdownshift 16 ++ uint32_t flags; /* XEN_DOMINF_* */ ++ uint64_aligned_t tot_pages; ++ uint64_aligned_t max_pages; ++ uint64_aligned_t shr_pages; ++ uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ ++ uint64_aligned_t cpu_time; ++ uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ ++ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ ++ uint32_t ssidref; ++ xen_domain_handle_t handle; ++}; ++typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); ++ ++ ++/* XEN_DOMCTL_getmemlist */ ++struct xen_domctl_getmemlist { ++ /* IN variables. */ ++ /* Max entries to write to output buffer. */ ++ uint64_aligned_t max_pfns; ++ /* Start index in guest's page list. */ ++ uint64_aligned_t start_pfn; ++ XEN_GUEST_HANDLE_64(uint64) buffer; ++ /* OUT variables. */ ++ uint64_aligned_t num_pfns; ++}; ++typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); ++ ++ ++/* XEN_DOMCTL_getpageframeinfo */ ++ ++#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 ++#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) ++#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) ++#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) ++#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) ++#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) ++#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) ++#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) ++#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ ++#define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28) ++#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) ++ ++struct xen_domctl_getpageframeinfo { ++ /* IN variables. */ ++ uint64_aligned_t gmfn; /* GMFN to query */ ++ /* OUT variables. */ ++ /* Is the page PINNED to a type? */ ++ uint32_t type; /* see above type defs */ ++}; ++typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); ++ ++ ++/* XEN_DOMCTL_getpageframeinfo2 */ ++struct xen_domctl_getpageframeinfo2 { ++ /* IN variables. */ ++ uint64_aligned_t num; ++ /* IN/OUT variables. */ ++ XEN_GUEST_HANDLE_64(uint32) array; ++}; ++typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); ++ ++ ++/* ++ * Control shadow pagetables operation ++ */ ++/* XEN_DOMCTL_shadow_op */ ++ ++/* Disable shadow mode. */ ++#define XEN_DOMCTL_SHADOW_OP_OFF 0 ++ ++/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE 32 ++ ++/* Log-dirty bitmap operations. */ ++ /* Return the bitmap and clean internal copy for next round. */ ++#define XEN_DOMCTL_SHADOW_OP_CLEAN 11 ++ /* Return the bitmap but do not modify internal copy. */ ++#define XEN_DOMCTL_SHADOW_OP_PEEK 12 ++ ++/* Memory allocation accessors. */ ++#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 ++#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 ++ ++/* Legacy enable operations. */ ++ /* Equiv. to ENABLE with no mode flags. */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 ++ /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 ++ /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 ++ ++/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ ++ /* ++ * Shadow pagetables are refcounted: guest does not use explicit mmu ++ * operations nor write-protect its pagetables. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) ++ /* ++ * Log pages in a bitmap as they are dirtied. ++ * Used for live relocation to determine which pages must be re-sent. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) ++ /* ++ * Automatically translate GPFNs into MFNs. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) ++ /* ++ * Xen does not steal virtual address space from the guest. ++ * Requires HVM support. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) ++ ++struct xen_domctl_shadow_op_stats { ++ uint32_t fault_count; ++ uint32_t dirty_count; ++}; ++typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); ++ ++struct xen_domctl_shadow_op { ++ /* IN variables. */ ++ uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ ++ ++ /* OP_ENABLE */ ++ uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ ++ ++ /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ ++ uint32_t mb; /* Shadow memory allocation in MB */ ++ ++ /* OP_PEEK / OP_CLEAN */ ++ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; ++ uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ ++ struct xen_domctl_shadow_op_stats stats; ++}; ++typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); ++ ++ ++/* XEN_DOMCTL_max_mem */ ++struct xen_domctl_max_mem { ++ /* IN variables. */ ++ uint64_aligned_t max_memkb; ++}; ++typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); ++ ++ ++/* XEN_DOMCTL_setvcpucontext */ ++/* XEN_DOMCTL_getvcpucontext */ ++struct xen_domctl_vcpucontext { ++ uint32_t vcpu; /* IN */ ++ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ ++}; ++typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); ++ ++ ++/* XEN_DOMCTL_getvcpuinfo */ ++struct xen_domctl_getvcpuinfo { ++ /* IN variables. */ ++ uint32_t vcpu; ++ /* OUT variables. */ ++ uint8_t online; /* currently online (not hotplugged)? */ ++ uint8_t blocked; /* blocked waiting for an event? */ ++ uint8_t running; /* currently scheduled on its CPU? */ ++ uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ ++ uint32_t cpu; /* current mapping */ ++}; ++typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); ++ ++ ++/* Get/set which physical cpus a vcpu can execute on. */ ++/* XEN_DOMCTL_setvcpuaffinity */ ++/* XEN_DOMCTL_getvcpuaffinity */ ++struct xen_domctl_vcpuaffinity { ++ uint32_t vcpu; /* IN */ ++ struct xenctl_cpumap cpumap; /* IN/OUT */ ++}; ++typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); ++ ++ ++/* XEN_DOMCTL_max_vcpus */ ++struct xen_domctl_max_vcpus { ++ uint32_t max; /* maximum number of vcpus */ ++}; ++typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); ++ ++ ++/* XEN_DOMCTL_scheduler_op */ ++/* Scheduler types. */ ++#define XEN_SCHEDULER_SEDF 4 ++#define XEN_SCHEDULER_CREDIT 5 ++/* Set or get info? */ ++#define XEN_DOMCTL_SCHEDOP_putinfo 0 ++#define XEN_DOMCTL_SCHEDOP_getinfo 1 ++struct xen_domctl_scheduler_op { ++ uint32_t sched_id; /* XEN_SCHEDULER_* */ ++ uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ ++ union { ++ struct xen_domctl_sched_sedf { ++ uint64_aligned_t period; ++ uint64_aligned_t slice; ++ uint64_aligned_t latency; ++ uint32_t extratime; ++ uint32_t weight; ++ } sedf; ++ struct xen_domctl_sched_credit { ++ uint16_t weight; ++ uint16_t cap; ++ } credit; ++ } u; ++}; ++typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); ++ ++ ++/* XEN_DOMCTL_setdomainhandle */ ++struct xen_domctl_setdomainhandle { ++ xen_domain_handle_t handle; ++}; ++typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); ++ ++ ++/* XEN_DOMCTL_setdebugging */ ++struct xen_domctl_setdebugging { ++ uint8_t enable; ++}; ++typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); ++ ++ ++/* XEN_DOMCTL_irq_permission */ ++struct xen_domctl_irq_permission { ++ uint8_t pirq; ++ uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ ++}; ++typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); ++ ++ ++/* XEN_DOMCTL_iomem_permission */ ++struct xen_domctl_iomem_permission { ++ uint64_aligned_t first_mfn;/* first page (physical page number) in range */ ++ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ ++ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ ++}; ++typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); ++ ++ ++/* XEN_DOMCTL_ioport_permission */ ++struct xen_domctl_ioport_permission { ++ uint32_t first_port; /* first port int range */ ++ uint32_t nr_ports; /* size of port range */ ++ uint8_t allow_access; /* allow or deny access to range? */ ++}; ++typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); ++ ++ ++/* XEN_DOMCTL_hypercall_init */ ++struct xen_domctl_hypercall_init { ++ uint64_aligned_t gmfn; /* GMFN to be initialised */ ++}; ++typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); ++ ++ ++/* XEN_DOMCTL_arch_setup */ ++#define _XEN_DOMAINSETUP_hvm_guest 0 ++#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) ++#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ ++#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) ++#define _XEN_DOMAINSETUP_sioemu_guest 2 ++#define XEN_DOMAINSETUP_sioemu_guest (1UL<<_XEN_DOMAINSETUP_sioemu_guest) ++typedef struct xen_domctl_arch_setup { ++ uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */ ++#ifdef __ia64__ ++ uint64_aligned_t bp; /* mpaddr of boot param area */ ++ uint64_aligned_t maxmem; /* Highest memory address for MDT. */ ++ uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */ ++ uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */ ++ int8_t vhpt_size_log2; /* Log2 of VHPT size. */ ++#endif ++} xen_domctl_arch_setup_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); ++ ++ ++/* XEN_DOMCTL_settimeoffset */ ++struct xen_domctl_settimeoffset { ++ int32_t time_offset_seconds; /* applied to domain wallclock time */ ++}; ++typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); ++ ++/* XEN_DOMCTL_gethvmcontext */ ++/* XEN_DOMCTL_sethvmcontext */ ++typedef struct xen_domctl_hvmcontext { ++ uint32_t size; /* IN/OUT: size of buffer / bytes filled */ ++ XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call ++ * gethvmcontext with NULL ++ * buffer to get size req'd */ ++} xen_domctl_hvmcontext_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); ++ ++ ++/* XEN_DOMCTL_set_address_size */ ++/* XEN_DOMCTL_get_address_size */ ++typedef struct xen_domctl_address_size { ++ uint32_t size; ++} xen_domctl_address_size_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); ++ ++ ++/* XEN_DOMCTL_real_mode_area */ ++struct xen_domctl_real_mode_area { ++ uint32_t log; /* log2 of Real Mode Area size */ ++}; ++typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); ++ ++ ++/* XEN_DOMCTL_sendtrigger */ ++#define XEN_DOMCTL_SENDTRIGGER_NMI 0 ++#define XEN_DOMCTL_SENDTRIGGER_RESET 1 ++#define XEN_DOMCTL_SENDTRIGGER_INIT 2 ++#define XEN_DOMCTL_SENDTRIGGER_POWER 3 ++struct xen_domctl_sendtrigger { ++ uint32_t trigger; /* IN */ ++ uint32_t vcpu; /* IN */ ++}; ++typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); ++ ++ ++/* Assign PCI device to HVM guest. Sets up IOMMU structures. */ ++/* XEN_DOMCTL_assign_device */ ++/* XEN_DOMCTL_test_assign_device */ ++/* XEN_DOMCTL_deassign_device */ ++struct xen_domctl_assign_device { ++ uint32_t machine_bdf; /* machine PCI ID of assigned device */ ++}; ++typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); ++ ++/* Retrieve sibling devices infomation of machine_bdf */ ++/* XEN_DOMCTL_get_device_group */ ++struct xen_domctl_get_device_group { ++ uint32_t machine_bdf; /* IN */ ++ uint32_t max_sdevs; /* IN */ ++ uint32_t num_sdevs; /* OUT */ ++ XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ ++}; ++typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); ++ ++/* Pass-through interrupts: bind real irq -> hvm devfn. */ ++/* XEN_DOMCTL_bind_pt_irq */ ++/* XEN_DOMCTL_unbind_pt_irq */ ++typedef enum pt_irq_type_e { ++ PT_IRQ_TYPE_PCI, ++ PT_IRQ_TYPE_ISA, ++ PT_IRQ_TYPE_MSI, ++ PT_IRQ_TYPE_MSI_TRANSLATE, ++} pt_irq_type_t; ++struct xen_domctl_bind_pt_irq { ++ uint32_t machine_irq; ++ pt_irq_type_t irq_type; ++ uint32_t hvm_domid; ++ ++ union { ++ struct { ++ uint8_t isa_irq; ++ } isa; ++ struct { ++ uint8_t bus; ++ uint8_t device; ++ uint8_t intx; ++ } pci; ++ struct { ++ uint8_t gvec; ++ uint32_t gflags; ++ uint64_aligned_t gtable; ++ } msi; ++ } u; ++}; ++typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); ++ ++ ++/* Bind machine I/O address range -> HVM address range. */ ++/* XEN_DOMCTL_memory_mapping */ ++#define DPCI_ADD_MAPPING 1 ++#define DPCI_REMOVE_MAPPING 0 ++struct xen_domctl_memory_mapping { ++ uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ ++ uint64_aligned_t first_mfn; /* first page (machine page) in range */ ++ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ ++ uint32_t add_mapping; /* add or remove mapping */ ++ uint32_t padding; /* padding for 64-bit aligned structure */ ++}; ++typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); ++ ++ ++/* Bind machine I/O port range -> HVM I/O port range. */ ++/* XEN_DOMCTL_ioport_mapping */ ++struct xen_domctl_ioport_mapping { ++ uint32_t first_gport; /* first guest IO port*/ ++ uint32_t first_mport; /* first machine IO port */ ++ uint32_t nr_ports; /* size of port range */ ++ uint32_t add_mapping; /* add or remove mapping */ ++}; ++typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); ++ ++ ++/* ++ * Pin caching type of RAM space for x86 HVM domU. ++ */ ++/* XEN_DOMCTL_pin_mem_cacheattr */ ++/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ ++#define XEN_DOMCTL_MEM_CACHEATTR_UC 0 ++#define XEN_DOMCTL_MEM_CACHEATTR_WC 1 ++#define XEN_DOMCTL_MEM_CACHEATTR_WT 4 ++#define XEN_DOMCTL_MEM_CACHEATTR_WP 5 ++#define XEN_DOMCTL_MEM_CACHEATTR_WB 6 ++#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 ++struct xen_domctl_pin_mem_cacheattr { ++ uint64_aligned_t start, end; ++ uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ ++}; ++typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); ++ ++ ++/* XEN_DOMCTL_set_ext_vcpucontext */ ++/* XEN_DOMCTL_get_ext_vcpucontext */ ++struct xen_domctl_ext_vcpucontext { ++ /* IN: VCPU that this call applies to. */ ++ uint32_t vcpu; ++ /* ++ * SET: Size of struct (IN) ++ * GET: Size of struct (OUT) ++ */ ++ uint32_t size; ++#if defined(__i386__) || defined(__x86_64__) ++ /* SYSCALL from 32-bit mode and SYSENTER callback information. */ ++ /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ ++ uint64_aligned_t syscall32_callback_eip; ++ uint64_aligned_t sysenter_callback_eip; ++ uint16_t syscall32_callback_cs; ++ uint16_t sysenter_callback_cs; ++ uint8_t syscall32_disables_events; ++ uint8_t sysenter_disables_events; ++#endif ++}; ++typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); ++ ++/* ++ * Set optimizaton features for a domain ++ */ ++/* XEN_DOMCTL_set_opt_feature */ ++struct xen_domctl_set_opt_feature { ++#if defined(__ia64__) ++ struct xen_ia64_opt_feature optf; ++#else ++ /* Make struct non-empty: do not depend on this field name! */ ++ uint64_t dummy; ++#endif ++}; ++typedef struct xen_domctl_set_opt_feature xen_domctl_set_opt_feature_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_opt_feature_t); ++ ++/* ++ * Set the target domain for a domain ++ */ ++/* XEN_DOMCTL_set_target */ ++struct xen_domctl_set_target { ++ domid_t target; ++}; ++typedef struct xen_domctl_set_target xen_domctl_set_target_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t); ++ ++#if defined(__i386__) || defined(__x86_64__) ++# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF ++/* XEN_DOMCTL_set_cpuid */ ++struct xen_domctl_cpuid { ++ uint32_t input[2]; ++ uint32_t eax; ++ uint32_t ebx; ++ uint32_t ecx; ++ uint32_t edx; ++}; ++typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); ++#endif ++ ++/* XEN_DOMCTL_subscribe */ ++struct xen_domctl_subscribe { ++ uint32_t port; /* IN */ ++}; ++typedef struct xen_domctl_subscribe xen_domctl_subscribe_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t); ++ ++/* ++ * Define the maximum machine address size which should be allocated ++ * to a guest. ++ */ ++/* XEN_DOMCTL_set_machine_address_size */ ++/* XEN_DOMCTL_get_machine_address_size */ ++ ++/* ++ * Do not inject spurious page faults into this domain. ++ */ ++/* XEN_DOMCTL_suppress_spurious_page_faults */ ++ ++/* XEN_DOMCTL_debug_op */ ++#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0 ++#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1 ++struct xen_domctl_debug_op { ++ uint32_t op; /* IN */ ++ uint32_t vcpu; /* IN */ ++}; ++typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); ++ ++/* ++ * Request a particular record from the HVM context ++ */ ++/* XEN_DOMCTL_gethvmcontext_partial */ ++typedef struct xen_domctl_hvmcontext_partial { ++ uint32_t type; /* IN: Type of record required */ ++ uint32_t instance; /* IN: Instance of that type */ ++ XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ ++} xen_domctl_hvmcontext_partial_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); ++ ++/* XEN_DOMCTL_disable_migrate */ ++typedef struct xen_domctl_disable_migrate { ++ uint32_t disable; /* IN: 1: disable migration and restore */ ++} xen_domctl_disable_migrate_t; ++ ++ ++/* XEN_DOMCTL_gettscinfo */ ++/* XEN_DOMCTL_settscinfo */ ++struct xen_guest_tsc_info { ++ uint32_t tsc_mode; ++ uint32_t gtsc_khz; ++ uint32_t incarnation; ++ uint32_t pad; ++ uint64_aligned_t elapsed_nsec; ++}; ++typedef struct xen_guest_tsc_info xen_guest_tsc_info_t; ++DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t); ++typedef struct xen_domctl_tsc_info { ++ XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */ ++ xen_guest_tsc_info_t info; /* IN */ ++} xen_domctl_tsc_info_t; ++ ++/* XEN_DOMCTL_gdbsx_guestmemio guest mem io */ ++struct xen_domctl_gdbsx_memio { ++ /* IN */ ++ uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */ ++ uint64_aligned_t gva; /* guest virtual address */ ++ uint64_aligned_t uva; /* user buffer virtual address */ ++ uint32_t len; /* number of bytes to read/write */ ++ uint8_t gwr; /* 0 = read from guest. 1 = write to guest */ ++ /* OUT */ ++ uint32_t remain; /* bytes remaining to be copied */ ++}; ++ ++/* XEN_DOMCTL_gdbsx_pausevcpu */ ++/* XEN_DOMCTL_gdbsx_unpausevcpu */ ++struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */ ++ uint32_t vcpu; /* which vcpu */ ++}; ++ ++/* XEN_DOMCTL_gdbsx_domstatus */ ++struct xen_domctl_gdbsx_domstatus { ++ /* OUT */ ++ uint8_t paused; /* is the domain paused */ ++ uint32_t vcpu_id; /* any vcpu in an event? */ ++ uint32_t vcpu_ev; /* if yes, what event? */ ++}; ++ ++/* ++ * Memory event operations ++ */ ++ ++/* XEN_DOMCTL_mem_event_op */ ++ ++/* Add and remove memory handlers */ ++#define XEN_DOMCTL_MEM_EVENT_OP_ENABLE 0 ++#define XEN_DOMCTL_MEM_EVENT_OP_DISABLE 1 ++ ++/* ++ * Page memory in and out. ++ */ ++#define XEN_DOMCTL_MEM_EVENT_OP_PAGING (1 << 0) ++ ++/* Domain memory paging */ ++#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE 0 ++#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_EVICT 1 ++#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP 2 ++#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME 3 ++ ++struct xen_domctl_mem_event_op { ++ uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_* */ ++ uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_ENABLE_* */ ++ ++ /* OP_ENABLE */ ++ uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */ ++ uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */ ++ ++ /* Other OPs */ ++ uint64_aligned_t gfn; /* IN: gfn of page being operated on */ ++}; ++typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); ++ ++/* ++ * Memory sharing operations ++ */ ++/* XEN_DOMCTL_mem_sharing_op */ ++ ++#define XEN_DOMCTL_MEM_SHARING_OP_CONTROL 0 ++#define XEN_DOMCTL_MEM_SHARING_OP_NOMINATE_GFN 1 ++#define XEN_DOMCTL_MEM_SHARING_OP_NOMINATE_GREF 2 ++#define XEN_DOMCTL_MEM_SHARING_OP_SHARE 3 ++#define XEN_DOMCTL_MEM_SHARING_OP_RESUME 4 ++#define XEN_DOMCTL_MEM_SHARING_OP_DEBUG_GFN 5 ++#define XEN_DOMCTL_MEM_SHARING_OP_DEBUG_MFN 6 ++#define XEN_DOMCTL_MEM_SHARING_OP_DEBUG_GREF 7 ++ ++#define XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID (-10) ++#define XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID (-9) ++ ++struct xen_domctl_mem_sharing_op { ++ uint8_t op; /* XEN_DOMCTL_MEM_EVENT_OP_* */ ++ ++ union { ++ uint8_t enable; /* OP_CONTROL */ ++ ++ struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ ++ union { ++ uint64_aligned_t gfn; /* IN: gfn to nominate */ ++ uint32_t grant_ref; /* IN: grant ref to nominate */ ++ } u; ++ uint64_aligned_t handle; /* OUT: the handle */ ++ } nominate; ++ struct mem_sharing_op_share { /* OP_SHARE */ ++ uint64_aligned_t source_handle; /* IN: handle to the source page */ ++ uint64_aligned_t client_handle; /* IN: handle to the client page */ ++ } share; ++ struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ ++ union { ++ uint64_aligned_t gfn; /* IN: gfn to debug */ ++ uint64_aligned_t mfn; /* IN: mfn to debug */ ++ grant_ref_t gref; /* IN: gref to debug */ ++ } u; ++ } debug; ++ } u; ++}; ++typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t); ++ ++ ++struct xen_domctl { ++ uint32_t cmd; ++#define XEN_DOMCTL_createdomain 1 ++#define XEN_DOMCTL_destroydomain 2 ++#define XEN_DOMCTL_pausedomain 3 ++#define XEN_DOMCTL_unpausedomain 4 ++#define XEN_DOMCTL_getdomaininfo 5 ++#define XEN_DOMCTL_getmemlist 6 ++#define XEN_DOMCTL_getpageframeinfo 7 ++#define XEN_DOMCTL_getpageframeinfo2 8 ++#define XEN_DOMCTL_setvcpuaffinity 9 ++#define XEN_DOMCTL_shadow_op 10 ++#define XEN_DOMCTL_max_mem 11 ++#define XEN_DOMCTL_setvcpucontext 12 ++#define XEN_DOMCTL_getvcpucontext 13 ++#define XEN_DOMCTL_getvcpuinfo 14 ++#define XEN_DOMCTL_max_vcpus 15 ++#define XEN_DOMCTL_scheduler_op 16 ++#define XEN_DOMCTL_setdomainhandle 17 ++#define XEN_DOMCTL_setdebugging 18 ++#define XEN_DOMCTL_irq_permission 19 ++#define XEN_DOMCTL_iomem_permission 20 ++#define XEN_DOMCTL_ioport_permission 21 ++#define XEN_DOMCTL_hypercall_init 22 ++#define XEN_DOMCTL_arch_setup 23 ++#define XEN_DOMCTL_settimeoffset 24 ++#define XEN_DOMCTL_getvcpuaffinity 25 ++#define XEN_DOMCTL_real_mode_area 26 ++#define XEN_DOMCTL_resumedomain 27 ++#define XEN_DOMCTL_sendtrigger 28 ++#define XEN_DOMCTL_subscribe 29 ++#define XEN_DOMCTL_gethvmcontext 33 ++#define XEN_DOMCTL_sethvmcontext 34 ++#define XEN_DOMCTL_set_address_size 35 ++#define XEN_DOMCTL_get_address_size 36 ++#define XEN_DOMCTL_assign_device 37 ++#define XEN_DOMCTL_bind_pt_irq 38 ++#define XEN_DOMCTL_memory_mapping 39 ++#define XEN_DOMCTL_ioport_mapping 40 ++#define XEN_DOMCTL_pin_mem_cacheattr 41 ++#define XEN_DOMCTL_set_ext_vcpucontext 42 ++#define XEN_DOMCTL_get_ext_vcpucontext 43 ++#define XEN_DOMCTL_set_opt_feature 44 ++#define XEN_DOMCTL_test_assign_device 45 ++#define XEN_DOMCTL_set_target 46 ++#define XEN_DOMCTL_deassign_device 47 ++#define XEN_DOMCTL_unbind_pt_irq 48 ++#define XEN_DOMCTL_set_cpuid 49 ++#define XEN_DOMCTL_get_device_group 50 ++#define XEN_DOMCTL_set_machine_address_size 51 ++#define XEN_DOMCTL_get_machine_address_size 52 ++#define XEN_DOMCTL_suppress_spurious_page_faults 53 ++#define XEN_DOMCTL_debug_op 54 ++#define XEN_DOMCTL_gethvmcontext_partial 55 ++#define XEN_DOMCTL_mem_event_op 56 ++#define XEN_DOMCTL_mem_sharing_op 57 ++#define XEN_DOMCTL_disable_migrate 58 ++#define XEN_DOMCTL_gettscinfo 59 ++#define XEN_DOMCTL_settscinfo 60 ++#define XEN_DOMCTL_gdbsx_guestmemio 1000 ++#define XEN_DOMCTL_gdbsx_pausevcpu 1001 ++#define XEN_DOMCTL_gdbsx_unpausevcpu 1002 ++#define XEN_DOMCTL_gdbsx_domstatus 1003 ++ uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ ++ domid_t domain; ++ union { ++ struct xen_domctl_createdomain createdomain; ++ struct xen_domctl_getdomaininfo getdomaininfo; ++ struct xen_domctl_getmemlist getmemlist; ++ struct xen_domctl_getpageframeinfo getpageframeinfo; ++ struct xen_domctl_getpageframeinfo2 getpageframeinfo2; ++ struct xen_domctl_vcpuaffinity vcpuaffinity; ++ struct xen_domctl_shadow_op shadow_op; ++ struct xen_domctl_max_mem max_mem; ++ struct xen_domctl_vcpucontext vcpucontext; ++ struct xen_domctl_getvcpuinfo getvcpuinfo; ++ struct xen_domctl_max_vcpus max_vcpus; ++ struct xen_domctl_scheduler_op scheduler_op; ++ struct xen_domctl_setdomainhandle setdomainhandle; ++ struct xen_domctl_setdebugging setdebugging; ++ struct xen_domctl_irq_permission irq_permission; ++ struct xen_domctl_iomem_permission iomem_permission; ++ struct xen_domctl_ioport_permission ioport_permission; ++ struct xen_domctl_hypercall_init hypercall_init; ++ struct xen_domctl_arch_setup arch_setup; ++ struct xen_domctl_settimeoffset settimeoffset; ++ struct xen_domctl_disable_migrate disable_migrate; ++ struct xen_domctl_tsc_info tsc_info; ++ struct xen_domctl_real_mode_area real_mode_area; ++ struct xen_domctl_hvmcontext hvmcontext; ++ struct xen_domctl_hvmcontext_partial hvmcontext_partial; ++ struct xen_domctl_address_size address_size; ++ struct xen_domctl_sendtrigger sendtrigger; ++ struct xen_domctl_get_device_group get_device_group; ++ struct xen_domctl_assign_device assign_device; ++ struct xen_domctl_bind_pt_irq bind_pt_irq; ++ struct xen_domctl_memory_mapping memory_mapping; ++ struct xen_domctl_ioport_mapping ioport_mapping; ++ struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; ++ struct xen_domctl_ext_vcpucontext ext_vcpucontext; ++ struct xen_domctl_set_opt_feature set_opt_feature; ++ struct xen_domctl_set_target set_target; ++ struct xen_domctl_subscribe subscribe; ++ struct xen_domctl_debug_op debug_op; ++ struct xen_domctl_mem_event_op mem_event_op; ++ struct xen_domctl_mem_sharing_op mem_sharing_op; ++#if defined(__i386__) || defined(__x86_64__) ++ struct xen_domctl_cpuid cpuid; ++#endif ++ struct xen_domctl_gdbsx_memio gdbsx_guest_memio; ++ struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; ++ struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct xen_domctl xen_domctl_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); ++ ++#endif /* __XEN_PUBLIC_DOMCTL_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/hvm/e820.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,34 @@ ++ ++/* ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_E820_H__ ++#define __XEN_PUBLIC_HVM_E820_H__ ++ ++/* E820 location in HVM virtual address space. */ ++#define HVM_E820_PAGE 0x00090000 ++#define HVM_E820_NR_OFFSET 0x000001E8 ++#define HVM_E820_OFFSET 0x000002D0 ++ ++#define HVM_BELOW_4G_RAM_END 0xF0000000 ++#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END ++#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) ++ ++#endif /* __XEN_PUBLIC_HVM_E820_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/hvm/hvm_info_table.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,75 @@ ++/****************************************************************************** ++ * hvm/hvm_info_table.h ++ * ++ * HVM parameter and information table, written into guest memory map. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ ++#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ ++ ++#define HVM_INFO_PFN 0x09F ++#define HVM_INFO_OFFSET 0x800 ++#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) ++ ++/* Maximum we can support with current vLAPIC ID mapping. */ ++#define HVM_MAX_VCPUS 128 ++ ++struct hvm_info_table { ++ char signature[8]; /* "HVM INFO" */ ++ uint32_t length; ++ uint8_t checksum; ++ ++ /* Should firmware build ACPI tables? */ ++ uint8_t acpi_enabled; ++ ++ /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */ ++ uint8_t apic_mode; ++ ++ /* How many CPUs does this domain have? */ ++ uint32_t nr_vcpus; ++ ++ /* ++ * MEMORY MAP provided by HVM domain builder. ++ * Notes: ++ * 1. page_to_phys(x) = x << 12 ++ * 2. If a field is zero, the corresponding range does not exist. ++ */ ++ /* ++ * 0x0 to page_to_phys(low_mem_pgend)-1: ++ * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF) ++ */ ++ uint32_t low_mem_pgend; ++ /* ++ * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF: ++ * Reserved for special memory mappings ++ */ ++ uint32_t reserved_mem_pgstart; ++ /* ++ * 0x100000000 to page_to_phys(high_mem_pgend)-1: ++ * RAM above 4GB ++ */ ++ uint32_t high_mem_pgend; ++ ++ /* Bitmap of which CPUs are online at boot time. */ ++ uint8_t vcpu_online[HVM_MAX_VCPUS/8]; ++}; ++ ++#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/hvm/hvm_op.h 2009-06-23 09:28:21.000000000 +0200 +@@ -0,0 +1,133 @@ ++/* ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ ++#define __XEN_PUBLIC_HVM_HVM_OP_H__ ++ ++#include "../xen.h" ++ ++/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ ++#define HVMOP_set_param 0 ++#define HVMOP_get_param 1 ++struct xen_hvm_param { ++ domid_t domid; /* IN */ ++ uint32_t index; /* IN */ ++ uint64_t value; /* IN/OUT */ ++}; ++typedef struct xen_hvm_param xen_hvm_param_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); ++ ++/* Set the logical level of one of a domain's PCI INTx wires. */ ++#define HVMOP_set_pci_intx_level 2 ++struct xen_hvm_set_pci_intx_level { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ ++ uint8_t domain, bus, device, intx; ++ /* Assertion level (0 = unasserted, 1 = asserted). */ ++ uint8_t level; ++}; ++typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); ++ ++/* Set the logical level of one of a domain's ISA IRQ wires. */ ++#define HVMOP_set_isa_irq_level 3 ++struct xen_hvm_set_isa_irq_level { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* ISA device identification, by ISA IRQ (0-15). */ ++ uint8_t isa_irq; ++ /* Assertion level (0 = unasserted, 1 = asserted). */ ++ uint8_t level; ++}; ++typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); ++ ++#define HVMOP_set_pci_link_route 4 ++struct xen_hvm_set_pci_link_route { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* PCI link identifier (0-3). */ ++ uint8_t link; ++ /* ISA IRQ (1-15), or 0 (disable link). */ ++ uint8_t isa_irq; ++}; ++typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); ++ ++/* Flushes all VCPU TLBs: @arg must be NULL. */ ++#define HVMOP_flush_tlbs 5 ++ ++/* Following tools-only interfaces may change in future. */ ++#if defined(__XEN__) || defined(__XEN_TOOLS__) ++ ++/* Track dirty VRAM. */ ++#define HVMOP_track_dirty_vram 6 ++struct xen_hvm_track_dirty_vram { ++ /* Domain to be tracked. */ ++ domid_t domid; ++ /* First pfn to track. */ ++ uint64_aligned_t first_pfn; ++ /* Number of pages to track. */ ++ uint64_aligned_t nr; ++ /* OUT variable. */ ++ /* Dirty bitmap buffer. */ ++ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; ++}; ++typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); ++ ++/* Notify that some pages got modified by the Device Model. */ ++#define HVMOP_modified_memory 7 ++struct xen_hvm_modified_memory { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* First pfn. */ ++ uint64_aligned_t first_pfn; ++ /* Number of pages. */ ++ uint64_aligned_t nr; ++}; ++typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); ++ ++#define HVMOP_set_mem_type 8 ++typedef enum { ++ HVMMEM_ram_rw, /* Normal read/write guest RAM */ ++ HVMMEM_ram_ro, /* Read-only; writes are discarded */ ++ HVMMEM_mmio_dm, /* Reads and write go to the device model */ ++} hvmmem_type_t; ++/* Notify that a region of memory is to be treated in a specific way. */ ++struct xen_hvm_set_mem_type { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* Memory type */ ++ hvmmem_type_t hvmmem_type; ++ /* First pfn. */ ++ uint64_aligned_t first_pfn; ++ /* Number of pages. */ ++ uint64_aligned_t nr; ++}; ++typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); ++ ++ ++#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ ++ ++#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/hvm/ioreq.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,119 @@ ++/* ++ * ioreq.h: I/O request definitions for device models ++ * Copyright (c) 2004, Intel Corporation. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _IOREQ_H_ ++#define _IOREQ_H_ ++ ++#define IOREQ_READ 1 ++#define IOREQ_WRITE 0 ++ ++#define STATE_IOREQ_NONE 0 ++#define STATE_IOREQ_READY 1 ++#define STATE_IOREQ_INPROCESS 2 ++#define STATE_IORESP_READY 3 ++ ++#define IOREQ_TYPE_PIO 0 /* pio */ ++#define IOREQ_TYPE_COPY 1 /* mmio ops */ ++#define IOREQ_TYPE_TIMEOFFSET 7 ++#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ ++ ++/* ++ * VMExit dispatcher should cooperate with instruction decoder to ++ * prepare this structure and notify service OS and DM by sending ++ * virq ++ */ ++struct ioreq { ++ uint64_t addr; /* physical address */ ++ uint64_t data; /* data (or paddr of data) */ ++ uint32_t count; /* for rep prefixes */ ++ uint32_t size; /* size in bytes */ ++ uint32_t vp_eport; /* evtchn for notifications to/from device model */ ++ uint16_t _pad0; ++ uint8_t state:4; ++ uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr ++ * of the real data to use. */ ++ uint8_t dir:1; /* 1=read, 0=write */ ++ uint8_t df:1; ++ uint8_t _pad1:1; ++ uint8_t type; /* I/O type */ ++}; ++typedef struct ioreq ioreq_t; ++ ++struct shared_iopage { ++ struct ioreq vcpu_ioreq[1]; ++}; ++typedef struct shared_iopage shared_iopage_t; ++ ++struct buf_ioreq { ++ uint8_t type; /* I/O type */ ++ uint8_t pad:1; ++ uint8_t dir:1; /* 1=read, 0=write */ ++ uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ ++ uint32_t addr:20;/* physical address */ ++ uint32_t data; /* data */ ++}; ++typedef struct buf_ioreq buf_ioreq_t; ++ ++#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ ++struct buffered_iopage { ++ unsigned int read_pointer; ++ unsigned int write_pointer; ++ buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; ++}; /* NB. Size of this structure must be no greater than one page. */ ++typedef struct buffered_iopage buffered_iopage_t; ++ ++#if defined(__ia64__) ++struct pio_buffer { ++ uint32_t page_offset; ++ uint32_t pointer; ++ uint32_t data_end; ++ uint32_t buf_size; ++ void *opaque; ++}; ++ ++#define PIO_BUFFER_IDE_PRIMARY 0 /* I/O port = 0x1F0 */ ++#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */ ++#define PIO_BUFFER_ENTRY_NUM 2 ++struct buffered_piopage { ++ struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM]; ++ uint8_t buffer[1]; ++}; ++#endif /* defined(__ia64__) */ ++ ++#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000001f40 ++#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) ++#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08) ++#define ACPI_GPE0_BLK_ADDRESS (ACPI_PM_TMR_BLK_ADDRESS + 0x20) ++#define ACPI_GPE0_BLK_LEN 0x08 ++ ++#endif /* _IOREQ_H_ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/hvm/params.h 2009-04-07 13:58:49.000000000 +0200 +@@ -0,0 +1,111 @@ ++/* ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ ++#define __XEN_PUBLIC_HVM_PARAMS_H__ ++ ++#include "hvm_op.h" ++ ++/* ++ * Parameter space for HVMOP_{set,get}_param. ++ */ ++ ++/* ++ * How should CPU0 event-channel notifications be delivered? ++ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). ++ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: ++ * Domain = val[47:32], Bus = val[31:16], ++ * DevFn = val[15: 8], IntX = val[ 1: 0] ++ * If val == 0 then CPU0 event-channel notifications are not delivered. ++ */ ++#define HVM_PARAM_CALLBACK_IRQ 0 ++ ++/* ++ * These are not used by Xen. They are here for convenience of HVM-guest ++ * xenbus implementations. ++ */ ++#define HVM_PARAM_STORE_PFN 1 ++#define HVM_PARAM_STORE_EVTCHN 2 ++ ++#define HVM_PARAM_PAE_ENABLED 4 ++ ++#define HVM_PARAM_IOREQ_PFN 5 ++ ++#define HVM_PARAM_BUFIOREQ_PFN 6 ++ ++#ifdef __ia64__ ++ ++#define HVM_PARAM_NVRAM_FD 7 ++#define HVM_PARAM_VHPT_SIZE 8 ++#define HVM_PARAM_BUFPIOREQ_PFN 9 ++ ++#elif defined(__i386__) || defined(__x86_64__) ++ ++/* Expose Viridian interfaces to this HVM guest? */ ++#define HVM_PARAM_VIRIDIAN 9 ++ ++#endif ++ ++/* ++ * Set mode for virtual timers (currently x86 only): ++ * delay_for_missed_ticks (default): ++ * Do not advance a vcpu's time beyond the correct delivery time for ++ * interrupts that have been missed due to preemption. Deliver missed ++ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual ++ * time stepwise for each one. ++ * no_delay_for_missed_ticks: ++ * As above, missed interrupts are delivered, but guest time always tracks ++ * wallclock (i.e., real) time while doing so. ++ * no_missed_ticks_pending: ++ * No missed interrupts are held pending. Instead, to ensure ticks are ++ * delivered at some non-zero rate, if we detect missed ticks then the ++ * internal tick alarm is not disabled if the VCPU is preempted during the ++ * next tick period. ++ * one_missed_tick_pending: ++ * Missed interrupts are collapsed together and delivered as one 'late tick'. ++ * Guest time always tracks wallclock (i.e., real) time. ++ */ ++#define HVM_PARAM_TIMER_MODE 10 ++#define HVMPTM_delay_for_missed_ticks 0 ++#define HVMPTM_no_delay_for_missed_ticks 1 ++#define HVMPTM_no_missed_ticks_pending 2 ++#define HVMPTM_one_missed_tick_pending 3 ++ ++/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ ++#define HVM_PARAM_HPET_ENABLED 11 ++ ++/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ ++#define HVM_PARAM_IDENT_PT 12 ++ ++/* Device Model domain, defaults to 0. */ ++#define HVM_PARAM_DM_DOMAIN 13 ++ ++/* ACPI S state: currently support S0 and S3 on x86. */ ++#define HVM_PARAM_ACPI_S_STATE 14 ++ ++/* TSS used on Intel when CR0.PE=0. */ ++#define HVM_PARAM_VM86_TSS 15 ++ ++/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ ++#define HVM_PARAM_VPT_ALIGN 16 ++ ++#define HVM_NR_PARAMS 17 ++ ++#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/hvm/save.h 2008-04-02 12:34:02.000000000 +0200 +@@ -0,0 +1,88 @@ ++/* ++ * hvm/save.h ++ * ++ * Structure definitions for HVM state that is held by Xen and must ++ * be saved along with the domain's memory and device-model state. ++ * ++ * Copyright (c) 2007 XenSource Ltd. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_SAVE_H__ ++#define __XEN_PUBLIC_HVM_SAVE_H__ ++ ++/* ++ * Structures in this header *must* have the same layout in 32bit ++ * and 64bit environments: this means that all fields must be explicitly ++ * sized types and aligned to their sizes, and the structs must be ++ * a multiple of eight bytes long. ++ * ++ * Only the state necessary for saving and restoring (i.e. fields ++ * that are analogous to actual hardware state) should go in this file. ++ * Internal mechanisms should be kept in Xen-private headers. ++ */ ++ ++#if !defined(__GNUC__) || defined(__STRICT_ANSI__) ++#error "Anonymous structs/unions are a GNU extension." ++#endif ++ ++/* ++ * Each entry is preceded by a descriptor giving its type and length ++ */ ++struct hvm_save_descriptor { ++ uint16_t typecode; /* Used to demux the various types below */ ++ uint16_t instance; /* Further demux within a type */ ++ uint32_t length; /* In bytes, *not* including this descriptor */ ++}; ++ ++ ++/* ++ * Each entry has a datatype associated with it: for example, the CPU state ++ * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), ++ * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). ++ * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system ++ * ugliness. ++ */ ++ ++#define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ ++ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; } ++ ++#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) ++#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) ++#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) ++ ++ ++/* ++ * The series of save records is teminated by a zero-type, zero-length ++ * descriptor. ++ */ ++ ++struct hvm_save_end {}; ++DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); ++ ++#if defined(__i386__) || defined(__x86_64__) ++#include "../arch-x86/hvm/save.h" ++#elif defined(__ia64__) ++#include "../arch-ia64/hvm/save.h" ++#else ++#error "unsupported architecture" ++#endif ++ ++#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/io/fsif.h 2009-06-23 09:28:21.000000000 +0200 +@@ -0,0 +1,192 @@ ++/****************************************************************************** ++ * fsif.h ++ * ++ * Interface to FS level split device drivers. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2007, Grzegorz Milos, . ++ */ ++ ++#ifndef __XEN_PUBLIC_IO_FSIF_H__ ++#define __XEN_PUBLIC_IO_FSIF_H__ ++ ++#include "ring.h" ++#include "../grant_table.h" ++ ++#define REQ_FILE_OPEN 1 ++#define REQ_FILE_CLOSE 2 ++#define REQ_FILE_READ 3 ++#define REQ_FILE_WRITE 4 ++#define REQ_STAT 5 ++#define REQ_FILE_TRUNCATE 6 ++#define REQ_REMOVE 7 ++#define REQ_RENAME 8 ++#define REQ_CREATE 9 ++#define REQ_DIR_LIST 10 ++#define REQ_CHMOD 11 ++#define REQ_FS_SPACE 12 ++#define REQ_FILE_SYNC 13 ++ ++struct fsif_open_request { ++ grant_ref_t gref; ++}; ++ ++struct fsif_close_request { ++ uint32_t fd; ++}; ++ ++struct fsif_read_request { ++ uint32_t fd; ++ int32_t pad; ++ uint64_t len; ++ uint64_t offset; ++ grant_ref_t grefs[1]; /* Variable length */ ++}; ++ ++struct fsif_write_request { ++ uint32_t fd; ++ int32_t pad; ++ uint64_t len; ++ uint64_t offset; ++ grant_ref_t grefs[1]; /* Variable length */ ++}; ++ ++struct fsif_stat_request { ++ uint32_t fd; ++}; ++ ++/* This structure is a copy of some fields from stat structure, returned ++ * via the ring. */ ++struct fsif_stat_response { ++ int32_t stat_mode; ++ uint32_t stat_uid; ++ uint32_t stat_gid; ++ int32_t stat_ret; ++ int64_t stat_size; ++ int64_t stat_atime; ++ int64_t stat_mtime; ++ int64_t stat_ctime; ++}; ++ ++struct fsif_truncate_request { ++ uint32_t fd; ++ int32_t pad; ++ int64_t length; ++}; ++ ++struct fsif_remove_request { ++ grant_ref_t gref; ++}; ++ ++struct fsif_rename_request { ++ uint16_t old_name_offset; ++ uint16_t new_name_offset; ++ grant_ref_t gref; ++}; ++ ++struct fsif_create_request { ++ int8_t directory; ++ int8_t pad; ++ int16_t pad2; ++ int32_t mode; ++ grant_ref_t gref; ++}; ++ ++struct fsif_list_request { ++ uint32_t offset; ++ grant_ref_t gref; ++}; ++ ++#define NR_FILES_SHIFT 0 ++#define NR_FILES_SIZE 16 /* 16 bits for the number of files mask */ ++#define NR_FILES_MASK (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT) ++#define ERROR_SIZE 32 /* 32 bits for the error mask */ ++#define ERROR_SHIFT (NR_FILES_SIZE + NR_FILES_SHIFT) ++#define ERROR_MASK (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT) ++#define HAS_MORE_SHIFT (ERROR_SHIFT + ERROR_SIZE) ++#define HAS_MORE_FLAG (1ULL << HAS_MORE_SHIFT) ++ ++struct fsif_chmod_request { ++ uint32_t fd; ++ int32_t mode; ++}; ++ ++struct fsif_space_request { ++ grant_ref_t gref; ++}; ++ ++struct fsif_sync_request { ++ uint32_t fd; ++}; ++ ++ ++/* FS operation request */ ++struct fsif_request { ++ uint8_t type; /* Type of the request */ ++ uint8_t pad; ++ uint16_t id; /* Request ID, copied to the response */ ++ uint32_t pad2; ++ union { ++ struct fsif_open_request fopen; ++ struct fsif_close_request fclose; ++ struct fsif_read_request fread; ++ struct fsif_write_request fwrite; ++ struct fsif_stat_request fstat; ++ struct fsif_truncate_request ftruncate; ++ struct fsif_remove_request fremove; ++ struct fsif_rename_request frename; ++ struct fsif_create_request fcreate; ++ struct fsif_list_request flist; ++ struct fsif_chmod_request fchmod; ++ struct fsif_space_request fspace; ++ struct fsif_sync_request fsync; ++ } u; ++}; ++typedef struct fsif_request fsif_request_t; ++ ++/* FS operation response */ ++struct fsif_response { ++ uint16_t id; ++ uint16_t pad1; ++ uint32_t pad2; ++ union { ++ uint64_t ret_val; ++ struct fsif_stat_response fstat; ++ } u; ++}; ++ ++typedef struct fsif_response fsif_response_t; ++ ++#define FSIF_RING_ENTRY_SIZE 64 ++ ++#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \ ++ sizeof(grant_ref_t) + 1) ++#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \ ++ sizeof(grant_ref_t) + 1) ++ ++DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response); ++ ++#define STATE_INITIALISED "init" ++#define STATE_READY "ready" ++#define STATE_CLOSING "closing" ++#define STATE_CLOSED "closed" ++ ++ ++#endif +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/io/pciif.h 2009-04-07 13:58:49.000000000 +0200 +@@ -0,0 +1,124 @@ ++/* ++ * PCI Backend/Frontend Common Data Structures & Macros ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Author: Ryan Wilson ++ */ ++#ifndef __XEN_PCI_COMMON_H__ ++#define __XEN_PCI_COMMON_H__ ++ ++/* Be sure to bump this number if you change this file */ ++#define XEN_PCI_MAGIC "7" ++ ++/* xen_pci_sharedinfo flags */ ++#define _XEN_PCIF_active (0) ++#define XEN_PCIF_active (1<<_XEN_PCIF_active) ++#define _XEN_PCIB_AERHANDLER (1) ++#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) ++#define _XEN_PCIB_active (2) ++#define XEN_PCIB_active (1<<_XEN_PCIB_active) ++ ++/* xen_pci_op commands */ ++#define XEN_PCI_OP_conf_read (0) ++#define XEN_PCI_OP_conf_write (1) ++#define XEN_PCI_OP_enable_msi (2) ++#define XEN_PCI_OP_disable_msi (3) ++#define XEN_PCI_OP_enable_msix (4) ++#define XEN_PCI_OP_disable_msix (5) ++#define XEN_PCI_OP_aer_detected (6) ++#define XEN_PCI_OP_aer_resume (7) ++#define XEN_PCI_OP_aer_mmio (8) ++#define XEN_PCI_OP_aer_slotreset (9) ++ ++/* xen_pci_op error numbers */ ++#define XEN_PCI_ERR_success (0) ++#define XEN_PCI_ERR_dev_not_found (-1) ++#define XEN_PCI_ERR_invalid_offset (-2) ++#define XEN_PCI_ERR_access_denied (-3) ++#define XEN_PCI_ERR_not_implemented (-4) ++/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ ++#define XEN_PCI_ERR_op_failed (-5) ++ ++/* ++ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) ++ * Should not exceed 128 ++ */ ++#define SH_INFO_MAX_VEC 128 ++ ++struct xen_msix_entry { ++ uint16_t vector; ++ uint16_t entry; ++}; ++struct xen_pci_op { ++ /* IN: what action to perform: XEN_PCI_OP_* */ ++ uint32_t cmd; ++ ++ /* OUT: will contain an error number (if any) from errno.h */ ++ int32_t err; ++ ++ /* IN: which device to touch */ ++ uint32_t domain; /* PCI Domain/Segment */ ++ uint32_t bus; ++ uint32_t devfn; ++ ++ /* IN: which configuration registers to touch */ ++ int32_t offset; ++ int32_t size; ++ ++ /* IN/OUT: Contains the result after a READ or the value to WRITE */ ++ uint32_t value; ++ /* IN: Contains extra infor for this operation */ ++ uint32_t info; ++ /*IN: param for msi-x */ ++ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; ++}; ++ ++/*used for pcie aer handling*/ ++struct xen_pcie_aer_op ++{ ++ ++ /* IN: what action to perform: XEN_PCI_OP_* */ ++ uint32_t cmd; ++ /*IN/OUT: return aer_op result or carry error_detected state as input*/ ++ int32_t err; ++ ++ /* IN: which device to touch */ ++ uint32_t domain; /* PCI Domain/Segment*/ ++ uint32_t bus; ++ uint32_t devfn; ++}; ++struct xen_pci_sharedinfo { ++ /* flags - XEN_PCIF_* */ ++ uint32_t flags; ++ struct xen_pci_op op; ++ struct xen_pcie_aer_op aer_op; ++}; ++ ++#endif /* __XEN_PCI_COMMON_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/io/tpmif.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,77 @@ ++/****************************************************************************** ++ * tpmif.h ++ * ++ * TPM I/O interface for Xen guest OSes. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2005, IBM Corporation ++ * ++ * Author: Stefan Berger, stefanb@us.ibm.com ++ * Grant table support: Mahadevan Gomathisankaran ++ * ++ * This code has been derived from tools/libxc/xen/io/netif.h ++ * ++ * Copyright (c) 2003-2004, Keir Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_IO_TPMIF_H__ ++#define __XEN_PUBLIC_IO_TPMIF_H__ ++ ++#include "../grant_table.h" ++ ++struct tpmif_tx_request { ++ unsigned long addr; /* Machine address of packet. */ ++ grant_ref_t ref; /* grant table access reference */ ++ uint16_t unused; ++ uint16_t size; /* Packet size in bytes. */ ++}; ++typedef struct tpmif_tx_request tpmif_tx_request_t; ++ ++/* ++ * The TPMIF_TX_RING_SIZE defines the number of pages the ++ * front-end and backend can exchange (= size of array). ++ */ ++typedef uint32_t TPMIF_RING_IDX; ++ ++#define TPMIF_TX_RING_SIZE 1 ++ ++/* This structure must fit in a memory page. */ ++ ++struct tpmif_ring { ++ struct tpmif_tx_request req; ++}; ++typedef struct tpmif_ring tpmif_ring_t; ++ ++struct tpmif_tx_interface { ++ struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; ++}; ++typedef struct tpmif_tx_interface tpmif_tx_interface_t; ++ ++#endif ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/io/usbif.h 2009-10-15 11:45:41.000000000 +0200 +@@ -0,0 +1,151 @@ ++/* ++ * usbif.h ++ * ++ * USB I/O interface for Xen guest OSes. ++ * ++ * Copyright (C) 2009, FUJITSU LABORATORIES LTD. ++ * Author: Noboru Iwamatsu ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_IO_USBIF_H__ ++#define __XEN_PUBLIC_IO_USBIF_H__ ++ ++#include "ring.h" ++#include "../grant_table.h" ++ ++enum usb_spec_version { ++ USB_VER_UNKNOWN = 0, ++ USB_VER_USB11, ++ USB_VER_USB20, ++ USB_VER_USB30, /* not supported yet */ ++}; ++ ++/* ++ * USB pipe in usbif_request ++ * ++ * bits 0-5 are specific bits for virtual USB driver. ++ * bits 7-31 are standard urb pipe. ++ * ++ * - port number(NEW): bits 0-4 ++ * (USB_MAXCHILDREN is 31) ++ * ++ * - operation flag(NEW): bit 5 ++ * (0 = submit urb, ++ * 1 = unlink urb) ++ * ++ * - direction: bit 7 ++ * (0 = Host-to-Device [Out] ++ * 1 = Device-to-Host [In]) ++ * ++ * - device address: bits 8-14 ++ * ++ * - endpoint: bits 15-18 ++ * ++ * - pipe type: bits 30-31 ++ * (00 = isochronous, 01 = interrupt, ++ * 10 = control, 11 = bulk) ++ */ ++#define usbif_pipeportnum(pipe) ((pipe) & 0x1f) ++#define usbif_setportnum_pipe(pipe, portnum) \ ++ ((pipe)|(portnum)) ++ ++#define usbif_pipeunlink(pipe) ((pipe) & 0x20) ++#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) ++#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20)) ++ ++#define USBIF_BACK_MAX_PENDING_REQS (128) ++#define USBIF_MAX_SEGMENTS_PER_REQUEST (16) ++ ++/* ++ * RING for transferring urbs. ++ */ ++struct usbif_request_segment { ++ grant_ref_t gref; ++ uint16_t offset; ++ uint16_t length; ++}; ++ ++struct usbif_urb_request { ++ uint16_t id; /* request id */ ++ uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ ++ ++ /* basic urb parameter */ ++ uint32_t pipe; ++ uint16_t transfer_flags; ++ uint16_t buffer_length; ++ union { ++ uint8_t ctrl[8]; /* setup_packet (Ctrl) */ ++ ++ struct { ++ uint16_t interval; /* maximum (1024*8) in usb core */ ++ uint16_t start_frame; /* start frame */ ++ uint16_t number_of_packets; /* number of ISO packet */ ++ uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ ++ } isoc; ++ ++ struct { ++ uint16_t interval; /* maximum (1024*8) in usb core */ ++ uint16_t pad[3]; ++ } intr; ++ ++ struct { ++ uint16_t unlink_id; /* unlink request id */ ++ uint16_t pad[3]; ++ } unlink; ++ ++ } u; ++ ++ /* urb data segments */ ++ struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST]; ++}; ++typedef struct usbif_urb_request usbif_urb_request_t; ++ ++struct usbif_urb_response { ++ uint16_t id; /* request id */ ++ uint16_t start_frame; /* start frame (ISO) */ ++ int32_t status; /* status (non-ISO) */ ++ int32_t actual_length; /* actual transfer length */ ++ int32_t error_count; /* number of ISO errors */ ++}; ++typedef struct usbif_urb_response usbif_urb_response_t; ++ ++DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); ++#define USB_URB_RING_SIZE __RING_SIZE((struct usbif_urb_sring *)0, PAGE_SIZE) ++ ++/* ++ * RING for notifying connect/disconnect events to frontend ++ */ ++struct usbif_conn_request { ++ uint16_t id; ++}; ++typedef struct usbif_conn_request usbif_conn_request_t; ++ ++struct usbif_conn_response { ++ uint16_t id; /* request id */ ++ uint8_t portnum; /* port number */ ++ uint8_t speed; /* usb_device_speed */ ++}; ++typedef struct usbif_conn_response usbif_conn_response_t; ++ ++DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); ++#define USB_CONN_RING_SIZE __RING_SIZE((struct usbif_conn_sring *)0, PAGE_SIZE) ++ ++#endif /* __XEN_PUBLIC_IO_USBIF_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/io/vscsiif.h 2008-07-21 11:00:33.000000000 +0200 +@@ -0,0 +1,105 @@ ++/****************************************************************************** ++ * vscsiif.h ++ * ++ * Based on the blkif.h code. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright(c) FUJITSU Limited 2008. ++ */ ++ ++#ifndef __XEN__PUBLIC_IO_SCSI_H__ ++#define __XEN__PUBLIC_IO_SCSI_H__ ++ ++#include "ring.h" ++#include "../grant_table.h" ++ ++/* command between backend and frontend */ ++#define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */ ++#define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/ ++#define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/ ++ ++ ++#define VSCSIIF_BACK_MAX_PENDING_REQS 128 ++ ++/* ++ * Maximum scatter/gather segments per request. ++ * ++ * Considering balance between allocating al least 16 "vscsiif_request" ++ * structures on one page (4096bytes) and number of scatter gather ++ * needed, we decided to use 26 as a magic number. ++ */ ++#define VSCSIIF_SG_TABLESIZE 26 ++ ++/* ++ * base on linux kernel 2.6.18 ++ */ ++#define VSCSIIF_MAX_COMMAND_SIZE 16 ++#define VSCSIIF_SENSE_BUFFERSIZE 96 ++ ++ ++struct vscsiif_request { ++ uint16_t rqid; /* private guest value, echoed in resp */ ++ uint8_t act; /* command between backend and frontend */ ++ uint8_t cmd_len; ++ ++ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; ++ uint16_t timeout_per_command; /* The command is issued by twice ++ the value in Backend. */ ++ uint16_t channel, id, lun; ++ uint16_t padding; ++ uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1) ++ DMA_FROM_DEVICE(2) ++ DMA_NONE(3) requests */ ++ uint8_t nr_segments; /* Number of pieces of scatter-gather */ ++ ++ struct scsiif_request_segment { ++ grant_ref_t gref; ++ uint16_t offset; ++ uint16_t length; ++ } seg[VSCSIIF_SG_TABLESIZE]; ++ uint32_t reserved[3]; ++}; ++typedef struct vscsiif_request vscsiif_request_t; ++ ++struct vscsiif_response { ++ uint16_t rqid; ++ uint8_t padding; ++ uint8_t sense_len; ++ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; ++ int32_t rslt; ++ uint32_t residual_len; /* request bufflen - ++ return the value from physical device */ ++ uint32_t reserved[36]; ++}; ++typedef struct vscsiif_response vscsiif_response_t; ++ ++DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response); ++ ++ ++#endif /*__XEN__PUBLIC_IO_SCSI_H__*/ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/kexec.h 2008-11-25 12:22:34.000000000 +0100 +@@ -0,0 +1,168 @@ ++/****************************************************************************** ++ * kexec.h - Public portion ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Xen port written by: ++ * - Simon 'Horms' Horman ++ * - Magnus Damm ++ */ ++ ++#ifndef _XEN_PUBLIC_KEXEC_H ++#define _XEN_PUBLIC_KEXEC_H ++ ++ ++/* This file describes the Kexec / Kdump hypercall interface for Xen. ++ * ++ * Kexec under vanilla Linux allows a user to reboot the physical machine ++ * into a new user-specified kernel. The Xen port extends this idea ++ * to allow rebooting of the machine from dom0. When kexec for dom0 ++ * is used to reboot, both the hypervisor and the domains get replaced ++ * with some other kernel. It is possible to kexec between vanilla ++ * Linux and Xen and back again. Xen to Xen works well too. ++ * ++ * The hypercall interface for kexec can be divided into three main ++ * types of hypercall operations: ++ * ++ * 1) Range information: ++ * This is used by the dom0 kernel to ask the hypervisor about various ++ * address information. This information is needed to allow kexec-tools ++ * to fill in the ELF headers for /proc/vmcore properly. ++ * ++ * 2) Load and unload of images: ++ * There are no big surprises here, the kexec binary from kexec-tools ++ * runs in userspace in dom0. The tool loads/unloads data into the ++ * dom0 kernel such as new kernel, initramfs and hypervisor. When ++ * loaded the dom0 kernel performs a load hypercall operation, and ++ * before releasing all page references the dom0 kernel calls unload. ++ * ++ * 3) Kexec operation: ++ * This is used to start a previously loaded kernel. ++ */ ++ ++#include "xen.h" ++ ++#if defined(__i386__) || defined(__x86_64__) ++#define KEXEC_XEN_NO_PAGES 17 ++#endif ++ ++/* ++ * Prototype for this hypercall is: ++ * int kexec_op(int cmd, void *args) ++ * @cmd == KEXEC_CMD_... ++ * KEXEC operation to perform ++ * @args == Operation-specific extra arguments (NULL if none). ++ */ ++ ++/* ++ * Kexec supports two types of operation: ++ * - kexec into a regular kernel, very similar to a standard reboot ++ * - KEXEC_TYPE_DEFAULT is used to specify this type ++ * - kexec into a special "crash kernel", aka kexec-on-panic ++ * - KEXEC_TYPE_CRASH is used to specify this type ++ * - parts of our system may be broken at kexec-on-panic time ++ * - the code should be kept as simple and self-contained as possible ++ */ ++ ++#define KEXEC_TYPE_DEFAULT 0 ++#define KEXEC_TYPE_CRASH 1 ++ ++ ++/* The kexec implementation for Xen allows the user to load two ++ * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. ++ * All data needed for a kexec reboot is kept in one xen_kexec_image_t ++ * per "instance". The data mainly consists of machine address lists to pages ++ * together with destination addresses. The data in xen_kexec_image_t ++ * is passed to the "code page" which is one page of code that performs ++ * the final relocations before jumping to the new kernel. ++ */ ++ ++typedef struct xen_kexec_image { ++#if defined(__i386__) || defined(__x86_64__) ++ unsigned long page_list[KEXEC_XEN_NO_PAGES]; ++#endif ++#if defined(__ia64__) ++ unsigned long reboot_code_buffer; ++#endif ++ unsigned long indirection_page; ++ unsigned long start_address; ++} xen_kexec_image_t; ++ ++/* ++ * Perform kexec having previously loaded a kexec or kdump kernel ++ * as appropriate. ++ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] ++ */ ++#define KEXEC_CMD_kexec 0 ++typedef struct xen_kexec_exec { ++ int type; ++} xen_kexec_exec_t; ++ ++/* ++ * Load/Unload kernel image for kexec or kdump. ++ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] ++ * image == relocation information for kexec (ignored for unload) [in] ++ */ ++#define KEXEC_CMD_kexec_load 1 ++#define KEXEC_CMD_kexec_unload 2 ++typedef struct xen_kexec_load { ++ int type; ++ xen_kexec_image_t image; ++} xen_kexec_load_t; ++ ++#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ ++#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ ++#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ ++#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap ++ * Note that although this is adjacent ++ * to Xen it exists in a separate EFI ++ * region on ia64, and thus needs to be ++ * inserted into iomem_machine separately */ ++#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of ++ * the ia64_boot_param */ ++#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of ++ * of the EFI Memory Map */ ++#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */ ++ ++/* ++ * Find the address and size of certain memory areas ++ * range == KEXEC_RANGE_... [in] ++ * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] ++ * size == number of bytes reserved in window [out] ++ * start == address of the first byte in the window [out] ++ */ ++#define KEXEC_CMD_kexec_get_range 3 ++typedef struct xen_kexec_range { ++ int range; ++ int nr; ++ unsigned long size; ++ unsigned long start; ++} xen_kexec_range_t; ++ ++#endif /* _XEN_PUBLIC_KEXEC_H */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/mem_event.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,69 @@ ++/****************************************************************************** ++ * mem_event.h ++ * ++ * Memory event common structures. ++ * ++ * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp) ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#ifndef _XEN_PUBLIC_MEM_EVENT_H ++#define _XEN_PUBLIC_MEM_EVENT_H ++ ++ ++#include "xen.h" ++#include "io/ring.h" ++ ++ ++/* Memory event notification modes */ ++#define MEM_EVENT_MODE_ASYNC 0 ++#define MEM_EVENT_MODE_SYNC (1 << 0) ++#define MEM_EVENT_MODE_SYNC_ALL (1 << 1) ++ ++/* Memory event flags */ ++#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0) ++#define MEM_EVENT_FLAG_DOM_PAUSED (1 << 1) ++#define MEM_EVENT_FLAG_OUT_OF_MEM (1 << 2) ++ ++ ++typedef struct mem_event_shared_page { ++ int port; ++} mem_event_shared_page_t; ++ ++typedef struct mem_event_st { ++ unsigned long gfn; ++ unsigned long offset; ++ unsigned long p2mt; ++ int vcpu_id; ++ uint64_t flags; ++} mem_event_request_t, mem_event_response_t; ++ ++ ++DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t); ++ ++ ++#endif ++ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/nmi.h 2009-06-23 09:28:21.000000000 +0200 +@@ -0,0 +1,80 @@ ++/****************************************************************************** ++ * nmi.h ++ * ++ * NMI callback registration and reason codes. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2005, Keir Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_NMI_H__ ++#define __XEN_PUBLIC_NMI_H__ ++ ++#include "xen.h" ++ ++/* ++ * NMI reason codes: ++ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. ++ */ ++ /* I/O-check error reported via ISA port 0x61, bit 6. */ ++#define _XEN_NMIREASON_io_error 0 ++#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) ++ /* Parity error reported via ISA port 0x61, bit 7. */ ++#define _XEN_NMIREASON_parity_error 1 ++#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) ++ /* Unknown hardware-generated NMI. */ ++#define _XEN_NMIREASON_unknown 2 ++#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) ++ ++/* ++ * long nmi_op(unsigned int cmd, void *arg) ++ * NB. All ops return zero on success, else a negative error code. ++ */ ++ ++/* ++ * Register NMI callback for this (calling) VCPU. Currently this only makes ++ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. ++ * arg == pointer to xennmi_callback structure. ++ */ ++#define XENNMI_register_callback 0 ++struct xennmi_callback { ++ unsigned long handler_address; ++ unsigned long pad; ++}; ++typedef struct xennmi_callback xennmi_callback_t; ++DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); ++ ++/* ++ * Deregister NMI callback for this (calling) VCPU. ++ * arg == NULL. ++ */ ++#define XENNMI_unregister_callback 1 ++ ++#endif /* __XEN_PUBLIC_NMI_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/platform.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,393 @@ ++/****************************************************************************** ++ * platform.h ++ * ++ * Hardware platform operations. Intended for use by domain-0 kernel. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_PLATFORM_H__ ++#define __XEN_PUBLIC_PLATFORM_H__ ++ ++#include "xen.h" ++ ++#define XENPF_INTERFACE_VERSION 0x03000001 ++ ++/* ++ * Set clock such that it would read after 00:00:00 UTC, ++ * 1 January, 1970 if the current system time was . ++ */ ++#define XENPF_settime 17 ++struct xenpf_settime { ++ /* IN variables. */ ++ uint32_t secs; ++ uint32_t nsecs; ++ uint64_t system_time; ++}; ++typedef struct xenpf_settime xenpf_settime_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); ++ ++/* ++ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. ++ * On x86, @type is an architecture-defined MTRR memory type. ++ * On success, returns the MTRR that was used (@reg) and a handle that can ++ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. ++ * (x86-specific). ++ */ ++#define XENPF_add_memtype 31 ++struct xenpf_add_memtype { ++ /* IN variables. */ ++ xen_pfn_t mfn; ++ uint64_t nr_mfns; ++ uint32_t type; ++ /* OUT variables. */ ++ uint32_t handle; ++ uint32_t reg; ++}; ++typedef struct xenpf_add_memtype xenpf_add_memtype_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); ++ ++/* ++ * Tear down an existing memory-range type. If @handle is remembered then it ++ * should be passed in to accurately tear down the correct setting (in case ++ * of overlapping memory regions with differing types). If it is not known ++ * then @handle should be set to zero. In all cases @reg must be set. ++ * (x86-specific). ++ */ ++#define XENPF_del_memtype 32 ++struct xenpf_del_memtype { ++ /* IN variables. */ ++ uint32_t handle; ++ uint32_t reg; ++}; ++typedef struct xenpf_del_memtype xenpf_del_memtype_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); ++ ++/* Read current type of an MTRR (x86-specific). */ ++#define XENPF_read_memtype 33 ++struct xenpf_read_memtype { ++ /* IN variables. */ ++ uint32_t reg; ++ /* OUT variables. */ ++ xen_pfn_t mfn; ++ uint64_t nr_mfns; ++ uint32_t type; ++}; ++typedef struct xenpf_read_memtype xenpf_read_memtype_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); ++ ++#define XENPF_microcode_update 35 ++struct xenpf_microcode_update { ++ /* IN variables. */ ++ XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ ++ uint32_t length; /* Length of microcode data. */ ++}; ++typedef struct xenpf_microcode_update xenpf_microcode_update_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); ++ ++#define XENPF_platform_quirk 39 ++#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ ++#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ ++#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ ++struct xenpf_platform_quirk { ++ /* IN variables. */ ++ uint32_t quirk_id; ++}; ++typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); ++ ++#define XENPF_firmware_info 50 ++#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ ++#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ ++#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ ++struct xenpf_firmware_info { ++ /* IN variables. */ ++ uint32_t type; ++ uint32_t index; ++ /* OUT variables. */ ++ union { ++ struct { ++ /* Int13, Fn48: Check Extensions Present. */ ++ uint8_t device; /* %dl: bios device number */ ++ uint8_t version; /* %ah: major version */ ++ uint16_t interface_support; /* %cx: support bitmap */ ++ /* Int13, Fn08: Legacy Get Device Parameters. */ ++ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ ++ uint8_t legacy_max_head; /* %dh: max head # */ ++ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ ++ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ ++ /* NB. First uint16_t of buffer must be set to buffer size. */ ++ XEN_GUEST_HANDLE(void) edd_params; ++ } disk_info; /* XEN_FW_DISK_INFO */ ++ struct { ++ uint8_t device; /* bios device number */ ++ uint32_t mbr_signature; /* offset 0x1b8 in mbr */ ++ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ ++ struct { ++ /* Int10, AX=4F15: Get EDID info. */ ++ uint8_t capabilities; ++ uint8_t edid_transfer_time; ++ /* must refer to 128-byte buffer */ ++ XEN_GUEST_HANDLE(uint8) edid; ++ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ ++ } u; ++}; ++typedef struct xenpf_firmware_info xenpf_firmware_info_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); ++ ++#define XENPF_enter_acpi_sleep 51 ++struct xenpf_enter_acpi_sleep { ++ /* IN variables */ ++ uint16_t pm1a_cnt_val; /* PM1a control value. */ ++ uint16_t pm1b_cnt_val; /* PM1b control value. */ ++ uint32_t sleep_state; /* Which state to enter (Sn). */ ++ uint32_t flags; /* Must be zero. */ ++}; ++typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); ++ ++#define XENPF_change_freq 52 ++struct xenpf_change_freq { ++ /* IN variables */ ++ uint32_t flags; /* Must be zero. */ ++ uint32_t cpu; /* Physical cpu. */ ++ uint64_t freq; /* New frequency (Hz). */ ++}; ++typedef struct xenpf_change_freq xenpf_change_freq_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); ++ ++/* ++ * Get idle times (nanoseconds since boot) for physical CPUs specified in the ++ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is ++ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap ++ * bit set are written to. On return, @cpumap_bitmap is modified so that any ++ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry ++ * cleared. ++ */ ++#define XENPF_getidletime 53 ++struct xenpf_getidletime { ++ /* IN/OUT variables */ ++ /* IN: CPUs to interrogate; OUT: subset of IN which are present */ ++ XEN_GUEST_HANDLE(uint8) cpumap_bitmap; ++ /* IN variables */ ++ /* Size of cpumap bitmap. */ ++ uint32_t cpumap_nr_cpus; ++ /* Must be indexable for every cpu in cpumap_bitmap. */ ++ XEN_GUEST_HANDLE(uint64) idletime; ++ /* OUT variables */ ++ /* System time when the idletime snapshots were taken. */ ++ uint64_t now; ++}; ++typedef struct xenpf_getidletime xenpf_getidletime_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); ++ ++#define XENPF_set_processor_pminfo 54 ++ ++/* ability bits */ ++#define XEN_PROCESSOR_PM_CX 1 ++#define XEN_PROCESSOR_PM_PX 2 ++#define XEN_PROCESSOR_PM_TX 4 ++ ++/* cmd type */ ++#define XEN_PM_CX 0 ++#define XEN_PM_PX 1 ++#define XEN_PM_TX 2 ++ ++/* Px sub info type */ ++#define XEN_PX_PCT 1 ++#define XEN_PX_PSS 2 ++#define XEN_PX_PPC 4 ++#define XEN_PX_PSD 8 ++ ++struct xen_power_register { ++ uint32_t space_id; ++ uint32_t bit_width; ++ uint32_t bit_offset; ++ uint32_t access_size; ++ uint64_t address; ++}; ++ ++struct xen_processor_csd { ++ uint32_t domain; /* domain number of one dependent group */ ++ uint32_t coord_type; /* coordination type */ ++ uint32_t num; /* number of processors in same domain */ ++}; ++typedef struct xen_processor_csd xen_processor_csd_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); ++ ++struct xen_processor_cx { ++ struct xen_power_register reg; /* GAS for Cx trigger register */ ++ uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ ++ uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ ++ uint32_t power; /* average power consumption(mW) */ ++ uint32_t dpcnt; /* number of dependency entries */ ++ XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ ++}; ++typedef struct xen_processor_cx xen_processor_cx_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); ++ ++struct xen_processor_flags { ++ uint32_t bm_control:1; ++ uint32_t bm_check:1; ++ uint32_t has_cst:1; ++ uint32_t power_setup_done:1; ++ uint32_t bm_rld_set:1; ++}; ++ ++struct xen_processor_power { ++ uint32_t count; /* number of C state entries in array below */ ++ struct xen_processor_flags flags; /* global flags of this processor */ ++ XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ ++}; ++ ++struct xen_pct_register { ++ uint8_t descriptor; ++ uint16_t length; ++ uint8_t space_id; ++ uint8_t bit_width; ++ uint8_t bit_offset; ++ uint8_t reserved; ++ uint64_t address; ++}; ++ ++struct xen_processor_px { ++ uint64_t core_frequency; /* megahertz */ ++ uint64_t power; /* milliWatts */ ++ uint64_t transition_latency; /* microseconds */ ++ uint64_t bus_master_latency; /* microseconds */ ++ uint64_t control; /* control value */ ++ uint64_t status; /* success indicator */ ++}; ++typedef struct xen_processor_px xen_processor_px_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t); ++ ++struct xen_psd_package { ++ uint64_t num_entries; ++ uint64_t revision; ++ uint64_t domain; ++ uint64_t coord_type; ++ uint64_t num_processors; ++}; ++ ++struct xen_processor_performance { ++ uint32_t flags; /* flag for Px sub info type */ ++ uint32_t platform_limit; /* Platform limitation on freq usage */ ++ struct xen_pct_register control_register; ++ struct xen_pct_register status_register; ++ uint32_t state_count; /* total available performance states */ ++ XEN_GUEST_HANDLE(xen_processor_px_t) states; ++ struct xen_psd_package domain_info; ++ uint32_t shared_type; /* coordination type of this processor */ ++}; ++typedef struct xen_processor_performance xen_processor_performance_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t); ++ ++struct xenpf_set_processor_pminfo { ++ /* IN variables */ ++ uint32_t id; /* ACPI CPU ID */ ++ uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ ++ union { ++ struct xen_processor_power power;/* Cx: _CST/_CSD */ ++ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ ++ } u; ++}; ++typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); ++ ++#define XENPF_get_cpuinfo 55 ++struct xenpf_pcpuinfo { ++ /* IN */ ++ uint32_t xen_cpuid; ++ /* OUT */ ++ /* The maxium cpu_id that is present */ ++ uint32_t max_present; ++#define XEN_PCPU_FLAGS_ONLINE 1 ++ /* Correponding xen_cpuid is not present*/ ++#define XEN_PCPU_FLAGS_INVALID 2 ++ uint32_t flags; ++ uint32_t apic_id; ++ uint32_t acpi_id; ++}; ++typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); ++ ++#define XENPF_cpu_online 56 ++#define XENPF_cpu_offline 57 ++struct xenpf_cpu_ol ++{ ++ uint32_t cpuid; ++}; ++typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); ++ ++#define XENPF_cpu_hotadd 58 ++struct xenpf_cpu_hotadd ++{ ++ uint32_t apic_id; ++ uint32_t acpi_id; ++ uint32_t pxm; ++}; ++ ++#define XENPF_mem_hotadd 59 ++struct xenpf_mem_hotadd ++{ ++ uint64_t spfn; ++ uint64_t epfn; ++ uint32_t pxm; ++ uint32_t flags; ++}; ++ ++struct xen_platform_op { ++ uint32_t cmd; ++ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ ++ union { ++ struct xenpf_settime settime; ++ struct xenpf_add_memtype add_memtype; ++ struct xenpf_del_memtype del_memtype; ++ struct xenpf_read_memtype read_memtype; ++ struct xenpf_microcode_update microcode; ++ struct xenpf_platform_quirk platform_quirk; ++ struct xenpf_firmware_info firmware_info; ++ struct xenpf_enter_acpi_sleep enter_acpi_sleep; ++ struct xenpf_change_freq change_freq; ++ struct xenpf_getidletime getidletime; ++ struct xenpf_set_processor_pminfo set_pminfo; ++ struct xenpf_pcpuinfo pcpu_info; ++ struct xenpf_cpu_ol cpu_ol; ++ struct xenpf_cpu_hotadd cpu_add; ++ struct xenpf_mem_hotadd mem_add; ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct xen_platform_op xen_platform_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); ++ ++#endif /* __XEN_PUBLIC_PLATFORM_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/sysctl.h 2010-01-18 15:23:12.000000000 +0100 +@@ -0,0 +1,523 @@ ++/****************************************************************************** ++ * sysctl.h ++ * ++ * System management operations. For use by node control stack. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_SYSCTL_H__ ++#define __XEN_PUBLIC_SYSCTL_H__ ++ ++#if !defined(__XEN__) && !defined(__XEN_TOOLS__) ++#error "sysctl operations are intended for use by node control tools only" ++#endif ++ ++#include "xen.h" ++#include "domctl.h" ++ ++#define XEN_SYSCTL_INTERFACE_VERSION 0x00000007 ++ ++/* ++ * Read console content from Xen buffer ring. ++ */ ++#define XEN_SYSCTL_readconsole 1 ++struct xen_sysctl_readconsole { ++ /* IN: Non-zero -> clear after reading. */ ++ uint8_t clear; ++ /* IN: Non-zero -> start index specified by @index field. */ ++ uint8_t incremental; ++ uint8_t pad0, pad1; ++ /* ++ * IN: Start index for consuming from ring buffer (if @incremental); ++ * OUT: End index after consuming from ring buffer. ++ */ ++ uint32_t index; ++ /* IN: Virtual address to write console data. */ ++ XEN_GUEST_HANDLE_64(char) buffer; ++ /* IN: Size of buffer; OUT: Bytes written to buffer. */ ++ uint32_t count; ++}; ++typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); ++ ++/* Get trace buffers machine base address */ ++#define XEN_SYSCTL_tbuf_op 2 ++struct xen_sysctl_tbuf_op { ++ /* IN variables */ ++#define XEN_SYSCTL_TBUFOP_get_info 0 ++#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 ++#define XEN_SYSCTL_TBUFOP_set_evt_mask 2 ++#define XEN_SYSCTL_TBUFOP_set_size 3 ++#define XEN_SYSCTL_TBUFOP_enable 4 ++#define XEN_SYSCTL_TBUFOP_disable 5 ++ uint32_t cmd; ++ /* IN/OUT variables */ ++ struct xenctl_cpumap cpu_mask; ++ uint32_t evt_mask; ++ /* OUT variables */ ++ uint64_aligned_t buffer_mfn; ++ uint32_t size; ++}; ++typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); ++ ++/* ++ * Get physical information about the host machine ++ */ ++#define XEN_SYSCTL_physinfo 3 ++ /* (x86) The platform supports HVM guests. */ ++#define _XEN_SYSCTL_PHYSCAP_hvm 0 ++#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) ++ /* (x86) The platform supports HVM-guest direct access to I/O devices. */ ++#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1 ++#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio) ++struct xen_sysctl_physinfo { ++ uint32_t threads_per_core; ++ uint32_t cores_per_socket; ++ uint32_t nr_cpus; ++ uint32_t max_node_id; ++ uint32_t cpu_khz; ++ uint64_aligned_t total_pages; ++ uint64_aligned_t free_pages; ++ uint64_aligned_t scrub_pages; ++ uint32_t hw_cap[8]; ++ ++ /* ++ * IN: maximum addressable entry in the caller-provided cpu_to_node array. ++ * OUT: largest cpu identifier in the system. ++ * If OUT is greater than IN then the cpu_to_node array is truncated! ++ */ ++ uint32_t max_cpu_id; ++ /* ++ * If not NULL, this array is filled with node identifier for each cpu. ++ * If a cpu has no node information (e.g., cpu not present) then the ++ * sentinel value ~0u is written. ++ * The size of this array is specified by the caller in @max_cpu_id. ++ * If the actual @max_cpu_id is smaller than the array then the trailing ++ * elements of the array will not be written by the sysctl. ++ */ ++ XEN_GUEST_HANDLE_64(uint32) cpu_to_node; ++ ++ /* XEN_SYSCTL_PHYSCAP_??? */ ++ uint32_t capabilities; ++}; ++typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); ++ ++/* ++ * Get the ID of the current scheduler. ++ */ ++#define XEN_SYSCTL_sched_id 4 ++struct xen_sysctl_sched_id { ++ /* OUT variable */ ++ uint32_t sched_id; ++}; ++typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); ++ ++/* Interface for controlling Xen software performance counters. */ ++#define XEN_SYSCTL_perfc_op 5 ++/* Sub-operations: */ ++#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ ++#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ ++struct xen_sysctl_perfc_desc { ++ char name[80]; /* name of perf counter */ ++ uint32_t nr_vals; /* number of values for this counter */ ++}; ++typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); ++typedef uint32_t xen_sysctl_perfc_val_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); ++ ++struct xen_sysctl_perfc_op { ++ /* IN variables. */ ++ uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ ++ /* OUT variables. */ ++ uint32_t nr_counters; /* number of counters description */ ++ uint32_t nr_vals; /* number of values */ ++ /* counter information (or NULL) */ ++ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; ++ /* counter values (or NULL) */ ++ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; ++}; ++typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); ++ ++#define XEN_SYSCTL_getdomaininfolist 6 ++struct xen_sysctl_getdomaininfolist { ++ /* IN variables. */ ++ domid_t first_domain; ++ uint32_t max_domains; ++ XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; ++ /* OUT variables. */ ++ uint32_t num_domains; ++}; ++typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); ++ ++/* Inject debug keys into Xen. */ ++#define XEN_SYSCTL_debug_keys 7 ++struct xen_sysctl_debug_keys { ++ /* IN variables. */ ++ XEN_GUEST_HANDLE_64(char) keys; ++ uint32_t nr_keys; ++}; ++typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); ++ ++/* Get physical CPU information. */ ++#define XEN_SYSCTL_getcpuinfo 8 ++struct xen_sysctl_cpuinfo { ++ uint64_aligned_t idletime; ++}; ++typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); ++struct xen_sysctl_getcpuinfo { ++ /* IN variables. */ ++ uint32_t max_cpus; ++ XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; ++ /* OUT variables. */ ++ uint32_t nr_cpus; ++}; ++typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); ++ ++#define XEN_SYSCTL_availheap 9 ++struct xen_sysctl_availheap { ++ /* IN variables. */ ++ uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ ++ uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ ++ int32_t node; /* NUMA node of interest (-1 for all nodes). */ ++ /* OUT variables. */ ++ uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ ++}; ++typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); ++ ++#define XEN_SYSCTL_get_pmstat 10 ++struct pm_px_val { ++ uint64_aligned_t freq; /* Px core frequency */ ++ uint64_aligned_t residency; /* Px residency time */ ++ uint64_aligned_t count; /* Px transition count */ ++}; ++typedef struct pm_px_val pm_px_val_t; ++DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); ++ ++struct pm_px_stat { ++ uint8_t total; /* total Px states */ ++ uint8_t usable; /* usable Px states */ ++ uint8_t last; /* last Px state */ ++ uint8_t cur; /* current Px state */ ++ XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ ++ XEN_GUEST_HANDLE_64(pm_px_val_t) pt; ++}; ++typedef struct pm_px_stat pm_px_stat_t; ++DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); ++ ++struct pm_cx_stat { ++ uint32_t nr; /* entry nr in triggers & residencies, including C0 */ ++ uint32_t last; /* last Cx state */ ++ uint64_aligned_t idle_time; /* idle time from boot */ ++ XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ ++ XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ ++}; ++ ++struct xen_sysctl_get_pmstat { ++#define PMSTAT_CATEGORY_MASK 0xf0 ++#define PMSTAT_PX 0x10 ++#define PMSTAT_CX 0x20 ++#define PMSTAT_get_max_px (PMSTAT_PX | 0x1) ++#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2) ++#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3) ++#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1) ++#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2) ++#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3) ++ uint32_t type; ++ uint32_t cpuid; ++ union { ++ struct pm_px_stat getpx; ++ struct pm_cx_stat getcx; ++ /* other struct for tx, etc */ ++ } u; ++}; ++typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); ++ ++/* ++ * Status codes. Must be greater than 0 to avoid confusing ++ * sysctl callers that see 0 as a plain successful return. ++ */ ++#define XEN_CPU_HOTPLUG_STATUS_OFFLINE 1 ++#define XEN_CPU_HOTPLUG_STATUS_ONLINE 2 ++#define XEN_CPU_HOTPLUG_STATUS_NEW 3 ++ ++#define XEN_SYSCTL_cpu_hotplug 11 ++struct xen_sysctl_cpu_hotplug { ++ /* IN variables */ ++ uint32_t cpu; /* Physical cpu. */ ++#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 ++#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 ++#define XEN_SYSCTL_CPU_HOTPLUG_STATUS 2 ++ uint32_t op; /* hotplug opcode */ ++}; ++typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); ++ ++/* ++ * Get/set xen power management, include ++ * 1. cpufreq governors and related parameters ++ */ ++#define XEN_SYSCTL_pm_op 12 ++struct xen_userspace { ++ uint32_t scaling_setspeed; ++}; ++typedef struct xen_userspace xen_userspace_t; ++ ++struct xen_ondemand { ++ uint32_t sampling_rate_max; ++ uint32_t sampling_rate_min; ++ ++ uint32_t sampling_rate; ++ uint32_t up_threshold; ++}; ++typedef struct xen_ondemand xen_ondemand_t; ++ ++/* ++ * cpufreq para name of this structure named ++ * same as sysfs file name of native linux ++ */ ++#define CPUFREQ_NAME_LEN 16 ++struct xen_get_cpufreq_para { ++ /* IN/OUT variable */ ++ uint32_t cpu_num; ++ uint32_t freq_num; ++ uint32_t gov_num; ++ ++ /* for all governors */ ++ /* OUT variable */ ++ XEN_GUEST_HANDLE_64(uint32) affected_cpus; ++ XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies; ++ XEN_GUEST_HANDLE_64(char) scaling_available_governors; ++ char scaling_driver[CPUFREQ_NAME_LEN]; ++ ++ uint32_t cpuinfo_cur_freq; ++ uint32_t cpuinfo_max_freq; ++ uint32_t cpuinfo_min_freq; ++ uint32_t scaling_cur_freq; ++ ++ char scaling_governor[CPUFREQ_NAME_LEN]; ++ uint32_t scaling_max_freq; ++ uint32_t scaling_min_freq; ++ ++ /* for specific governor */ ++ union { ++ struct xen_userspace userspace; ++ struct xen_ondemand ondemand; ++ } u; ++}; ++ ++struct xen_set_cpufreq_gov { ++ char scaling_governor[CPUFREQ_NAME_LEN]; ++}; ++ ++struct xen_set_cpufreq_para { ++ #define SCALING_MAX_FREQ 1 ++ #define SCALING_MIN_FREQ 2 ++ #define SCALING_SETSPEED 3 ++ #define SAMPLING_RATE 4 ++ #define UP_THRESHOLD 5 ++ ++ uint32_t ctrl_type; ++ uint32_t ctrl_value; ++}; ++ ++/* Get physical CPU topology information. */ ++#define INVALID_TOPOLOGY_ID (~0U) ++struct xen_get_cputopo { ++ /* IN: maximum addressable entry in ++ * the caller-provided cpu_to_core/socket. ++ */ ++ uint32_t max_cpus; ++ XEN_GUEST_HANDLE_64(uint32) cpu_to_core; ++ XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; ++ ++ /* OUT: number of cpus returned ++ * If OUT is greater than IN then the cpu_to_core/socket is truncated! ++ */ ++ uint32_t nr_cpus; ++}; ++ ++struct xen_sysctl_pm_op { ++ #define PM_PARA_CATEGORY_MASK 0xf0 ++ #define CPUFREQ_PARA 0x10 ++ ++ /* cpufreq command type */ ++ #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01) ++ #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02) ++ #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03) ++ #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04) ++ ++ /* get CPU topology */ ++ #define XEN_SYSCTL_pm_op_get_cputopo 0x20 ++ ++ /* set/reset scheduler power saving option */ ++ #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 ++ ++ /* cpuidle max_cstate access command */ ++ #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 ++ #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 ++ ++ /* set scheduler migration cost value */ ++ #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 ++ #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 ++ ++ uint32_t cmd; ++ uint32_t cpuid; ++ union { ++ struct xen_get_cpufreq_para get_para; ++ struct xen_set_cpufreq_gov set_gov; ++ struct xen_set_cpufreq_para set_para; ++ uint64_aligned_t get_avgfreq; ++ struct xen_get_cputopo get_topo; ++ uint32_t set_sched_opt_smt; ++ uint32_t get_max_cstate; ++ uint32_t set_max_cstate; ++ uint32_t get_vcpu_migration_delay; ++ uint32_t set_vcpu_migration_delay; ++ } u; ++}; ++ ++#define XEN_SYSCTL_page_offline_op 14 ++struct xen_sysctl_page_offline_op { ++ /* IN: range of page to be offlined */ ++#define sysctl_page_offline 1 ++#define sysctl_page_online 2 ++#define sysctl_query_page_offline 3 ++ uint32_t cmd; ++ uint32_t start; ++ uint32_t end; ++ /* OUT: result of page offline request */ ++ /* ++ * bit 0~15: result flags ++ * bit 16~31: owner ++ */ ++ XEN_GUEST_HANDLE(uint32) status; ++}; ++ ++#define PG_OFFLINE_STATUS_MASK (0xFFUL) ++ ++/* The result is invalid, i.e. HV does not handle it */ ++#define PG_OFFLINE_INVALID (0x1UL << 0) ++ ++#define PG_OFFLINE_OFFLINED (0x1UL << 1) ++#define PG_OFFLINE_PENDING (0x1UL << 2) ++#define PG_OFFLINE_FAILED (0x1UL << 3) ++ ++#define PG_ONLINE_FAILED PG_OFFLINE_FAILED ++#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED ++ ++#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1) ++#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2) ++#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3) ++#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4) ++ ++#define PG_OFFLINE_MISC_MASK (0xFFUL << 4) ++ ++/* only valid when PG_OFFLINE_FAILED */ ++#define PG_OFFLINE_XENPAGE (0x1UL << 8) ++#define PG_OFFLINE_DOM0PAGE (0x1UL << 9) ++#define PG_OFFLINE_ANONYMOUS (0x1UL << 10) ++#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11) ++#define PG_OFFLINE_OWNED (0x1UL << 12) ++ ++#define PG_OFFLINE_BROKEN (0x1UL << 13) ++#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN ++ ++#define PG_OFFLINE_OWNER_SHIFT 16 ++ ++#define XEN_SYSCTL_lockprof_op 15 ++/* Sub-operations: */ ++#define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */ ++#define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */ ++/* Record-type: */ ++#define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */ ++#define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */ ++#define LOCKPROF_TYPE_N 2 /* number of types */ ++struct xen_sysctl_lockprof_data { ++ char name[40]; /* lock name (may include up to 2 %d specifiers) */ ++ int32_t type; /* LOCKPROF_TYPE_??? */ ++ int32_t idx; /* index (e.g. domain id) */ ++ uint64_aligned_t lock_cnt; /* # of locking succeeded */ ++ uint64_aligned_t block_cnt; /* # of wait for lock */ ++ uint64_aligned_t lock_time; /* nsecs lock held */ ++ uint64_aligned_t block_time; /* nsecs waited for lock */ ++}; ++typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t); ++struct xen_sysctl_lockprof_op { ++ /* IN variables. */ ++ uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */ ++ uint32_t max_elem; /* size of output buffer */ ++ /* OUT variables (query only). */ ++ uint32_t nr_elem; /* number of elements available */ ++ uint64_aligned_t time; /* nsecs of profile measurement */ ++ /* profile information (or NULL) */ ++ XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data; ++}; ++typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); ++ ++struct xen_sysctl { ++ uint32_t cmd; ++ uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ ++ union { ++ struct xen_sysctl_readconsole readconsole; ++ struct xen_sysctl_tbuf_op tbuf_op; ++ struct xen_sysctl_physinfo physinfo; ++ struct xen_sysctl_sched_id sched_id; ++ struct xen_sysctl_perfc_op perfc_op; ++ struct xen_sysctl_getdomaininfolist getdomaininfolist; ++ struct xen_sysctl_debug_keys debug_keys; ++ struct xen_sysctl_getcpuinfo getcpuinfo; ++ struct xen_sysctl_availheap availheap; ++ struct xen_sysctl_get_pmstat get_pmstat; ++ struct xen_sysctl_cpu_hotplug cpu_hotplug; ++ struct xen_sysctl_pm_op pm_op; ++ struct xen_sysctl_page_offline_op page_offline; ++ struct xen_sysctl_lockprof_op lockprof_op; ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct xen_sysctl xen_sysctl_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); ++ ++#endif /* __XEN_PUBLIC_SYSCTL_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/tmem.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,144 @@ ++/****************************************************************************** ++ * tmem.h ++ * ++ * Guest OS interface to Xen Transcendent Memory. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004, K A Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_TMEM_H__ ++#define __XEN_PUBLIC_TMEM_H__ ++ ++#include "xen.h" ++ ++/* Commands to HYPERVISOR_tmem_op() */ ++#define TMEM_CONTROL 0 ++#define TMEM_NEW_POOL 1 ++#define TMEM_DESTROY_POOL 2 ++#define TMEM_NEW_PAGE 3 ++#define TMEM_PUT_PAGE 4 ++#define TMEM_GET_PAGE 5 ++#define TMEM_FLUSH_PAGE 6 ++#define TMEM_FLUSH_OBJECT 7 ++#define TMEM_READ 8 ++#define TMEM_WRITE 9 ++#define TMEM_XCHG 10 ++ ++/* Privileged commands to HYPERVISOR_tmem_op() */ ++#define TMEM_AUTH 101 ++#define TMEM_RESTORE_NEW 102 ++ ++/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */ ++#define TMEMC_THAW 0 ++#define TMEMC_FREEZE 1 ++#define TMEMC_FLUSH 2 ++#define TMEMC_DESTROY 3 ++#define TMEMC_LIST 4 ++#define TMEMC_SET_WEIGHT 5 ++#define TMEMC_SET_CAP 6 ++#define TMEMC_SET_COMPRESS 7 ++#define TMEMC_QUERY_FREEABLE_MB 8 ++#define TMEMC_SAVE_BEGIN 10 ++#define TMEMC_SAVE_GET_VERSION 11 ++#define TMEMC_SAVE_GET_MAXPOOLS 12 ++#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13 ++#define TMEMC_SAVE_GET_CLIENT_CAP 14 ++#define TMEMC_SAVE_GET_CLIENT_FLAGS 15 ++#define TMEMC_SAVE_GET_POOL_FLAGS 16 ++#define TMEMC_SAVE_GET_POOL_NPAGES 17 ++#define TMEMC_SAVE_GET_POOL_UUID 18 ++#define TMEMC_SAVE_GET_NEXT_PAGE 19 ++#define TMEMC_SAVE_GET_NEXT_INV 20 ++#define TMEMC_SAVE_END 21 ++#define TMEMC_RESTORE_BEGIN 30 ++#define TMEMC_RESTORE_PUT_PAGE 32 ++#define TMEMC_RESTORE_FLUSH_PAGE 33 ++ ++/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ ++#define TMEM_POOL_PERSIST 1 ++#define TMEM_POOL_SHARED 2 ++#define TMEM_POOL_PAGESIZE_SHIFT 4 ++#define TMEM_POOL_PAGESIZE_MASK 0xf ++#define TMEM_POOL_VERSION_SHIFT 24 ++#define TMEM_POOL_VERSION_MASK 0xff ++ ++/* Bits for client flags (save/restore) */ ++#define TMEM_CLIENT_COMPRESS 1 ++#define TMEM_CLIENT_FROZEN 2 ++ ++/* Special errno values */ ++#define EFROZEN 1000 ++#define EEMPTY 1001 ++ ++ ++#ifndef __ASSEMBLY__ ++typedef xen_pfn_t tmem_cli_mfn_t; ++typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t; ++struct tmem_op { ++ uint32_t cmd; ++ int32_t pool_id; ++ union { ++ struct { ++ uint64_t uuid[2]; ++ uint32_t flags; ++ uint32_t arg1; ++ } new; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */ ++ struct { ++ uint32_t subop; ++ uint32_t cli_id; ++ uint32_t arg1; ++ uint32_t arg2; ++ uint64_t arg3; ++ tmem_cli_va_t buf; ++ } ctrl; /* for cmd == TMEM_CONTROL */ ++ struct { ++ ++ uint64_t object; ++ uint32_t index; ++ uint32_t tmem_offset; ++ uint32_t pfn_offset; ++ uint32_t len; ++ tmem_cli_mfn_t cmfn; /* client machine page frame */ ++ } gen; /* for all other cmd ("generic") */ ++ } u; ++}; ++typedef struct tmem_op tmem_op_t; ++DEFINE_XEN_GUEST_HANDLE(tmem_op_t); ++ ++struct tmem_handle { ++ uint32_t pool_id; ++ uint32_t index; ++ uint64_t oid; ++}; ++ ++#endif ++ ++#endif /* __XEN_PUBLIC_TMEM_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/trace.h 2009-06-23 09:28:21.000000000 +0200 +@@ -0,0 +1,208 @@ ++/****************************************************************************** ++ * include/public/trace.h ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Mark Williamson, (C) 2004 Intel Research Cambridge ++ * Copyright (C) 2005 Bin Ren ++ */ ++ ++#ifndef __XEN_PUBLIC_TRACE_H__ ++#define __XEN_PUBLIC_TRACE_H__ ++ ++#define TRACE_EXTRA_MAX 7 ++#define TRACE_EXTRA_SHIFT 28 ++ ++/* Trace classes */ ++#define TRC_CLS_SHIFT 16 ++#define TRC_GEN 0x0001f000 /* General trace */ ++#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ ++#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ ++#define TRC_HVM 0x0008f000 /* Xen HVM trace */ ++#define TRC_MEM 0x0010f000 /* Xen memory trace */ ++#define TRC_PV 0x0020f000 /* Xen PV traces */ ++#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ ++#define TRC_PM 0x0080f000 /* Xen power management trace */ ++#define TRC_ALL 0x0ffff000 ++#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) ++#define TRC_HD_CYCLE_FLAG (1UL<<31) ++#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) ++#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) ++ ++/* Trace subclasses */ ++#define TRC_SUBCLS_SHIFT 12 ++ ++/* trace subclasses for SVM */ ++#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ ++#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ ++ ++#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ ++#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ ++ ++/* Trace events per class */ ++#define TRC_LOST_RECORDS (TRC_GEN + 1) ++#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) ++#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) ++#define TRC_TRACE_IRQ (TRC_GEN + 4) ++ ++#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) ++#define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2) ++#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1) ++#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2) ++#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3) ++#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4) ++#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5) ++#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6) ++#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7) ++#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8) ++#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9) ++#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10) ++#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11) ++#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12) ++#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13) ++#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) ++#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) ++ ++#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) ++#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) ++#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) ++ ++#define TRC_PV_HYPERCALL (TRC_PV + 1) ++#define TRC_PV_TRAP (TRC_PV + 3) ++#define TRC_PV_PAGE_FAULT (TRC_PV + 4) ++#define TRC_PV_FORCED_INVALID_OP (TRC_PV + 5) ++#define TRC_PV_EMULATE_PRIVOP (TRC_PV + 6) ++#define TRC_PV_EMULATE_4GB (TRC_PV + 7) ++#define TRC_PV_MATH_STATE_RESTORE (TRC_PV + 8) ++#define TRC_PV_PAGING_FIXUP (TRC_PV + 9) ++#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV + 10) ++#define TRC_PV_PTWR_EMULATION (TRC_PV + 11) ++#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV + 12) ++ /* Indicates that addresses in trace record are 64 bits */ ++#define TRC_64_FLAG (0x100) ++ ++#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1) ++#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2) ++#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3) ++#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4) ++#define TRC_SHADOW_MMIO (TRC_SHADOW + 5) ++#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6) ++#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7) ++#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8) ++#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9) ++#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10) ++#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11) ++#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12) ++#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13) ++#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14) ++#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15) ++ ++/* trace events per subclass */ ++#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) ++#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) ++#define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02) ++#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01) ++#define TRC_HVM_PF_XEN64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01) ++#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02) ++#define TRC_HVM_PF_INJECT64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02) ++#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03) ++#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04) ++#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05) ++#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06) ++#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07) ++#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08) ++#define TRC_HVM_CR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08) ++#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09) ++#define TRC_HVM_CR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09) ++#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A) ++#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B) ++#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C) ++#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D) ++#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E) ++#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F) ++#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10) ++#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11) ++#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12) ++#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13) ++#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) ++#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) ++#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) ++#define TRC_HVM_IOPORT_READ (TRC_HVM_HANDLER + 0x16) ++#define TRC_HVM_IOMEM_READ (TRC_HVM_HANDLER + 0x17) ++#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) ++#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) ++#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) ++#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0x20) ++#define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) ++#define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) ++ ++/* trace subclasses for power management */ ++#define TRC_PM_FREQ 0x00801000 /* xen cpu freq events */ ++#define TRC_PM_IDLE 0x00802000 /* xen cpu idle events */ ++ ++/* trace events for per class */ ++#define TRC_PM_FREQ_CHANGE (TRC_PM_FREQ + 0x01) ++#define TRC_PM_IDLE_ENTRY (TRC_PM_IDLE + 0x01) ++#define TRC_PM_IDLE_EXIT (TRC_PM_IDLE + 0x02) ++ ++/* This structure represents a single trace buffer record. */ ++struct t_rec { ++ uint32_t event:28; ++ uint32_t extra_u32:3; /* # entries in trailing extra_u32[] array */ ++ uint32_t cycles_included:1; /* u.cycles or u.no_cycles? */ ++ union { ++ struct { ++ uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */ ++ uint32_t extra_u32[7]; /* event data items */ ++ } cycles; ++ struct { ++ uint32_t extra_u32[7]; /* event data items */ ++ } nocycles; ++ } u; ++}; ++ ++/* ++ * This structure contains the metadata for a single trace buffer. The head ++ * field, indexes into an array of struct t_rec's. ++ */ ++struct t_buf { ++ /* Assume the data buffer size is X. X is generally not a power of 2. ++ * CONS and PROD are incremented modulo (2*X): ++ * 0 <= cons < 2*X ++ * 0 <= prod < 2*X ++ * This is done because addition modulo X breaks at 2^32 when X is not a ++ * power of 2: ++ * (((2^32 - 1) % X) + 1) % X != (2^32) % X ++ */ ++ uint32_t cons; /* Offset of next item to be consumed by control tools. */ ++ uint32_t prod; /* Offset of next item to be produced by Xen. */ ++ /* Records follow immediately after the meta-data header. */ ++}; ++ ++#endif /* __XEN_PUBLIC_TRACE_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/xen-compat.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,44 @@ ++/****************************************************************************** ++ * xen-compat.h ++ * ++ * Guest OS interface to Xen. Compatibility layer. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2006, Christian Limpach ++ */ ++ ++#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ ++#define __XEN_PUBLIC_XEN_COMPAT_H__ ++ ++#define __XEN_LATEST_INTERFACE_VERSION__ 0x0003020a ++ ++#if defined(__XEN__) || defined(__XEN_TOOLS__) ++/* Xen is built with matching headers and implements the latest interface. */ ++#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ ++#elif !defined(__XEN_INTERFACE_VERSION__) ++/* Guests which do not specify a version get the legacy interface. */ ++#define __XEN_INTERFACE_VERSION__ 0x00000000 ++#endif ++ ++#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ ++#error "These header files do not support the requested interface version." ++#endif ++ ++#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/xenoprof.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,138 @@ ++/****************************************************************************** ++ * xenoprof.h ++ * ++ * Interface for enabling system wide profiling based on hardware performance ++ * counters ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * Written by Aravind Menon & Jose Renato Santos ++ */ ++ ++#ifndef __XEN_PUBLIC_XENOPROF_H__ ++#define __XEN_PUBLIC_XENOPROF_H__ ++ ++#include "xen.h" ++ ++/* ++ * Commands to HYPERVISOR_xenoprof_op(). ++ */ ++#define XENOPROF_init 0 ++#define XENOPROF_reset_active_list 1 ++#define XENOPROF_reset_passive_list 2 ++#define XENOPROF_set_active 3 ++#define XENOPROF_set_passive 4 ++#define XENOPROF_reserve_counters 5 ++#define XENOPROF_counter 6 ++#define XENOPROF_setup_events 7 ++#define XENOPROF_enable_virq 8 ++#define XENOPROF_start 9 ++#define XENOPROF_stop 10 ++#define XENOPROF_disable_virq 11 ++#define XENOPROF_release_counters 12 ++#define XENOPROF_shutdown 13 ++#define XENOPROF_get_buffer 14 ++#define XENOPROF_set_backtrace 15 ++#define XENOPROF_last_op 15 ++ ++#define MAX_OPROF_EVENTS 32 ++#define MAX_OPROF_DOMAINS 25 ++#define XENOPROF_CPU_TYPE_SIZE 64 ++ ++/* Xenoprof performance events (not Xen events) */ ++struct event_log { ++ uint64_t eip; ++ uint8_t mode; ++ uint8_t event; ++}; ++ ++/* PC value that indicates a special code */ ++#define XENOPROF_ESCAPE_CODE ~0UL ++/* Transient events for the xenoprof->oprofile cpu buf */ ++#define XENOPROF_TRACE_BEGIN 1 ++ ++/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ ++struct xenoprof_buf { ++ uint32_t event_head; ++ uint32_t event_tail; ++ uint32_t event_size; ++ uint32_t vcpu_id; ++ uint64_t xen_samples; ++ uint64_t kernel_samples; ++ uint64_t user_samples; ++ uint64_t lost_samples; ++ struct event_log event_log[1]; ++}; ++#ifndef __XEN__ ++typedef struct xenoprof_buf xenoprof_buf_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); ++#endif ++ ++struct xenoprof_init { ++ int32_t num_events; ++ int32_t is_primary; ++ char cpu_type[XENOPROF_CPU_TYPE_SIZE]; ++}; ++typedef struct xenoprof_init xenoprof_init_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); ++ ++struct xenoprof_get_buffer { ++ int32_t max_samples; ++ int32_t nbuf; ++ int32_t bufsize; ++ uint64_t buf_gmaddr; ++}; ++typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); ++ ++struct xenoprof_counter { ++ uint32_t ind; ++ uint64_t count; ++ uint32_t enabled; ++ uint32_t event; ++ uint32_t hypervisor; ++ uint32_t kernel; ++ uint32_t user; ++ uint64_t unit_mask; ++}; ++typedef struct xenoprof_counter xenoprof_counter_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); ++ ++typedef struct xenoprof_passive { ++ uint16_t domain_id; ++ int32_t max_samples; ++ int32_t nbuf; ++ int32_t bufsize; ++ uint64_t buf_gmaddr; ++} xenoprof_passive_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); ++ ++ ++#endif /* __XEN_PUBLIC_XENOPROF_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/xsm/acm.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,223 @@ ++/* ++ * acm.h: Xen access control module interface defintions ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Reiner Sailer ++ * Copyright (c) 2005, International Business Machines Corporation. ++ */ ++ ++#ifndef _XEN_PUBLIC_ACM_H ++#define _XEN_PUBLIC_ACM_H ++ ++#include "../xen.h" ++ ++/* default ssid reference value if not supplied */ ++#define ACM_DEFAULT_SSID 0x0 ++#define ACM_DEFAULT_LOCAL_SSID 0x0 ++ ++/* Internal ACM ERROR types */ ++#define ACM_OK 0 ++#define ACM_UNDEF -1 ++#define ACM_INIT_SSID_ERROR -2 ++#define ACM_INIT_SOID_ERROR -3 ++#define ACM_ERROR -4 ++ ++/* External ACCESS DECISIONS */ ++#define ACM_ACCESS_PERMITTED 0 ++#define ACM_ACCESS_DENIED -111 ++#define ACM_NULL_POINTER_ERROR -200 ++ ++/* ++ Error codes reported in when trying to test for a new policy ++ These error codes are reported in an array of tuples where ++ each error code is followed by a parameter describing the error ++ more closely, such as a domain id. ++*/ ++#define ACM_EVTCHN_SHARING_VIOLATION 0x100 ++#define ACM_GNTTAB_SHARING_VIOLATION 0x101 ++#define ACM_DOMAIN_LOOKUP 0x102 ++#define ACM_CHWALL_CONFLICT 0x103 ++#define ACM_SSIDREF_IN_USE 0x104 ++ ++ ++/* primary policy in lower 4 bits */ ++#define ACM_NULL_POLICY 0 ++#define ACM_CHINESE_WALL_POLICY 1 ++#define ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY 2 ++#define ACM_POLICY_UNDEFINED 15 ++ ++/* combinations have secondary policy component in higher 4bit */ ++#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY \ ++ ((ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY << 4) | ACM_CHINESE_WALL_POLICY) ++ ++/* policy: */ ++#define ACM_POLICY_NAME(X) \ ++ ((X) == (ACM_NULL_POLICY)) ? "NULL" : \ ++ ((X) == (ACM_CHINESE_WALL_POLICY)) ? "CHINESE WALL" : \ ++ ((X) == (ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "SIMPLE TYPE ENFORCEMENT" : \ ++ ((X) == (ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT" : \ ++ "UNDEFINED" ++ ++/* the following policy versions must be increased ++ * whenever the interpretation of the related ++ * policy's data structure changes ++ */ ++#define ACM_POLICY_VERSION 4 ++#define ACM_CHWALL_VERSION 1 ++#define ACM_STE_VERSION 1 ++ ++/* defines a ssid reference used by xen */ ++typedef uint32_t ssidref_t; ++ ++/* hooks that are known to domains */ ++#define ACMHOOK_none 0 ++#define ACMHOOK_sharing 1 ++#define ACMHOOK_authorization 2 ++#define ACMHOOK_conflictset 3 ++ ++/* -------security policy relevant type definitions-------- */ ++ ++/* type identifier; compares to "equal" or "not equal" */ ++typedef uint16_t domaintype_t; ++ ++/* CHINESE WALL POLICY DATA STRUCTURES ++ * ++ * current accumulated conflict type set: ++ * When a domain is started and has a type that is in ++ * a conflict set, the conflicting types are incremented in ++ * the aggregate set. When a domain is destroyed, the ++ * conflicting types to its type are decremented. ++ * If a domain has multiple types, this procedure works over ++ * all those types. ++ * ++ * conflict_aggregate_set[i] holds the number of ++ * running domains that have a conflict with type i. ++ * ++ * running_types[i] holds the number of running domains ++ * that include type i in their ssidref-referenced type set ++ * ++ * conflict_sets[i][j] is "0" if type j has no conflict ++ * with type i and is "1" otherwise. ++ */ ++/* high-16 = version, low-16 = check magic */ ++#define ACM_MAGIC 0x0001debc ++ ++/* size of the SHA1 hash identifying the XML policy from which the ++ binary policy was created */ ++#define ACM_SHA1_HASH_SIZE 20 ++ ++/* each offset in bytes from start of the struct they ++ * are part of */ ++ ++/* V3 of the policy buffer aded a version structure */ ++struct acm_policy_version ++{ ++ uint32_t major; ++ uint32_t minor; ++}; ++ ++ ++/* each buffer consists of all policy information for ++ * the respective policy given in the policy code ++ * ++ * acm_policy_buffer, acm_chwall_policy_buffer, ++ * and acm_ste_policy_buffer need to stay 32-bit aligned ++ * because we create binary policies also with external ++ * tools that assume packed representations (e.g. the java tool) ++ */ ++struct acm_policy_buffer { ++ uint32_t magic; ++ uint32_t policy_version; /* ACM_POLICY_VERSION */ ++ uint32_t len; ++ uint32_t policy_reference_offset; ++ uint32_t primary_policy_code; ++ uint32_t primary_buffer_offset; ++ uint32_t secondary_policy_code; ++ uint32_t secondary_buffer_offset; ++ struct acm_policy_version xml_pol_version; /* add in V3 */ ++ uint8_t xml_policy_hash[ACM_SHA1_HASH_SIZE]; /* added in V4 */ ++}; ++ ++ ++struct acm_policy_reference_buffer { ++ uint32_t len; ++}; ++ ++struct acm_chwall_policy_buffer { ++ uint32_t policy_version; /* ACM_CHWALL_VERSION */ ++ uint32_t policy_code; ++ uint32_t chwall_max_types; ++ uint32_t chwall_max_ssidrefs; ++ uint32_t chwall_max_conflictsets; ++ uint32_t chwall_ssid_offset; ++ uint32_t chwall_conflict_sets_offset; ++ uint32_t chwall_running_types_offset; ++ uint32_t chwall_conflict_aggregate_offset; ++}; ++ ++struct acm_ste_policy_buffer { ++ uint32_t policy_version; /* ACM_STE_VERSION */ ++ uint32_t policy_code; ++ uint32_t ste_max_types; ++ uint32_t ste_max_ssidrefs; ++ uint32_t ste_ssid_offset; ++}; ++ ++struct acm_stats_buffer { ++ uint32_t magic; ++ uint32_t len; ++ uint32_t primary_policy_code; ++ uint32_t primary_stats_offset; ++ uint32_t secondary_policy_code; ++ uint32_t secondary_stats_offset; ++}; ++ ++struct acm_ste_stats_buffer { ++ uint32_t ec_eval_count; ++ uint32_t gt_eval_count; ++ uint32_t ec_denied_count; ++ uint32_t gt_denied_count; ++ uint32_t ec_cachehit_count; ++ uint32_t gt_cachehit_count; ++}; ++ ++struct acm_ssid_buffer { ++ uint32_t len; ++ ssidref_t ssidref; ++ uint32_t policy_reference_offset; ++ uint32_t primary_policy_code; ++ uint32_t primary_max_types; ++ uint32_t primary_types_offset; ++ uint32_t secondary_policy_code; ++ uint32_t secondary_max_types; ++ uint32_t secondary_types_offset; ++}; ++ ++#endif ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/xsm/acm_ops.h 2007-10-22 13:39:15.000000000 +0200 +@@ -0,0 +1,159 @@ ++/* ++ * acm_ops.h: Xen access control module hypervisor commands ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Reiner Sailer ++ * Copyright (c) 2005,2006 International Business Machines Corporation. ++ */ ++ ++#ifndef __XEN_PUBLIC_ACM_OPS_H__ ++#define __XEN_PUBLIC_ACM_OPS_H__ ++ ++#include "../xen.h" ++#include "acm.h" ++ ++/* ++ * Make sure you increment the interface version whenever you modify this file! ++ * This makes sure that old versions of acm tools will stop working in a ++ * well-defined way (rather than crashing the machine, for instance). ++ */ ++#define ACM_INTERFACE_VERSION 0xAAAA000A ++ ++/************************************************************************/ ++ ++/* ++ * Prototype for this hypercall is: ++ * int acm_op(int cmd, void *args) ++ * @cmd == ACMOP_??? (access control module operation). ++ * @args == Operation-specific extra arguments (NULL if none). ++ */ ++ ++ ++#define ACMOP_setpolicy 1 ++struct acm_setpolicy { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) pushcache; ++ uint32_t pushcache_size; ++}; ++ ++ ++#define ACMOP_getpolicy 2 ++struct acm_getpolicy { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) pullcache; ++ uint32_t pullcache_size; ++}; ++ ++ ++#define ACMOP_dumpstats 3 ++struct acm_dumpstats { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) pullcache; ++ uint32_t pullcache_size; ++}; ++ ++ ++#define ACMOP_getssid 4 ++#define ACM_GETBY_ssidref 1 ++#define ACM_GETBY_domainid 2 ++struct acm_getssid { ++ /* IN */ ++ uint32_t get_ssid_by; /* ACM_GETBY_* */ ++ union { ++ domaintype_t domainid; ++ ssidref_t ssidref; ++ } id; ++ XEN_GUEST_HANDLE_64(void) ssidbuf; ++ uint32_t ssidbuf_size; ++}; ++ ++#define ACMOP_getdecision 5 ++struct acm_getdecision { ++ /* IN */ ++ uint32_t get_decision_by1; /* ACM_GETBY_* */ ++ uint32_t get_decision_by2; /* ACM_GETBY_* */ ++ union { ++ domaintype_t domainid; ++ ssidref_t ssidref; ++ } id1; ++ union { ++ domaintype_t domainid; ++ ssidref_t ssidref; ++ } id2; ++ uint32_t hook; ++ /* OUT */ ++ uint32_t acm_decision; ++}; ++ ++ ++#define ACMOP_chgpolicy 6 ++struct acm_change_policy { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) policy_pushcache; ++ uint32_t policy_pushcache_size; ++ XEN_GUEST_HANDLE_64(void) del_array; ++ uint32_t delarray_size; ++ XEN_GUEST_HANDLE_64(void) chg_array; ++ uint32_t chgarray_size; ++ /* OUT */ ++ /* array with error code */ ++ XEN_GUEST_HANDLE_64(void) err_array; ++ uint32_t errarray_size; ++}; ++ ++#define ACMOP_relabeldoms 7 ++struct acm_relabel_doms { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) relabel_map; ++ uint32_t relabel_map_size; ++ /* OUT */ ++ XEN_GUEST_HANDLE_64(void) err_array; ++ uint32_t errarray_size; ++}; ++ ++/* future interface to Xen */ ++struct xen_acmctl { ++ uint32_t cmd; ++ uint32_t interface_version; ++ union { ++ struct acm_setpolicy setpolicy; ++ struct acm_getpolicy getpolicy; ++ struct acm_dumpstats dumpstats; ++ struct acm_getssid getssid; ++ struct acm_getdecision getdecision; ++ struct acm_change_policy change_policy; ++ struct acm_relabel_doms relabel_doms; ++ } u; ++}; ++ ++typedef struct xen_acmctl xen_acmctl_t; ++DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t); ++ ++#endif /* __XEN_PUBLIC_ACM_OPS_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/include/xen/interface/xsm/flask_op.h 2010-01-04 11:56:34.000000000 +0100 +@@ -0,0 +1,47 @@ ++/* ++ * This file contains the flask_op hypercall commands and definitions. ++ * ++ * Author: George Coker, ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2, ++ * as published by the Free Software Foundation. ++ */ ++ ++#ifndef __FLASK_OP_H__ ++#define __FLASK_OP_H__ ++ ++#define FLASK_LOAD 1 ++#define FLASK_GETENFORCE 2 ++#define FLASK_SETENFORCE 3 ++#define FLASK_CONTEXT_TO_SID 4 ++#define FLASK_SID_TO_CONTEXT 5 ++#define FLASK_ACCESS 6 ++#define FLASK_CREATE 7 ++#define FLASK_RELABEL 8 ++#define FLASK_USER 9 ++#define FLASK_POLICYVERS 10 ++#define FLASK_GETBOOL 11 ++#define FLASK_SETBOOL 12 ++#define FLASK_COMMITBOOLS 13 ++#define FLASK_MLS 14 ++#define FLASK_DISABLE 15 ++#define FLASK_GETAVC_THRESHOLD 16 ++#define FLASK_SETAVC_THRESHOLD 17 ++#define FLASK_AVC_HASHSTATS 18 ++#define FLASK_AVC_CACHESTATS 19 ++#define FLASK_MEMBER 20 ++#define FLASK_ADD_OCONTEXT 21 ++#define FLASK_DEL_OCONTEXT 22 ++ ++#define FLASK_LAST FLASK_DEL_OCONTEXT ++ ++typedef struct flask_op { ++ uint32_t cmd; ++ uint32_t size; ++ char *buf; ++} flask_op_t; ++ ++DEFINE_XEN_GUEST_HANDLE(flask_op_t); ++ ++#endif --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-kconfig-compat +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-kconfig-compat @@ -0,0 +1,32 @@ +From: jbeulich@novell.com +Subject: add 3.2.0-compatibility configure option +Patch-mainline: obsolete + +--- head-2009-12-16.orig/drivers/xen/Kconfig 2009-12-18 12:30:34.000000000 +0100 ++++ head-2009-12-16/drivers/xen/Kconfig 2009-12-18 12:30:46.000000000 +0100 +@@ -319,6 +319,15 @@ choice + config XEN_COMPAT_030100_AND_LATER + bool "3.1.0 and later" + ++ config XEN_COMPAT_030200_AND_LATER ++ bool "3.2.0 and later" ++ ++ config XEN_COMPAT_030300_AND_LATER ++ bool "3.3.0 and later" ++ ++ config XEN_COMPAT_030400_AND_LATER ++ bool "3.4.0 and later" ++ + config XEN_COMPAT_LATEST_ONLY + bool "no compatibility code" + +@@ -327,6 +336,9 @@ endchoice + config XEN_COMPAT + hex + default 0xffffff if XEN_COMPAT_LATEST_ONLY ++ default 0x030400 if XEN_COMPAT_030400_AND_LATER ++ default 0x030300 if XEN_COMPAT_030300_AND_LATER ++ default 0x030200 if XEN_COMPAT_030200_AND_LATER + default 0x030100 if XEN_COMPAT_030100_AND_LATER + default 0x030004 if XEN_COMPAT_030004_AND_LATER + default 0x030002 if XEN_COMPAT_030002_AND_LATER --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-blktap-write-barriers +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-blktap-write-barriers @@ -0,0 +1,105 @@ +From: kwolf@suse.de +Subject: blktap: Write Barriers +Patch-mainline: obsolete + +--- head-2010-01-04.orig/drivers/xen/blktap/blktap.c 2010-01-04 12:41:47.000000000 +0100 ++++ head-2010-01-04/drivers/xen/blktap/blktap.c 2010-01-04 13:22:24.000000000 +0100 +@@ -1366,6 +1366,9 @@ static int do_block_io_op(blkif_t *blkif + dispatch_rw_block_io(blkif, &req, pending_req); + break; + ++ case BLKIF_OP_WRITE_BARRIER: ++ /* TODO Some counter? */ ++ /* Fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); +@@ -1397,7 +1400,7 @@ static void dispatch_rw_block_io(blkif_t + pending_req_t *pending_req) + { + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); +- int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; ++ int op, operation; + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int nseg; + int ret, i, nr_sects = 0; +@@ -1409,6 +1412,21 @@ static void dispatch_rw_block_io(blkif_t + struct mm_struct *mm; + struct vm_area_struct *vma = NULL; + ++ switch (req->operation) { ++ case BLKIF_OP_READ: ++ operation = READ; ++ break; ++ case BLKIF_OP_WRITE: ++ operation = WRITE; ++ break; ++ case BLKIF_OP_WRITE_BARRIER: ++ operation = WRITE_BARRIER; ++ break; ++ default: ++ operation = 0; /* make gcc happy */ ++ BUG(); ++ } ++ + if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV) + goto fail_response; + +@@ -1448,7 +1466,7 @@ static void dispatch_rw_block_io(blkif_t + + pending_req->blkif = blkif; + pending_req->id = req->id; +- pending_req->operation = operation; ++ pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + op = 0; +@@ -1465,7 +1483,7 @@ static void dispatch_rw_block_io(blkif_t + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); + + flags = GNTMAP_host_map; +- if (operation == WRITE) ++ if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], kvaddr, flags, + req->seg[i].gref, blkif->domid); +@@ -1482,7 +1500,7 @@ static void dispatch_rw_block_io(blkif_t + + flags = GNTMAP_host_map | GNTMAP_application_map + | GNTMAP_contains_pte; +- if (operation == WRITE) ++ if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], ptep, flags, + req->seg[i].gref, blkif->domid); +--- head-2010-01-04.orig/drivers/xen/blktap/xenbus.c 2008-09-15 15:10:39.000000000 +0200 ++++ head-2010-01-04/drivers/xen/blktap/xenbus.c 2009-05-19 10:10:06.000000000 +0200 +@@ -395,7 +395,28 @@ static void connect(struct backend_info + int err; + + struct xenbus_device *dev = be->dev; ++ struct xenbus_transaction xbt; + ++ /* Write feature-barrier to xenstore */ ++again: ++ err = xenbus_transaction_start(&xbt); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "starting transaction"); ++ return; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-barrier", "1"); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "writing feature-barrier"); ++ xenbus_transaction_end(xbt, 1); ++ return; ++ } ++ ++ err = xenbus_transaction_end(xbt, 0); ++ if (err == -EAGAIN) ++ goto again; ++ ++ /* Switch state */ + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-sections +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-sections @@ -0,0 +1,127 @@ +From: jbeulich@novell.com +Subject: fix placement of some routines/data +Patch-mainline: obsolete + +--- head-2009-11-20.orig/arch/x86/kernel/time-xen.c 2009-11-23 10:43:52.000000000 +0100 ++++ head-2009-11-20/arch/x86/kernel/time-xen.c 2009-11-23 10:44:50.000000000 +0100 +@@ -665,7 +665,7 @@ int xen_update_persistent_clock(void) + /* Dynamically-mapped IRQ. */ + DEFINE_PER_CPU(int, timer_irq); + +-static void setup_cpu0_timer_irq(void) ++static void __init setup_cpu0_timer_irq(void) + { + per_cpu(timer_irq, 0) = + bind_virq_to_irqhandler( +@@ -877,7 +877,7 @@ int __cpuinit local_setup_timer(unsigned + return 0; + } + +-void __cpuexit local_teardown_timer(unsigned int cpu) ++void __cpuinit local_teardown_timer(unsigned int cpu) + { + BUG_ON(cpu == 0); + unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL); +--- head-2009-11-20.orig/drivers/xen/core/cpu_hotplug.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2009-11-20/drivers/xen/core/cpu_hotplug.c 2009-11-06 11:09:19.000000000 +0100 +@@ -24,7 +24,7 @@ static int local_cpu_hotplug_request(voi + return (current->mm != NULL); + } + +-static void vcpu_hotplug(unsigned int cpu) ++static void __cpuinit vcpu_hotplug(unsigned int cpu) + { + int err; + char dir[32], state[32]; +@@ -51,7 +51,7 @@ static void vcpu_hotplug(unsigned int cp + } + } + +-static void handle_vcpu_hotplug_event( ++static void __cpuinit handle_vcpu_hotplug_event( + struct xenbus_watch *watch, const char **vec, unsigned int len) + { + unsigned int cpu; +@@ -80,12 +80,12 @@ static int smpboot_cpu_notify(struct not + return NOTIFY_OK; + } + +-static int setup_cpu_watcher(struct notifier_block *notifier, +- unsigned long event, void *data) ++static int __cpuinit setup_cpu_watcher(struct notifier_block *notifier, ++ unsigned long event, void *data) + { + unsigned int i; + +- static struct xenbus_watch cpu_watch = { ++ static struct xenbus_watch __cpuinitdata cpu_watch = { + .node = "cpu", + .callback = handle_vcpu_hotplug_event, + .flags = XBWF_new_thread }; +@@ -105,7 +105,7 @@ static int __init setup_vcpu_hotplug_eve + { + static struct notifier_block hotplug_cpu = { + .notifier_call = smpboot_cpu_notify }; +- static struct notifier_block xsn_cpu = { ++ static struct notifier_block __cpuinitdata xsn_cpu = { + .notifier_call = setup_cpu_watcher }; + + if (!is_running_on_xen()) +@@ -119,7 +119,7 @@ static int __init setup_vcpu_hotplug_eve + + arch_initcall(setup_vcpu_hotplug_event); + +-int smp_suspend(void) ++int __ref smp_suspend(void) + { + unsigned int cpu; + int err; +@@ -140,7 +140,7 @@ int smp_suspend(void) + return 0; + } + +-void smp_resume(void) ++void __ref smp_resume(void) + { + unsigned int cpu; + +--- head-2009-11-20.orig/drivers/xen/core/smpboot.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2009-11-20/drivers/xen/core/smpboot.c 2009-11-06 11:09:19.000000000 +0100 +@@ -173,7 +173,7 @@ static int __cpuinit xen_smp_intr_init(u + } + + #ifdef CONFIG_HOTPLUG_CPU +-static void __cpuexit xen_smp_intr_exit(unsigned int cpu) ++static void __cpuinit xen_smp_intr_exit(unsigned int cpu) + { + if (cpu != 0) + local_teardown_timer(cpu); +@@ -392,7 +392,7 @@ int __cpuexit __cpu_disable(void) + return 0; + } + +-void __cpuexit __cpu_die(unsigned int cpu) ++void __cpuinit __cpu_die(unsigned int cpu) + { + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { + current->state = TASK_UNINTERRUPTIBLE; +--- head-2009-11-20.orig/drivers/xen/evtchn/evtchn.c 2009-03-18 10:39:31.000000000 +0100 ++++ head-2009-11-20/drivers/xen/evtchn/evtchn.c 2009-11-06 11:09:19.000000000 +0100 +@@ -549,14 +549,15 @@ static int __init evtchn_init(void) + + return 0; + } ++module_init(evtchn_init); + ++#ifdef CONFIG_MODULE + static void __exit evtchn_cleanup(void) + { + misc_deregister(&evtchn_miscdev); + unregister_cpu_notifier(&evtchn_cpu_nfb); + } +- +-module_init(evtchn_init); + module_exit(evtchn_cleanup); ++#endif + + MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32.21-22 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32.21-22 @@ -0,0 +1,106 @@ +From: Jiri Slaby +Subject: Linux 2.6.32.22 +Patch-mainline: 2.6.32.22 + +Signed-off-by: Jiri Slaby + +Automatically created from "patches.kernel.org/patch-2.6.32.21-22" by xen-port-patches.py + +--- sle11sp1-2010-09-22.orig/arch/x86/ia32/ia32entry-xen.S 2010-01-20 10:28:42.000000000 +0100 ++++ sle11sp1-2010-09-22/arch/x86/ia32/ia32entry-xen.S 2010-09-22 11:35:36.000000000 +0200 +@@ -47,7 +47,12 @@ ia32_common: + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %eax because syscall_trace_enter() returned +- * the value it wants us to use in the table lookup. ++ * the %rax value we should see. Instead, we just truncate that ++ * value to 32 bits again as we did on entry from user mode. ++ * If it's a new value set by user_regset during entry tracing, ++ * this matches the normal truncation of the user-mode value. ++ * If it's -1 to make us punt the syscall, then (u32)-1 is still ++ * an appropriately invalid value. + */ + .macro LOAD_ARGS32 offset, _r9=0 + .if \_r9 +@@ -57,6 +62,7 @@ ia32_common: + movl \offset+48(%rsp),%edx + movl \offset+56(%rsp),%esi + movl \offset+64(%rsp),%edi ++ movl %eax,%eax /* zero extension */ + .endm + + .macro CFI_STARTPROC32 simple +@@ -139,7 +145,7 @@ ENTRY(ia32_sysenter_target) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz sysenter_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + jmp ia32_do_call + +@@ -153,7 +159,7 @@ ENTRY(ia32_sysenter_target) + movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ + call audit_syscall_entry + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + movl %ebx,%edi /* reload 1st syscall arg */ + movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ +@@ -180,7 +186,7 @@ sysenter_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + jmp ia32_do_call + CFI_ENDPROC +@@ -234,7 +240,7 @@ ENTRY(ia32_cstar_target) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz cstar_tracesys +- cmpl $IA32_NR_syscalls-1,%eax ++ cmpq $IA32_NR_syscalls-1,%rax + ja ia32_badsys + cstar_do_call: + IA32_ARG_FIXUP 1 +@@ -261,7 +267,7 @@ cstar_tracesys: + LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ + RESTORE_REST + xchgl %ebp,%r9d +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + jmp cstar_do_call + END(ia32_cstar_target) +@@ -318,7 +324,7 @@ ENTRY(ia32_syscall) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz ia32_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + ia32_do_call: + IA32_ARG_FIXUP +@@ -337,7 +343,7 @@ ia32_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call + END(ia32_syscall) +--- sle11sp1-2010-09-22.orig/arch/x86/kernel/apic/io_apic-xen.c 2010-09-22 11:35:28.000000000 +0200 ++++ sle11sp1-2010-09-22/arch/x86/kernel/apic/io_apic-xen.c 2010-09-22 11:35:36.000000000 +0200 +@@ -3451,7 +3451,7 @@ static int set_msi_irq_affinity(unsigned + + cfg = desc->chip_data; + +- read_msi_msg_desc(desc, &msg); ++ get_cached_msi_msg_desc(desc, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-fixup-kconfig +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-fixup-kconfig @@ -0,0 +1,54 @@ +Subject: Fix xen configuration. +From: jbeulich@novell.com +Patch-mainline: obsolete + +--- head-2009-12-16.orig/arch/x86/Kconfig 2009-12-04 10:44:40.000000000 +0100 ++++ head-2009-12-16/arch/x86/Kconfig 2009-10-15 11:53:21.000000000 +0200 +@@ -158,6 +158,7 @@ config HAVE_CPUMASK_OF_CPU_MAP + + config ARCH_HIBERNATION_POSSIBLE + def_bool y ++ depends on !XEN + + config ARCH_SUSPEND_POSSIBLE + def_bool y +--- head-2009-12-16.orig/arch/x86/Kconfig.debug 2009-12-04 10:44:40.000000000 +0100 ++++ head-2009-12-16/arch/x86/Kconfig.debug 2009-10-15 11:53:21.000000000 +0200 +@@ -289,7 +289,7 @@ config OPTIMIZE_INLINING + + config KDB + bool "Built-in Kernel Debugger support" +- depends on DEBUG_KERNEL ++ depends on DEBUG_KERNEL && !XEN + select KALLSYMS + select KALLSYMS_ALL + help +--- head-2009-12-16.orig/drivers/xen/Kconfig 2009-12-04 10:44:40.000000000 +0100 ++++ head-2009-12-16/drivers/xen/Kconfig 2009-12-18 12:08:28.000000000 +0100 +@@ -22,6 +22,7 @@ config XEN_PRIVILEGED_GUEST + + config XEN_UNPRIVILEGED_GUEST + def_bool !XEN_PRIVILEGED_GUEST ++ select PM + + config XEN_PRIVCMD + def_bool y +@@ -116,7 +117,7 @@ config XEN_NETDEV_LOOPBACK + + config XEN_PCIDEV_BACKEND + tristate "PCI-device backend driver" +- depends on PCI && XEN_BACKEND ++ depends on PCI && XEN_PRIVILEGED_GUEST && XEN_BACKEND + default XEN_BACKEND + help + The PCI device backend driver allows the kernel to export arbitrary +@@ -127,8 +128,8 @@ config XEN_PCIDEV_BACKEND + choice + prompt "PCI Backend Mode" + depends on XEN_PCIDEV_BACKEND +- default XEN_PCIDEV_BACKEND_VPCI if !IA64 + default XEN_PCIDEV_BACKEND_CONTROLLER if IA64 ++ default XEN_PCIDEV_BACKEND_VPCI + + config XEN_PCIDEV_BACKEND_VPCI + bool "Virtual PCI" --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32.26-27 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32.26-27 @@ -0,0 +1,32 @@ +From: Jiri Slaby +Subject: Linux 2.6.32.27 +Patch-mainline: 2.6.32.27 + +Signed-off-by: Jiri Slaby +Automatically created from "patches.kernel.org/patch-2.6.32.26-27" by xen-port-patches.py + +--- sle11sp1-2010-12-14.orig/arch/x86/kernel/quirks-xen.c 2010-02-09 17:12:56.000000000 +0100 ++++ sle11sp1-2010-12-14/arch/x86/kernel/quirks-xen.c 2010-12-13 13:36:09.000000000 +0100 +@@ -513,6 +513,7 @@ static void __init quirk_amd_nb_node(str + { + struct pci_dev *nb_ht; + unsigned int devfn; ++ u32 node; + u32 val; + + devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); +@@ -521,7 +522,13 @@ static void __init quirk_amd_nb_node(str + return; + + pci_read_config_dword(nb_ht, 0x60, &val); +- set_dev_node(&dev->dev, val & 7); ++ node = val & 7; ++ /* ++ * Some hardware may return an invalid node ID, ++ * so check it first: ++ */ ++ if (node_online(node)) ++ set_dev_node(&dev->dev, node); + pci_dev_put(nb_ht); + } + --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.29 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.29 @@ -0,0 +1,11693 @@ +From: Linux Kernel Mailing List +Subject: Linux: 2.6.29 +Patch-mainline: 2.6.29 + + This patch contains the differences between 2.6.28 and 2.6.29. + +Acked-by: Jeff Mahoney +Automatically created from "patches.kernel.org/patch-2.6.29" by xen-port-patches.py + +--- sle11sp1-2010-01-20.orig/arch/x86/Kconfig 2009-11-20 11:02:23.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/Kconfig 2009-11-20 11:02:51.000000000 +0100 +@@ -885,7 +885,7 @@ config X86_XEN_GENAPIC + config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + default n +- depends on X86_IO_APIC ++ depends on X86_IO_APIC && !XEN + ---help--- + This option enables a workaround that fixes a source of + spurious interrupts. This is recommended when threaded +--- sle11sp1-2010-01-20.orig/arch/x86/Makefile 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/Makefile 2009-11-06 10:51:55.000000000 +0100 +@@ -158,8 +158,8 @@ BOOT_TARGETS = bzlilo bzdisk fdimage fdi + PHONY += bzImage vmlinuz $(BOOT_TARGETS) + + ifdef CONFIG_XEN +-KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ +- -I$(srctree)/arch/x86/include/mach-xen $(KBUILD_CPPFLAGS) ++LINUXINCLUDE := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ ++ -I$(srctree)/arch/x86/include/mach-xen $(LINUXINCLUDE) + + ifdef CONFIG_X86_64 + LDFLAGS_vmlinux := -e startup_64 +--- sle11sp1-2010-01-20.orig/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:51:55.000000000 +0100 +@@ -363,9 +363,9 @@ ENTRY(ia32_syscall) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz ia32_tracesys +-ia32_do_syscall: + cmpl $(IA32_NR_syscalls-1),%eax +- ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ ++ ja ia32_badsys ++ia32_do_call: + IA32_ARG_FIXUP + call *ia32_sys_call_table(,%rax,8) # xxx: rip relative + ia32_sysret: +@@ -380,7 +380,9 @@ ia32_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- jmp ia32_do_syscall ++ cmpl $(IA32_NR_syscalls-1),%eax ++ ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ ++ jmp ia32_do_call + END(ia32_syscall) + + ia32_badsys: +--- sle11sp1-2010-01-20.orig/arch/x86/include/asm/hw_irq.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/asm/hw_irq.h 2009-11-06 10:51:55.000000000 +0100 +@@ -124,7 +124,9 @@ extern irqreturn_t smp_call_function_sin + #endif + #endif + ++#ifndef CONFIG_XEN + extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); ++#endif + + typedef int vector_irq_t[NR_VECTORS]; + DECLARE_PER_CPU(vector_irq_t, vector_irq); +--- sle11sp1-2010-01-20.orig/arch/x86/include/asm/hypervisor.h 2010-01-20 10:20:50.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/asm/hypervisor.h 2009-11-06 10:51:55.000000000 +0100 +@@ -24,3 +24,7 @@ extern void init_hypervisor(struct cpuin + extern void init_hypervisor_platform(void); + + #endif ++ ++#ifdef HAVE_XEN_PLATFORM_COMPAT_H ++#include_next ++#endif +--- sle11sp1-2010-01-20.orig/arch/x86/include/asm/kexec.h 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/asm/kexec.h 2009-11-06 10:51:55.000000000 +0100 +@@ -12,13 +12,10 @@ + /* + * The hypervisor interface implicitly requires that all entries (except + * for possibly the final one) are arranged in matching PA_/VA_ pairs. ++# define VA_PGD 3 + */ +-# define PA_PMD_0 8 +-# define VA_PMD_0 9 +-# define PA_PMD_1 10 +-# define VA_PMD_1 11 +-# define PA_SWAP_PAGE 12 +-# define PAGES_NR 13 ++# define PA_SWAP_PAGE 4 ++# define PAGES_NR 5 + # endif /* CONFIG_XEN */ + #else + # define PA_CONTROL_PAGE 0 +--- sle11sp1-2010-01-20.orig/arch/x86/include/asm/xen/hypervisor.h 2010-01-20 10:20:50.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/asm/xen/hypervisor.h 2009-11-06 10:51:55.000000000 +0100 +@@ -43,7 +43,7 @@ enum xen_domain_type { + XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ + }; + +-#ifdef CONFIG_XEN ++#ifdef CONFIG_PARAVIRT_XEN + extern enum xen_domain_type xen_domain_type; + #else + #define xen_domain_type XEN_NATIVE +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/desc.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/desc.h 2009-11-06 10:51:55.000000000 +0100 +@@ -342,16 +342,14 @@ static inline void set_intr_gate(unsigne + _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); + } + +-#define SYS_VECTOR_FREE 0 +-#define SYS_VECTOR_ALLOCED 1 +- + extern int first_system_vector; +-extern char system_vectors[]; ++/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ ++extern unsigned long used_vectors[]; + + static inline void alloc_system_vector(int vector) + { +- if (system_vectors[vector] == SYS_VECTOR_FREE) { +- system_vectors[vector] = SYS_VECTOR_ALLOCED; ++ if (!test_bit(vector, used_vectors)) { ++ set_bit(vector, used_vectors); + if (first_system_vector > vector) + first_system_vector = vector; + } else +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/fixmap_64.h 2009-11-06 10:51:55.000000000 +0100 +@@ -16,7 +16,6 @@ + #include + #include + #include +-#include + #include + + /* +@@ -52,11 +51,6 @@ enum fixed_addresses { + FIX_ISAMAP_END, + FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, + #endif +-#ifdef CONFIG_EFI +- FIX_EFI_IO_MAP_LAST_PAGE, +- FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE +- + MAX_EFI_IO_PAGES - 1, +-#endif + #ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, + #else +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/highmem.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/highmem.h 2009-11-06 10:51:55.000000000 +0100 +@@ -80,6 +80,7 @@ static inline void clear_user_highpage(s + clear_highpage(page); + } + #define __HAVE_ARCH_CLEAR_HIGHPAGE ++#define clear_user_highpage clear_user_highpage + #define __HAVE_ARCH_CLEAR_USER_HIGHPAGE + + void copy_highpage(struct page *to, struct page *from); +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:33:49.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:42:20.000000000 +0100 +@@ -69,6 +69,8 @@ extern start_info_t *xen_start_info; + #define is_initial_xendomain() 0 + #endif + ++#define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN)) ++ + struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu); + + /* arch/xen/kernel/evtchn.c */ +@@ -139,7 +141,7 @@ void scrub_pages(void *, unsigned int); + + DECLARE_PER_CPU(bool, xen_lazy_mmu); + +-int xen_multicall_flush(bool); ++void xen_multicall_flush(bool); + + int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t, + unsigned long flags); +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/io.h 2009-12-11 15:20:59.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/io.h 2009-11-06 10:51:55.000000000 +0100 +@@ -4,6 +4,7 @@ + #define ARCH_HAS_IOREMAP_WC + + #include ++#include + + #define build_mmio_read(name, size, type, reg, barrier) \ + static inline type name(const volatile void __iomem *addr) \ +@@ -45,21 +46,39 @@ build_mmio_write(__writel, "l", unsigned + #define mmiowb() barrier() + + #ifdef CONFIG_X86_64 ++ + build_mmio_read(readq, "q", unsigned long, "=r", :"memory") +-build_mmio_read(__readq, "q", unsigned long, "=r", ) + build_mmio_write(writeq, "q", unsigned long, "r", :"memory") +-build_mmio_write(__writeq, "q", unsigned long, "r", ) + +-#define readq_relaxed(a) __readq(a) +-#define __raw_readq __readq +-#define __raw_writeq writeq +- +-/* Let people know we have them */ +-#define readq readq +-#define writeq writeq ++#else ++ ++static inline __u64 readq(const volatile void __iomem *addr) ++{ ++ const volatile u32 __iomem *p = addr; ++ u32 low, high; ++ ++ low = readl(p); ++ high = readl(p + 1); ++ ++ return low + ((u64)high << 32); ++} ++ ++static inline void writeq(__u64 val, volatile void __iomem *addr) ++{ ++ writel(val, addr); ++ writel(val >> 32, addr+4); ++} ++ + #endif + +-extern int iommu_bio_merge; ++#define readq_relaxed(a) readq(a) ++ ++#define __raw_readq(a) readq(a) ++#define __raw_writeq(val, addr) writeq(val, addr) ++ ++/* Let people know that we have them */ ++#define readq readq ++#define writeq writeq + + #define native_io_delay xen_io_delay + +@@ -120,7 +139,6 @@ extern void __iomem *ioremap_wc(unsigned + * A boot-time mapping is currently limited to at most 16 pages. + */ + extern void early_ioremap_init(void); +-extern void early_ioremap_clear(void); + extern void early_ioremap_reset(void); + extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); + extern void __iomem *early_memremap(unsigned long offset, unsigned long size); +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/irq_vectors.h 2009-11-06 10:51:55.000000000 +0100 +@@ -24,6 +24,8 @@ + #define LAST_VM86_IRQ 15 + #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) + ++#define NR_IRQS_LEGACY 16 ++ + /* + * The flat IRQ space is divided into two regions: + * 1. A one-to-one mapping of real physical IRQs. This space is only used +@@ -36,8 +38,10 @@ + + #define PIRQ_BASE 0 + #if defined(NR_CPUS) && defined(MAX_IO_APICS) +-# if NR_CPUS < MAX_IO_APICS ++# if !defined(CONFIG_SPARSE_IRQ) && NR_CPUS < MAX_IO_APICS + # define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS) ++# elif defined(CONFIG_SPARSE_IRQ) && 8 * NR_CPUS > 32 * MAX_IO_APICS ++# define NR_PIRQS (NR_VECTORS + 8 * NR_CPUS) + # else + # define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS) + # endif +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/mmu_context_32.h 2009-11-06 10:51:55.000000000 +0100 +@@ -3,10 +3,9 @@ + + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) + { +-#if 0 /* XEN: no lazy tlb */ +- unsigned cpu = smp_processor_id(); +- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) +- per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY; ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ ++ if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) ++ x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); + #endif + } + +@@ -38,9 +37,9 @@ static inline void switch_mm(struct mm_s + + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); +-#if 0 /* XEN: no lazy tlb */ +- per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; +- per_cpu(cpu_tlbstate, cpu).active_mm = next; ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ ++ x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); ++ x86_write_percpu(cpu_tlbstate.active_mm, next); + #endif + cpu_set(cpu, next->cpu_vm_mask); + +@@ -62,10 +61,10 @@ static inline void switch_mm(struct mm_s + + BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); + } +-#if 0 /* XEN: no lazy tlb */ ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ + else { +- per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; +- BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); ++ x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); ++ BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); + + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:51:55.000000000 +0100 +@@ -22,6 +22,8 @@ struct pci_sysdata { + }; + + extern int pci_routeirq; ++extern int noioapicquirk; ++extern int noioapicreroute; + + /* scan a bus after allocating a pci_sysdata for it */ + extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, +@@ -88,6 +90,8 @@ static inline void pci_dma_burst_advice( + static inline void early_quirks(void) { } + #endif + ++extern void pci_iommu_alloc(void); ++ + #endif /* __KERNEL__ */ + + #ifdef CONFIG_X86_32 +@@ -104,9 +108,9 @@ static inline void early_quirks(void) { + + #ifdef CONFIG_NUMA + /* Returns the node based on pci bus */ +-static inline int __pcibus_to_node(struct pci_bus *bus) ++static inline int __pcibus_to_node(const struct pci_bus *bus) + { +- struct pci_sysdata *sd = bus->sysdata; ++ const struct pci_sysdata *sd = bus->sysdata; + + return sd->node; + } +@@ -115,6 +119,12 @@ static inline cpumask_t __pcibus_to_cpum + { + return node_to_cpumask(__pcibus_to_node(bus)); + } ++ ++static inline const struct cpumask * ++cpumask_of_pcibus(const struct pci_bus *bus) ++{ ++ return cpumask_of_node(__pcibus_to_node(bus)); ++} + #endif + + #endif /* _ASM_X86_PCI_H */ +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-06 10:51:55.000000000 +0100 +@@ -22,12 +22,10 @@ + #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + + /* If _PAGE_BIT_PRESENT is clear, we use these: */ +- +-/* set: nonlinear file mapping, saved PTE; unset:swap */ +-#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY +- +-/* if the user mapped it with PROT_NONE; pte_present gives true */ ++/* - if the user mapped it with PROT_NONE; pte_present gives true */ + #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL ++/* - set: nonlinear file mapping, saved PTE; unset:swap */ ++#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY + + #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) + #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +@@ -176,8 +174,19 @@ extern unsigned int __kernel_page_user; + #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ + #endif + ++/* ++ * Macro to mark a page protection value as UC- ++ */ ++#define pgprot_noncached(prot) \ ++ ((boot_cpu_data.x86 > 3) \ ++ ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \ ++ : (prot)) ++ + #ifndef __ASSEMBLY__ + ++#define pgprot_writecombine pgprot_writecombine ++extern pgprot_t pgprot_writecombine(pgprot_t prot); ++ + /* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. +@@ -309,41 +318,43 @@ static inline pte_t pte_mkspecial(pte_t + + extern pteval_t __supported_pte_mask; + +-static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) ++/* ++ * Mask out unsupported bits in a present pgprot. Non-present pgprots ++ * can use those bits for other purposes, so leave them be. ++ */ ++static inline pgprotval_t massage_pgprot(pgprot_t pgprot) + { +- pgprotval_t prot = pgprot_val(pgprot); ++ pgprotval_t protval = pgprot_val(pgprot); ++ ++ if (protval & _PAGE_PRESENT) ++ protval &= __supported_pte_mask; + +- if (prot & _PAGE_PRESENT) +- prot &= __supported_pte_mask; +- return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | prot); ++ return protval; + } + +-static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot) ++static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) + { +- pgprotval_t prot = pgprot_val(pgprot); ++ return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | ++ massage_pgprot(pgprot)); ++} + +- if (prot & _PAGE_PRESENT) +- prot &= __supported_pte_mask; +- return __pte_ma(((phys_addr_t)page_nr << PAGE_SHIFT) | prot); ++static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot) ++{ ++ return __pte_ma(((phys_addr_t)page_nr << PAGE_SHIFT) | ++ massage_pgprot(pgprot)); + } + + static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) + { +- pgprotval_t prot = pgprot_val(pgprot); +- +- if (prot & _PAGE_PRESENT) +- prot &= __supported_pte_mask; +- return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | prot); ++ return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | ++ massage_pgprot(pgprot)); + } + + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) + { +- pgprotval_t prot = pgprot_val(newprot); + pteval_t val = pte_val(pte) & _PAGE_CHG_MASK; + +- if (prot & _PAGE_PRESENT) +- prot &= __supported_pte_mask; +- val |= prot & ~_PAGE_CHG_MASK; ++ val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; + + return __pte(val); + } +@@ -359,11 +370,33 @@ static inline pgprot_t pgprot_modify(pgp + + #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) + +-#define canon_pgprot(p) __pgprot(pgprot_val(p) & _PAGE_PRESENT \ +- ? pgprot_val(p) & __supported_pte_mask \ +- : pgprot_val(p)) ++#define canon_pgprot(p) __pgprot(massage_pgprot(p)) ++ ++static inline int is_new_memtype_allowed(unsigned long flags, ++ unsigned long new_flags) ++{ ++ /* ++ * Certain new memtypes are not allowed with certain ++ * requested memtype: ++ * - request is uncached, return cannot be write-back ++ * - request is write-combine, return cannot be write-back ++ */ ++ if ((flags == _PAGE_CACHE_UC_MINUS && ++ new_flags == _PAGE_CACHE_WB) || ++ (flags == _PAGE_CACHE_WC && ++ new_flags == _PAGE_CACHE_WB)) { ++ return 0; ++ } ++ ++ return 1; ++} + + #ifndef __ASSEMBLY__ ++#ifndef CONFIG_XEN ++/* Indicate that x86 has its own track and untrack pfn vma functions */ ++#define __HAVE_PFNMAP_TRACKING ++#endif ++ + #define __HAVE_PHYS_MEM_ACCESS_PROT + struct file; + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/pgtable-3level.h 2009-11-06 10:51:55.000000000 +0100 +@@ -151,6 +151,7 @@ static inline int pte_none(pte_t pte) + #define PTE_FILE_MAX_BITS 32 + + /* Encode and de-code a swap entry */ ++#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) + #define __swp_type(x) (((x).val) & 0x1f) + #define __swp_offset(x) ((x).val >> 5) + #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-11-06 10:51:55.000000000 +0100 +@@ -107,15 +107,6 @@ extern unsigned long pg0[]; + #endif + + /* +- * Macro to mark a page protection value as "uncacheable". +- * On processors which do not support it, this is a no-op. +- */ +-#define pgprot_noncached(prot) \ +- ((boot_cpu_data.x86 > 3) \ +- ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \ +- : (prot)) +- +-/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 10:51:55.000000000 +0100 +@@ -149,8 +149,8 @@ static inline void xen_pgd_clear(pgd_t * + #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) + #define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +- +-#define MAXMEM _AC(0x000004ffffffffff, UL) ++#define MAX_PHYSMEM_BITS 43 ++#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) + #define VMALLOC_START _AC(0xffffc20000000000, UL) + #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) + #define VMEMMAP_START _AC(0xffffe20000000000, UL) +@@ -183,12 +183,6 @@ static inline int pmd_bad(pmd_t pmd) + #define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT) + + /* +- * Macro to mark a page protection value as "uncacheable". +- */ +-#define pgprot_noncached(prot) \ +- (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT)) +- +-/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +@@ -270,6 +264,8 @@ static inline int pud_large(pud_t pte) + #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) + #endif + ++#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) ++ + #define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ + & ((1U << SWP_TYPE_BITS) - 1)) + #define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/processor.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/processor.h 2009-11-06 10:51:55.000000000 +0100 +@@ -111,6 +111,7 @@ struct cpuinfo_x86 { + /* Index into per_cpu list: */ + u16 cpu_index; + #endif ++ unsigned int x86_hyper_vendor; + } __attribute__((__aligned__(SMP_CACHE_BYTES))); + + #define X86_VENDOR_INTEL 0 +@@ -124,6 +125,10 @@ struct cpuinfo_x86 { + + #define X86_VENDOR_UNKNOWN 0xff + ++#define X86_HYPER_VENDOR_NONE 0 ++#define X86_HYPER_VENDOR_VMWARE 1 ++#define X86_HYPER_VENDOR_XEN 'X' ++ + /* + * capabilities of CPUs + */ +@@ -354,7 +359,7 @@ struct i387_soft_struct { + u8 no_update; + u8 rm; + u8 alimit; +- struct info *info; ++ struct math_emu_info *info; + u32 entry_eip; + }; + +@@ -696,6 +701,19 @@ extern void switch_to_new_gdt(void); + extern void cpu_init(void); + extern void init_gdt(int cpu); + ++static inline unsigned long get_debugctlmsr(void) ++{ ++ unsigned long debugctlmsr = 0; ++ ++#ifndef CONFIG_X86_DEBUGCTLMSR ++ if (boot_cpu_data.x86 < 6) ++ return 0; ++#endif ++ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); ++ ++ return debugctlmsr; ++} ++ + static inline void update_debugctlmsr(unsigned long debugctlmsr) + { + #ifndef CONFIG_X86_DEBUGCTLMSR +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:14:43.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:14:58.000000000 +0100 +@@ -18,9 +18,26 @@ + #include + #include + ++#ifdef CONFIG_X86_64 ++ ++#define cpu_callin_mask cpu_possible_mask ++#define cpu_callout_mask cpu_possible_mask ++extern cpumask_var_t cpu_initialized_mask; ++extern cpumask_var_t cpu_sibling_setup_mask; ++ ++#else /* CONFIG_X86_32 */ ++ ++#define cpu_callin_map cpu_possible_map + #define cpu_callout_map cpu_possible_map + extern cpumask_t cpu_initialized; +-#define cpu_callin_map cpu_possible_map ++extern cpumask_t cpu_sibling_setup_map; ++ ++#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) ++#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) ++#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) ++#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) ++ ++#endif /* CONFIG_X86_32 */ + + extern void (*mtrr_hook)(void); + extern void zap_low_mappings(void); +@@ -29,7 +46,6 @@ extern int __cpuinit get_local_pda(int c + + extern int smp_num_siblings; + extern unsigned int num_processors; +-extern cpumask_t cpu_initialized; + + DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); + DECLARE_PER_CPU(cpumask_t, cpu_core_map); +@@ -38,6 +54,16 @@ DECLARE_PER_CPU(u16, cpu_llc_id); + DECLARE_PER_CPU(int, cpu_number); + #endif + ++static inline struct cpumask *cpu_sibling_mask(int cpu) ++{ ++ return &per_cpu(cpu_sibling_map, cpu); ++} ++ ++static inline struct cpumask *cpu_core_mask(int cpu) ++{ ++ return &per_cpu(cpu_core_map, cpu); ++} ++ + DECLARE_PER_CPU(u16, x86_cpu_to_apicid); + DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); + +@@ -64,7 +90,7 @@ struct smp_ops { + void (*cpu_die)(unsigned int cpu); + void (*play_dead)(void); + +- void (*send_call_func_ipi)(cpumask_t mask); ++ void (*send_call_func_ipi)(const struct cpumask *mask); + void (*send_call_func_single_ipi)(int cpu); + }; + +@@ -125,7 +151,7 @@ static inline void arch_send_call_functi + + static inline void arch_send_call_function_ipi(cpumask_t mask) + { +- smp_ops.send_call_func_ipi(mask); ++ smp_ops.send_call_func_ipi(&mask); + } + + void cpu_disable_common(void); +@@ -144,13 +170,13 @@ extern int __cpu_disable(void); + extern void __cpu_die(unsigned int cpu); + void xen_smp_send_stop(void); + void xen_smp_send_reschedule(int cpu); +-void xen_send_call_func_ipi(cpumask_t mask); ++void xen_send_call_func_ipi(const struct cpumask *mask); + void xen_send_call_func_single_ipi(int cpu); + + #define smp_send_stop xen_smp_send_stop + #define smp_send_reschedule xen_smp_send_reschedule + #define arch_send_call_function_single_ipi xen_send_call_func_single_ipi +-#define arch_send_call_function_ipi xen_send_call_func_ipi ++#define arch_send_call_function_ipi(m) xen_send_call_func_ipi(&(m)) + + void play_dead(void); + +@@ -164,7 +190,7 @@ void smp_store_cpu_info(int id); + /* We don't mark CPUs online until __cpu_up(), so we need another measure */ + static inline int num_booting_cpus(void) + { +- return cpus_weight(cpu_callout_map); ++ return cpumask_weight(cpu_callout_mask); + } + #else + static inline void prefill_possible_map(void) +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/spinlock.h 2009-11-17 15:30:35.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/spinlock.h 2009-11-17 15:30:42.000000000 +0100 +@@ -332,6 +332,7 @@ static inline int __raw_spin_is_contende + { + return __raw_spin(is_contended)(lock); + } ++#define __raw_spin_is_contended __raw_spin_is_contended + + static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) + { +--- sle11sp1-2010-01-20.orig/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:51:55.000000000 +0100 +@@ -18,12 +18,12 @@ + # define AT_VECTOR_SIZE_ARCH 1 + #endif + +-#ifdef CONFIG_X86_32 +- + struct task_struct; /* one of the stranger aspects of C forward declarations */ + struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); + ++#ifdef CONFIG_X86_32 ++ + /* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. +@@ -298,6 +298,8 @@ extern void free_init_pages(char *what, + + void xen_idle(void); + ++void stop_this_cpu(void *dummy); ++ + /* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/acpi/sleep-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/acpi/sleep-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -163,6 +163,8 @@ static int __init acpi_sleep_setup(char + #ifdef CONFIG_HIBERNATION + if (strncmp(str, "s4_nohwsig", 10) == 0) + acpi_no_s4_hw_signature(); ++ if (strncmp(str, "s4_nonvs", 8) == 0) ++ acpi_s4_no_nvs(); + #endif + if (strncmp(str, "old_ordering", 12) == 0) + acpi_old_suspend_ordering(); +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/apic/apic-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/apic/apic-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -32,7 +32,7 @@ static int __init apic_set_verbosity(cha + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { +- printk(KERN_WARNING "APIC Verbosity level %s not recognised" ++ pr_warning("APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/cpu/Makefile 2010-01-20 10:20:50.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/cpu/Makefile 2009-11-06 10:51:55.000000000 +0100 +@@ -35,6 +35,8 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ + + obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o + ++disabled-obj-$(CONFIG_XEN) := hypervisor.o vmware.o ++ + quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ + +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -38,17 +38,41 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_XEN + #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_LOCAL_APIC) + #define phys_pkg_id(a,b) a + #endif +-#include + #include + #endif + + #include "cpu.h" + ++#ifdef CONFIG_X86_64 ++ ++/* all of these masks are initialized in setup_cpu_local_masks() */ ++#ifndef CONFIG_XEN ++cpumask_var_t cpu_callin_mask; ++cpumask_var_t cpu_callout_mask; ++#endif ++cpumask_var_t cpu_initialized_mask; ++ ++/* representing cpus for which sibling maps can be computed */ ++cpumask_var_t cpu_sibling_setup_mask; ++ ++#else /* CONFIG_X86_32 */ ++ ++#ifndef CONFIG_XEN ++cpumask_t cpu_callin_map; ++cpumask_t cpu_callout_map; ++#endif ++cpumask_t cpu_initialized; ++cpumask_t cpu_sibling_setup_map; ++ ++#endif /* CONFIG_X86_32 */ ++ ++ + static struct cpu_dev *this_cpu __cpuinitdata; + + #ifdef CONFIG_X86_64 +@@ -377,7 +401,7 @@ void __cpuinit detect_ht(struct cpuinfo_ + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + +- if (smp_num_siblings > NR_CPUS) { ++ if (smp_num_siblings > nr_cpu_ids) { + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; +@@ -728,6 +752,7 @@ static void __cpuinit identify_cpu(struc + detect_ht(c); + #endif + ++ init_hypervisor(c); + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are +@@ -879,8 +904,6 @@ static __init int setup_disablecpuid(cha + } + __setup("clearcpuid=", setup_disablecpuid); + +-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +- + #ifdef CONFIG_X86_64 + struct x8664_pda **_cpu_pda __read_mostly; + EXPORT_SYMBOL(_cpu_pda); +@@ -889,7 +912,7 @@ EXPORT_SYMBOL(_cpu_pda); + struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + #endif + +-char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; ++static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + + static void __ref switch_pt(int cpu) + { +@@ -949,8 +972,8 @@ void __cpuinit pda_init(int cpu) + } + + #ifndef CONFIG_X86_NO_TSS +-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + +- DEBUG_STKSZ] __page_aligned_bss; ++static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + ++ DEBUG_STKSZ] __page_aligned_bss; + #endif + + extern asmlinkage void ignore_sysret(void); +@@ -1038,7 +1061,7 @@ void __cpuinit cpu_init(void) + + me = current; + +- if (cpu_test_and_set(cpu, cpu_initialized)) ++ if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); +@@ -1163,7 +1186,7 @@ void __cpuinit cpu_init(void) + #endif + struct thread_struct *thread = &curr->thread; + +- if (cpu_test_and_set(cpu, cpu_initialized)) { ++ if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-11-06 10:51:32.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -33,7 +33,7 @@ struct mtrr_ops generic_mtrr_ops = { + + struct mtrr_ops *mtrr_if = &generic_mtrr_ops; + unsigned int num_var_ranges; +-unsigned int mtrr_usage_table[MAX_VAR_RANGES]; ++unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; + + static u64 tom2; + +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/e820-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/e820-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -719,6 +719,27 @@ void __init e820_mark_nosave_regions(uns + } + } + #endif ++ ++#ifdef CONFIG_HIBERNATION ++/** ++ * Mark ACPI NVS memory region, so that we can save/restore it during ++ * hibernation and the subsequent resume. ++ */ ++static int __init e820_mark_nvs_memory(void) ++{ ++ int i; ++ ++ for (i = 0; i < e820.nr_map; i++) { ++ struct e820entry *ei = &e820.map[i]; ++ ++ if (ei->type == E820_NVS) ++ hibernate_nvs_register(ei->addr, ei->size); ++ } ++ ++ return 0; ++} ++core_initcall(e820_mark_nvs_memory); ++#endif + #endif + + /* +@@ -734,22 +755,6 @@ struct early_res { + static struct early_res early_res[MAX_EARLY_RES] __initdata = { + #ifndef CONFIG_XEN + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ +-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) +- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, +-#endif +-#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) +- /* +- * But first pinch a few for the stack/trampoline stuff +- * FIXME: Don't need the extra page at 4K, but need to fix +- * trampoline before removing it. (see the GDT stuff) +- */ +- { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, +- /* +- * Has to be in very low memory so we can execute +- * real-mode AP code. +- */ +- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, +-#endif + #endif + {} + }; +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -904,49 +904,6 @@ static struct console early_dbgp_console + }; + #endif + +-/* Console interface to a host file on AMD's SimNow! */ +- +-static int simnow_fd; +- +-enum { +- MAGIC1 = 0xBACCD00A, +- MAGIC2 = 0xCA110000, +- XOPEN = 5, +- XWRITE = 4, +-}; +- +-static noinline long simnow(long cmd, long a, long b, long c) +-{ +- long ret; +- +- asm volatile("cpuid" : +- "=a" (ret) : +- "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); +- return ret; +-} +- +-static void __init simnow_init(char *str) +-{ +- char *fn = "klog"; +- +- if (*str == '=') +- fn = ++str; +- /* error ignored */ +- simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644); +-} +- +-static void simnow_write(struct console *con, const char *s, unsigned n) +-{ +- simnow(XWRITE, simnow_fd, (unsigned long)s, n); +-} +- +-static struct console simnow_console = { +- .name = "simnow", +- .write = simnow_write, +- .flags = CON_PRINTBUFFER, +- .index = -1, +-}; +- + /* Direct interface for emergencies */ + static struct console *early_console = &early_vga_console; + static int __initdata early_console_initialized; +@@ -958,7 +915,7 @@ asmlinkage void early_printk(const char + va_list ap; + + va_start(ap, fmt); +- n = vscnprintf(buf, 512, fmt, ap); ++ n = vscnprintf(buf, sizeof(buf), fmt, ap); + early_console->write(early_console, buf, n); + va_end(ap); + } +@@ -991,10 +948,6 @@ static int __init setup_early_printk(cha + current_ypos = boot_params.screen_info.orig_y; + #endif + early_console = &early_vga_console; +- } else if (!strncmp(buf, "simnow", 6)) { +- simnow_init(buf + 6); +- early_console = &simnow_console; +- keep_early = 1; + #ifdef CONFIG_EARLY_PRINTK_DBGP + } else if (!strncmp(buf, "dbgp", 4)) { + if (early_dbgp_init(buf+4) < 0) +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/entry_32-xen.S 2009-11-06 10:51:55.000000000 +0100 +@@ -690,28 +690,37 @@ END(syscall_badsys) + 27:; + + /* +- * Build the entry stubs and pointer table with +- * some assembler magic. ++ * Build the entry stubs and pointer table with some assembler magic. ++ * We pack 7 stubs into a single 32-byte chunk, which will fit in a ++ * single cache line on all modern x86 implementations. + */ +-.section .rodata,"a" ++.section .init.rodata,"a" + ENTRY(interrupt) + .text +- ++ .p2align 5 ++ .p2align CONFIG_X86_L1_CACHE_SHIFT + ENTRY(irq_entries_start) + RING0_INT_FRAME +-vector=0 +-.rept NR_VECTORS +- ALIGN +- .if vector ++vector=FIRST_EXTERNAL_VECTOR ++.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 ++ .balign 32 ++ .rept 7 ++ .if vector < NR_VECTORS ++ .if vector <> FIRST_EXTERNAL_VECTOR + CFI_ADJUST_CFA_OFFSET -4 +- .endif +-1: pushl $~(vector) ++ .endif ++1: pushl $(~vector+0x80) /* Note: always in signed byte range */ + CFI_ADJUST_CFA_OFFSET 4 +- jmp common_interrupt +- .previous ++ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 ++ jmp 2f ++ .endif ++ .previous + .long 1b +- .text ++ .text + vector=vector+1 ++ .endif ++ .endr ++2: jmp common_interrupt + .endr + END(irq_entries_start) + +@@ -723,8 +732,9 @@ END(interrupt) + * the CPU automatically disables interrupts when executing an IRQ vector, + * so IRQ-flags tracing has to follow that: + */ +- ALIGN ++ .p2align CONFIG_X86_L1_CACHE_SHIFT + common_interrupt: ++ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ + SAVE_ALL + TRACE_IRQS_OFF + movl %esp,%eax +@@ -751,68 +761,7 @@ ENDPROC(name) + + #else + #define UNWIND_ESPFIX_STACK +-#endif +- +-KPROBE_ENTRY(page_fault) +- RING0_EC_FRAME +- pushl $do_page_fault +- CFI_ADJUST_CFA_OFFSET 4 +- ALIGN +-error_code: +- /* the function address is in %fs's slot on the stack */ +- pushl %es +- CFI_ADJUST_CFA_OFFSET 4 +- /*CFI_REL_OFFSET es, 0*/ +- pushl %ds +- CFI_ADJUST_CFA_OFFSET 4 +- /*CFI_REL_OFFSET ds, 0*/ +- pushl %eax +- CFI_ADJUST_CFA_OFFSET 4 +- CFI_REL_OFFSET eax, 0 +- pushl %ebp +- CFI_ADJUST_CFA_OFFSET 4 +- CFI_REL_OFFSET ebp, 0 +- pushl %edi +- CFI_ADJUST_CFA_OFFSET 4 +- CFI_REL_OFFSET edi, 0 +- pushl %esi +- CFI_ADJUST_CFA_OFFSET 4 +- CFI_REL_OFFSET esi, 0 +- pushl %edx +- CFI_ADJUST_CFA_OFFSET 4 +- CFI_REL_OFFSET edx, 0 +- pushl %ecx +- CFI_ADJUST_CFA_OFFSET 4 +- CFI_REL_OFFSET ecx, 0 +- pushl %ebx +- CFI_ADJUST_CFA_OFFSET 4 +- CFI_REL_OFFSET ebx, 0 +- cld +- pushl %fs +- CFI_ADJUST_CFA_OFFSET 4 +- /*CFI_REL_OFFSET fs, 0*/ +- movl $(__KERNEL_PERCPU), %ecx +- movl %ecx, %fs +- UNWIND_ESPFIX_STACK +- popl %ecx +- CFI_ADJUST_CFA_OFFSET -4 +- /*CFI_REGISTER es, ecx*/ +- movl PT_FS(%esp), %edi # get the function address +- movl PT_ORIG_EAX(%esp), %edx # get the error code +- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart +- mov %ecx, PT_FS(%esp) +- /*CFI_REL_OFFSET fs, ES*/ +- movl $(__USER_DS), %ecx +- movl %ecx, %ds +- movl %ecx, %es +- TRACE_IRQS_OFF +- movl %esp,%eax # pt_regs pointer +- call *%edi +- jmp ret_from_exception +- CFI_ENDPROC +-KPROBE_END(page_fault) + +-#ifdef CONFIG_XEN + # A note on the "critical region" in our callback handler. + # We want to avoid stacking callback handlers due to events occurring + # during handling of the last event. To do this, we keep events disabled +@@ -981,158 +930,6 @@ ENTRY(device_not_available) + CFI_ENDPROC + END(device_not_available) + +-#ifndef CONFIG_XEN +-/* +- * Debug traps and NMI can happen at the one SYSENTER instruction +- * that sets up the real kernel stack. Check here, since we can't +- * allow the wrong stack to be used. +- * +- * "SYSENTER_stack_sp0+12" is because the NMI/debug handler will have +- * already pushed 3 words if it hits on the sysenter instruction: +- * eflags, cs and eip. +- * +- * We just load the right stack, and push the three (known) values +- * by hand onto the new stack - while updating the return eip past +- * the instruction that would have done it for sysenter. +- */ +-#define FIX_STACK(offset, ok, label) \ +- cmpw $__KERNEL_CS,4(%esp); \ +- jne ok; \ +-label: \ +- movl SYSENTER_stack_sp0+offset(%esp),%esp; \ +- CFI_DEF_CFA esp, 0; \ +- CFI_UNDEFINED eip; \ +- pushfl; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- pushl $__KERNEL_CS; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- pushl $sysenter_past_esp; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- CFI_REL_OFFSET eip, 0 +-#endif /* CONFIG_XEN */ +- +-KPROBE_ENTRY(debug) +- RING0_INT_FRAME +-#ifndef CONFIG_XEN +- cmpl $ia32_sysenter_target,(%esp) +- jne debug_stack_correct +- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) +-debug_stack_correct: +-#endif /* !CONFIG_XEN */ +- pushl $-1 # mark this as an int +- CFI_ADJUST_CFA_OFFSET 4 +- SAVE_ALL +- TRACE_IRQS_OFF +- xorl %edx,%edx # error code 0 +- movl %esp,%eax # pt_regs pointer +- call do_debug +- jmp ret_from_exception +- CFI_ENDPROC +-KPROBE_END(debug) +- +-#ifndef CONFIG_XEN +-/* +- * NMI is doubly nasty. It can happen _while_ we're handling +- * a debug fault, and the debug fault hasn't yet been able to +- * clear up the stack. So we first check whether we got an +- * NMI on the sysenter entry path, but after that we need to +- * check whether we got an NMI on the debug path where the debug +- * fault happened on the sysenter path. +- */ +-KPROBE_ENTRY(nmi) +- RING0_INT_FRAME +- pushl %eax +- CFI_ADJUST_CFA_OFFSET 4 +- movl %ss, %eax +- cmpw $__ESPFIX_SS, %ax +- popl %eax +- CFI_ADJUST_CFA_OFFSET -4 +- je nmi_espfix_stack +- cmpl $ia32_sysenter_target,(%esp) +- je nmi_stack_fixup +- pushl %eax +- CFI_ADJUST_CFA_OFFSET 4 +- movl %esp,%eax +- /* Do not access memory above the end of our stack page, +- * it might not exist. +- */ +- andl $(THREAD_SIZE-1),%eax +- cmpl $(THREAD_SIZE-20),%eax +- popl %eax +- CFI_ADJUST_CFA_OFFSET -4 +- jae nmi_stack_correct +- cmpl $ia32_sysenter_target,12(%esp) +- je nmi_debug_stack_check +-nmi_stack_correct: +- /* We have a RING0_INT_FRAME here */ +- pushl %eax +- CFI_ADJUST_CFA_OFFSET 4 +- SAVE_ALL +- TRACE_IRQS_OFF +- xorl %edx,%edx # zero error code +- movl %esp,%eax # pt_regs pointer +- call do_nmi +- jmp restore_nocheck_notrace +- CFI_ENDPROC +- +-nmi_stack_fixup: +- RING0_INT_FRAME +- FIX_STACK(12,nmi_stack_correct, 1) +- jmp nmi_stack_correct +- +-nmi_debug_stack_check: +- /* We have a RING0_INT_FRAME here */ +- cmpw $__KERNEL_CS,16(%esp) +- jne nmi_stack_correct +- cmpl $debug,(%esp) +- jb nmi_stack_correct +- cmpl $debug_esp_fix_insn,(%esp) +- ja nmi_stack_correct +- FIX_STACK(24,nmi_stack_correct, 1) +- jmp nmi_stack_correct +- +-nmi_espfix_stack: +- /* We have a RING0_INT_FRAME here. +- * +- * create the pointer to lss back +- */ +- pushl %ss +- CFI_ADJUST_CFA_OFFSET 4 +- pushl %esp +- CFI_ADJUST_CFA_OFFSET 4 +- addw $4, (%esp) +- /* copy the iret frame of 12 bytes */ +- .rept 3 +- pushl 16(%esp) +- CFI_ADJUST_CFA_OFFSET 4 +- .endr +- pushl %eax +- CFI_ADJUST_CFA_OFFSET 4 +- SAVE_ALL +- TRACE_IRQS_OFF +- FIXUP_ESPFIX_STACK # %eax == %esp +- xorl %edx,%edx # zero error code +- call do_nmi +- RESTORE_REGS +- lss 12+4(%esp), %esp # back to espfix stack +- CFI_ADJUST_CFA_OFFSET -24 +- jmp irq_return +- CFI_ENDPROC +-#else +-KPROBE_ENTRY(nmi) +- RING0_INT_FRAME +- pushl %eax +- CFI_ADJUST_CFA_OFFSET 4 +- SAVE_ALL +- xorl %edx,%edx # zero error code +- movl %esp,%eax # pt_regs pointer +- call do_nmi +- orl $NMI_MASK, PT_EFLAGS(%esp) +- jmp restore_all +- CFI_ENDPROC +-#endif +-KPROBE_END(nmi) +- + #ifdef CONFIG_PARAVIRT + ENTRY(native_iret) + iret +@@ -1148,19 +945,6 @@ ENTRY(native_irq_enable_sysexit) + END(native_irq_enable_sysexit) + #endif + +-KPROBE_ENTRY(int3) +- RING0_INT_FRAME +- pushl $-1 # mark this as an int +- CFI_ADJUST_CFA_OFFSET 4 +- SAVE_ALL +- TRACE_IRQS_OFF +- xorl %edx,%edx # zero error code +- movl %esp,%eax # pt_regs pointer +- call do_int3 +- jmp ret_from_exception +- CFI_ENDPROC +-KPROBE_END(int3) +- + ENTRY(overflow) + RING0_INT_FRAME + pushl $0 +@@ -1225,14 +1009,6 @@ ENTRY(stack_segment) + CFI_ENDPROC + END(stack_segment) + +-KPROBE_ENTRY(general_protection) +- RING0_EC_FRAME +- pushl $do_general_protection +- CFI_ADJUST_CFA_OFFSET 4 +- jmp error_code +- CFI_ENDPROC +-KPROBE_END(general_protection) +- + ENTRY(alignment_check) + RING0_EC_FRAME + pushl $do_alignment_check +@@ -1292,6 +1068,7 @@ ENTRY(kernel_thread_helper) + push %eax + CFI_ADJUST_CFA_OFFSET 4 + call do_exit ++ ud2 # padding for call trace + CFI_ENDPROC + ENDPROC(kernel_thread_helper) + +@@ -1303,6 +1080,9 @@ ENTRY(mcount) + END(mcount) + + ENTRY(ftrace_caller) ++ cmpl $0, function_trace_stop ++ jne ftrace_stub ++ + pushl %eax + pushl %ecx + pushl %edx +@@ -1317,6 +1097,11 @@ ftrace_call: + popl %edx + popl %ecx + popl %eax ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++.globl ftrace_graph_call ++ftrace_graph_call: ++ jmp ftrace_stub ++#endif + + .globl ftrace_stub + ftrace_stub: +@@ -1326,8 +1111,18 @@ END(ftrace_caller) + #else /* ! CONFIG_DYNAMIC_FTRACE */ + + ENTRY(mcount) ++ cmpl $0, function_trace_stop ++ jne ftrace_stub ++ + cmpl $ftrace_stub, ftrace_trace_function + jnz trace ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ cmpl $ftrace_stub, ftrace_graph_return ++ jnz ftrace_graph_caller ++ ++ cmpl $ftrace_graph_entry_stub, ftrace_graph_entry ++ jnz ftrace_graph_caller ++#endif + .globl ftrace_stub + ftrace_stub: + ret +@@ -1346,12 +1141,43 @@ trace: + popl %edx + popl %ecx + popl %eax +- + jmp ftrace_stub + END(mcount) + #endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* CONFIG_FUNCTION_TRACER */ + ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ENTRY(ftrace_graph_caller) ++ cmpl $0, function_trace_stop ++ jne ftrace_stub ++ ++ pushl %eax ++ pushl %ecx ++ pushl %edx ++ movl 0xc(%esp), %edx ++ lea 0x4(%ebp), %eax ++ subl $MCOUNT_INSN_SIZE, %edx ++ call prepare_ftrace_return ++ popl %edx ++ popl %ecx ++ popl %eax ++ ret ++END(ftrace_graph_caller) ++ ++.globl return_to_handler ++return_to_handler: ++ pushl $0 ++ pushl %eax ++ pushl %ecx ++ pushl %edx ++ call ftrace_return_to_handler ++ movl %eax, 0xc(%esp) ++ popl %edx ++ popl %ecx ++ popl %eax ++ ret ++#endif ++ + #include + + # pv syscall call handler stub +@@ -1485,3 +1311,238 @@ mask=0 + #undef sys_fork + #undef sys_clone + #undef sys_vfork ++ ++/* ++ * Some functions should be protected against kprobes ++ */ ++ .pushsection .kprobes.text, "ax" ++ ++ENTRY(page_fault) ++ RING0_EC_FRAME ++ pushl $do_page_fault ++ CFI_ADJUST_CFA_OFFSET 4 ++ ALIGN ++error_code: ++ /* the function address is in %fs's slot on the stack */ ++ pushl %es ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET es, 0*/ ++ pushl %ds ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ds, 0*/ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eax, 0 ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebp, 0 ++ pushl %edi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edi, 0 ++ pushl %esi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esi, 0 ++ pushl %edx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edx, 0 ++ pushl %ecx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ecx, 0 ++ pushl %ebx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebx, 0 ++ cld ++ pushl %fs ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET fs, 0*/ ++ movl $(__KERNEL_PERCPU), %ecx ++ movl %ecx, %fs ++ UNWIND_ESPFIX_STACK ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_REGISTER es, ecx*/ ++ movl PT_FS(%esp), %edi # get the function address ++ movl PT_ORIG_EAX(%esp), %edx # get the error code ++ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart ++ mov %ecx, PT_FS(%esp) ++ /*CFI_REL_OFFSET fs, ES*/ ++ movl $(__USER_DS), %ecx ++ movl %ecx, %ds ++ movl %ecx, %es ++ TRACE_IRQS_OFF ++ movl %esp,%eax # pt_regs pointer ++ call *%edi ++ jmp ret_from_exception ++ CFI_ENDPROC ++END(page_fault) ++ ++#ifndef CONFIG_XEN ++/* ++ * Debug traps and NMI can happen at the one SYSENTER instruction ++ * that sets up the real kernel stack. Check here, since we can't ++ * allow the wrong stack to be used. ++ * ++ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have ++ * already pushed 3 words if it hits on the sysenter instruction: ++ * eflags, cs and eip. ++ * ++ * We just load the right stack, and push the three (known) values ++ * by hand onto the new stack - while updating the return eip past ++ * the instruction that would have done it for sysenter. ++ */ ++#define FIX_STACK(offset, ok, label) \ ++ cmpw $__KERNEL_CS,4(%esp); \ ++ jne ok; \ ++label: \ ++ movl TSS_sysenter_sp0+offset(%esp),%esp; \ ++ CFI_DEF_CFA esp, 0; \ ++ CFI_UNDEFINED eip; \ ++ pushfl; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ pushl $__KERNEL_CS; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ pushl $sysenter_past_esp; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ CFI_REL_OFFSET eip, 0 ++#endif /* CONFIG_XEN */ ++ ++ENTRY(debug) ++ RING0_INT_FRAME ++#ifndef CONFIG_XEN ++ cmpl $ia32_sysenter_target,(%esp) ++ jne debug_stack_correct ++ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) ++debug_stack_correct: ++#endif /* !CONFIG_XEN */ ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ TRACE_IRQS_OFF ++ xorl %edx,%edx # error code 0 ++ movl %esp,%eax # pt_regs pointer ++ call do_debug ++ jmp ret_from_exception ++ CFI_ENDPROC ++END(debug) ++ ++/* ++ * NMI is doubly nasty. It can happen _while_ we're handling ++ * a debug fault, and the debug fault hasn't yet been able to ++ * clear up the stack. So we first check whether we got an ++ * NMI on the sysenter entry path, but after that we need to ++ * check whether we got an NMI on the debug path where the debug ++ * fault happened on the sysenter path. ++ */ ++ENTRY(nmi) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++#ifndef CONFIG_XEN ++ movl %ss, %eax ++ cmpw $__ESPFIX_SS, %ax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ je nmi_espfix_stack ++ cmpl $ia32_sysenter_target,(%esp) ++ je nmi_stack_fixup ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ movl %esp,%eax ++ /* Do not access memory above the end of our stack page, ++ * it might not exist. ++ */ ++ andl $(THREAD_SIZE-1),%eax ++ cmpl $(THREAD_SIZE-20),%eax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ jae nmi_stack_correct ++ cmpl $ia32_sysenter_target,12(%esp) ++ je nmi_debug_stack_check ++nmi_stack_correct: ++ /* We have a RING0_INT_FRAME here */ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ jmp restore_nocheck_notrace ++ CFI_ENDPROC ++ ++nmi_stack_fixup: ++ RING0_INT_FRAME ++ FIX_STACK(12,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++ ++nmi_debug_stack_check: ++ /* We have a RING0_INT_FRAME here */ ++ cmpw $__KERNEL_CS,16(%esp) ++ jne nmi_stack_correct ++ cmpl $debug,(%esp) ++ jb nmi_stack_correct ++ cmpl $debug_esp_fix_insn,(%esp) ++ ja nmi_stack_correct ++ FIX_STACK(24,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++ ++nmi_espfix_stack: ++ /* We have a RING0_INT_FRAME here. ++ * ++ * create the pointer to lss back ++ */ ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl %esp ++ CFI_ADJUST_CFA_OFFSET 4 ++ addw $4, (%esp) ++ /* copy the iret frame of 12 bytes */ ++ .rept 3 ++ pushl 16(%esp) ++ CFI_ADJUST_CFA_OFFSET 4 ++ .endr ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ FIXUP_ESPFIX_STACK # %eax == %esp ++ xorl %edx,%edx # zero error code ++ call do_nmi ++ RESTORE_REGS ++ lss 12+4(%esp), %esp # back to espfix stack ++ CFI_ADJUST_CFA_OFFSET -24 ++ jmp irq_return ++#else ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ orl $NMI_MASK, PT_EFLAGS(%esp) ++ jmp restore_all ++#endif ++ CFI_ENDPROC ++END(nmi) ++ ++ENTRY(int3) ++ RING0_INT_FRAME ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ TRACE_IRQS_OFF ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_int3 ++ jmp ret_from_exception ++ CFI_ENDPROC ++END(int3) ++ ++ENTRY(general_protection) ++ RING0_EC_FRAME ++ pushl $do_general_protection ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++END(general_protection) ++ ++/* ++ * End of kprobes section ++ */ ++ .popsection +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:51:55.000000000 +0100 +@@ -14,15 +14,15 @@ + * + * NOTE: This code handles signal-recognition, which happens every time + * after an interrupt and after each system call. +- * +- * Normal syscalls and interrupts don't save a full stack frame, this is ++ * ++ * Normal syscalls and interrupts don't save a full stack frame, this is + * only done for syscall tracing, signals or fork/exec et.al. +- * +- * A note on terminology: +- * - top of stack: Architecture defined interrupt frame from SS to RIP +- * at the top of the kernel process stack. ++ * ++ * A note on terminology: ++ * - top of stack: Architecture defined interrupt frame from SS to RIP ++ * at the top of the kernel process stack. + * - partial stack frame: partially saved registers upto R11. +- * - full stack frame: Like partial stack frame, but all register saved. ++ * - full stack frame: Like partial stack frame, but all register saved. + * + * Some macro usage: + * - CFI macros are used to generate dwarf2 unwind information for better +@@ -65,7 +65,6 @@ + #define __AUDIT_ARCH_LE 0x40000000 + + .code64 +- + #ifdef CONFIG_FUNCTION_TRACER + #ifdef CONFIG_DYNAMIC_FTRACE + ENTRY(mcount) +@@ -73,16 +72,10 @@ ENTRY(mcount) + END(mcount) + + ENTRY(ftrace_caller) ++ cmpl $0, function_trace_stop ++ jne ftrace_stub + +- /* taken from glibc */ +- subq $0x38, %rsp +- movq %rax, (%rsp) +- movq %rcx, 8(%rsp) +- movq %rdx, 16(%rsp) +- movq %rsi, 24(%rsp) +- movq %rdi, 32(%rsp) +- movq %r8, 40(%rsp) +- movq %r9, 48(%rsp) ++ MCOUNT_SAVE_FRAME + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi +@@ -92,14 +85,13 @@ ENTRY(ftrace_caller) + ftrace_call: + call ftrace_stub + +- movq 48(%rsp), %r9 +- movq 40(%rsp), %r8 +- movq 32(%rsp), %rdi +- movq 24(%rsp), %rsi +- movq 16(%rsp), %rdx +- movq 8(%rsp), %rcx +- movq (%rsp), %rax +- addq $0x38, %rsp ++ MCOUNT_RESTORE_FRAME ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++.globl ftrace_graph_call ++ftrace_graph_call: ++ jmp ftrace_stub ++#endif + + .globl ftrace_stub + ftrace_stub: +@@ -108,15 +100,63 @@ END(ftrace_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ + ENTRY(mcount) ++ cmpl $0, function_trace_stop ++ jne ftrace_stub ++ + cmpq $ftrace_stub, ftrace_trace_function + jnz trace ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ cmpq $ftrace_stub, ftrace_graph_return ++ jnz ftrace_graph_caller ++ ++ cmpq $ftrace_graph_entry_stub, ftrace_graph_entry ++ jnz ftrace_graph_caller ++#endif ++ + .globl ftrace_stub + ftrace_stub: + retq + + trace: +- /* taken from glibc */ +- subq $0x38, %rsp ++ MCOUNT_SAVE_FRAME ++ ++ movq 0x38(%rsp), %rdi ++ movq 8(%rbp), %rsi ++ subq $MCOUNT_INSN_SIZE, %rdi ++ ++ call *ftrace_trace_function ++ ++ MCOUNT_RESTORE_FRAME ++ ++ jmp ftrace_stub ++END(mcount) ++#endif /* CONFIG_DYNAMIC_FTRACE */ ++#endif /* CONFIG_FUNCTION_TRACER */ ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ENTRY(ftrace_graph_caller) ++ cmpl $0, function_trace_stop ++ jne ftrace_stub ++ ++ MCOUNT_SAVE_FRAME ++ ++ leaq 8(%rbp), %rdi ++ movq 0x38(%rsp), %rsi ++ subq $MCOUNT_INSN_SIZE, %rsi ++ ++ call prepare_ftrace_return ++ ++ MCOUNT_RESTORE_FRAME ++ ++ retq ++END(ftrace_graph_caller) ++ ++ ++.globl return_to_handler ++return_to_handler: ++ subq $80, %rsp ++ + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) +@@ -124,13 +164,14 @@ trace: + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) ++ movq %r10, 56(%rsp) ++ movq %r11, 64(%rsp) + +- movq 0x38(%rsp), %rdi +- movq 8(%rbp), %rsi +- subq $MCOUNT_INSN_SIZE, %rdi +- +- call *ftrace_trace_function ++ call ftrace_return_to_handler + ++ movq %rax, 72(%rsp) ++ movq 64(%rsp), %r11 ++ movq 56(%rsp), %r10 + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi +@@ -138,16 +179,14 @@ trace: + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax +- addq $0x38, %rsp ++ addq $72, %rsp ++ retq ++#endif + +- jmp ftrace_stub +-END(mcount) +-#endif /* CONFIG_DYNAMIC_FTRACE */ +-#endif /* CONFIG_FUNCTION_TRACER */ + + #ifndef CONFIG_PREEMPT + #define retint_kernel retint_restore_args +-#endif ++#endif + + #ifdef CONFIG_PARAVIRT + ENTRY(native_usergs_sysret64) +@@ -168,20 +207,20 @@ ENTRY(native_usergs_sysret64) + NMI_MASK = 0x80000000 + + /* +- * C code is not supposed to know about undefined top of stack. Every time +- * a C function with an pt_regs argument is called from the SYSCALL based ++ * C code is not supposed to know about undefined top of stack. Every time ++ * a C function with an pt_regs argument is called from the SYSCALL based + * fast path FIXUP_TOP_OF_STACK is needed. + * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs + * manipulation. +- */ +- +- /* %rsp:at FRAMEEND */ +- .macro FIXUP_TOP_OF_STACK tmp +- movq $__USER_CS,CS(%rsp) +- movq $-1,RCX(%rsp) ++ */ ++ ++ /* %rsp:at FRAMEEND */ ++ .macro FIXUP_TOP_OF_STACK tmp offset=0 ++ movq $__USER_CS,CS+\offset(%rsp) ++ movq $-1,RCX+\offset(%rsp) + .endm + +- .macro RESTORE_TOP_OF_STACK tmp,offset=0 ++ .macro RESTORE_TOP_OF_STACK tmp offset=0 + .endm + + .macro FAKE_STACK_FRAME child_rip +@@ -193,7 +232,7 @@ NMI_MASK = 0x80000000 + pushq %rax /* rsp */ + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rsp,0 +- pushq $(1<<9) /* eflags - interrupts on */ ++ pushq $X86_EFLAGS_IF /* eflags - interrupts on */ + CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET rflags,0*/ + pushq $__KERNEL_CS /* cs */ +@@ -211,36 +250,80 @@ NMI_MASK = 0x80000000 + CFI_ADJUST_CFA_OFFSET -(6*8) + .endm + +- .macro CFI_DEFAULT_STACK start=1,adj=0 ++/* ++ * initial frame state for interrupts (and exceptions without error code) ++ */ ++ .macro EMPTY_FRAME start=1 offset=0 + .if \start +- CFI_STARTPROC simple ++ CFI_STARTPROC simple + CFI_SIGNAL_FRAME +- CFI_DEF_CFA rsp,SS+8 - \adj*ARGOFFSET ++ CFI_DEF_CFA rsp,8+\offset + .else +- CFI_DEF_CFA_OFFSET SS+8 - \adj*ARGOFFSET ++ CFI_DEF_CFA_OFFSET 8+\offset + .endif +- .if \adj == 0 +- CFI_REL_OFFSET r15,R15 +- CFI_REL_OFFSET r14,R14 +- CFI_REL_OFFSET r13,R13 +- CFI_REL_OFFSET r12,R12 +- CFI_REL_OFFSET rbp,RBP +- CFI_REL_OFFSET rbx,RBX ++ .endm ++ ++/* ++ * initial frame state for syscall ++ */ ++ .macro BASIC_FRAME start=1 offset=0 ++ EMPTY_FRAME \start, SS+8+\offset-RIP ++ /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ ++ CFI_REL_OFFSET rsp, RSP+\offset-RIP ++ /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ ++ /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ ++ CFI_REL_OFFSET rip, RIP+\offset-RIP ++ .endm ++ ++/* ++ * initial frame state for interrupts (and exceptions without error code) ++ */ ++ .macro INTR_FRAME start=1 offset=0 ++ .if \start == 1 ++ BASIC_FRAME 1, \offset+2*8 ++ CFI_REL_OFFSET rcx, 0+\offset ++ CFI_REL_OFFSET r11, 8+\offset ++ .else ++ BASIC_FRAME \start, \offset + .endif +- CFI_REL_OFFSET r11,R11 - \adj*ARGOFFSET +- CFI_REL_OFFSET r10,R10 - \adj*ARGOFFSET +- CFI_REL_OFFSET r9,R9 - \adj*ARGOFFSET +- CFI_REL_OFFSET r8,R8 - \adj*ARGOFFSET +- CFI_REL_OFFSET rax,RAX - \adj*ARGOFFSET +- CFI_REL_OFFSET rcx,RCX - \adj*ARGOFFSET +- CFI_REL_OFFSET rdx,RDX - \adj*ARGOFFSET +- CFI_REL_OFFSET rsi,RSI - \adj*ARGOFFSET +- CFI_REL_OFFSET rdi,RDI - \adj*ARGOFFSET +- CFI_REL_OFFSET rip,RIP - \adj*ARGOFFSET +- /*CFI_REL_OFFSET cs,CS - \adj*ARGOFFSET*/ +- /*CFI_REL_OFFSET rflags,EFLAGS - \adj*ARGOFFSET*/ +- CFI_REL_OFFSET rsp,RSP - \adj*ARGOFFSET +- /*CFI_REL_OFFSET ss,SS - \adj*ARGOFFSET*/ ++ .endm ++ ++/* ++ * initial frame state for exceptions with error code (and interrupts ++ * with vector already pushed) ++ */ ++ .macro XCPT_FRAME start=1 offset=0 ++ INTR_FRAME \start, RIP+\offset-ORIG_RAX ++ /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ ++ .endm ++ ++/* ++ * frame that enables calling into C. ++ */ ++ .macro PARTIAL_FRAME start=1 offset=0 ++ XCPT_FRAME 2*\start, ORIG_RAX+\offset-ARGOFFSET ++ CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET ++ CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET ++ CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET ++ CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET ++ CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET ++ CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET ++ CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET ++ CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET ++ CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET ++ .endm ++ ++/* ++ * frame that enables passing a complete pt_regs to a C function. ++ */ ++ .macro DEFAULT_FRAME start=1 offset=0 ++ PARTIAL_FRAME \start, R11+\offset-R15 ++ CFI_REL_OFFSET rbx, RBX+\offset ++ CFI_REL_OFFSET rbp, RBP+\offset ++ CFI_REL_OFFSET r12, R12+\offset ++ CFI_REL_OFFSET r13, R13+\offset ++ CFI_REL_OFFSET r14, R14+\offset ++ CFI_REL_OFFSET r15, R15+\offset + .endm + + /* +@@ -270,70 +353,149 @@ NMI_MASK = 0x80000000 + jmp hypercall_page + (__HYPERVISOR_iret * 32) + .endm + ++#ifndef CONFIG_XEN ++/* save partial stack frame */ ++ENTRY(save_args) ++ XCPT_FRAME ++ cld ++ movq_cfi rdi, RDI+16-ARGOFFSET ++ movq_cfi rsi, RSI+16-ARGOFFSET ++ movq_cfi rdx, RDX+16-ARGOFFSET ++ movq_cfi rcx, RCX+16-ARGOFFSET ++ movq_cfi rax, RAX+16-ARGOFFSET ++ movq_cfi r8, R8+16-ARGOFFSET ++ movq_cfi r9, R9+16-ARGOFFSET ++ movq_cfi r10, R10+16-ARGOFFSET ++ movq_cfi r11, R11+16-ARGOFFSET ++ ++ leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ ++ movq_cfi rbp, 8 /* push %rbp */ ++ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ ++ testl $3, CS(%rdi) ++ je 1f ++ SWAPGS ++ /* ++ * irqcount is used to check if a CPU is already on an interrupt stack ++ * or not. While this is essentially redundant with preempt_count it is ++ * a little cheaper to use a separate counter in the PDA (short of ++ * moving irq_enter into assembly, which would be too much work) ++ */ ++1: incl %gs:pda_irqcount ++ jne 2f ++ popq_cfi %rax /* move return address... */ ++ mov %gs:pda_irqstackptr,%rsp ++ EMPTY_FRAME 0 ++ pushq_cfi %rbp /* backlink for unwinder */ ++ pushq_cfi %rax /* ... to the new stack */ ++ /* ++ * We entered an interrupt context - irqs are off: ++ */ ++2: TRACE_IRQS_OFF ++ ret ++ CFI_ENDPROC ++END(save_args) ++#endif ++ ++ENTRY(save_rest) ++ PARTIAL_FRAME 1 REST_SKIP+8 ++ movq 5*8+16(%rsp), %r11 /* save return address */ ++ movq_cfi rbx, RBX+16 ++ movq_cfi rbp, RBP+16 ++ movq_cfi r12, R12+16 ++ movq_cfi r13, R13+16 ++ movq_cfi r14, R14+16 ++ movq_cfi r15, R15+16 ++ movq %r11, 8(%rsp) /* return address */ ++ FIXUP_TOP_OF_STACK %r11, 16 ++ ret ++ CFI_ENDPROC ++END(save_rest) ++ ++#ifndef CONFIG_XEN ++/* save complete stack frame */ ++ENTRY(save_paranoid) ++ XCPT_FRAME 1 RDI+8 ++ cld ++ movq_cfi rdi, RDI+8 ++ movq_cfi rsi, RSI+8 ++ movq_cfi rdx, RDX+8 ++ movq_cfi rcx, RCX+8 ++ movq_cfi rax, RAX+8 ++ movq_cfi r8, R8+8 ++ movq_cfi r9, R9+8 ++ movq_cfi r10, R10+8 ++ movq_cfi r11, R11+8 ++ movq_cfi rbx, RBX+8 ++ movq_cfi rbp, RBP+8 ++ movq_cfi r12, R12+8 ++ movq_cfi r13, R13+8 ++ movq_cfi r14, R14+8 ++ movq_cfi r15, R15+8 ++ movl $1,%ebx ++ movl $MSR_GS_BASE,%ecx ++ rdmsr ++ testl %edx,%edx ++ js 1f /* negative -> in kernel */ ++ SWAPGS ++ xorl %ebx,%ebx ++1: ret ++ CFI_ENDPROC ++END(save_paranoid) ++#endif ++ + /* +- * A newly forked process directly context switches into this. +- */ +-/* rdi: prev */ ++ * A newly forked process directly context switches into this address. ++ * ++ * rdi: prev task we switched from ++ */ + ENTRY(ret_from_fork) +- CFI_DEFAULT_STACK ++ DEFAULT_FRAME ++ + push kernel_eflags(%rip) + CFI_ADJUST_CFA_OFFSET 8 +- popf # reset kernel eflags ++ popf # reset kernel eflags + CFI_ADJUST_CFA_OFFSET -8 +- call schedule_tail ++ ++ call schedule_tail # rdi: 'prev' task parameter ++ + GET_THREAD_INFO(%rcx) +- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) +- jnz rff_trace +-rff_action: ++ ++ CFI_REMEMBER_STATE + RESTORE_REST +- testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? ++ ++ testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? + je int_ret_from_sys_call +- testl $_TIF_IA32,TI_flags(%rcx) ++ ++ testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET + jnz int_ret_from_sys_call +- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET +- jmp ret_from_sys_call +-rff_trace: +- movq %rsp,%rdi +- call syscall_trace_leave +- GET_THREAD_INFO(%rcx) +- jmp rff_action ++ ++ RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET ++ jmp ret_from_sys_call # go to the SYSRET fastpath ++ ++ CFI_RESTORE_STATE + CFI_ENDPROC + END(ret_from_fork) + + /* +- * initial frame state for interrupts and exceptions +- */ +- .macro _frame ref +- CFI_STARTPROC simple +- CFI_SIGNAL_FRAME +- CFI_DEF_CFA rsp,SS+8-\ref +- /*CFI_REL_OFFSET ss,SS-\ref*/ +- CFI_REL_OFFSET rsp,RSP-\ref +- /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ +- /*CFI_REL_OFFSET cs,CS-\ref*/ +- CFI_REL_OFFSET rip,RIP-\ref +- .endm +- +-/* + * System call entry. Upto 6 arguments in registers are supported. + * + * SYSCALL does not save anything on the stack and does not change the + * stack pointer. + */ +- ++ + /* +- * Register setup: ++ * Register setup: + * rax system call number + * rdi arg0 +- * rcx return address for syscall/sysret, C arg3 ++ * rcx return address for syscall/sysret, C arg3 + * rsi arg1 +- * rdx arg2 ++ * rdx arg2 + * r10 arg3 (--> moved to rcx for C) + * r8 arg4 + * r9 arg5 + * r11 eflags for syscall/sysret, temporary for C +- * r12-r15,rbp,rbx saved by C code, not touched. +- * ++ * r12-r15,rbp,rbx saved by C code, not touched. ++ * + * Interrupts are enabled on entry. + * Only called from user space. + * +@@ -343,10 +505,10 @@ END(ret_from_fork) + * When user can change the frames always force IRET. That is because + * it deals with uncanonical addresses better. SYSRET has trouble + * with them due to bugs in both AMD and Intel CPUs. +- */ ++ */ + + ENTRY(system_call) +- _frame (RIP-0x10) ++ INTR_FRAME start=2 offset=2*8 + SAVE_ARGS -8,0 + movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + GET_THREAD_INFO(%rcx) +@@ -360,19 +522,19 @@ system_call_fastpath: + movq %rax,RAX-ARGOFFSET(%rsp) + /* + * Syscall return path ending with SYSRET (fast path) +- * Has incomplete stack frame and undefined top of stack. +- */ ++ * Has incomplete stack frame and undefined top of stack. ++ */ + ret_from_sys_call: + movl $_TIF_ALLWORK_MASK,%edi + /* edi: flagmask */ +-sysret_check: ++sysret_check: + LOCKDEP_SYS_EXIT + GET_THREAD_INFO(%rcx) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + movl TI_flags(%rcx),%edx + andl %edi,%edx +- jnz sysret_careful ++ jnz sysret_careful + CFI_REMEMBER_STATE + /* + * sysretq will re-enable interrupts: +@@ -384,7 +546,7 @@ sysret_check: + + CFI_RESTORE_STATE + /* Handle reschedules */ +- /* edx: work, edi: workmask */ ++ /* edx: work, edi: workmask */ + sysret_careful: + bt $TIF_NEED_RESCHED,%edx + jnc sysret_signal +@@ -397,7 +559,7 @@ sysret_careful: + CFI_ADJUST_CFA_OFFSET -8 + jmp sysret_check + +- /* Handle a signal */ ++ /* Handle a signal */ + sysret_signal: + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) +@@ -406,17 +568,20 @@ sysret_signal: + jc sysret_audit + #endif + /* edx: work flags (arg3) */ +- leaq do_notify_resume(%rip),%rax + leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 + xorl %esi,%esi # oldset -> arg2 +- call ptregscall_common ++ SAVE_REST ++ FIXUP_TOP_OF_STACK %r11 ++ call do_notify_resume ++ RESTORE_TOP_OF_STACK %r11 ++ RESTORE_REST + movl $_TIF_WORK_MASK,%edi + /* Use IRET because user could have changed frame. This + works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + jmp int_with_check +- ++ + badsys: + movq $-ENOSYS,RAX-ARGOFFSET(%rsp) + jmp ret_from_sys_call +@@ -455,7 +620,7 @@ sysret_audit: + #endif /* CONFIG_AUDITSYSCALL */ + + /* Do syscall tracing */ +-tracesys: ++tracesys: + #ifdef CONFIG_AUDITSYSCALL + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) + jz auditsys +@@ -478,8 +643,8 @@ tracesys: + call *sys_call_table(,%rax,8) + movq %rax,RAX-ARGOFFSET(%rsp) + /* Use IRET because user could have changed frame */ +- +-/* ++ ++/* + * Syscall return path ending with IRET. + * Has correct top of stack, but partial stack frame. + */ +@@ -527,18 +692,18 @@ int_very_careful: + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) + SAVE_REST +- /* Check for syscall exit trace */ ++ /* Check for syscall exit trace */ + testl $_TIF_WORK_SYSCALL_EXIT,%edx + jz int_signal + pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 +- leaq 8(%rsp),%rdi # &ptregs -> arg1 ++ leaq 8(%rsp),%rdi # &ptregs -> arg1 + call syscall_trace_leave + popq %rdi + CFI_ADJUST_CFA_OFFSET -8 + andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi + jmp int_restore_rest +- ++ + int_signal: + testl $_TIF_DO_NOTIFY_MASK,%edx + jz 1f +@@ -553,22 +718,24 @@ int_restore_rest: + jmp int_with_check + CFI_ENDPROC + END(system_call) +- +-/* ++ ++/* + * Certain special system calls that need to save a complete full stack frame. +- */ +- ++ */ + .macro PTREGSCALL label,func,arg +- .globl \label +-\label: +- leaq \func(%rip),%rax +- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ +- jmp ptregscall_common ++ENTRY(\label) ++ PARTIAL_FRAME 1 8 /* offset 8: return address */ ++ subq $REST_SKIP, %rsp ++ CFI_ADJUST_CFA_OFFSET REST_SKIP ++ call save_rest ++ DEFAULT_FRAME 0 8 /* offset 8: return address */ ++ leaq 8(%rsp), \arg /* pt_regs pointer */ ++ call \func ++ jmp ptregscall_common ++ CFI_ENDPROC + END(\label) + .endm + +- CFI_STARTPROC +- + PTREGSCALL stub_clone, sys_clone, %r8 + PTREGSCALL stub_fork, sys_fork, %rdi + PTREGSCALL stub_vfork, sys_vfork, %rdi +@@ -576,25 +743,18 @@ END(\label) + PTREGSCALL stub_iopl, sys_iopl, %rsi + + ENTRY(ptregscall_common) +- popq %r11 +- CFI_ADJUST_CFA_OFFSET -8 +- CFI_REGISTER rip, r11 +- SAVE_REST +- movq %r11, %r15 +- CFI_REGISTER rip, r15 +- FIXUP_TOP_OF_STACK %r11 +- call *%rax +- RESTORE_TOP_OF_STACK %r11 +- movq %r15, %r11 +- CFI_REGISTER rip, r11 +- RESTORE_REST +- pushq %r11 +- CFI_ADJUST_CFA_OFFSET 8 +- CFI_REL_OFFSET rip, 0 +- ret ++ DEFAULT_FRAME 1 8 /* offset 8: return address */ ++ RESTORE_TOP_OF_STACK %r11, 8 ++ movq_cfi_restore R15+8, r15 ++ movq_cfi_restore R14+8, r14 ++ movq_cfi_restore R13+8, r13 ++ movq_cfi_restore R12+8, r12 ++ movq_cfi_restore RBP+8, rbp ++ movq_cfi_restore RBX+8, rbx ++ ret $REST_SKIP /* pop extended registers */ + CFI_ENDPROC + END(ptregscall_common) +- ++ + ENTRY(stub_execve) + CFI_STARTPROC + popq %r11 +@@ -610,11 +770,11 @@ ENTRY(stub_execve) + jmp int_ret_from_sys_call + CFI_ENDPROC + END(stub_execve) +- ++ + /* + * sigreturn is special because it needs to restore all registers on return. + * This cannot be done with SYSRET, so use the IRET return path instead. +- */ ++ */ + ENTRY(stub_rt_sigreturn) + CFI_STARTPROC + addq $8, %rsp +@@ -629,24 +789,12 @@ ENTRY(stub_rt_sigreturn) + CFI_ENDPROC + END(stub_rt_sigreturn) + +-/* initial frame state for interrupts (and exceptions without error code) */ +-#define INTR_FRAME _frame (RIP-0x10); \ +- CFI_REL_OFFSET rcx,0; \ +- CFI_REL_OFFSET r11,8 +- +-/* initial frame state for exceptions with error code (and interrupts with +- vector already pushed) */ +-#define XCPT_FRAME _frame (RIP-0x18); \ +- CFI_REL_OFFSET rcx,0; \ +- CFI_REL_OFFSET r11,8 +- +-/* ++/* + * Interrupt exit. +- * + */ + + retint_with_reschedule: +- CFI_DEFAULT_STACK adj=1 ++ PARTIAL_FRAME + movl $_TIF_WORK_MASK,%edi + retint_check: + LOCKDEP_SYS_EXIT_IRQ +@@ -675,20 +823,20 @@ retint_careful: + pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 + call schedule +- popq %rdi ++ popq %rdi + CFI_ADJUST_CFA_OFFSET -8 + GET_THREAD_INFO(%rcx) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + jmp retint_check +- ++ + retint_signal: + testl $_TIF_DO_NOTIFY_MASK,%edx + jz retint_restore_args + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) + SAVE_REST +- movq $-1,ORIG_RAX(%rsp) ++ movq $-1,ORIG_RAX(%rsp) + xorl %esi,%esi # oldset + movq %rsp,%rdi # &pt_regs + call do_notify_resume +@@ -710,324 +858,132 @@ ENTRY(retint_kernel) + jnc retint_restore_args + call preempt_schedule_irq + jmp retint_kernel /* check again */ +-#endif ++#endif + + CFI_ENDPROC + END(retint_check) +- ++ + #ifndef CONFIG_XEN + /* + * APIC interrupts. +- */ +- .macro apicinterrupt num,func ++ */ ++.macro apicinterrupt num sym do_sym ++ENTRY(\sym) + INTR_FRAME + pushq $~(\num) + CFI_ADJUST_CFA_OFFSET 8 +- interrupt \func ++ interrupt \do_sym + jmp error_entry + CFI_ENDPROC +- .endm ++END(\sym) ++.endm + +-ENTRY(thermal_interrupt) +- apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt +-END(thermal_interrupt) +- +-ENTRY(threshold_interrupt) +- apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt +-END(threshold_interrupt) +- +-#ifdef CONFIG_SMP +-ENTRY(reschedule_interrupt) +- apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt +-END(reschedule_interrupt) +- +- .macro INVALIDATE_ENTRY num +-ENTRY(invalidate_interrupt\num) +- apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt +-END(invalidate_interrupt\num) +- .endm ++#ifdef CONFIG_SMP ++apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ ++ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt ++#endif ++ ++apicinterrupt UV_BAU_MESSAGE \ ++ uv_bau_message_intr1 uv_bau_message_interrupt ++apicinterrupt LOCAL_TIMER_VECTOR \ ++ apic_timer_interrupt smp_apic_timer_interrupt ++ ++#ifdef CONFIG_SMP ++apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ ++ invalidate_interrupt0 smp_invalidate_interrupt ++apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ ++ invalidate_interrupt1 smp_invalidate_interrupt ++apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ ++ invalidate_interrupt2 smp_invalidate_interrupt ++apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ ++ invalidate_interrupt3 smp_invalidate_interrupt ++apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ ++ invalidate_interrupt4 smp_invalidate_interrupt ++apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ ++ invalidate_interrupt5 smp_invalidate_interrupt ++apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ ++ invalidate_interrupt6 smp_invalidate_interrupt ++apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ ++ invalidate_interrupt7 smp_invalidate_interrupt ++#endif + +- INVALIDATE_ENTRY 0 +- INVALIDATE_ENTRY 1 +- INVALIDATE_ENTRY 2 +- INVALIDATE_ENTRY 3 +- INVALIDATE_ENTRY 4 +- INVALIDATE_ENTRY 5 +- INVALIDATE_ENTRY 6 +- INVALIDATE_ENTRY 7 +- +-ENTRY(call_function_interrupt) +- apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt +-END(call_function_interrupt) +-ENTRY(call_function_single_interrupt) +- apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt +-END(call_function_single_interrupt) +-ENTRY(irq_move_cleanup_interrupt) +- apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt +-END(irq_move_cleanup_interrupt) ++apicinterrupt THRESHOLD_APIC_VECTOR \ ++ threshold_interrupt mce_threshold_interrupt ++apicinterrupt THERMAL_APIC_VECTOR \ ++ thermal_interrupt smp_thermal_interrupt ++ ++#ifdef CONFIG_SMP ++apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ ++ call_function_single_interrupt smp_call_function_single_interrupt ++apicinterrupt CALL_FUNCTION_VECTOR \ ++ call_function_interrupt smp_call_function_interrupt ++apicinterrupt RESCHEDULE_VECTOR \ ++ reschedule_interrupt smp_reschedule_interrupt + #endif + +-ENTRY(apic_timer_interrupt) +- apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt +-END(apic_timer_interrupt) +- +-ENTRY(uv_bau_message_intr1) +- apicinterrupt 220,uv_bau_message_interrupt +-END(uv_bau_message_intr1) +- +-ENTRY(error_interrupt) +- apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt +-END(error_interrupt) +- +-ENTRY(spurious_interrupt) +- apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt +-END(spurious_interrupt) ++apicinterrupt ERROR_APIC_VECTOR \ ++ error_interrupt smp_error_interrupt ++apicinterrupt SPURIOUS_APIC_VECTOR \ ++ spurious_interrupt smp_spurious_interrupt + #endif /* !CONFIG_XEN */ +- ++ + /* + * Exception entry points. +- */ +- .macro zeroentry sym ++ */ ++.macro zeroentry sym do_sym ++ENTRY(\sym) + INTR_FRAME + movq (%rsp),%rcx + CFI_RESTORE rcx + movq 8(%rsp),%r11 + CFI_RESTORE r11 +- addq $0x10,%rsp /* skip rcx and r11 */ +- CFI_ADJUST_CFA_OFFSET -0x10 +- pushq $0 /* push error code/oldrax */ +- CFI_ADJUST_CFA_OFFSET 8 +- pushq %rax /* push real oldrax to the rdi slot */ +- CFI_ADJUST_CFA_OFFSET 8 +- CFI_REL_OFFSET rax,0 +- leaq \sym(%rip),%rax +- jmp error_entry ++ movq $-1,8(%rsp) /* ORIG_RAX: no syscall to restart */ ++ subq $(15-1)*8,%rsp ++ CFI_ADJUST_CFA_OFFSET (15-1)*8 ++ call error_entry ++ DEFAULT_FRAME 0 ++ movq %rsp,%rdi /* pt_regs pointer */ ++ xorl %esi,%esi /* no error code */ ++ call \do_sym ++ jmp error_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC +- .endm ++END(\sym) ++.endm ++ ++.macro paranoidzeroentry sym do_sym ++ zeroentry \sym \do_sym ++.endm ++ ++.macro paranoidzeroentry_ist sym do_sym ist ++ zeroentry \sym \do_sym ++.endm + +- .macro errorentry sym ++.macro errorentry sym do_sym ++ENTRY(\sym) + XCPT_FRAME + movq (%rsp),%rcx + CFI_RESTORE rcx + movq 8(%rsp),%r11 + CFI_RESTORE r11 +- addq $0x10,%rsp /* rsp points to the error code */ +- CFI_ADJUST_CFA_OFFSET -0x10 +- pushq %rax +- CFI_ADJUST_CFA_OFFSET 8 +- CFI_REL_OFFSET rax,0 +- leaq \sym(%rip),%rax +- jmp error_entry ++ subq $(15-2)*8,%rsp ++ CFI_ADJUST_CFA_OFFSET (15-2)*8 ++ call error_entry ++ DEFAULT_FRAME 0 ++ movq %rsp,%rdi /* pt_regs pointer */ ++ movq ORIG_RAX(%rsp),%rsi /* get error code */ ++ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ ++ call \do_sym ++ jmp error_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC +- .endm ++END(\sym) ++.endm + +-#if 0 /* not XEN */ + /* error code is on the stack already */ +- /* handle NMI like exceptions that can happen everywhere */ +- .macro paranoidentry sym, ist=0, irqtrace=1 +- movq (%rsp),%rcx +- movq 8(%rsp),%r11 +- addq $0x10,%rsp /* skip rcx and r11 */ +- SAVE_ALL +- cld +-#if 0 /* not XEN */ +- movl $1,%ebx +- movl $MSR_GS_BASE,%ecx +- rdmsr +- testl %edx,%edx +- js 1f +- SWAPGS +- xorl %ebx,%ebx +-1: +-#endif +- .if \ist +- movq %gs:pda_data_offset, %rbp +- .endif +- .if \irqtrace +- TRACE_IRQS_OFF +- .endif +- movq %rsp,%rdi +- movq ORIG_RAX(%rsp),%rsi +- movq $-1,ORIG_RAX(%rsp) +- .if \ist +- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) +- .endif +- call \sym +- .if \ist +- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) +- .endif +- DISABLE_INTERRUPTS(CLBR_NONE) +- .if \irqtrace +- TRACE_IRQS_OFF +- .endif +- .endm +- +- /* +- * "Paranoid" exit path from exception stack. +- * Paranoid because this is used by NMIs and cannot take +- * any kernel state for granted. +- * We don't do kernel preemption checks here, because only +- * NMI should be common and it does not enable IRQs and +- * cannot get reschedule ticks. +- * +- * "trace" is 0 for the NMI handler only, because irq-tracing +- * is fundamentally NMI-unsafe. (we cannot change the soft and +- * hard flags at once, atomically) +- */ +- .macro paranoidexit trace=1 +- /* ebx: no swapgs flag */ +-paranoid_exit\trace: +- testl %ebx,%ebx /* swapgs needed? */ +- jnz paranoid_restore\trace +- testl $3,CS(%rsp) +- jnz paranoid_userspace\trace +-paranoid_swapgs\trace: +- .if \trace +- TRACE_IRQS_IRETQ 0 +- .endif +- SWAPGS_UNSAFE_STACK +-paranoid_restore\trace: +- RESTORE_ALL 8 +- jmp irq_return +-paranoid_userspace\trace: +- GET_THREAD_INFO(%rcx) +- movl TI_flags(%rcx),%ebx +- andl $_TIF_WORK_MASK,%ebx +- jz paranoid_swapgs\trace +- movq %rsp,%rdi /* &pt_regs */ +- call sync_regs +- movq %rax,%rsp /* switch stack for scheduling */ +- testl $_TIF_NEED_RESCHED,%ebx +- jnz paranoid_schedule\trace +- movl %ebx,%edx /* arg3: thread flags */ +- .if \trace +- TRACE_IRQS_ON +- .endif +- ENABLE_INTERRUPTS(CLBR_NONE) +- xorl %esi,%esi /* arg2: oldset */ +- movq %rsp,%rdi /* arg1: &pt_regs */ +- call do_notify_resume +- DISABLE_INTERRUPTS(CLBR_NONE) +- .if \trace +- TRACE_IRQS_OFF +- .endif +- jmp paranoid_userspace\trace +-paranoid_schedule\trace: +- .if \trace +- TRACE_IRQS_ON +- .endif +- ENABLE_INTERRUPTS(CLBR_ANY) +- call schedule +- DISABLE_INTERRUPTS(CLBR_ANY) +- .if \trace +- TRACE_IRQS_OFF +- .endif +- jmp paranoid_userspace\trace +- CFI_ENDPROC +- .endm +-#endif ++.macro paranoiderrorentry sym do_sym ++ errorentry \sym \do_sym ++.endm + + /* +- * Exception entry point. This expects an error code/orig_rax on the stack +- * and the exception handler in %rax. +- */ +-KPROBE_ENTRY(error_entry) +- _frame RDI +- CFI_REL_OFFSET rax,0 +- /* rdi slot contains rax, oldrax contains error code */ +- cld +- subq $14*8,%rsp +- CFI_ADJUST_CFA_OFFSET (14*8) +- movq %rsi,13*8(%rsp) +- CFI_REL_OFFSET rsi,RSI +- movq 14*8(%rsp),%rsi /* load rax from rdi slot */ +- CFI_REGISTER rax,rsi +- movq %rdx,12*8(%rsp) +- CFI_REL_OFFSET rdx,RDX +- movq %rcx,11*8(%rsp) +- CFI_REL_OFFSET rcx,RCX +- movq %rsi,10*8(%rsp) /* store rax */ +- CFI_REL_OFFSET rax,RAX +- movq %r8, 9*8(%rsp) +- CFI_REL_OFFSET r8,R8 +- movq %r9, 8*8(%rsp) +- CFI_REL_OFFSET r9,R9 +- movq %r10,7*8(%rsp) +- CFI_REL_OFFSET r10,R10 +- movq %r11,6*8(%rsp) +- CFI_REL_OFFSET r11,R11 +- movq %rbx,5*8(%rsp) +- CFI_REL_OFFSET rbx,RBX +- movq %rbp,4*8(%rsp) +- CFI_REL_OFFSET rbp,RBP +- movq %r12,3*8(%rsp) +- CFI_REL_OFFSET r12,R12 +- movq %r13,2*8(%rsp) +- CFI_REL_OFFSET r13,R13 +- movq %r14,1*8(%rsp) +- CFI_REL_OFFSET r14,R14 +- movq %r15,(%rsp) +- CFI_REL_OFFSET r15,R15 +-#if 0 +- cmpl $__KERNEL_CS,CS(%rsp) +- CFI_REMEMBER_STATE +- je error_kernelspace +-#endif +-error_call_handler: +- movq %rdi, RDI(%rsp) +- CFI_REL_OFFSET rdi,RDI +- movq %rsp,%rdi +- movq ORIG_RAX(%rsp),%rsi # get error code +- movq $-1,ORIG_RAX(%rsp) +- call *%rax +-error_exit: +- RESTORE_REST +- DISABLE_INTERRUPTS(CLBR_NONE) +- TRACE_IRQS_OFF +- GET_THREAD_INFO(%rcx) +- testb $3,CS-ARGOFFSET(%rsp) +- jz retint_kernel +- LOCKDEP_SYS_EXIT_IRQ +- movl TI_flags(%rcx),%edx +- movl $_TIF_WORK_MASK,%edi +- andl %edi,%edx +- jnz retint_careful +- jmp retint_restore_args +- +-#if 0 +- /* +- * We need to re-write the logic here because we don't do iretq to +- * to return to user mode. It's still possible that we get trap/fault +- * in the kernel (when accessing buffers pointed to by system calls, +- * for example). +- * +- */ +- CFI_RESTORE_STATE +-error_kernelspace: +- incl %ebx +- /* There are two places in the kernel that can potentially fault with +- usergs. Handle them here. The exception handlers after +- iret run with kernel gs again, so don't set the user space flag. +- B stepping K8s sometimes report an truncated RIP for IRET +- exceptions returning to compat mode. Check for these here too. */ +- leaq irq_return(%rip),%rcx +- cmpq %rcx,RIP(%rsp) +- je error_swapgs +- movl %ecx,%ecx /* zero extend */ +- cmpq %rcx,RIP(%rsp) +- je error_swapgs +- cmpq $gs_change,RIP(%rsp) +- je error_swapgs +- jmp error_sti +-#endif +- CFI_ENDPROC +-KPROBE_END(error_entry) +- +-ENTRY(hypervisor_callback) +- zeroentry do_hypervisor_callback +-END(hypervisor_callback) +- +-/* + * Copied from arch/xen/i386/kernel/entry.S + */ + # A note on the "critical region" in our callback handler. +@@ -1047,7 +1003,7 @@ ENTRY(do_hypervisor_callback) # do_hyp + # see the correct pointer to the pt_regs + movq %rdi, %rsp # we don't return, adjust the stack frame + CFI_ENDPROC +- CFI_DEFAULT_STACK ++ DEFAULT_FRAME + 11: incl %gs:pda_irqcount + movq %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp +@@ -1063,7 +1019,7 @@ END(do_hypervisor_callback) + + ALIGN + restore_all_enable_events: +- CFI_DEFAULT_STACK adj=1 ++ PARTIAL_FRAME + TRACE_IRQS_ON + __ENABLE_INTERRUPTS + +@@ -1099,9 +1055,7 @@ ecrit: /**** END OF CRITICAL REGION *** + # We distinguish between categories by comparing each saved segment register + # with its current contents: any discrepancy means we in category 1. + ENTRY(failsafe_callback) +- _frame (RIP-0x30) +- CFI_REL_OFFSET rcx, 0 +- CFI_REL_OFFSET r11, 8 ++ INTR_FRAME offset=4*8 + movw %ds,%cx + cmpw %cx,0x10(%rsp) + CFI_REMEMBER_STATE +@@ -1137,20 +1091,19 @@ ENTRY(failsafe_callback) + SAVE_ALL + jmp error_exit + CFI_ENDPROC +-#if 0 +- .section __ex_table,"a" +- .align 8 +- .quad gs_change,bad_gs +- .previous +- .section .fixup,"ax" +- /* running with kernelgs */ +-bad_gs: +-/* swapgs */ /* switch back to user gs */ +- xorl %eax,%eax +- movl %eax,%gs +- jmp 2b +- .previous +-#endif ++ ++zeroentry divide_error do_divide_error ++zeroentry overflow do_overflow ++zeroentry bounds do_bounds ++zeroentry invalid_op do_invalid_op ++zeroentry device_not_available do_device_not_available ++zeroentry hypervisor_callback do_hypervisor_callback ++zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun ++errorentry invalid_TSS do_invalid_TSS ++errorentry segment_not_present do_segment_not_present ++zeroentry coprocessor_error do_coprocessor_error ++errorentry alignment_check do_alignment_check ++zeroentry simd_coprocessor_error do_simd_coprocessor_error + + /* + * Create a kernel thread. +@@ -1174,7 +1127,7 @@ ENTRY(kernel_thread) + + xorl %r8d,%r8d + xorl %r9d,%r9d +- ++ + # clone now + call do_fork + movq %rax,RAX(%rsp) +@@ -1185,15 +1138,15 @@ ENTRY(kernel_thread) + * so internally to the x86_64 port you can rely on kernel_thread() + * not to reschedule the child before returning, this avoids the need + * of hacks for example to fork off the per-CPU idle tasks. +- * [Hopefully no generic code relies on the reschedule -AK] ++ * [Hopefully no generic code relies on the reschedule -AK] + */ + RESTORE_ALL + UNFAKE_STACK_FRAME + ret + CFI_ENDPROC +-ENDPROC(kernel_thread) +- +-child_rip: ++END(kernel_thread) ++ ++ENTRY(child_rip) + pushq $0 # fake return address + CFI_STARTPROC + /* +@@ -1206,8 +1159,9 @@ child_rip: + # exit + mov %eax, %edi + call do_exit ++ ud2 # padding for call trace + CFI_ENDPROC +-ENDPROC(child_rip) ++END(child_rip) + + /* + * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. +@@ -1227,10 +1181,10 @@ ENDPROC(child_rip) + ENTRY(kernel_execve) + CFI_STARTPROC + FAKE_STACK_FRAME $0 +- SAVE_ALL ++ SAVE_ALL + movq %rsp,%rcx + call sys_execve +- movq %rax, RAX(%rsp) ++ movq %rax, RAX(%rsp) + RESTORE_REST + testq %rax,%rax + jne 1f +@@ -1239,132 +1193,7 @@ ENTRY(kernel_execve) + UNFAKE_STACK_FRAME + ret + CFI_ENDPROC +-ENDPROC(kernel_execve) +- +-KPROBE_ENTRY(page_fault) +- errorentry do_page_fault +-KPROBE_END(page_fault) +- +-ENTRY(coprocessor_error) +- zeroentry do_coprocessor_error +-END(coprocessor_error) +- +-ENTRY(simd_coprocessor_error) +- zeroentry do_simd_coprocessor_error +-END(simd_coprocessor_error) +- +-ENTRY(device_not_available) +- zeroentry do_device_not_available +-END(device_not_available) +- +- /* runs on exception stack */ +-KPROBE_ENTRY(debug) +-/* INTR_FRAME +- PARAVIRT_ADJUST_EXCEPTION_FRAME +- pushq $0 +- CFI_ADJUST_CFA_OFFSET 8 */ +- zeroentry do_debug +-/* paranoidexit +- CFI_ENDPROC */ +-KPROBE_END(debug) +- +-KPROBE_ENTRY(nmi) +- zeroentry do_nmi_callback +-KPROBE_END(nmi) +-do_nmi_callback: +- CFI_STARTPROC +- addq $8, %rsp +- CFI_ENDPROC +- CFI_DEFAULT_STACK +- call do_nmi +- orl $NMI_MASK,EFLAGS(%rsp) +- RESTORE_REST +- DISABLE_INTERRUPTS(CLBR_NONE) +- TRACE_IRQS_OFF +- GET_THREAD_INFO(%rcx) +- jmp retint_restore_args +- CFI_ENDPROC +-END(do_nmi_callback) +- +-KPROBE_ENTRY(int3) +-/* INTR_FRAME +- PARAVIRT_ADJUST_EXCEPTION_FRAME +- pushq $0 +- CFI_ADJUST_CFA_OFFSET 8 */ +- zeroentry do_int3 +-/* jmp paranoid_exit1 +- CFI_ENDPROC */ +-KPROBE_END(int3) +- +-ENTRY(overflow) +- zeroentry do_overflow +-END(overflow) +- +-ENTRY(bounds) +- zeroentry do_bounds +-END(bounds) +- +-ENTRY(invalid_op) +- zeroentry do_invalid_op +-END(invalid_op) +- +-ENTRY(coprocessor_segment_overrun) +- zeroentry do_coprocessor_segment_overrun +-END(coprocessor_segment_overrun) +- +-#if 0 +- /* runs on exception stack */ +-ENTRY(double_fault) +- XCPT_FRAME +- PARAVIRT_ADJUST_EXCEPTION_FRAME +- paranoidentry do_double_fault +- jmp paranoid_exit1 +- CFI_ENDPROC +-END(double_fault) +-#endif +- +-ENTRY(invalid_TSS) +- errorentry do_invalid_TSS +-END(invalid_TSS) +- +-ENTRY(segment_not_present) +- errorentry do_segment_not_present +-END(segment_not_present) +- +- /* runs on exception stack */ +-ENTRY(stack_segment) +-/* XCPT_FRAME +- PARAVIRT_ADJUST_EXCEPTION_FRAME +- paranoidentry do_stack_segment */ +- errorentry do_stack_segment +-/* jmp paranoid_exit1 +- CFI_ENDPROC */ +-END(stack_segment) +- +-KPROBE_ENTRY(general_protection) +- errorentry do_general_protection +-KPROBE_END(general_protection) +- +-ENTRY(alignment_check) +- errorentry do_alignment_check +-END(alignment_check) +- +-ENTRY(divide_error) +- zeroentry do_divide_error +-END(divide_error) +- +-#ifndef CONFIG_XEN +-ENTRY(spurious_interrupt_bug) +- zeroentry do_spurious_interrupt_bug +-END(spurious_interrupt_bug) +-#endif +- +-#ifdef CONFIG_X86_MCE +- /* runs on exception stack */ +-KPROBE_ENTRY(machine_check) +- zeroentry do_machine_check +-END(machine_check) +-#endif ++END(kernel_execve) + + /* Call softirq on interrupt stack. Interrupts are off. */ + ENTRY(call_softirq) +@@ -1384,24 +1213,191 @@ ENTRY(call_softirq) + decl %gs:pda_irqcount + ret + CFI_ENDPROC +-ENDPROC(call_softirq) ++END(call_softirq) ++ ++/* ++ * Some functions should be protected against kprobes ++ */ ++ .pushsection .kprobes.text, "ax" ++ ++paranoidzeroentry_ist debug do_debug DEBUG_STACK ++zeroentry nmi do_nmi_callback ++paranoidzeroentry_ist int3 do_int3 DEBUG_STACK ++paranoiderrorentry stack_segment do_stack_segment ++errorentry general_protection do_general_protection ++errorentry page_fault do_page_fault ++#ifdef CONFIG_X86_MCE ++paranoidzeroentry machine_check do_machine_check ++#endif ++ ++#ifndef CONFIG_XEN ++ /* ++ * "Paranoid" exit path from exception stack. ++ * Paranoid because this is used by NMIs and cannot take ++ * any kernel state for granted. ++ * We don't do kernel preemption checks here, because only ++ * NMI should be common and it does not enable IRQs and ++ * cannot get reschedule ticks. ++ * ++ * "trace" is 0 for the NMI handler only, because irq-tracing ++ * is fundamentally NMI-unsafe. (we cannot change the soft and ++ * hard flags at once, atomically) ++ */ ++ ++ /* ebx: no swapgs flag */ ++ENTRY(paranoid_exit) ++ INTR_FRAME ++ DISABLE_INTERRUPTS(CLBR_NONE) ++ TRACE_IRQS_OFF ++ testl %ebx,%ebx /* swapgs needed? */ ++ jnz paranoid_restore ++ testl $3,CS(%rsp) ++ jnz paranoid_userspace ++paranoid_swapgs: ++ TRACE_IRQS_IRETQ 0 ++ SWAPGS_UNSAFE_STACK ++paranoid_restore: ++ RESTORE_ALL 8 ++ jmp irq_return ++paranoid_userspace: ++ GET_THREAD_INFO(%rcx) ++ movl TI_flags(%rcx),%ebx ++ andl $_TIF_WORK_MASK,%ebx ++ jz paranoid_swapgs ++ movq %rsp,%rdi /* &pt_regs */ ++ call sync_regs ++ movq %rax,%rsp /* switch stack for scheduling */ ++ testl $_TIF_NEED_RESCHED,%ebx ++ jnz paranoid_schedule ++ movl %ebx,%edx /* arg3: thread flags */ ++ TRACE_IRQS_ON ++ ENABLE_INTERRUPTS(CLBR_NONE) ++ xorl %esi,%esi /* arg2: oldset */ ++ movq %rsp,%rdi /* arg1: &pt_regs */ ++ call do_notify_resume ++ DISABLE_INTERRUPTS(CLBR_NONE) ++ TRACE_IRQS_OFF ++ jmp paranoid_userspace ++paranoid_schedule: ++ TRACE_IRQS_ON ++ ENABLE_INTERRUPTS(CLBR_ANY) ++ call schedule ++ DISABLE_INTERRUPTS(CLBR_ANY) ++ TRACE_IRQS_OFF ++ jmp paranoid_userspace ++ CFI_ENDPROC ++END(paranoid_exit) ++#endif ++ ++/* ++ * Exception entry point. This expects an error code/orig_rax on the stack. ++ * returns in "no swapgs flag" in %ebx. ++ */ ++ENTRY(error_entry) ++ XCPT_FRAME 2 ++ CFI_ADJUST_CFA_OFFSET 15*8 ++ /* oldrax contains error code */ ++ cld ++ movq_cfi rdi, RDI+8 ++ movq_cfi rsi, RSI+8 ++ movq_cfi rdx, RDX+8 ++ movq_cfi rcx, RCX+8 ++ movq_cfi rax, RAX+8 ++ movq_cfi r8, R8+8 ++ movq_cfi r9, R9+8 ++ movq_cfi r10, R10+8 ++ movq_cfi r11, R11+8 ++ movq_cfi rbx, RBX+8 ++ movq_cfi rbp, RBP+8 ++ movq_cfi r12, R12+8 ++ movq_cfi r13, R13+8 ++ movq_cfi r14, R14+8 ++ movq_cfi r15, R15+8 ++#ifndef CONFIG_XEN ++ xorl %ebx,%ebx ++ testl $3,CS+8(%rsp) ++ je error_kernelspace ++error_swapgs: ++ SWAPGS ++error_sti: ++#endif ++ TRACE_IRQS_OFF ++ ret ++ CFI_ENDPROC ++ ++#ifndef CONFIG_XEN ++/* ++ * There are two places in the kernel that can potentially fault with ++ * usergs. Handle them here. The exception handlers after iret run with ++ * kernel gs again, so don't set the user space flag. B stepping K8s ++ * sometimes report an truncated RIP for IRET exceptions returning to ++ * compat mode. Check for these here too. ++ */ ++error_kernelspace: ++ incl %ebx ++ leaq irq_return(%rip),%rcx ++ cmpq %rcx,RIP+8(%rsp) ++ je error_swapgs ++ movl %ecx,%ecx /* zero extend */ ++ cmpq %rcx,RIP+8(%rsp) ++ je error_swapgs ++ cmpq $gs_change,RIP+8(%rsp) ++ je error_swapgs ++ jmp error_sti ++#endif ++END(error_entry) ++ ++ ++ENTRY(error_exit) ++ DEFAULT_FRAME ++ RESTORE_REST ++ DISABLE_INTERRUPTS(CLBR_NONE) ++ TRACE_IRQS_OFF ++ GET_THREAD_INFO(%rcx) ++ testb $3,CS-ARGOFFSET(%rsp) ++ jz retint_kernel ++ LOCKDEP_SYS_EXIT_IRQ ++ movl TI_flags(%rcx),%edx ++ movl $_TIF_WORK_MASK,%edi ++ andl %edi,%edx ++ jnz retint_careful ++ jmp retint_restore_args ++ CFI_ENDPROC ++END(error_exit) ++ ++ ++do_nmi_callback: ++ CFI_STARTPROC ++ addq $8, %rsp ++ CFI_ENDPROC ++ DEFAULT_FRAME ++ call do_nmi ++ orl $NMI_MASK,EFLAGS(%rsp) ++ RESTORE_REST ++ DISABLE_INTERRUPTS(CLBR_NONE) ++ TRACE_IRQS_OFF ++ GET_THREAD_INFO(%rcx) ++ jmp retint_restore_args ++ CFI_ENDPROC ++END(do_nmi_callback) ++ + + #ifndef CONFIG_IA32_EMULATION +-KPROBE_ENTRY(ignore_sysret) +- CFI_STARTPROC simple +- CFI_SIGNAL_FRAME +- CFI_DEF_CFA rsp,SS+8-RIP+16 +-/* CFI_REL_OFFSET ss,SS-RIP+16 */ +- CFI_REL_OFFSET rsp,RSP-RIP+16 +-/* CFI_REL_OFFSET rflags,EFLAGS-RIP+16 */ +-/* CFI_REL_OFFSET cs,CS-RIP+16 */ +- CFI_REL_OFFSET rip,RIP-RIP+16 ++ENTRY(ignore_sysret) ++ INTR_FRAME + popq %rcx + CFI_ADJUST_CFA_OFFSET -8 ++ CFI_RESTORE rcx + popq %r11 + CFI_ADJUST_CFA_OFFSET -8 ++ CFI_RESTORE r11 + mov $-ENOSYS,%eax + HYPERVISOR_IRET 0 + CFI_ENDPROC +-ENDPROC(ignore_sysret) ++END(ignore_sysret) + #endif ++ ++/* ++ * End of kprobes section ++ */ ++ .popsection +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/head-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/head-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -36,7 +36,6 @@ void __init reserve_ebda_region(void) + + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); +- printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/head32-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/head32-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -12,9 +12,12 @@ + #include + #include + #include ++#include + + void __init i386_start_kernel(void) + { ++ reserve_trampoline_memory(); ++ + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + + #ifndef CONFIG_XEN +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/head64-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/head64-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -31,9 +31,10 @@ + #include + #include + #include ++#include + + /* boot cpu pda */ +-static struct x8664_pda _boot_cpu_pda __read_mostly; ++static struct x8664_pda _boot_cpu_pda; + + #ifdef CONFIG_SMP + /* +@@ -163,6 +164,8 @@ void __init x86_64_start_reservations(ch + { + copy_bootdata(__va(real_mode_data)); + ++ reserve_trampoline_memory(); ++ + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + + reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE), +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/apic/io_apic-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/apic/io_apic-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -122,102 +122,276 @@ static int __init parse_noapic(char *str + } + early_param("noapic", parse_noapic); + ++#ifndef CONFIG_XEN + struct irq_pin_list; ++ ++/* ++ * This is performance-critical, we want to do it O(1) ++ * ++ * the indexing order of this array favors 1:1 mappings ++ * between pins and IRQs. ++ */ ++ ++struct irq_pin_list { ++ int apic, pin; ++ struct irq_pin_list *next; ++}; ++ ++static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) ++{ ++ struct irq_pin_list *pin; ++ int node; ++ ++ node = cpu_to_node(cpu); ++ ++ pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); ++ ++ return pin; ++} ++ + struct irq_cfg { +-#ifndef CONFIG_XEN +- unsigned int irq; + struct irq_pin_list *irq_2_pin; +- cpumask_t domain; +- cpumask_t old_domain; ++ cpumask_var_t domain; ++ cpumask_var_t old_domain; + unsigned move_cleanup_count; +-#endif + u8 vector; +-#ifndef CONFIG_XEN + u8 move_in_progress : 1; ++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC ++ u8 move_desc_pending : 1; + #endif + }; + + /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ ++#ifdef CONFIG_SPARSE_IRQ ++static struct irq_cfg irq_cfgx[] = { ++#else + static struct irq_cfg irq_cfgx[NR_IRQS] = { +- [0] = { .irq = 0 }, +- [1] = { .irq = 1 }, +- [2] = { .irq = 2 }, +- [3] = { .irq = 3 }, +- [4] = { .irq = 4 }, +- [5] = { .irq = 5 }, +- [6] = { .irq = 6 }, +- [7] = { .irq = 7 }, +- [8] = { .irq = 8 }, +- [9] = { .irq = 9 }, +- [10] = { .irq = 10 }, +- [11] = { .irq = 11 }, +- [12] = { .irq = 12 }, +- [13] = { .irq = 13 }, +- [14] = { .irq = 14 }, +- [15] = { .irq = 15 }, ++#endif ++ [0] = { .vector = IRQ0_VECTOR, }, ++ [1] = { .vector = IRQ1_VECTOR, }, ++ [2] = { .vector = IRQ2_VECTOR, }, ++ [3] = { .vector = IRQ3_VECTOR, }, ++ [4] = { .vector = IRQ4_VECTOR, }, ++ [5] = { .vector = IRQ5_VECTOR, }, ++ [6] = { .vector = IRQ6_VECTOR, }, ++ [7] = { .vector = IRQ7_VECTOR, }, ++ [8] = { .vector = IRQ8_VECTOR, }, ++ [9] = { .vector = IRQ9_VECTOR, }, ++ [10] = { .vector = IRQ10_VECTOR, }, ++ [11] = { .vector = IRQ11_VECTOR, }, ++ [12] = { .vector = IRQ12_VECTOR, }, ++ [13] = { .vector = IRQ13_VECTOR, }, ++ [14] = { .vector = IRQ14_VECTOR, }, ++ [15] = { .vector = IRQ15_VECTOR, }, + }; + +-#define for_each_irq_cfg(irq, cfg) \ +- for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) ++int __init arch_early_irq_init(void) ++{ ++ struct irq_cfg *cfg; ++ struct irq_desc *desc; ++ int count; ++ int i; ++ ++ cfg = irq_cfgx; ++ count = ARRAY_SIZE(irq_cfgx); + ++ for (i = 0; i < count; i++) { ++ desc = irq_to_desc(i); ++ desc->chip_data = &cfg[i]; ++ alloc_bootmem_cpumask_var(&cfg[i].domain); ++ alloc_bootmem_cpumask_var(&cfg[i].old_domain); ++ if (i < NR_IRQS_LEGACY) ++ cpumask_setall(cfg[i].domain); ++ } ++ ++ return 0; ++} ++ ++#ifdef CONFIG_SPARSE_IRQ + static struct irq_cfg *irq_cfg(unsigned int irq) + { +- return irq < nr_irqs ? irq_cfgx + irq : NULL; ++ struct irq_cfg *cfg = NULL; ++ struct irq_desc *desc; ++ ++ desc = irq_to_desc(irq); ++ if (desc) ++ cfg = desc->chip_data; ++ ++ return cfg; + } + +-static struct irq_cfg *irq_cfg_alloc(unsigned int irq) ++static struct irq_cfg *get_one_free_irq_cfg(int cpu) + { +- return irq_cfg(irq); ++ struct irq_cfg *cfg; ++ int node; ++ ++ node = cpu_to_node(cpu); ++ ++ cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); ++ if (cfg) { ++ if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { ++ kfree(cfg); ++ cfg = NULL; ++ } else if (!alloc_cpumask_var_node(&cfg->old_domain, ++ GFP_ATOMIC, node)) { ++ free_cpumask_var(cfg->domain); ++ kfree(cfg); ++ cfg = NULL; ++ } else { ++ cpumask_clear(cfg->domain); ++ cpumask_clear(cfg->old_domain); ++ } ++ } ++ ++ return cfg; + } + +-#ifdef CONFIG_XEN +-#define irq_2_pin_init() +-#define add_pin_to_irq(irq, apic, pin) +-#else +-/* +- * Rough estimation of how many shared IRQs there are, can be changed +- * anytime. +- */ +-#define MAX_PLUS_SHARED_IRQS NR_IRQS +-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) ++int arch_init_chip_data(struct irq_desc *desc, int cpu) ++{ ++ struct irq_cfg *cfg; + +-/* +- * This is performance-critical, we want to do it O(1) +- * +- * the indexing order of this array favors 1:1 mappings +- * between pins and IRQs. +- */ ++ cfg = desc->chip_data; ++ if (!cfg) { ++ desc->chip_data = get_one_free_irq_cfg(cpu); ++ if (!desc->chip_data) { ++ printk(KERN_ERR "can not alloc irq_cfg\n"); ++ BUG_ON(1); ++ } ++ } + +-struct irq_pin_list { +- int apic, pin; +- struct irq_pin_list *next; +-}; ++ return 0; ++} + +-static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; +-static struct irq_pin_list *irq_2_pin_ptr; ++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + +-static void __init irq_2_pin_init(void) ++static void ++init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) + { +- struct irq_pin_list *pin = irq_2_pin_head; +- int i; ++ struct irq_pin_list *old_entry, *head, *tail, *entry; ++ ++ cfg->irq_2_pin = NULL; ++ old_entry = old_cfg->irq_2_pin; ++ if (!old_entry) ++ return; ++ ++ entry = get_one_free_irq_2_pin(cpu); ++ if (!entry) ++ return; ++ ++ entry->apic = old_entry->apic; ++ entry->pin = old_entry->pin; ++ head = entry; ++ tail = entry; ++ old_entry = old_entry->next; ++ while (old_entry) { ++ entry = get_one_free_irq_2_pin(cpu); ++ if (!entry) { ++ entry = head; ++ while (entry) { ++ head = entry->next; ++ kfree(entry); ++ entry = head; ++ } ++ /* still use the old one */ ++ return; ++ } ++ entry->apic = old_entry->apic; ++ entry->pin = old_entry->pin; ++ tail->next = entry; ++ tail = entry; ++ old_entry = old_entry->next; ++ } + +- for (i = 1; i < PIN_MAP_SIZE; i++) +- pin[i-1].next = &pin[i]; ++ tail->next = NULL; ++ cfg->irq_2_pin = head; ++} ++ ++static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) ++{ ++ struct irq_pin_list *entry, *next; ++ ++ if (old_cfg->irq_2_pin == cfg->irq_2_pin) ++ return; ++ ++ entry = old_cfg->irq_2_pin; + +- irq_2_pin_ptr = &pin[0]; ++ while (entry) { ++ next = entry->next; ++ kfree(entry); ++ entry = next; ++ } ++ old_cfg->irq_2_pin = NULL; + } + +-static struct irq_pin_list *get_one_free_irq_2_pin(void) ++void arch_init_copy_chip_data(struct irq_desc *old_desc, ++ struct irq_desc *desc, int cpu) + { +- struct irq_pin_list *pin = irq_2_pin_ptr; ++ struct irq_cfg *cfg; ++ struct irq_cfg *old_cfg; + +- if (!pin) +- panic("can not get more irq_2_pin\n"); ++ cfg = get_one_free_irq_cfg(cpu); + +- irq_2_pin_ptr = pin->next; +- pin->next = NULL; +- return pin; ++ if (!cfg) ++ return; ++ ++ desc->chip_data = cfg; ++ ++ old_cfg = old_desc->chip_data; ++ ++ memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); ++ ++ init_copy_irq_2_pin(old_cfg, cfg, cpu); ++} ++ ++static void free_irq_cfg(struct irq_cfg *old_cfg) ++{ ++ kfree(old_cfg); ++} ++ ++void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) ++{ ++ struct irq_cfg *old_cfg, *cfg; ++ ++ old_cfg = old_desc->chip_data; ++ cfg = desc->chip_data; ++ ++ if (old_cfg == cfg) ++ return; ++ ++ if (old_cfg) { ++ free_irq_2_pin(old_cfg, cfg); ++ free_irq_cfg(old_cfg); ++ old_desc->chip_data = NULL; ++ } ++} ++ ++static void ++set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) ++{ ++ struct irq_cfg *cfg = desc->chip_data; ++ ++ if (!cfg->move_in_progress) { ++ /* it means that domain is not changed */ ++ if (!cpumask_intersects(&desc->affinity, mask)) ++ cfg->move_desc_pending = 1; ++ } ++} ++#endif ++ ++#else ++static struct irq_cfg *irq_cfg(unsigned int irq) ++{ ++ return irq < nr_irqs ? irq_cfgx + irq : NULL; ++} ++ ++#endif ++ ++#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC ++static inline void ++set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) ++{ + } ++#endif + + struct io_apic { + unsigned int index; +@@ -230,7 +404,7 @@ static __attribute_const__ struct io_api + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); + } +-#endif ++#endif /* CONFIG_XEN */ + + static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) + { +@@ -285,11 +459,10 @@ static inline void io_apic_modify(unsign + writel(value, &io_apic->data); + } + +-static bool io_apic_level_ack_pending(unsigned int irq) ++static bool io_apic_level_ack_pending(struct irq_cfg *cfg) + { + struct irq_pin_list *entry; + unsigned long flags; +- struct irq_cfg *cfg = irq_cfg(irq); + + spin_lock_irqsave(&ioapic_lock, flags); + entry = cfg->irq_2_pin; +@@ -375,13 +548,32 @@ static void ioapic_mask_entry(int apic, + } + + #ifdef CONFIG_SMP +-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) ++static void send_cleanup_vector(struct irq_cfg *cfg) ++{ ++ cpumask_var_t cleanup_mask; ++ ++ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { ++ unsigned int i; ++ cfg->move_cleanup_count = 0; ++ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) ++ cfg->move_cleanup_count++; ++ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) ++ send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); ++ } else { ++ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); ++ cfg->move_cleanup_count = cpumask_weight(cleanup_mask); ++ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); ++ free_cpumask_var(cleanup_mask); ++ } ++ cfg->move_in_progress = 0; ++} ++ ++static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) + { + int apic, pin; +- struct irq_cfg *cfg; + struct irq_pin_list *entry; ++ u8 vector = cfg->vector; + +- cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; +@@ -411,36 +603,61 @@ static void __target_IO_APIC_irq(unsigne + } + } + +-static int assign_irq_vector(int irq, cpumask_t mask); ++static int ++assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); ++ ++/* ++ * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid ++ * of that, or returns BAD_APICID and leaves desc->affinity untouched. ++ */ ++static unsigned int ++set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) ++{ ++ struct irq_cfg *cfg; ++ unsigned int irq; ++ ++ if (!cpumask_intersects(mask, cpu_online_mask)) ++ return BAD_APICID; ++ ++ irq = desc->irq; ++ cfg = desc->chip_data; ++ if (assign_irq_vector(irq, cfg, mask)) ++ return BAD_APICID; ++ ++ cpumask_and(&desc->affinity, cfg->domain, mask); ++ set_extra_move_desc(desc, mask); ++ return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); ++} + +-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) ++static void ++set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) + { + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; +- cpumask_t tmp; +- struct irq_desc *desc; ++ unsigned int irq; + +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- return; ++ irq = desc->irq; ++ cfg = desc->chip_data; + +- cfg = irq_cfg(irq); +- if (assign_irq_vector(irq, mask)) +- return; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ dest = set_desc_affinity(desc, mask); ++ if (dest != BAD_APICID) { ++ /* Only the high 8 bits are valid. */ ++ dest = SET_APIC_LOGICAL_ID(dest); ++ __target_IO_APIC_irq(irq, dest, cfg); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} + +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); +- /* +- * Only the high 8 bits are valid. +- */ +- dest = SET_APIC_LOGICAL_ID(dest); ++static void ++set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) ++{ ++ struct irq_desc *desc; + + desc = irq_to_desc(irq); +- spin_lock_irqsave(&ioapic_lock, flags); +- __target_IO_APIC_irq(irq, dest, cfg->vector); +- desc->affinity = mask; +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ set_ioapic_affinity_irq_desc(desc, mask); + } + #endif /* CONFIG_SMP */ + +@@ -449,16 +666,18 @@ static void set_ioapic_affinity_irq(unsi + * shared ISA-space IRQs, so we have to support them. We are super + * fast in the common case, and fast for shared ISA-space IRQs. + */ +-static void add_pin_to_irq(unsigned int irq, int apic, int pin) ++static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) + { +- struct irq_cfg *cfg; + struct irq_pin_list *entry; + +- /* first time to refer irq_cfg, so with new */ +- cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { +- entry = get_one_free_irq_2_pin(); ++ entry = get_one_free_irq_2_pin(cpu); ++ if (!entry) { ++ printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", ++ apic, pin); ++ return; ++ } + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; +@@ -473,7 +692,7 @@ static void add_pin_to_irq(unsigned int + entry = entry->next; + } + +- entry->next = get_one_free_irq_2_pin(); ++ entry->next = get_one_free_irq_2_pin(cpu); + entry = entry->next; + entry->apic = apic; + entry->pin = pin; +@@ -482,11 +701,10 @@ static void add_pin_to_irq(unsigned int + /* + * Reroute an IRQ to a different pin. + */ +-static void __init replace_pin_at_irq(unsigned int irq, ++static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, + int oldapic, int oldpin, + int newapic, int newpin) + { +- struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; + +@@ -503,18 +721,16 @@ static void __init replace_pin_at_irq(un + + /* why? call replace before add? */ + if (!replaced) +- add_pin_to_irq(irq, newapic, newpin); ++ add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); + } + +-static inline void io_apic_modify_irq(unsigned int irq, ++static inline void io_apic_modify_irq(struct irq_cfg *cfg, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) + { + int pin; +- struct irq_cfg *cfg; + struct irq_pin_list *entry; + +- cfg = irq_cfg(irq); + for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + unsigned int reg; + pin = entry->pin; +@@ -527,13 +743,13 @@ static inline void io_apic_modify_irq(un + } + } + +-static void __unmask_IO_APIC_irq(unsigned int irq) ++static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) + { +- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); ++ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); + } + + #ifdef CONFIG_X86_64 +-void io_apic_sync(struct irq_pin_list *entry) ++static void io_apic_sync(struct irq_pin_list *entry) + { + /* + * Synchronize the IO-APIC and the CPU by doing +@@ -544,47 +760,64 @@ void io_apic_sync(struct irq_pin_list *e + readl(&io_apic->data); + } + +-static void __mask_IO_APIC_irq(unsigned int irq) ++static void __mask_IO_APIC_irq(struct irq_cfg *cfg) + { +- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); ++ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + } + #else /* CONFIG_X86_32 */ +-static void __mask_IO_APIC_irq(unsigned int irq) ++static void __mask_IO_APIC_irq(struct irq_cfg *cfg) + { +- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); ++ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); + } + +-static void __mask_and_edge_IO_APIC_irq(unsigned int irq) ++static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) + { +- io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, ++ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); + } + +-static void __unmask_and_level_IO_APIC_irq(unsigned int irq) ++static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) + { +- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, ++ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); + } + #endif /* CONFIG_X86_32 */ + +-static void mask_IO_APIC_irq (unsigned int irq) ++static void mask_IO_APIC_irq_desc(struct irq_desc *desc) + { ++ struct irq_cfg *cfg = desc->chip_data; + unsigned long flags; + ++ BUG_ON(!cfg); ++ + spin_lock_irqsave(&ioapic_lock, flags); +- __mask_IO_APIC_irq(irq); ++ __mask_IO_APIC_irq(cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +-static void unmask_IO_APIC_irq (unsigned int irq) ++static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) + { ++ struct irq_cfg *cfg = desc->chip_data; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); +- __unmask_IO_APIC_irq(irq); ++ __unmask_IO_APIC_irq(cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + ++static void mask_IO_APIC_irq(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ mask_IO_APIC_irq_desc(desc); ++} ++static void unmask_IO_APIC_irq(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ unmask_IO_APIC_irq_desc(desc); ++} ++ + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) + { + struct IO_APIC_route_entry entry; +@@ -624,6 +857,8 @@ void send_IPI_self(int vector) + apic_write(APIC_ICR, cfg); + } + #endif /* !CONFIG_SMP && CONFIG_X86_32*/ ++#else ++#define add_pin_to_irq_cpu(cfg, cpu, apic, pin) + #endif /* CONFIG_XEN */ + + #ifdef CONFIG_X86_32 +@@ -864,7 +1099,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector + */ + static int EISA_ELCR(unsigned int irq) + { +- if (irq < 16) { ++ if (irq < NR_IRQS_LEGACY) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } +@@ -1089,52 +1324,118 @@ void unlock_vector_lock(void) + { + spin_unlock(&vector_lock); + } +-#endif + +-static int assign_irq_vector(int irq, cpumask_t mask) ++static int ++__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) + { +- struct physdev_irq irq_op; +- struct irq_cfg *cfg; +- +- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS) +- return -EINVAL; ++ /* ++ * NOTE! The local APIC isn't very good at handling ++ * multiple interrupts at the same interrupt level. ++ * As the interrupt level is determined by taking the ++ * vector number and shifting that right by 4, we ++ * want to spread these out a bit so that they don't ++ * all fall in the same interrupt level. ++ * ++ * Also, we've got to be careful not to trash gate ++ * 0x80, because int 0x80 is hm, kind of importantish. ;) ++ */ ++ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; ++ unsigned int old_vector; ++ int cpu, err; ++ cpumask_var_t tmp_mask; ++ ++ if ((cfg->move_in_progress) || cfg->move_cleanup_count) ++ return -EBUSY; ++ ++ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) ++ return -ENOMEM; ++ ++ old_vector = cfg->vector; ++ if (old_vector) { ++ cpumask_and(tmp_mask, mask, cpu_online_mask); ++ cpumask_and(tmp_mask, cfg->domain, tmp_mask); ++ if (!cpumask_empty(tmp_mask)) { ++ free_cpumask_var(tmp_mask); ++ return 0; ++ } ++ } + +- cfg = irq_cfg(irq); ++ /* Only try and allocate irqs on cpus that are present */ ++ err = -ENOSPC; ++ for_each_cpu_and(cpu, mask, cpu_online_mask) { ++ int new_cpu; ++ int vector, offset; ++ ++ vector_allocation_domain(cpu, tmp_mask); ++ ++ vector = current_vector; ++ offset = current_offset; ++next: ++ vector += 8; ++ if (vector >= first_system_vector) { ++ /* If out of vectors on large boxen, must share them. */ ++ offset = (offset + 1) % 8; ++ vector = FIRST_DEVICE_VECTOR + offset; ++ } ++ if (unlikely(current_vector == vector)) ++ continue; + +- if (cfg->vector) +- return 0; ++ if (test_bit(vector, used_vectors)) ++ goto next; + +- irq_op.irq = irq; +- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) +- return -ENOSPC; ++#ifdef CONFIG_KDB ++ if (vector == KDBENTER_VECTOR) ++ goto next; ++#endif /* CONFIG_KDB */ ++ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) ++ if (per_cpu(vector_irq, new_cpu)[vector] != -1) ++ goto next; ++ /* Found one! */ ++ current_vector = vector; ++ current_offset = offset; ++ if (old_vector) { ++ cfg->move_in_progress = 1; ++ cpumask_copy(cfg->old_domain, cfg->domain); ++ } ++ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) ++ per_cpu(vector_irq, new_cpu)[vector] = irq; ++ cfg->vector = vector; ++ cpumask_copy(cfg->domain, tmp_mask); ++ err = 0; ++ break; ++ } ++ free_cpumask_var(tmp_mask); ++ return err; ++} + +- cfg->vector = irq_op.vector; ++static int ++assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) ++{ ++ int err; ++ unsigned long flags; + +- return 0; ++ spin_lock_irqsave(&vector_lock, flags); ++ err = __assign_irq_vector(irq, cfg, mask); ++ spin_unlock_irqrestore(&vector_lock, flags); ++ return err; + } + +-#ifndef CONFIG_XEN +-static void __clear_irq_vector(int irq) ++static void __clear_irq_vector(int irq, struct irq_cfg *cfg) + { +- struct irq_cfg *cfg; +- cpumask_t mask; + int cpu, vector; + +- cfg = irq_cfg(irq); + BUG_ON(!cfg->vector); + + vector = cfg->vector; +- cpus_and(mask, cfg->domain, cpu_online_map); +- for_each_cpu_mask_nr(cpu, mask) ++ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + cfg->vector = 0; +- cpus_clear(cfg->domain); ++ cpumask_clear(cfg->domain); + + if (likely(!cfg->move_in_progress)) + return; +- cpus_and(mask, cfg->old_domain, cpu_online_map); +- for_each_cpu_mask_nr(cpu, mask) { ++ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] != irq) +@@ -1152,10 +1453,12 @@ void __setup_vector_irq(int cpu) + /* This function must be called with vector_lock held */ + int irq, vector; + struct irq_cfg *cfg; ++ struct irq_desc *desc; + + /* Mark the inuse vectors */ +- for_each_irq_cfg(irq, cfg) { +- if (!cpu_isset(cpu, cfg->domain)) ++ for_each_irq_desc(irq, desc) { ++ cfg = desc->chip_data; ++ if (!cpumask_test_cpu(cpu, cfg->domain)) + continue; + vector = cfg->vector; + per_cpu(vector_irq, cpu)[vector] = irq; +@@ -1167,7 +1470,7 @@ void __setup_vector_irq(int cpu) + continue; + + cfg = irq_cfg(irq); +- if (!cpu_isset(cpu, cfg->domain)) ++ if (!cpumask_test_cpu(cpu, cfg->domain)) + per_cpu(vector_irq, cpu)[vector] = -1; + } + } +@@ -1205,11 +1508,8 @@ static inline int IO_APIC_irq_trigger(in + } + #endif + +-static void ioapic_register_intr(int irq, unsigned long trigger) ++static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) + { +- struct irq_desc *desc; +- +- desc = irq_to_desc(irq); + + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) +@@ -1240,8 +1540,8 @@ static void ioapic_register_intr(int irq + handle_edge_irq, "edge"); + } + #else /* !CONFIG_XEN */ +-#define __clear_irq_vector(irq) ((void)(irq)) +-#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq) ++#define __clear_irq_vector(irq, cfg) ((void)0) ++#define ioapic_register_intr(irq, desc, trigger) evtchn_register_pirq(irq) + #endif + + static int setup_ioapic_entry(int apic, int irq, +@@ -1305,24 +1605,25 @@ static int setup_ioapic_entry(int apic, + return 0; + } + +-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, ++static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, + int trigger, int polarity) + { + struct irq_cfg *cfg; + struct IO_APIC_route_entry entry; +- cpumask_t mask; ++ unsigned int dest; + + if (!IO_APIC_IRQ(irq)) + return; + +- cfg = irq_cfg(irq); ++ cfg = desc->chip_data; + +- mask = TARGET_CPUS; +- if (assign_irq_vector(irq, mask)) ++ if (assign_irq_vector(irq, cfg, TARGET_CPUS)) + return; + + #ifndef CONFIG_XEN +- cpus_and(mask, cfg->domain, mask); ++ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); ++#else ++ dest = cpu_mask_to_apicid(TARGET_CPUS); + #endif + + apic_printk(APIC_VERBOSE,KERN_DEBUG +@@ -1333,16 +1634,15 @@ static void setup_IO_APIC_irq(int apic, + + + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, +- cpu_mask_to_apicid(mask), trigger, polarity, +- cfg->vector)) { ++ dest, trigger, polarity, cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); +- __clear_irq_vector(irq); ++ __clear_irq_vector(irq, cfg); + return; + } + +- ioapic_register_intr(irq, trigger); +- if (irq < 16) ++ ioapic_register_intr(irq, desc, trigger); ++ if (irq < NR_IRQS_LEGACY) + disable_8259A_irq(irq); + + ioapic_write_entry(apic, pin, entry); +@@ -1352,6 +1652,9 @@ static void __init setup_IO_APIC_irqs(vo + { + int apic, pin, idx, irq; + int notcon = 0; ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ int cpu = boot_cpu_id; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + +@@ -1386,9 +1689,15 @@ static void __init setup_IO_APIC_irqs(vo + if (multi_timer_check(apic, irq)) + continue; + #endif +- add_pin_to_irq(irq, apic, pin); ++ desc = irq_to_desc_alloc_cpu(irq, cpu); ++ if (!desc) { ++ printk(KERN_INFO "can not get irq_desc for %d\n", irq); ++ continue; ++ } ++ cfg = desc->chip_data; ++ add_pin_to_irq_cpu(cfg, cpu, apic, pin); + +- setup_IO_APIC_irq(apic, pin, irq, ++ setup_IO_APIC_irq(apic, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); + } + } +@@ -1448,6 +1757,7 @@ __apicdebuginit(void) print_IO_APIC(void + union IO_APIC_reg_03 reg_03; + unsigned long flags; + struct irq_cfg *cfg; ++ struct irq_desc *desc; + unsigned int irq; + + if (apic_verbosity == APIC_QUIET) +@@ -1537,8 +1847,11 @@ __apicdebuginit(void) print_IO_APIC(void + } + } + printk(KERN_DEBUG "IRQ to pin mappings:\n"); +- for_each_irq_cfg(irq, cfg) { +- struct irq_pin_list *entry = cfg->irq_2_pin; ++ for_each_irq_desc(irq, desc) { ++ struct irq_pin_list *entry; ++ ++ cfg = desc->chip_data; ++ entry = cfg->irq_2_pin; + if (!entry) + continue; + printk(KERN_DEBUG "IRQ%d ", irq); +@@ -2030,14 +2343,16 @@ static unsigned int startup_ioapic_irq(u + { + int was_pending = 0; + unsigned long flags; ++ struct irq_cfg *cfg; + + spin_lock_irqsave(&ioapic_lock, flags); +- if (irq < 16) { ++ if (irq < NR_IRQS_LEGACY) { + disable_8259A_irq(irq); + if (i8259A_irq_pending(irq)) + was_pending = 1; + } +- __unmask_IO_APIC_irq(irq); ++ cfg = irq_cfg(irq); ++ __unmask_IO_APIC_irq(cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +@@ -2051,7 +2366,7 @@ static int ioapic_retrigger_irq(unsigned + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); +- send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); ++ send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +@@ -2100,35 +2415,35 @@ static DECLARE_DELAYED_WORK(ir_migration + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +-static void migrate_ioapic_irq(int irq, cpumask_t mask) ++static void ++migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) + { + struct irq_cfg *cfg; +- struct irq_desc *desc; +- cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte; + unsigned int dest; + unsigned long flags; ++ unsigned int irq; + +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) ++ if (!cpumask_intersects(mask, cpu_online_mask)) + return; + ++ irq = desc->irq; + if (get_irte(irq, &irte)) + return; + +- if (assign_irq_vector(irq, mask)) ++ cfg = desc->chip_data; ++ if (assign_irq_vector(irq, cfg, mask)) + return; + +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); ++ set_extra_move_desc(desc, mask); ++ ++ dest = cpu_mask_to_apicid_and(cfg->domain, mask); + +- desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); +- __target_IO_APIC_irq(irq, dest, cfg->vector); ++ __target_IO_APIC_irq(irq, dest, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -2140,24 +2455,20 @@ static void migrate_ioapic_irq(int irq, + */ + modify_irte(irq, &irte); + +- if (cfg->move_in_progress) { +- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); +- cfg->move_cleanup_count = cpus_weight(cleanup_mask); +- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); +- cfg->move_in_progress = 0; +- } ++ if (cfg->move_in_progress) ++ send_cleanup_vector(cfg); + +- desc->affinity = mask; ++ cpumask_copy(&desc->affinity, mask); + } + +-static int migrate_irq_remapped_level(int irq) ++static int migrate_irq_remapped_level_desc(struct irq_desc *desc) + { + int ret = -1; +- struct irq_desc *desc = irq_to_desc(irq); ++ struct irq_cfg *cfg = desc->chip_data; + +- mask_IO_APIC_irq(irq); ++ mask_IO_APIC_irq_desc(desc); + +- if (io_apic_level_ack_pending(irq)) { ++ if (io_apic_level_ack_pending(cfg)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse +@@ -2169,14 +2480,15 @@ static int migrate_irq_remapped_level(in + } + + /* everthing is clear. we have right of way */ +- migrate_ioapic_irq(irq, desc->pending_mask); ++ migrate_ioapic_irq_desc(desc, &desc->pending_mask); + + ret = 0; + desc->status &= ~IRQ_MOVE_PENDING; +- cpus_clear(desc->pending_mask); ++ cpumask_clear(&desc->pending_mask); + + unmask: +- unmask_IO_APIC_irq(irq); ++ unmask_IO_APIC_irq_desc(desc); ++ + return ret; + } + +@@ -2197,7 +2509,7 @@ static void ir_irq_migration(struct work + continue; + } + +- desc->chip->set_affinity(irq, desc->pending_mask); ++ desc->chip->set_affinity(irq, &desc->pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +@@ -2206,28 +2518,33 @@ static void ir_irq_migration(struct work + /* + * Migrates the IRQ destination in the process context. + */ +-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) ++static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, ++ const struct cpumask *mask) + { +- struct irq_desc *desc = irq_to_desc(irq); +- + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; +- desc->pending_mask = mask; +- migrate_irq_remapped_level(irq); ++ cpumask_copy(&desc->pending_mask, mask); ++ migrate_irq_remapped_level_desc(desc); + return; + } + +- migrate_ioapic_irq(irq, mask); ++ migrate_ioapic_irq_desc(desc, mask); ++} ++static void set_ir_ioapic_affinity_irq(unsigned int irq, ++ const struct cpumask *mask) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ set_ir_ioapic_affinity_irq_desc(desc, mask); + } + #endif + + asmlinkage void smp_irq_move_cleanup_interrupt(void) + { + unsigned vector, me; ++ + ack_APIC_irq(); +-#ifdef CONFIG_X86_64 + exit_idle(); +-#endif + irq_enter(); + + me = smp_processor_id(); +@@ -2237,6 +2554,9 @@ asmlinkage void smp_irq_move_cleanup_int + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + ++ if (irq == -1) ++ continue; ++ + desc = irq_to_desc(irq); + if (!desc) + continue; +@@ -2246,7 +2566,7 @@ asmlinkage void smp_irq_move_cleanup_int + if (!cfg->move_cleanup_count) + goto unlock; + +- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) ++ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; +@@ -2258,28 +2578,45 @@ unlock: + irq_exit(); + } + +-static void irq_complete_move(unsigned int irq) ++static void irq_complete_move(struct irq_desc **descp) + { +- struct irq_cfg *cfg = irq_cfg(irq); ++ struct irq_desc *desc = *descp; ++ struct irq_cfg *cfg = desc->chip_data; + unsigned vector, me; + +- if (likely(!cfg->move_in_progress)) ++ if (likely(!cfg->move_in_progress)) { ++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC ++ if (likely(!cfg->move_desc_pending)) ++ return; ++ ++ /* domain has not changed, but affinity did */ ++ me = smp_processor_id(); ++ if (cpu_isset(me, desc->affinity)) { ++ *descp = desc = move_irq_desc(desc, me); ++ /* get the new one */ ++ cfg = desc->chip_data; ++ cfg->move_desc_pending = 0; ++ } ++#endif + return; ++ } + + vector = ~get_irq_regs()->orig_ax; + me = smp_processor_id(); +- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { +- cpumask_t cleanup_mask; + +- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); +- cfg->move_cleanup_count = cpus_weight(cleanup_mask); +- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); +- cfg->move_in_progress = 0; ++ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { ++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC ++ *descp = desc = move_irq_desc(desc, me); ++ /* get the new one */ ++ cfg = desc->chip_data; ++#endif ++ send_cleanup_vector(cfg); + } + } + #else +-static inline void irq_complete_move(unsigned int irq) {} ++static inline void irq_complete_move(struct irq_desc **descp) {} + #endif ++ + #ifdef CONFIG_INTR_REMAP + static void ack_x2apic_level(unsigned int irq) + { +@@ -2290,11 +2627,14 @@ static void ack_x2apic_edge(unsigned int + { + ack_x2APIC_irq(); + } ++ + #endif + + static void ack_apic_edge(unsigned int irq) + { +- irq_complete_move(irq); ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ irq_complete_move(&desc); + move_native_irq(irq); + ack_APIC_irq(); + } +@@ -2303,18 +2643,21 @@ atomic_t irq_mis_count; + + static void ack_apic_level(unsigned int irq) + { ++ struct irq_desc *desc = irq_to_desc(irq); ++ + #ifdef CONFIG_X86_32 + unsigned long v; + int i; + #endif ++ struct irq_cfg *cfg; + int do_unmask_irq = 0; + +- irq_complete_move(irq); ++ irq_complete_move(&desc); + #ifdef CONFIG_GENERIC_PENDING_IRQ + /* If we are moving the irq we need to mask it */ +- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { ++ if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; +- mask_IO_APIC_irq(irq); ++ mask_IO_APIC_irq_desc(desc); + } + #endif + +@@ -2338,7 +2681,8 @@ static void ack_apic_level(unsigned int + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ +- i = irq_cfg(irq)->vector; ++ cfg = desc->chip_data; ++ i = cfg->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + #endif +@@ -2377,17 +2721,18 @@ static void ack_apic_level(unsigned int + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ +- if (!io_apic_level_ack_pending(irq)) ++ cfg = desc->chip_data; ++ if (!io_apic_level_ack_pending(cfg)) + move_masked_irq(irq); +- unmask_IO_APIC_irq(irq); ++ unmask_IO_APIC_irq_desc(desc); + } + + #ifdef CONFIG_X86_32 + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); +- __mask_and_edge_IO_APIC_irq(irq); +- __unmask_and_level_IO_APIC_irq(irq); ++ __mask_and_edge_IO_APIC_irq(cfg); ++ __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } + #endif +@@ -2439,24 +2784,23 @@ static inline void init_IO_APIC_traps(vo + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ +- for_each_irq_cfg(irq, cfg) { ++ for_each_irq_desc(irq, desc) { + #ifdef CONFIG_XEN + if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) + continue; + #endif +- if (IO_APIC_IRQ(irq) && !cfg->vector) { ++ cfg = desc->chip_data; ++ if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 + * interrupt if we can.. + */ +- if (irq < 16) ++ if (irq < NR_IRQS_LEGACY) + make_8259A_irq(irq); +- else { +- desc = irq_to_desc(irq); ++ else + /* Strange. Oh, well.. */ + desc->chip = &no_irq_chip; +- } + } + } + } +@@ -2482,7 +2826,7 @@ static void unmask_lapic_irq(unsigned in + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); + } + +-static void ack_lapic_irq (unsigned int irq) ++static void ack_lapic_irq(unsigned int irq) + { + ack_APIC_irq(); + } +@@ -2494,11 +2838,8 @@ static struct irq_chip lapic_chip __read + .ack = ack_lapic_irq, + }; + +-static void lapic_register_intr(int irq) ++static void lapic_register_intr(int irq, struct irq_desc *desc) + { +- struct irq_desc *desc; +- +- desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; + set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, + "edge"); +@@ -2602,7 +2943,9 @@ int timer_through_8259 __initdata; + */ + static inline void __init check_timer(void) + { +- struct irq_cfg *cfg = irq_cfg(0); ++ struct irq_desc *desc = irq_to_desc(0); ++ struct irq_cfg *cfg = desc->chip_data; ++ int cpu = boot_cpu_id; + int apic1, pin1, apic2, pin2; + unsigned long flags; + unsigned int ver; +@@ -2617,7 +2960,7 @@ static inline void __init check_timer(vo + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); +- assign_irq_vector(0, TARGET_CPUS); ++ assign_irq_vector(0, cfg, TARGET_CPUS); + + /* + * As IRQ0 is to be enabled in the 8259A, the virtual +@@ -2668,10 +3011,10 @@ static inline void __init check_timer(vo + * Ok, does IRQ0 through the IOAPIC work? + */ + if (no_pin1) { +- add_pin_to_irq(0, apic1, pin1); ++ add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } +- unmask_IO_APIC_irq(0); ++ unmask_IO_APIC_irq_desc(desc); + if (timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); +@@ -2697,9 +3040,9 @@ static inline void __init check_timer(vo + /* + * legacy devices should be connected to IO APIC #0 + */ +- replace_pin_at_irq(0, apic1, pin1, apic2, pin2); ++ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); +- unmask_IO_APIC_irq(0); ++ unmask_IO_APIC_irq_desc(desc); + enable_8259A_irq(0); + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); +@@ -2731,7 +3074,7 @@ static inline void __init check_timer(vo + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as Virtual Wire IRQ...\n"); + +- lapic_register_intr(0); ++ lapic_register_intr(0, desc); + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ + enable_8259A_irq(0); + +@@ -2930,22 +3273,26 @@ unsigned int create_irq_nr(unsigned int + unsigned int irq; + unsigned int new; + unsigned long flags; +- struct irq_cfg *cfg_new; +- +- irq_want = nr_irqs - 1; ++ struct irq_cfg *cfg_new = NULL; ++ int cpu = boot_cpu_id; ++ struct irq_desc *desc_new = NULL; + + irq = 0; + spin_lock_irqsave(&vector_lock, flags); +- for (new = irq_want; new > 0; new--) { ++ for (new = irq_want; new < NR_IRQS; new++) { + if (platform_legacy_irq(new)) + continue; +- cfg_new = irq_cfg(new); +- if (cfg_new && cfg_new->vector != 0) ++ ++ desc_new = irq_to_desc_alloc_cpu(new, cpu); ++ if (!desc_new) { ++ printk(KERN_INFO "can not get irq_desc for %d\n", new); ++ continue; ++ } ++ cfg_new = desc_new->chip_data; ++ ++ if (cfg_new->vector != 0) + continue; +- /* check if need to create one */ +- if (!cfg_new) +- cfg_new = irq_cfg_alloc(new); +- if (__assign_irq_vector(new, TARGET_CPUS) == 0) ++ if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) + irq = new; + break; + } +@@ -2953,15 +3300,21 @@ unsigned int create_irq_nr(unsigned int + + if (irq > 0) { + dynamic_irq_init(irq); ++ /* restore it, in case dynamic_irq_init clear it */ ++ if (desc_new) ++ desc_new->chip_data = cfg_new; + } + return irq; + } + ++static int nr_irqs_gsi = NR_IRQS_LEGACY; + int create_irq(void) + { ++ unsigned int irq_want; + int irq; + +- irq = create_irq_nr(nr_irqs - 1); ++ irq_want = nr_irqs_gsi; ++ irq = create_irq_nr(irq_want); + + if (irq == 0) + irq = -1; +@@ -2972,14 +3325,22 @@ int create_irq(void) + void destroy_irq(unsigned int irq) + { + unsigned long flags; ++ struct irq_cfg *cfg; ++ struct irq_desc *desc; + ++ /* store it, in case dynamic_irq_cleanup clear it */ ++ desc = irq_to_desc(irq); ++ cfg = desc->chip_data; + dynamic_irq_cleanup(irq); ++ /* connect back irq_cfg */ ++ if (desc) ++ desc->chip_data = cfg; + + #ifdef CONFIG_INTR_REMAP + free_irte(irq); + #endif + spin_lock_irqsave(&vector_lock, flags); +- __clear_irq_vector(irq); ++ __clear_irq_vector(irq, cfg); + spin_unlock_irqrestore(&vector_lock, flags); + } + #endif /* CONFIG_XEN */ +@@ -2993,16 +3354,13 @@ static int msi_compose_msg(struct pci_de + struct irq_cfg *cfg; + int err; + unsigned dest; +- cpumask_t tmp; + +- tmp = TARGET_CPUS; +- err = assign_irq_vector(irq, tmp); ++ cfg = irq_cfg(irq); ++ err = assign_irq_vector(irq, cfg, TARGET_CPUS); + if (err) + return err; + +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, tmp); +- dest = cpu_mask_to_apicid(tmp); ++ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + + #ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { +@@ -3056,64 +3414,48 @@ static int msi_compose_msg(struct pci_de + } + + #ifdef CONFIG_SMP +-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ++static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) + { ++ struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; +- cpumask_t tmp; +- struct irq_desc *desc; + +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) ++ dest = set_desc_affinity(desc, mask); ++ if (dest == BAD_APICID) + return; + +- if (assign_irq_vector(irq, mask)) +- return; +- +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); ++ cfg = desc->chip_data; + +- read_msi_msg(irq, &msg); ++ read_msi_msg_desc(desc, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + +- write_msi_msg(irq, &msg); +- desc = irq_to_desc(irq); +- desc->affinity = mask; ++ write_msi_msg_desc(desc, &msg); + } +- + #ifdef CONFIG_INTR_REMAP + /* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ++static void ++ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) + { +- struct irq_cfg *cfg; ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; +- cpumask_t tmp, cleanup_mask; + struct irte irte; +- struct irq_desc *desc; +- +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) +- return; + + if (get_irte(irq, &irte)) + return; + +- if (assign_irq_vector(irq, mask)) ++ dest = set_desc_affinity(desc, mask); ++ if (dest == BAD_APICID) + return; + +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); +- + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + +@@ -3127,16 +3469,10 @@ static void ir_set_msi_irq_affinity(unsi + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ +- if (cfg->move_in_progress) { +- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); +- cfg->move_cleanup_count = cpus_weight(cleanup_mask); +- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); +- cfg->move_in_progress = 0; +- } +- +- desc = irq_to_desc(irq); +- desc->affinity = mask; ++ if (cfg->move_in_progress) ++ send_cleanup_vector(cfg); + } ++ + #endif + #endif /* CONFIG_SMP */ + +@@ -3195,7 +3531,7 @@ static int msi_alloc_irte(struct pci_dev + } + #endif + +-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) ++static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) + { + int ret; + struct msi_msg msg; +@@ -3204,7 +3540,7 @@ static int setup_msi_irq(struct pci_dev + if (ret < 0) + return ret; + +- set_irq_msi(irq, desc); ++ set_irq_msi(irq, msidesc); + write_msi_msg(irq, &msg); + + #ifdef CONFIG_INTR_REMAP +@@ -3224,26 +3560,13 @@ static int setup_msi_irq(struct pci_dev + return 0; + } + +-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +-{ +- unsigned int irq; +- +- irq = dev->bus->number; +- irq <<= 8; +- irq |= dev->devfn; +- irq <<= 12; +- +- return irq; +-} +- +-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) ++int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) + { + unsigned int irq; + int ret; + unsigned int irq_want; + +- irq_want = build_irq_for_pci_dev(dev) + 0x100; +- ++ irq_want = nr_irqs_gsi; + irq = create_irq_nr(irq_want); + if (irq == 0) + return -1; +@@ -3257,7 +3580,7 @@ int arch_setup_msi_irq(struct pci_dev *d + goto error; + no_ir: + #endif +- ret = setup_msi_irq(dev, desc, irq); ++ ret = setup_msi_irq(dev, msidesc, irq); + if (ret < 0) { + destroy_irq(irq); + return ret; +@@ -3275,7 +3598,7 @@ int arch_setup_msi_irqs(struct pci_dev * + { + unsigned int irq; + int ret, sub_handle; +- struct msi_desc *desc; ++ struct msi_desc *msidesc; + unsigned int irq_want; + + #ifdef CONFIG_INTR_REMAP +@@ -3283,10 +3606,11 @@ int arch_setup_msi_irqs(struct pci_dev * + int index = 0; + #endif + +- irq_want = build_irq_for_pci_dev(dev) + 0x100; ++ irq_want = nr_irqs_gsi; + sub_handle = 0; +- list_for_each_entry(desc, &dev->msi_list, list) { +- irq = create_irq_nr(irq_want--); ++ list_for_each_entry(msidesc, &dev->msi_list, list) { ++ irq = create_irq_nr(irq_want); ++ irq_want++; + if (irq == 0) + return -1; + #ifdef CONFIG_INTR_REMAP +@@ -3318,7 +3642,7 @@ int arch_setup_msi_irqs(struct pci_dev * + } + no_ir: + #endif +- ret = setup_msi_irq(dev, desc, irq); ++ ret = setup_msi_irq(dev, msidesc, irq); + if (ret < 0) + goto error; + sub_handle++; +@@ -3337,24 +3661,18 @@ void arch_teardown_msi_irq(unsigned int + + #ifdef CONFIG_DMAR + #ifdef CONFIG_SMP +-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) ++static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) + { ++ struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; +- cpumask_t tmp; +- struct irq_desc *desc; + +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) ++ dest = set_desc_affinity(desc, mask); ++ if (dest == BAD_APICID) + return; + +- if (assign_irq_vector(irq, mask)) +- return; +- +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); ++ cfg = desc->chip_data; + + dmar_msi_read(irq, &msg); + +@@ -3364,9 +3682,8 @@ static void dmar_msi_set_affinity(unsign + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); +- desc = irq_to_desc(irq); +- desc->affinity = mask; + } ++ + #endif /* CONFIG_SMP */ + + struct irq_chip dmar_msi_type = { +@@ -3398,24 +3715,18 @@ int arch_setup_dmar_msi(unsigned int irq + #ifdef CONFIG_HPET_TIMER + + #ifdef CONFIG_SMP +-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) ++static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) + { ++ struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; +- struct irq_desc *desc; + struct msi_msg msg; + unsigned int dest; +- cpumask_t tmp; + +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) ++ dest = set_desc_affinity(desc, mask); ++ if (dest == BAD_APICID) + return; + +- if (assign_irq_vector(irq, mask)) +- return; +- +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); ++ cfg = desc->chip_data; + + hpet_msi_read(irq, &msg); + +@@ -3425,9 +3736,8 @@ static void hpet_msi_set_affinity(unsign + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + hpet_msi_write(irq, &msg); +- desc = irq_to_desc(irq); +- desc->affinity = mask; + } ++ + #endif /* CONFIG_SMP */ + + struct irq_chip hpet_msi_type = { +@@ -3480,28 +3790,21 @@ static void target_ht_irq(unsigned int i + write_ht_irq_msg(irq, &msg); + } + +-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) ++static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) + { ++ struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + unsigned int dest; +- cpumask_t tmp; +- struct irq_desc *desc; + +- cpus_and(tmp, mask, cpu_online_map); +- if (cpus_empty(tmp)) ++ dest = set_desc_affinity(desc, mask); ++ if (dest == BAD_APICID) + return; + +- if (assign_irq_vector(irq, mask)) +- return; +- +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, mask); +- dest = cpu_mask_to_apicid(tmp); ++ cfg = desc->chip_data; + + target_ht_irq(irq, dest, cfg->vector); +- desc = irq_to_desc(irq); +- desc->affinity = mask; + } ++ + #endif + + static struct irq_chip ht_irq_chip = { +@@ -3519,17 +3822,14 @@ int arch_setup_ht_irq(unsigned int irq, + { + struct irq_cfg *cfg; + int err; +- cpumask_t tmp; + +- tmp = TARGET_CPUS; +- err = assign_irq_vector(irq, tmp); ++ cfg = irq_cfg(irq); ++ err = assign_irq_vector(irq, cfg, TARGET_CPUS); + if (!err) { + struct ht_irq_msg msg; + unsigned dest; + +- cfg = irq_cfg(irq); +- cpus_and(tmp, cfg->domain, tmp); +- dest = cpu_mask_to_apicid(tmp); ++ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + +@@ -3565,7 +3865,7 @@ int arch_setup_ht_irq(unsigned int irq, + int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset) + { +- const cpumask_t *eligible_cpu = get_cpu_mask(cpu); ++ const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; +@@ -3573,7 +3873,9 @@ int arch_enable_uv_irq(char *irq_name, u + unsigned long flags; + int err; + +- err = assign_irq_vector(irq, *eligible_cpu); ++ cfg = irq_cfg(irq); ++ ++ err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + +@@ -3582,8 +3884,6 @@ int arch_enable_uv_irq(char *irq_name, u + irq_name); + spin_unlock_irqrestore(&vector_lock, flags); + +- cfg = irq_cfg(irq); +- + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); +@@ -3594,7 +3894,7 @@ int arch_enable_uv_irq(char *irq_name, u + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; +- entry->dest = cpu_mask_to_apicid(*eligible_cpu); ++ entry->dest = cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +@@ -3635,10 +3935,29 @@ int __init io_apic_get_redir_entries (in + return reg_01.bits.entries; + } + +-int __init probe_nr_irqs(void) ++#ifndef CONFIG_XEN ++void __init probe_nr_irqs_gsi(void) + { +- return NR_IRQS; ++ int nr = 0; ++ ++ nr = acpi_probe_gsi(); ++ if (nr > nr_irqs_gsi) { ++ nr_irqs_gsi = nr; ++ } else { ++ /* for acpi=off or acpi is not compiled in */ ++ int idx; ++ ++ nr = 0; ++ for (idx = 0; idx < nr_ioapics; idx++) ++ nr += io_apic_get_redir_entries(idx) + 1; ++ ++ if (nr > nr_irqs_gsi) ++ nr_irqs_gsi = nr; ++ } ++ ++ printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); + } ++#endif + + /* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration +@@ -3738,6 +4057,10 @@ int __init io_apic_get_version(int ioapi + + int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) + { ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ int cpu = boot_cpu_id; ++ + #ifdef CONFIG_XEN + if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n", +@@ -3752,13 +4075,21 @@ int io_apic_set_pci_routing (int ioapic, + return -EINVAL; + } + ++ desc = irq_to_desc_alloc_cpu(irq, cpu); ++ if (!desc) { ++ printk(KERN_INFO "can not get irq_desc %d\n", irq); ++ return 0; ++ } ++ + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ +- if (irq >= 16) +- add_pin_to_irq(irq, ioapic, pin); ++ if (irq >= NR_IRQS_LEGACY) { ++ cfg = desc->chip_data; ++ add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); ++ } + +- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); ++ setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); + + return 0; + } +@@ -3797,7 +4128,7 @@ void __init setup_ioapic_dest(void) + int pin, ioapic, irq, irq_entry; + struct irq_desc *desc; + struct irq_cfg *cfg; +- cpumask_t mask; ++ const struct cpumask *mask; + + if (skip_ioapic_setup == 1) + return; +@@ -3813,9 +4144,10 @@ void __init setup_ioapic_dest(void) + * when you have too many devices, because at that time only boot + * cpu is online. + */ +- cfg = irq_cfg(irq); ++ desc = irq_to_desc(irq); ++ cfg = desc->chip_data; + if (!cfg->vector) { +- setup_IO_APIC_irq(ioapic, pin, irq, ++ setup_IO_APIC_irq(ioapic, pin, irq, desc, + irq_trigger(irq_entry), + irq_polarity(irq_entry)); + continue; +@@ -3825,19 +4157,18 @@ void __init setup_ioapic_dest(void) + /* + * Honour affinities which have been set in early boot + */ +- desc = irq_to_desc(irq); + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) +- mask = desc->affinity; ++ mask = &desc->affinity; + else + mask = TARGET_CPUS; + + #ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) +- set_ir_ioapic_affinity_irq(irq, mask); ++ set_ir_ioapic_affinity_irq_desc(desc, mask); + else + #endif +- set_ioapic_affinity_irq(irq, mask); ++ set_ioapic_affinity_irq_desc(desc, mask); + } + + } +@@ -3886,7 +4217,6 @@ void __init ioapic_init_mappings(void) + struct resource *ioapic_res; + int i; + +- irq_2_pin_init(); + ioapic_res = ioapic_setup_resources(); + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/ioport-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/ioport-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -36,7 +36,7 @@ static void set_bitmap(unsigned long *bi + */ + asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) + { +- struct thread_struct * t = ¤t->thread; ++ struct thread_struct *t = ¤t->thread; + struct physdev_set_iobitmap set_iobitmap; + + if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -150,31 +150,28 @@ static inline void __send_IPI_dest_field + /* + * This is only used on smaller machines. + */ +-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) ++void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) + { + #ifndef CONFIG_XEN +- unsigned long mask = cpus_addr(cpumask)[0]; ++ unsigned long mask = cpumask_bits(cpumask)[0]; + #else +- cpumask_t mask; + unsigned int cpu; + #endif + unsigned long flags; + + local_irq_save(flags); + #ifndef CONFIG_XEN +- WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); ++ WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __send_IPI_dest_field(mask, vector); + #else +- cpus_andnot(mask, cpumask, cpu_online_map); +- WARN_ON(!cpus_empty(mask)); +- for_each_online_cpu(cpu) +- if (cpu_isset(cpu, cpumask)) +- __send_IPI_one(cpu, vector); ++ WARN_ON(!cpumask_subset(cpumask, cpu_online_mask)); ++ for_each_cpu_and(cpu, cpumask, cpu_online_mask) ++ __send_IPI_one(cpu, vector); + #endif + local_irq_restore(flags); + } + +-void send_IPI_mask_sequence(cpumask_t mask, int vector) ++void send_IPI_mask_sequence(const struct cpumask *mask, int vector) + { + #ifndef CONFIG_XEN + unsigned long flags; +@@ -187,18 +184,37 @@ void send_IPI_mask_sequence(cpumask_t ma + */ + + local_irq_save(flags); +- for_each_possible_cpu(query_cpu) { +- if (cpu_isset(query_cpu, mask)) { +- __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), +- vector); +- } +- } ++ for_each_cpu(query_cpu, mask) ++ __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + local_irq_restore(flags); + #else + send_IPI_mask_bitmask(mask, vector); + #endif + } + ++void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) ++{ ++ unsigned long flags; ++ unsigned int query_cpu; ++ unsigned int this_cpu = smp_processor_id(); ++ ++ /* See Hack comment above */ ++ ++ local_irq_save(flags); ++#ifndef CONFIG_XEN ++ for_each_cpu(query_cpu, mask) ++ if (query_cpu != this_cpu) ++ __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), ++ vector); ++#else ++ WARN_ON(!cpumask_subset(mask, cpu_online_mask)); ++ for_each_cpu_and(query_cpu, mask, cpu_online_mask) ++ if (query_cpu != this_cpu) ++ __send_IPI_one(query_cpu, vector); ++#endif ++ local_irq_restore(flags); ++} ++ + #ifndef CONFIG_XEN + /* must come after the send_IPI functions above for inlining */ + static int convert_apicid_to_cpu(int apic_id) +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/irq-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/irq-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -5,10 +5,11 @@ + #include + #include + #include ++#include + + #include + #include +-#include ++#include + + atomic_t irq_err_count; + +@@ -43,57 +44,57 @@ void ack_bad_irq(unsigned int irq) + /* + * /proc/interrupts printing: + */ +-static int show_other_interrupts(struct seq_file *p) ++static int show_other_interrupts(struct seq_file *p, int prec) + { + int j; + +- seq_printf(p, "NMI: "); ++ seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); + #ifdef CONFIG_X86_LOCAL_APIC +- seq_printf(p, "LOC: "); ++ seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); + #endif + #ifdef CONFIG_SMP +- seq_printf(p, "RES: "); ++ seq_printf(p, "%*s: ", prec, "RES"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); +- seq_printf(p, "CAL: "); ++ seq_printf(p, "%*s: ", prec, "CAL"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); + seq_printf(p, " Function call interrupts\n"); + #ifndef CONFIG_XEN +- seq_printf(p, "TLB: "); ++ seq_printf(p, "%*s: ", prec, "TLB"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); + #endif + #endif + #ifdef CONFIG_X86_MCE +- seq_printf(p, "TRM: "); ++ seq_printf(p, "%*s: ", prec, "TRM"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + # ifdef CONFIG_X86_64 +- seq_printf(p, "THR: "); ++ seq_printf(p, "%*s: ", prec, "THR"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); + # endif + #endif + #ifdef CONFIG_X86_LOCAL_APIC +- seq_printf(p, "SPU: "); ++ seq_printf(p, "%*s: ", prec, "SPU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); + #endif +- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); ++ seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + #if defined(CONFIG_X86_IO_APIC) +- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); ++ seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); + #endif + return 0; + } +@@ -101,25 +102,31 @@ static int show_other_interrupts(struct + int show_interrupts(struct seq_file *p, void *v) + { + unsigned long flags, any_count = 0; +- int i = *(loff_t *) v, j; ++ int i = *(loff_t *) v, j, prec; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; + ++ for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) ++ j *= 10; ++ + if (i == nr_irqs) +- return show_other_interrupts(p); ++ return show_other_interrupts(p, prec); + + /* print header */ + if (i == 0) { +- seq_printf(p, " "); ++ seq_printf(p, "%*s", prec + 8, ""); + for_each_online_cpu(j) + seq_printf(p, "CPU%-8d", j); + seq_putc(p, '\n'); + } + + desc = irq_to_desc(i); ++ if (!desc) ++ return 0; ++ + spin_lock_irqsave(&desc->lock, flags); + #ifndef CONFIG_SMP + any_count = kstat_irqs(i); +@@ -131,7 +138,7 @@ int show_interrupts(struct seq_file *p, + if (!action && !any_count) + goto out; + +- seq_printf(p, "%3d: ", i); ++ seq_printf(p, "%*d: ", prec, i); + #ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); + #else +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -15,9 +15,9 @@ + #include + #include + #include ++#include + + #include +-#include + + DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + EXPORT_PER_CPU_SYMBOL(irq_stat); +@@ -93,7 +93,7 @@ execute_on_irq_stack(int overflow, struc + return 0; + + /* build the stack frame on the IRQ stack */ +- isp = (u32 *) ((char*)irqctx + sizeof(*irqctx)); ++ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + irqctx->tinfo.task = curctx->tinfo.task; + irqctx->tinfo.previous_esp = current_stack_pointer; + +@@ -137,7 +137,7 @@ void __cpuinit irq_ctx_init(int cpu) + + hardirq_ctx[cpu] = irqctx; + +- irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; ++ irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE]; + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; +@@ -147,7 +147,7 @@ void __cpuinit irq_ctx_init(int cpu) + softirq_ctx[cpu] = irqctx; + + printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", +- cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); ++ cpu, hardirq_ctx[cpu], softirq_ctx[cpu]); + } + + void irq_ctx_exit(int cpu) +@@ -174,7 +174,7 @@ asmlinkage void do_softirq(void) + irqctx->tinfo.previous_esp = current_stack_pointer; + + /* build the stack frame on the softirq stack */ +- isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + + call_on_stack(__do_softirq, isp); + /* +@@ -229,25 +229,28 @@ unsigned int do_IRQ(struct pt_regs *regs + + #ifdef CONFIG_HOTPLUG_CPU + +-void fixup_irqs(cpumask_t map) ++/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ ++void fixup_irqs(void) + { + unsigned int irq; + static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { +- cpumask_t mask; ++ const struct cpumask *affinity; + ++ if (!desc) ++ continue; + if (irq == 2) + continue; + +- cpus_and(mask, desc->affinity, map); +- if (any_online_cpu(mask) == NR_CPUS) { ++ affinity = &desc->affinity; ++ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + /*printk("Breaking affinity for irq %i\n", irq);*/ +- mask = map; ++ affinity = cpu_all_mask; + } + if (desc->chip->set_affinity) +- desc->chip->set_affinity(irq, mask); ++ desc->chip->set_affinity(irq, affinity); + else if (desc->action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/irq_64-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -13,12 +13,12 @@ + #include + #include + #include +-#include ++#include ++#include ++#include + #include + #include +-#include + +-#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* + * Probabilistic stack overflow check: + * +@@ -28,26 +28,25 @@ + */ + static inline void stack_overflow_check(struct pt_regs *regs) + { ++#ifdef CONFIG_DEBUG_STACKOVERFLOW + u64 curbase = (u64)task_stack_page(current); +- static unsigned long warned = -60*HZ; + +- if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && +- regs->sp < curbase + sizeof(struct thread_info) + 128 && +- time_after(jiffies, warned + 60*HZ)) { +- printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", +- current->comm, curbase, regs->sp); +- show_stack(NULL,NULL); +- warned = jiffies; +- } +-} ++ WARN_ONCE(regs->sp >= curbase && ++ regs->sp <= curbase + THREAD_SIZE && ++ regs->sp < curbase + sizeof(struct thread_info) + ++ sizeof(struct pt_regs) + 128, ++ ++ "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", ++ current->comm, curbase, regs->sp); + #endif ++} + + /* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +-asmlinkage unsigned int do_IRQ(struct pt_regs *regs) ++asmlinkage unsigned int /*__irq_entry*/ do_IRQ(struct pt_regs *regs) + { + struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; +@@ -58,9 +57,7 @@ asmlinkage unsigned int do_IRQ(struct pt + /*exit_idle();*/ + /*irq_enter();*/ + +-#ifdef CONFIG_DEBUG_STACKOVERFLOW + stack_overflow_check(regs); +-#endif + + desc = irq_to_desc(irq); + if (likely(desc)) +@@ -82,40 +79,43 @@ asmlinkage unsigned int do_IRQ(struct pt + } + + #ifdef CONFIG_HOTPLUG_CPU +-void fixup_irqs(cpumask_t map) ++/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ ++void fixup_irqs(void) + { + unsigned int irq; + static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { +- cpumask_t mask; + int break_affinity = 0; + int set_affinity = 1; ++ const struct cpumask *affinity; + ++ if (!desc) ++ continue; + if (irq == 2) + continue; + + /* interrupt's are disabled at this point */ + spin_lock(&desc->lock); + ++ affinity = &desc->affinity; + if (!irq_has_action(irq) || +- cpus_equal(desc->affinity, map)) { ++ cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); + continue; + } + +- cpus_and(mask, desc->affinity, map); +- if (cpus_empty(mask)) { ++ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + break_affinity = 1; +- mask = map; ++ affinity = cpu_all_mask; + } + + if (desc->chip->mask) + desc->chip->mask(irq); + + if (desc->chip->set_affinity) +- desc->chip->set_affinity(irq, mask); ++ desc->chip->set_affinity(irq, affinity); + else if (!(warned++)) + set_affinity = 0; + +@@ -141,18 +141,18 @@ extern void call_softirq(void); + + asmlinkage void do_softirq(void) + { +- __u32 pending; +- unsigned long flags; ++ __u32 pending; ++ unsigned long flags; + +- if (in_interrupt()) +- return; ++ if (in_interrupt()) ++ return; + +- local_irq_save(flags); +- pending = local_softirq_pending(); +- /* Switch to interrupt stack */ +- if (pending) { ++ local_irq_save(flags); ++ pending = local_softirq_pending(); ++ /* Switch to interrupt stack */ ++ if (pending) { + call_softirq(); + WARN_ON_ONCE(softirq_count()); + } +- local_irq_restore(flags); ++ local_irq_restore(flags); + } +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/ldt-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/ldt-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -12,8 +12,8 @@ + #include + #include + #include ++#include + +-#include + #include + #include + #include +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/machine_kexec_32.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/machine_kexec_32.c 2009-11-06 10:51:55.000000000 +0100 +@@ -122,13 +122,7 @@ void machine_kexec_setup_load_arg(xen_ke + memcpy(control_page, relocate_kernel, PAGE_SIZE); + + xki->page_list[PA_CONTROL_PAGE] = __ma(control_page); +- xki->page_list[PA_PGD] = __ma(kexec_pgd); +-#ifdef CONFIG_X86_PAE +- xki->page_list[PA_PMD_0] = __ma(kexec_pmd0); +- xki->page_list[PA_PMD_1] = __ma(kexec_pmd1); +-#endif +- xki->page_list[PA_PTE_0] = __ma(kexec_pte0); +- xki->page_list[PA_PTE_1] = __ma(kexec_pte1); ++ xki->page_list[PA_PGD] = __ma(image->arch.pgd); + + if (image->type == KEXEC_TYPE_DEFAULT) + xki->page_list[PA_SWAP_PAGE] = page_to_phys(image->swap_page); +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -2,7 +2,7 @@ + * Intel Multiprocessor Specification 1.1 and 1.4 + * compliant MP-table parsing routines. + * +- * (c) 1995 Alan Cox, Building #3 ++ * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 2008 Alexey Starikovskiy + */ +@@ -16,18 +16,18 @@ + #include + #include + #include ++#include + +-#include + #include + #include + #include + #include + #include +-#include + #include + #include + #include + #include ++#include + + #include + #ifdef CONFIG_X86_32 +@@ -54,13 +54,13 @@ static int __init mpf_checksum(unsigned + return sum & 0xFF; + } + +-static void __init MP_processor_info(struct mpc_config_processor *m) ++static void __init MP_processor_info(struct mpc_cpu *m) + { + #ifndef CONFIG_XEN + int apicid; + char *bootup_cpu = ""; + +- if (!(m->mpc_cpuflag & CPU_ENABLED)) { ++ if (!(m->cpuflag & CPU_ENABLED)) { + disabled_cpus++; + return; + } +@@ -68,57 +68,57 @@ static void __init MP_processor_info(str + if (x86_quirks->mpc_apic_id) + apicid = x86_quirks->mpc_apic_id(m); + else +- apicid = m->mpc_apicid; ++ apicid = m->apicid; + +- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { ++ if (m->cpuflag & CPU_BOOTPROCESSOR) { + bootup_cpu = " (Bootup-CPU)"; +- boot_cpu_physical_apicid = m->mpc_apicid; ++ boot_cpu_physical_apicid = m->apicid; + } + +- printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); +- generic_processor_info(apicid, m->mpc_apicver); ++ printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu); ++ generic_processor_info(apicid, m->apicver); + #else /* CONFIG_XEN */ + num_processors++; + #endif + } + + #ifdef CONFIG_X86_IO_APIC +-static void __init MP_bus_info(struct mpc_config_bus *m) ++static void __init MP_bus_info(struct mpc_bus *m) + { + char str[7]; +- memcpy(str, m->mpc_bustype, 6); ++ memcpy(str, m->bustype, 6); + str[6] = 0; + + if (x86_quirks->mpc_oem_bus_info) + x86_quirks->mpc_oem_bus_info(m, str); + else +- apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); ++ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); + + #if MAX_MP_BUSSES < 256 +- if (m->mpc_busid >= MAX_MP_BUSSES) { ++ if (m->busid >= MAX_MP_BUSSES) { + printk(KERN_WARNING "MP table busid value (%d) for bustype %s " + " is too large, max. supported is %d\n", +- m->mpc_busid, str, MAX_MP_BUSSES - 1); ++ m->busid, str, MAX_MP_BUSSES - 1); + return; + } + #endif + + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { +- set_bit(m->mpc_busid, mp_bus_not_pci); +-#if defined(CONFIG_EISA) || defined (CONFIG_MCA) +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; ++ set_bit(m->busid, mp_bus_not_pci); ++#if defined(CONFIG_EISA) || defined(CONFIG_MCA) ++ mp_bus_id_to_type[m->busid] = MP_BUS_ISA; + #endif + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { + if (x86_quirks->mpc_oem_pci_bus) + x86_quirks->mpc_oem_pci_bus(m); + +- clear_bit(m->mpc_busid, mp_bus_not_pci); +-#if defined(CONFIG_EISA) || defined (CONFIG_MCA) +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; ++ clear_bit(m->busid, mp_bus_not_pci); ++#if defined(CONFIG_EISA) || defined(CONFIG_MCA) ++ mp_bus_id_to_type[m->busid] = MP_BUS_PCI; + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; ++ mp_bus_id_to_type[m->busid] = MP_BUS_EISA; + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; ++ mp_bus_id_to_type[m->busid] = MP_BUS_MCA; + #endif + } else + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); +@@ -142,32 +142,31 @@ static int bad_ioapic(unsigned long addr + return 0; + } + +-static void __init MP_ioapic_info(struct mpc_config_ioapic *m) ++static void __init MP_ioapic_info(struct mpc_ioapic *m) + { +- if (!(m->mpc_flags & MPC_APIC_USABLE)) ++ if (!(m->flags & MPC_APIC_USABLE)) + return; + + printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", +- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); ++ m->apicid, m->apicver, m->apicaddr); + +- if (bad_ioapic(m->mpc_apicaddr)) ++ if (bad_ioapic(m->apicaddr)) + return; + +- mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; +- mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; +- mp_ioapics[nr_ioapics].mp_type = m->mpc_type; +- mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; +- mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; ++ mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; ++ mp_ioapics[nr_ioapics].mp_apicid = m->apicid; ++ mp_ioapics[nr_ioapics].mp_type = m->type; ++ mp_ioapics[nr_ioapics].mp_apicver = m->apicver; ++ mp_ioapics[nr_ioapics].mp_flags = m->flags; + nr_ioapics++; + } + +-static void print_MP_intsrc_info(struct mpc_config_intsrc *m) ++static void print_MP_intsrc_info(struct mpc_intsrc *m) + { + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", +- m->mpc_irqtype, m->mpc_irqflag & 3, +- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, +- m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); ++ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, ++ m->srcbusirq, m->dstapic, m->dstirq); + } + + static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +@@ -179,52 +178,52 @@ static void __init print_mp_irq_info(str + mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); + } + +-static void __init assign_to_mp_irq(struct mpc_config_intsrc *m, ++static void __init assign_to_mp_irq(struct mpc_intsrc *m, + struct mp_config_intsrc *mp_irq) + { +- mp_irq->mp_dstapic = m->mpc_dstapic; +- mp_irq->mp_type = m->mpc_type; +- mp_irq->mp_irqtype = m->mpc_irqtype; +- mp_irq->mp_irqflag = m->mpc_irqflag; +- mp_irq->mp_srcbus = m->mpc_srcbus; +- mp_irq->mp_srcbusirq = m->mpc_srcbusirq; +- mp_irq->mp_dstirq = m->mpc_dstirq; ++ mp_irq->mp_dstapic = m->dstapic; ++ mp_irq->mp_type = m->type; ++ mp_irq->mp_irqtype = m->irqtype; ++ mp_irq->mp_irqflag = m->irqflag; ++ mp_irq->mp_srcbus = m->srcbus; ++ mp_irq->mp_srcbusirq = m->srcbusirq; ++ mp_irq->mp_dstirq = m->dstirq; + } + + static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, +- struct mpc_config_intsrc *m) ++ struct mpc_intsrc *m) + { +- m->mpc_dstapic = mp_irq->mp_dstapic; +- m->mpc_type = mp_irq->mp_type; +- m->mpc_irqtype = mp_irq->mp_irqtype; +- m->mpc_irqflag = mp_irq->mp_irqflag; +- m->mpc_srcbus = mp_irq->mp_srcbus; +- m->mpc_srcbusirq = mp_irq->mp_srcbusirq; +- m->mpc_dstirq = mp_irq->mp_dstirq; ++ m->dstapic = mp_irq->mp_dstapic; ++ m->type = mp_irq->mp_type; ++ m->irqtype = mp_irq->mp_irqtype; ++ m->irqflag = mp_irq->mp_irqflag; ++ m->srcbus = mp_irq->mp_srcbus; ++ m->srcbusirq = mp_irq->mp_srcbusirq; ++ m->dstirq = mp_irq->mp_dstirq; + } + + static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, +- struct mpc_config_intsrc *m) ++ struct mpc_intsrc *m) + { +- if (mp_irq->mp_dstapic != m->mpc_dstapic) ++ if (mp_irq->mp_dstapic != m->dstapic) + return 1; +- if (mp_irq->mp_type != m->mpc_type) ++ if (mp_irq->mp_type != m->type) + return 2; +- if (mp_irq->mp_irqtype != m->mpc_irqtype) ++ if (mp_irq->mp_irqtype != m->irqtype) + return 3; +- if (mp_irq->mp_irqflag != m->mpc_irqflag) ++ if (mp_irq->mp_irqflag != m->irqflag) + return 4; +- if (mp_irq->mp_srcbus != m->mpc_srcbus) ++ if (mp_irq->mp_srcbus != m->srcbus) + return 5; +- if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) ++ if (mp_irq->mp_srcbusirq != m->srcbusirq) + return 6; +- if (mp_irq->mp_dstirq != m->mpc_dstirq) ++ if (mp_irq->mp_dstirq != m->dstirq) + return 7; + + return 0; + } + +-static void __init MP_intsrc_info(struct mpc_config_intsrc *m) ++static void __init MP_intsrc_info(struct mpc_intsrc *m) + { + int i; + +@@ -242,57 +241,55 @@ static void __init MP_intsrc_info(struct + + #endif + +-static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) ++static void __init MP_lintsrc_info(struct mpc_lintsrc *m) + { + apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC LINT %02x\n", +- m->mpc_irqtype, m->mpc_irqflag & 3, +- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, +- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); ++ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid, ++ m->srcbusirq, m->destapic, m->destapiclint); + } + + /* + * Read/parse the MPC + */ + +-static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, +- char *str) ++static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) + { + +- if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { ++ if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) { + printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", +- mpc->mpc_signature[0], mpc->mpc_signature[1], +- mpc->mpc_signature[2], mpc->mpc_signature[3]); ++ mpc->signature[0], mpc->signature[1], ++ mpc->signature[2], mpc->signature[3]); + return 0; + } +- if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { ++ if (mpf_checksum((unsigned char *)mpc, mpc->length)) { + printk(KERN_ERR "MPTABLE: checksum error!\n"); + return 0; + } +- if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { ++ if (mpc->spec != 0x01 && mpc->spec != 0x04) { + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", +- mpc->mpc_spec); ++ mpc->spec); + return 0; + } +- if (!mpc->mpc_lapic) { ++ if (!mpc->lapic) { + printk(KERN_ERR "MPTABLE: null local APIC address!\n"); + return 0; + } +- memcpy(oem, mpc->mpc_oem, 8); ++ memcpy(oem, mpc->oem, 8); + oem[8] = 0; + printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); + +- memcpy(str, mpc->mpc_productid, 12); ++ memcpy(str, mpc->productid, 12); + str[12] = 0; + + printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); + +- printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); ++ printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic); + + return 1; + } + +-static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) ++static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) + { + char str[16]; + char oem[10]; +@@ -317,14 +314,14 @@ static int __init smp_read_mpc(struct mp + #endif + /* save the local APIC address, it might be non-default */ + if (!acpi_lapic) +- mp_lapic_addr = mpc->mpc_lapic; ++ mp_lapic_addr = mpc->lapic; + + if (early) + return 1; + +- if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { +- struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; +- x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); ++ if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) { ++ struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr; ++ x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize); + } + + /* +@@ -333,12 +330,11 @@ static int __init smp_read_mpc(struct mp + if (x86_quirks->mpc_record) + *x86_quirks->mpc_record = 0; + +- while (count < mpc->mpc_length) { ++ while (count < mpc->length) { + switch (*mpt) { + case MP_PROCESSOR: + { +- struct mpc_config_processor *m = +- (struct mpc_config_processor *)mpt; ++ struct mpc_cpu *m = (struct mpc_cpu *)mpt; + /* ACPI may have already provided this data */ + if (!acpi_lapic) + MP_processor_info(m); +@@ -348,8 +344,7 @@ static int __init smp_read_mpc(struct mp + } + case MP_BUS: + { +- struct mpc_config_bus *m = +- (struct mpc_config_bus *)mpt; ++ struct mpc_bus *m = (struct mpc_bus *)mpt; + #ifdef CONFIG_X86_IO_APIC + MP_bus_info(m); + #endif +@@ -360,30 +355,28 @@ static int __init smp_read_mpc(struct mp + case MP_IOAPIC: + { + #ifdef CONFIG_X86_IO_APIC +- struct mpc_config_ioapic *m = +- (struct mpc_config_ioapic *)mpt; ++ struct mpc_ioapic *m = (struct mpc_ioapic *)mpt; + MP_ioapic_info(m); + #endif +- mpt += sizeof(struct mpc_config_ioapic); +- count += sizeof(struct mpc_config_ioapic); ++ mpt += sizeof(struct mpc_ioapic); ++ count += sizeof(struct mpc_ioapic); + break; + } + case MP_INTSRC: + { + #ifdef CONFIG_X86_IO_APIC +- struct mpc_config_intsrc *m = +- (struct mpc_config_intsrc *)mpt; ++ struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; + + MP_intsrc_info(m); + #endif +- mpt += sizeof(struct mpc_config_intsrc); +- count += sizeof(struct mpc_config_intsrc); ++ mpt += sizeof(struct mpc_intsrc); ++ count += sizeof(struct mpc_intsrc); + break; + } + case MP_LINTSRC: + { +- struct mpc_config_lintsrc *m = +- (struct mpc_config_lintsrc *)mpt; ++ struct mpc_lintsrc *m = ++ (struct mpc_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt += sizeof(*m); + count += sizeof(*m); +@@ -394,8 +387,8 @@ static int __init smp_read_mpc(struct mp + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); + printk(KERN_ERR "type %x\n", *mpt); + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, +- 1, mpc, mpc->mpc_length, 1); +- count = mpc->mpc_length; ++ 1, mpc, mpc->length, 1); ++ count = mpc->length; + break; + } + if (x86_quirks->mpc_record) +@@ -426,16 +419,16 @@ static int __init ELCR_trigger(unsigned + + static void __init construct_default_ioirq_mptable(int mpc_default_type) + { +- struct mpc_config_intsrc intsrc; ++ struct mpc_intsrc intsrc; + int i; + int ELCR_fallback = 0; + +- intsrc.mpc_type = MP_INTSRC; +- intsrc.mpc_irqflag = 0; /* conforming */ +- intsrc.mpc_srcbus = 0; +- intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; ++ intsrc.type = MP_INTSRC; ++ intsrc.irqflag = 0; /* conforming */ ++ intsrc.srcbus = 0; ++ intsrc.dstapic = mp_ioapics[0].mp_apicid; + +- intsrc.mpc_irqtype = mp_INT; ++ intsrc.irqtype = mp_INT; + + /* + * If true, we have an ISA/PCI system with no IRQ entries +@@ -478,30 +471,30 @@ static void __init construct_default_ioi + * irqflag field (level sensitive, active high polarity). + */ + if (ELCR_trigger(i)) +- intsrc.mpc_irqflag = 13; ++ intsrc.irqflag = 13; + else +- intsrc.mpc_irqflag = 0; ++ intsrc.irqflag = 0; + } + +- intsrc.mpc_srcbusirq = i; +- intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ ++ intsrc.srcbusirq = i; ++ intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + MP_intsrc_info(&intsrc); + } + +- intsrc.mpc_irqtype = mp_ExtINT; +- intsrc.mpc_srcbusirq = 0; +- intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ ++ intsrc.irqtype = mp_ExtINT; ++ intsrc.srcbusirq = 0; ++ intsrc.dstirq = 0; /* 8259A to INTIN0 */ + MP_intsrc_info(&intsrc); + } + + + static void __init construct_ioapic_table(int mpc_default_type) + { +- struct mpc_config_ioapic ioapic; +- struct mpc_config_bus bus; ++ struct mpc_ioapic ioapic; ++ struct mpc_bus bus; + +- bus.mpc_type = MP_BUS; +- bus.mpc_busid = 0; ++ bus.type = MP_BUS; ++ bus.busid = 0; + switch (mpc_default_type) { + default: + printk(KERN_ERR "???\nUnknown standard configuration %d\n", +@@ -509,29 +502,29 @@ static void __init construct_ioapic_tabl + /* fall through */ + case 1: + case 5: +- memcpy(bus.mpc_bustype, "ISA ", 6); ++ memcpy(bus.bustype, "ISA ", 6); + break; + case 2: + case 6: + case 3: +- memcpy(bus.mpc_bustype, "EISA ", 6); ++ memcpy(bus.bustype, "EISA ", 6); + break; + case 4: + case 7: +- memcpy(bus.mpc_bustype, "MCA ", 6); ++ memcpy(bus.bustype, "MCA ", 6); + } + MP_bus_info(&bus); + if (mpc_default_type > 4) { +- bus.mpc_busid = 1; +- memcpy(bus.mpc_bustype, "PCI ", 6); ++ bus.busid = 1; ++ memcpy(bus.bustype, "PCI ", 6); + MP_bus_info(&bus); + } + +- ioapic.mpc_type = MP_IOAPIC; +- ioapic.mpc_apicid = 2; +- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; +- ioapic.mpc_flags = MPC_APIC_USABLE; +- ioapic.mpc_apicaddr = 0xFEC00000; ++ ioapic.type = MP_IOAPIC; ++ ioapic.apicid = 2; ++ ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ ioapic.flags = MPC_APIC_USABLE; ++ ioapic.apicaddr = 0xFEC00000; + MP_ioapic_info(&ioapic); + + /* +@@ -545,8 +538,8 @@ static inline void __init construct_ioap + + static inline void __init construct_default_ISA_mptable(int mpc_default_type) + { +- struct mpc_config_processor processor; +- struct mpc_config_lintsrc lintsrc; ++ struct mpc_cpu processor; ++ struct mpc_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + +@@ -558,30 +551,30 @@ static inline void __init construct_defa + /* + * 2 CPUs, numbered 0 & 1. + */ +- processor.mpc_type = MP_PROCESSOR; ++ processor.type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ +- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; +- processor.mpc_cpuflag = CPU_ENABLED; +- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | ++ processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ processor.cpuflag = CPU_ENABLED; ++ processor.cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; +- processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; +- processor.mpc_reserved[0] = 0; +- processor.mpc_reserved[1] = 0; ++ processor.featureflag = boot_cpu_data.x86_capability[0]; ++ processor.reserved[0] = 0; ++ processor.reserved[1] = 0; + for (i = 0; i < 2; i++) { +- processor.mpc_apicid = i; ++ processor.apicid = i; + MP_processor_info(&processor); + } + + construct_ioapic_table(mpc_default_type); + +- lintsrc.mpc_type = MP_LINTSRC; +- lintsrc.mpc_irqflag = 0; /* conforming */ +- lintsrc.mpc_srcbusid = 0; +- lintsrc.mpc_srcbusirq = 0; +- lintsrc.mpc_destapic = MP_APIC_ALL; ++ lintsrc.type = MP_LINTSRC; ++ lintsrc.irqflag = 0; /* conforming */ ++ lintsrc.srcbusid = 0; ++ lintsrc.srcbusirq = 0; ++ lintsrc.destapic = MP_APIC_ALL; + for (i = 0; i < 2; i++) { +- lintsrc.mpc_irqtype = linttypes[i]; +- lintsrc.mpc_destapiclint = i; ++ lintsrc.irqtype = linttypes[i]; ++ lintsrc.destapiclint = i; + MP_lintsrc_info(&lintsrc); + } + } +@@ -595,26 +588,23 @@ static void __init __get_smp_config(unsi + { + struct intel_mp_floating *mpf = mpf_found; + +- if (x86_quirks->mach_get_smp_config) { +- if (x86_quirks->mach_get_smp_config(early)) +- return; +- } ++ if (!mpf) ++ return; ++ + if (acpi_lapic && early) + return; ++ + /* +- * ACPI supports both logical (e.g. Hyper-Threading) and physical +- * processors, where MPS only supports physical. ++ * MPS doesn't support hyperthreading, aka only have ++ * thread 0 apic id in MPS table + */ +- if (acpi_lapic && acpi_ioapic) { +- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " +- "information\n"); ++ if (acpi_lapic && acpi_ioapic) + return; +- } else if (acpi_lapic) +- printk(KERN_INFO "Using ACPI for processor (LAPIC) " +- "configuration information\n"); + +- if (!mpf) +- return; ++ if (x86_quirks->mach_get_smp_config) { ++ if (x86_quirks->mach_get_smp_config(early)) ++ return; ++ } + + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", + mpf->mpf_specification); +@@ -669,15 +659,15 @@ static void __init __get_smp_config(unsi + * ISA defaults and hope it will work. + */ + if (!mp_irq_entries) { +- struct mpc_config_bus bus; ++ struct mpc_bus bus; + + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " + "using default mptable. " + "(tell your hw vendor)\n"); + +- bus.mpc_type = MP_BUS; +- bus.mpc_busid = 0; +- memcpy(bus.mpc_bustype, "ISA ", 6); ++ bus.type = MP_BUS; ++ bus.busid = 0; ++ memcpy(bus.bustype, "ISA ", 6); + MP_bus_info(&bus); + + construct_default_ioirq_mptable(0); +@@ -823,14 +813,14 @@ void __init find_smp_config(void) + #ifdef CONFIG_X86_IO_APIC + static u8 __initdata irq_used[MAX_IRQ_SOURCES]; + +-static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) ++static int __init get_MP_intsrc_index(struct mpc_intsrc *m) + { + int i; + +- if (m->mpc_irqtype != mp_INT) ++ if (m->irqtype != mp_INT) + return 0; + +- if (m->mpc_irqflag != 0x0f) ++ if (m->irqflag != 0x0f) + return 0; + + /* not legacy */ +@@ -842,9 +832,9 @@ static int __init get_MP_intsrc_index(s + if (mp_irqs[i].mp_irqflag != 0x0f) + continue; + +- if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) ++ if (mp_irqs[i].mp_srcbus != m->srcbus) + continue; +- if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) ++ if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) + continue; + if (irq_used[i]) { + /* already claimed */ +@@ -860,10 +850,10 @@ static int __init get_MP_intsrc_index(s + + #define SPARE_SLOT_NUM 20 + +-static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; ++static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; + #endif + +-static int __init replace_intsrc_all(struct mp_config_table *mpc, ++static int __init replace_intsrc_all(struct mpc_table *mpc, + unsigned long mpc_new_phys, + unsigned long mpc_new_length) + { +@@ -875,36 +865,33 @@ static int __init replace_intsrc_all(st + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; + +- printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); +- while (count < mpc->mpc_length) { ++ printk(KERN_INFO "mpc_length %x\n", mpc->length); ++ while (count < mpc->length) { + switch (*mpt) { + case MP_PROCESSOR: + { +- struct mpc_config_processor *m = +- (struct mpc_config_processor *)mpt; ++ struct mpc_cpu *m = (struct mpc_cpu *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { +- struct mpc_config_bus *m = +- (struct mpc_config_bus *)mpt; ++ struct mpc_bus *m = (struct mpc_bus *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { +- mpt += sizeof(struct mpc_config_ioapic); +- count += sizeof(struct mpc_config_ioapic); ++ mpt += sizeof(struct mpc_ioapic); ++ count += sizeof(struct mpc_ioapic); + break; + } + case MP_INTSRC: + { + #ifdef CONFIG_X86_IO_APIC +- struct mpc_config_intsrc *m = +- (struct mpc_config_intsrc *)mpt; ++ struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; + + apic_printk(APIC_VERBOSE, "OLD "); + print_MP_intsrc_info(m); +@@ -925,14 +912,14 @@ static int __init replace_intsrc_all(st + nr_m_spare++; + } + #endif +- mpt += sizeof(struct mpc_config_intsrc); +- count += sizeof(struct mpc_config_intsrc); ++ mpt += sizeof(struct mpc_intsrc); ++ count += sizeof(struct mpc_intsrc); + break; + } + case MP_LINTSRC: + { +- struct mpc_config_lintsrc *m = +- (struct mpc_config_lintsrc *)mpt; ++ struct mpc_lintsrc *m = ++ (struct mpc_lintsrc *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; +@@ -942,7 +929,7 @@ static int __init replace_intsrc_all(st + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); + printk(KERN_ERR "type %x\n", *mpt); + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, +- 1, mpc, mpc->mpc_length, 1); ++ 1, mpc, mpc->length, 1); + goto out; + } + } +@@ -964,9 +951,8 @@ static int __init replace_intsrc_all(st + assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); + m_spare[nr_m_spare] = NULL; + } else { +- struct mpc_config_intsrc *m = +- (struct mpc_config_intsrc *)mpt; +- count += sizeof(struct mpc_config_intsrc); ++ struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; ++ count += sizeof(struct mpc_intsrc); + if (!mpc_new_phys) { + printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); + } else { +@@ -978,17 +964,16 @@ static int __init replace_intsrc_all(st + } + } + assign_to_mpc_intsrc(&mp_irqs[i], m); +- mpc->mpc_length = count; +- mpt += sizeof(struct mpc_config_intsrc); ++ mpc->length = count; ++ mpt += sizeof(struct mpc_intsrc); + } + print_mp_irq_info(&mp_irqs[i]); + } + #endif + out: + /* update checksum */ +- mpc->mpc_checksum = 0; +- mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, +- mpc->mpc_length); ++ mpc->checksum = 0; ++ mpc->checksum -= mpf_checksum((unsigned char *)mpc, mpc->length); + + return 0; + } +@@ -1034,8 +1019,7 @@ static int __init update_mp_table(void) + char str[16]; + char oem[10]; + struct intel_mp_floating *mpf; +- struct mp_config_table *mpc; +- struct mp_config_table *mpc_new; ++ struct mpc_table *mpc, *mpc_new; + + if (!enable_update_mptable) + return 0; +@@ -1061,7 +1045,7 @@ static int __init update_mp_table(void) + printk(KERN_INFO "mpf: %lx\n", (long)arbitrary_virt_to_machine(mpf)); + printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); + +- if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { ++ if (mpc_new_phys && mpc->length > mpc_new_length) { + mpc_new_phys = 0; + printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", + mpc_new_length); +@@ -1070,10 +1054,10 @@ static int __init update_mp_table(void) + if (!mpc_new_phys) { + unsigned char old, new; + /* check if we can change the postion */ +- mpc->mpc_checksum = 0; +- old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); +- mpc->mpc_checksum = 0xff; +- new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); ++ mpc->checksum = 0; ++ old = mpf_checksum((unsigned char *)mpc, mpc->length); ++ mpc->checksum = 0xff; ++ new = mpf_checksum((unsigned char *)mpc, mpc->length); + if (old == new) { + printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); + return 0; +@@ -1085,7 +1069,7 @@ static int __init update_mp_table(void) + mpc_new_bus = phys_to_machine(mpc_new_phys); + mpf->mpf_physptr = mpc_new_bus; + mpc_new = phys_to_virt(mpc_new_phys); +- memcpy(mpc_new, mpc, mpc->mpc_length); ++ memcpy(mpc_new, mpc, mpc->length); + mpc = mpc_new; + /* check if we can modify that */ + if (mpc_new_bus - mpf->mpf_physptr) { +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -30,11 +31,6 @@ int no_iommu __read_mostly; + /* Set this to 1 if there is a HW IOMMU in the system */ + int iommu_detected __read_mostly = 0; + +-/* This tells the BIO block layer to assume merging. Default to off +- because we cannot guarantee merging later. */ +-int iommu_bio_merge __read_mostly = 0; +-EXPORT_SYMBOL(iommu_bio_merge); +- + dma_addr_t bad_dma_address __read_mostly = 0; + EXPORT_SYMBOL(bad_dma_address); + +@@ -42,7 +38,7 @@ EXPORT_SYMBOL(bad_dma_address); + be probably a smaller DMA mask, but this is bug-to-bug compatible + to older i386. */ + struct device x86_dma_fallback_dev = { +- .bus_id = "fallback device", ++ .init_name = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, + }; +@@ -105,8 +101,6 @@ static void __init dma32_free_bootmem(vo + dma32_bootmem_ptr = NULL; + dma32_bootmem_size = 0; + } +-#else +-#define dma32_free_bootmem() ((void)0) + #endif + + static struct dma_mapping_ops swiotlb_dma_ops = { +@@ -128,8 +122,11 @@ static struct dma_mapping_ops swiotlb_dm + + void __init pci_iommu_alloc(void) + { ++#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) + /* free the range so iommu could get some range less than 4G */ + dma32_free_bootmem(); ++#endif ++ + /* + * The order of these functions is important for + * fall-back/fail-over reasons +@@ -149,16 +146,6 @@ void __init pci_iommu_alloc(void) + } + } + +-#ifndef CONFIG_XEN +-unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) +-{ +- unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); +- +- return size >> PAGE_SHIFT; +-} +-EXPORT_SYMBOL(iommu_nr_pages); +-#endif +- + void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) + { +@@ -246,7 +233,6 @@ static __init int iommu_setup(char *p) + } + + if (!strncmp(p, "biomerge", 8)) { +- iommu_bio_merge = 4096; + iommu_merge = 1; + force_iommu = 1; + } +@@ -385,8 +371,8 @@ fs_initcall(pci_iommu_init); + static __devinit void via_no_dac(struct pci_dev *dev) + { + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { +- printk(KERN_INFO "PCI: VIA PCI bridge detected." +- "Disabling DAC.\n"); ++ printk(KERN_INFO ++ "PCI: VIA PCI bridge detected. Disabling DAC.\n"); + forbid_dac = 1; + } + } +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/process-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/process-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -1,13 +1,17 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + #include + #include ++#include + #include ++#include ++#include + + unsigned long idle_halt; + EXPORT_SYMBOL(idle_halt); +@@ -99,6 +103,9 @@ static inline int hlt_use_halt(void) + */ + void xen_idle(void) + { ++ struct power_trace it; ++ ++ trace_power_start(&it, POWER_CSTATE, 1); + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we +@@ -111,11 +118,27 @@ void xen_idle(void) + else + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; ++ trace_power_end(&it); + } + #ifdef CONFIG_APM_MODULE + EXPORT_SYMBOL(default_idle); + #endif + ++void stop_this_cpu(void *dummy) ++{ ++ local_irq_disable(); ++ /* ++ * Remove this CPU: ++ */ ++ cpu_clear(smp_processor_id(), cpu_online_map); ++ disable_all_local_evtchn(); ++ ++ for (;;) { ++ if (hlt_works(smp_processor_id())) ++ halt(); ++ } ++} ++ + static void do_nothing(void *unused) + { + } +@@ -149,24 +172,37 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); + */ + void mwait_idle_with_hints(unsigned long ax, unsigned long cx) + { ++ struct power_trace it; ++ ++ trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); + if (!need_resched()) { ++ if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) ++ clflush((void *)¤t_thread_info()->flags); ++ + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __mwait(ax, cx); + } ++ trace_power_end(&it); + } + + /* Default MONITOR/MWAIT with no hints, used for default C1 state */ + static void mwait_idle(void) + { ++ struct power_trace it; + if (!need_resched()) { ++ trace_power_start(&it, POWER_CSTATE, 1); ++ if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) ++ clflush((void *)¤t_thread_info()->flags); ++ + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); ++ trace_power_end(&it); + } else + local_irq_enable(); + } +@@ -179,9 +215,13 @@ static void mwait_idle(void) + */ + static void poll_idle(void) + { ++ struct power_trace it; ++ ++ trace_power_start(&it, POWER_CSTATE, 0); + local_irq_enable(); + while (!need_resched()) + cpu_relax(); ++ trace_power_end(&it); + } + + #ifndef CONFIG_XEN +@@ -267,7 +307,7 @@ static void c1e_idle(void) + rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); + if (lo & K8_INTP_C1E_ACTIVE_MASK) { + c1e_detected = 1; +- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) ++ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halt in AMD C1E"); + printk(KERN_INFO "System has AMD C1E enabled\n"); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/process_32-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/process_32-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -38,11 +38,13 @@ + #include + #include + #include ++#include ++#include ++#include ++#include + +-#include + #include + #include +-#include + #include + #include + #include +@@ -59,10 +61,9 @@ + + #include + #include +-#include + #include + #include +-#include ++#include + + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork"); +@@ -108,9 +109,6 @@ void cpu_idle(void) + check_pgt_cache(); + rmb(); + +- if (rcu_pending(cpu)) +- rcu_check_callbacks(cpu, 0); +- + if (cpu_is_offline(cpu)) + play_dead(); + +@@ -208,7 +206,7 @@ extern void kernel_thread_helper(void); + /* + * Create a kernel thread + */ +-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) ++int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) + { + struct pt_regs regs; + +@@ -247,14 +245,8 @@ void exit_thread(void) + t->io_bitmap_ptr = NULL; + clear_thread_flag(TIF_IO_BITMAP); + } +-#ifdef CONFIG_X86_DS +- /* Free any DS contexts that have not been properly released. */ +- if (unlikely(current->thread.ds_ctx)) { +- /* we clear debugctl to make sure DS is not used. */ +- update_debugctlmsr(0); +- ds_free(current->thread.ds_ctx); +- } +-#endif /* CONFIG_X86_DS */ ++ ++ ds_exit_thread(current); + } + + void flush_thread(void) +@@ -267,7 +259,7 @@ void flush_thread(void) + tsk->thread.debugreg3 = 0; + tsk->thread.debugreg6 = 0; + tsk->thread.debugreg7 = 0; +- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); ++ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); + clear_tsk_thread_flag(tsk, TIF_DEBUG); + /* + * Forget coprocessor state.. +@@ -294,9 +286,9 @@ void prepare_to_copy(struct task_struct + + int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, + unsigned long unused, +- struct task_struct * p, struct pt_regs * regs) ++ struct task_struct *p, struct pt_regs *regs) + { +- struct pt_regs * childregs; ++ struct pt_regs *childregs; + struct task_struct *tsk; + int err; + +@@ -340,13 +332,19 @@ int copy_thread(int nr, unsigned long cl + kfree(p->thread.io_bitmap_ptr); + p->thread.io_bitmap_max = 0; + } ++ ++ ds_copy_thread(p, current); ++ ++ clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); ++ p->thread.debugctlmsr = 0; ++ + return err; + } + + void + start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) + { +- __asm__("movl %0, %%gs" :: "r"(0)); ++ __asm__("movl %0, %%gs" : : "r"(0)); + regs->fs = 0; + set_fs(USER_DS); + regs->ds = __USER_DS; +@@ -420,47 +418,18 @@ int set_tsc_mode(unsigned int val) + return 0; + } + +-#ifdef CONFIG_X86_DS +-static int update_debugctl(struct thread_struct *prev, +- struct thread_struct *next, unsigned long debugctl) +-{ +- unsigned long ds_prev = 0; +- unsigned long ds_next = 0; +- +- if (prev->ds_ctx) +- ds_prev = (unsigned long)prev->ds_ctx->ds; +- if (next->ds_ctx) +- ds_next = (unsigned long)next->ds_ctx->ds; +- +- if (ds_next != ds_prev) { +- /* we clear debugctl to make sure DS +- * is not in use when we change it */ +- debugctl = 0; +- update_debugctlmsr(0); +- wrmsr(MSR_IA32_DS_AREA, ds_next, 0); +- } +- return debugctl; +-} +-#else +-static int update_debugctl(struct thread_struct *prev, +- struct thread_struct *next, unsigned long debugctl) +-{ +- return debugctl; +-} +-#endif /* CONFIG_X86_DS */ +- + static noinline void + __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) + { + struct thread_struct *prev, *next; +- unsigned long debugctl; + + prev = &prev_p->thread; + next = &next_p->thread; + +- debugctl = update_debugctl(prev, next, prev->debugctlmsr); +- +- if (next->debugctlmsr != debugctl) ++ if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || ++ test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) ++ ds_switch_to(prev_p, next_p); ++ else if (next->debugctlmsr != prev->debugctlmsr) + update_debugctlmsr(next->debugctlmsr); + + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { +@@ -481,14 +450,6 @@ __switch_to_xtra(struct task_struct *pre + else + hard_enable_TSC(); + } +- +-#ifdef CONFIG_X86_PTRACE_BTS +- if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) +- ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); +- +- if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) +- ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); +-#endif /* CONFIG_X86_PTRACE_BTS */ + } + + /* +@@ -518,7 +479,8 @@ __switch_to_xtra(struct task_struct *pre + * the task-switch, and shows up in ret_from_fork in entry.S, + * for example. + */ +-struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ++__notrace_funcgraph struct task_struct * ++__switch_to(struct task_struct *prev_p, struct task_struct *next_p) + { + struct thread_struct *prev = &prev_p->thread, + *next = &next_p->thread; +@@ -698,7 +660,7 @@ asmlinkage int sys_vfork(struct pt_regs + asmlinkage int sys_execve(struct pt_regs regs) + { + int error; +- char * filename; ++ char *filename; + + filename = getname((char __user *) regs.bx); + error = PTR_ERR(filename); +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/process_64-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/process_64-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -42,6 +42,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include +@@ -59,6 +61,7 @@ + #include + #include + #include ++#include + + #include + +@@ -158,14 +161,18 @@ void __show_regs(struct pt_regs *regs, i + unsigned long d0, d1, d2, d3, d6, d7; + unsigned int fsindex, gsindex; + unsigned int ds, cs, es; ++ const char *board; + + printk("\n"); + print_modules(); +- printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", ++ board = dmi_get_system_info(DMI_PRODUCT_NAME); ++ if (!board) ++ board = ""; ++ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), +- init_utsname()->version); ++ init_utsname()->version, board); + printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk_address(regs->ip, 1); + printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, +@@ -256,14 +263,8 @@ void exit_thread(void) + #endif + t->io_bitmap_max = 0; + } +-#ifdef CONFIG_X86_DS +- /* Free any DS contexts that have not been properly released. */ +- if (unlikely(t->ds_ctx)) { +- /* we clear debugctl to make sure DS is not used. */ +- update_debugctlmsr(0); +- ds_free(t->ds_ctx); +- } +-#endif /* CONFIG_X86_DS */ ++ ++ ds_exit_thread(current); + } + + void xen_load_gs_index(unsigned gs) +@@ -400,6 +401,11 @@ int copy_thread(int nr, unsigned long cl + } + p->thread.iopl = current->thread.iopl; + ++ ds_copy_thread(p, me); ++ ++ clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); ++ p->thread.debugctlmsr = 0; ++ + err = 0; + out: + if (err && p->thread.io_bitmap_ptr) { +@@ -497,35 +503,14 @@ static inline void __switch_to_xtra(stru + struct task_struct *next_p) + { + struct thread_struct *prev, *next; +- unsigned long debugctl; + + prev = &prev_p->thread, + next = &next_p->thread; + +- debugctl = prev->debugctlmsr; +- +-#ifdef CONFIG_X86_DS +- { +- unsigned long ds_prev = 0, ds_next = 0; +- +- if (prev->ds_ctx) +- ds_prev = (unsigned long)prev->ds_ctx->ds; +- if (next->ds_ctx) +- ds_next = (unsigned long)next->ds_ctx->ds; +- +- if (ds_next != ds_prev) { +- /* +- * We clear debugctl to make sure DS +- * is not in use when we change it: +- */ +- debugctl = 0; +- update_debugctlmsr(0); +- wrmsrl(MSR_IA32_DS_AREA, ds_next); +- } +- } +-#endif /* CONFIG_X86_DS */ +- +- if (next->debugctlmsr != debugctl) ++ if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || ++ test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) ++ ds_switch_to(prev_p, next_p); ++ else if (next->debugctlmsr != prev->debugctlmsr) + update_debugctlmsr(next->debugctlmsr); + + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { +@@ -546,14 +531,6 @@ static inline void __switch_to_xtra(stru + else + hard_enable_TSC(); + } +- +-#ifdef CONFIG_X86_PTRACE_BTS +- if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) +- ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); +- +- if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) +- ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); +-#endif /* CONFIG_X86_PTRACE_BTS */ + } + + /* +@@ -564,8 +541,9 @@ static inline void __switch_to_xtra(stru + * - could test fs/gs bitsliced + * + * Kprobes not supported here. Set the probe on schedule instead. ++ * Function graph tracer not supported too. + */ +-struct task_struct * ++__notrace_funcgraph struct task_struct * + __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + { + struct thread_struct *prev = &prev_p->thread; +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/quirks-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/quirks-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -169,6 +169,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I + ich_force_enable_hpet); + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, + ich_force_enable_hpet); ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, ++ ich_force_enable_hpet); + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, + ich_force_enable_hpet); + +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/setup-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/setup-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -93,11 +93,13 @@ + #include + #include + #include ++#include + #include + #include + + #include + #include ++#include + + #include + #include +@@ -508,6 +510,7 @@ static void __init reserve_early_setup_d + * @size: Size of the crashkernel memory to reserve. + * Returns the base address on success, and -1ULL on failure. + */ ++static + unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) + { + const unsigned long long alignment = 16<<20; /* 16M */ +@@ -650,165 +653,32 @@ static int __init setup_elfcorehdr(char + early_param("elfcorehdr", setup_elfcorehdr); + #endif + +-static struct x86_quirks default_x86_quirks __initdata; +- +-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; +- +-/* +- * Some BIOSes seem to corrupt the low 64k of memory during events +- * like suspend/resume and unplugging an HDMI cable. Reserve all +- * remaining free memory in that area and fill it with a distinct +- * pattern. +- */ +-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION +-#define MAX_SCAN_AREAS 8 +- +-static int __read_mostly memory_corruption_check = -1; +- +-static unsigned __read_mostly corruption_check_size = 64*1024; +-static unsigned __read_mostly corruption_check_period = 60; /* seconds */ +- +-static struct e820entry scan_areas[MAX_SCAN_AREAS]; +-static int num_scan_areas; +- +- +-static int set_corruption_check(char *arg) +-{ +- char *end; +- +- memory_corruption_check = simple_strtol(arg, &end, 10); +- +- return (*end == 0) ? 0 : -EINVAL; +-} +-early_param("memory_corruption_check", set_corruption_check); +- +-static int set_corruption_check_period(char *arg) +-{ +- char *end; +- +- corruption_check_period = simple_strtoul(arg, &end, 10); +- +- return (*end == 0) ? 0 : -EINVAL; +-} +-early_param("memory_corruption_check_period", set_corruption_check_period); +- +-static int set_corruption_check_size(char *arg) ++#ifndef CONFIG_XEN ++static int __init default_update_genapic(void) + { +- char *end; +- unsigned size; +- +- size = memparse(arg, &end); +- +- if (*end == '\0') +- corruption_check_size = size; ++#ifdef CONFIG_X86_SMP ++# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) ++ genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; ++# endif ++#endif + +- return (size == corruption_check_size) ? 0 : -EINVAL; ++ return 0; + } +-early_param("memory_corruption_check_size", set_corruption_check_size); +- +- +-static void __init setup_bios_corruption_check(void) +-{ +- u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ +- +- if (memory_corruption_check == -1) { +- memory_corruption_check = +-#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK +- 1 + #else +- 0 ++#define default_update_genapic NULL + #endif +- ; +- } +- +- if (corruption_check_size == 0) +- memory_corruption_check = 0; +- +- if (!memory_corruption_check) +- return; +- +- corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); +- +- while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { +- u64 size; +- addr = find_e820_area_size(addr, &size, PAGE_SIZE); +- +- if (addr == 0) +- break; +- +- if ((addr + size) > corruption_check_size) +- size = corruption_check_size - addr; + +- if (size == 0) +- break; +- +- e820_update_range(addr, size, E820_RAM, E820_RESERVED); +- scan_areas[num_scan_areas].addr = addr; +- scan_areas[num_scan_areas].size = size; +- num_scan_areas++; +- +- /* Assume we've already mapped this early memory */ +- memset(__va(addr), 0, size); +- +- addr += size; +- } +- +- printk(KERN_INFO "Scanning %d areas for low memory corruption\n", +- num_scan_areas); +- update_e820(); +-} +- +-static struct timer_list periodic_check_timer; +- +-void check_for_bios_corruption(void) +-{ +- int i; +- int corruption = 0; +- +- if (!memory_corruption_check) +- return; +- +- for(i = 0; i < num_scan_areas; i++) { +- unsigned long *addr = __va(scan_areas[i].addr); +- unsigned long size = scan_areas[i].size; +- +- for(; size; addr++, size -= sizeof(unsigned long)) { +- if (!*addr) +- continue; +- printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", +- addr, __pa(addr), *addr); +- corruption = 1; +- *addr = 0; +- } +- } +- +- WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); +-} +- +-static void periodic_check_for_corruption(unsigned long data) +-{ +- check_for_bios_corruption(); +- mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); +-} +- +-void start_periodic_check_for_corruption(void) +-{ +- if (!memory_corruption_check || corruption_check_period == 0) +- return; +- +- printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", +- corruption_check_period); ++static struct x86_quirks default_x86_quirks __initdata = { ++ .update_genapic = default_update_genapic, ++}; + +- init_timer(&periodic_check_timer); +- periodic_check_timer.function = &periodic_check_for_corruption; +- periodic_check_for_corruption(0); +-} +-#endif ++struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; + ++#ifdef CONFIG_X86_RESERVE_LOW_64K + static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) + { + printk(KERN_NOTICE +- "%s detected: BIOS may corrupt low RAM, working it around.\n", ++ "%s detected: BIOS may corrupt low RAM, working around it.\n", + d->ident); + + e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); +@@ -816,6 +686,7 @@ static int __init dmi_low_memory_corrupt + + return 0; + } ++#endif + + /* List of systems that have known low memory corruption BIOS problems */ + static struct dmi_system_id __initdata bad_bios_dmi_table[] = { +@@ -1023,15 +894,25 @@ void __init setup_arch(char **cmdline_p) + + finish_e820_parsing(); + ++ if (efi_enabled) ++ efi_init(); ++ + if (is_initial_xendomain()) { + dmi_scan_machine(); + + dmi_check_system(bad_bios_dmi_table); ++ } ++ ++ /* ++ * VMware detection requires dmi to be available, so this ++ * needs to be done after dmi_scan_machine, for the BP. ++ */ ++ init_hypervisor(&boot_cpu_data); + + #ifdef CONFIG_X86_32 ++ if (is_initial_xendomain()) + probe_roms(); + #endif +- } + + #ifndef CONFIG_XEN + /* after parse_early_param, so could debug it */ +@@ -1039,8 +920,6 @@ void __init setup_arch(char **cmdline_p) + insert_resource(&iomem_resource, &data_resource); + insert_resource(&iomem_resource, &bss_resource); + +- if (efi_enabled) +- efi_init(); + + #ifdef CONFIG_X86_32 + if (ppro_with_ram_bug()) { +@@ -1295,7 +1174,7 @@ void __init setup_arch(char **cmdline_p) + ioapic_init_mappings(); + + /* need to wait for io_apic is mapped */ +- nr_irqs = probe_nr_irqs(); ++ probe_nr_irqs_gsi(); + + kvm_guest_init(); + +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/smp-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/smp-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -1,7 +1,7 @@ + /* + * Intel SMP support routines. + * +- * (c) 1995 Alan Cox, Building #3 ++ * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * (c) 2002,2003 Andi Kleen, SuSE Labs. + * +@@ -118,30 +118,17 @@ void xen_smp_send_reschedule(int cpu) + WARN_ON(1); + return; + } +- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); ++ send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + } + + void xen_send_call_func_single_ipi(int cpu) + { +- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_VECTOR); ++ send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_VECTOR); + } + +-void xen_send_call_func_ipi(cpumask_t mask) ++void xen_send_call_func_ipi(const struct cpumask *mask) + { +- send_IPI_mask(mask, CALL_FUNCTION_VECTOR); +-} +- +-static void stop_this_cpu(void *dummy) +-{ +- local_irq_disable(); +- /* +- * Remove this CPU: +- */ +- cpu_clear(smp_processor_id(), cpu_online_map); +- disable_all_local_evtchn(); +- if (hlt_works(smp_processor_id())) +- for (;;) halt(); +- for (;;); ++ send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR); + } + + /* +@@ -165,11 +152,7 @@ void xen_smp_send_stop(void) + */ + irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) + { +-#ifdef CONFIG_X86_32 +- __get_cpu_var(irq_stat).irq_resched_count++; +-#else +- add_pda(irq_resched_count, 1); +-#endif ++ inc_irq_stat(irq_resched_count); + return IRQ_HANDLED; + } + +@@ -177,11 +160,7 @@ irqreturn_t smp_call_function_interrupt( + { + irq_enter(); + generic_smp_call_function_interrupt(); +-#ifdef CONFIG_X86_32 +- __get_cpu_var(irq_stat).irq_call_count++; +-#else +- add_pda(irq_call_count, 1); +-#endif ++ inc_irq_stat(irq_call_count); + irq_exit(); + + return IRQ_HANDLED; +@@ -191,11 +170,7 @@ irqreturn_t smp_call_function_single_int + { + irq_enter(); + generic_smp_call_function_single_interrupt(); +-#ifdef CONFIG_X86_32 +- __get_cpu_var(irq_stat).irq_call_count++; +-#else +- add_pda(irq_call_count, 1); +-#endif ++ inc_irq_stat(irq_call_count); + irq_exit(); + + return IRQ_HANDLED; +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/time-xen.c 2009-11-23 10:41:33.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/time-xen.c 2009-11-23 10:41:53.000000000 +0100 +@@ -445,11 +445,7 @@ irqreturn_t timer_interrupt(int irq, voi + struct vcpu_runstate_info runstate; + + /* Keep nmi watchdog up to date */ +-#ifdef __i386__ +- x86_add_percpu(irq_stat.irq0_irqs, 1); +-#else +- add_pda(irq0_irqs, 1); +-#endif ++ inc_irq_stat(irq0_irqs); + + /* + * Here we are in the timer irq handler. We just have irqs locally +@@ -509,7 +505,6 @@ irqreturn_t timer_interrupt(int irq, voi + + /* + * Account stolen ticks. +- * HACK: Passing NULL to account_steal_time() + * ensures that the ticks are accounted as stolen. + */ + stolen = runstate.time[RUNSTATE_runnable] +@@ -522,12 +517,11 @@ irqreturn_t timer_interrupt(int irq, voi + do_div(stolen, NS_PER_TICK); + per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK; + per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK; +- account_steal_time(NULL, (cputime_t)stolen); ++ account_steal_time((cputime_t)stolen); + } + + /* + * Account blocked ticks. +- * HACK: Passing idle_task to account_steal_time() + * ensures that the ticks are accounted as idle/wait. + */ + blocked = runstate.time[RUNSTATE_blocked] +@@ -539,7 +533,7 @@ irqreturn_t timer_interrupt(int irq, voi + do_div(blocked, NS_PER_TICK); + per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK; + per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK; +- account_steal_time(idle_task(cpu), (cputime_t)blocked); ++ account_idle_time((cputime_t)blocked); + } + + /* Account user/system ticks. */ +@@ -547,10 +541,14 @@ irqreturn_t timer_interrupt(int irq, voi + do_div(delta_cpu, NS_PER_TICK); + per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; + if (user_mode_vm(get_irq_regs())) +- account_user_time(current, (cputime_t)delta_cpu); +- else ++ account_user_time(current, (cputime_t)delta_cpu, ++ (cputime_t)delta_cpu); ++ else if (current != idle_task(cpu)) + account_system_time(current, HARDIRQ_OFFSET, ++ (cputime_t)delta_cpu, + (cputime_t)delta_cpu); ++ else ++ account_idle_time((cputime_t)delta_cpu); + } + + /* Offlined for more than a few seconds? Avoid lockup warnings. */ +@@ -779,7 +777,7 @@ static void stop_hz_timer(void) + unsigned long j; + int rc; + +- cpu_set(cpu, nohz_cpu_mask); ++ cpumask_set_cpu(cpu, nohz_cpu_mask); + + /* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */ + /* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */ +@@ -795,7 +793,7 @@ static void stop_hz_timer(void) + local_softirq_pending() || + (j = get_next_timer_interrupt(jiffies), + time_before_eq(j, jiffies))) { +- cpu_clear(cpu, nohz_cpu_mask); ++ cpumask_clear_cpu(cpu, nohz_cpu_mask); + j = jiffies + 1; + } + +@@ -813,7 +811,7 @@ static void stop_hz_timer(void) + + static void start_hz_timer(void) + { +- cpu_clear(smp_processor_id(), nohz_cpu_mask); ++ cpumask_clear_cpu(smp_processor_id(), nohz_cpu_mask); + } + + void xen_safe_halt(void) +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/traps-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/traps-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -20,7 +20,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -51,7 +50,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -65,18 +63,10 @@ + #else + #include + #include +-#include +-#include +-#include + #include + + #include "cpu/mcheck/mce.h" + +-#ifndef CONFIG_XEN +-DECLARE_BITMAP(used_vectors, NR_VECTORS); +-EXPORT_SYMBOL_GPL(used_vectors); +-#endif +- + asmlinkage int system_call(void); + + /* Do we ignore FPU interrupts ? */ +@@ -93,6 +83,11 @@ gate_desc idt_table[256] + #endif + #endif + ++#ifndef CONFIG_XEN ++DECLARE_BITMAP(used_vectors, NR_VECTORS); ++EXPORT_SYMBOL_GPL(used_vectors); ++#endif ++ + static int ignore_nmis; + + static inline void conditional_sti(struct pt_regs *regs) +@@ -108,6 +103,12 @@ static inline void preempt_conditional_s + local_irq_enable(); + } + ++static inline void conditional_cli(struct pt_regs *regs) ++{ ++ if (regs->flags & X86_EFLAGS_IF) ++ local_irq_disable(); ++} ++ + static inline void preempt_conditional_cli(struct pt_regs *regs) + { + if (regs->flags & X86_EFLAGS_IF) +@@ -298,8 +299,10 @@ dotraplinkage void do_double_fault(struc + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 8; + +- /* This is always a kernel trap and never fixable (and thus must +- never return). */ ++ /* ++ * This is always a kernel trap and never fixable (and thus must ++ * never return). ++ */ + for (;;) + die(str, regs, error_code); + } +@@ -476,11 +479,7 @@ do_nmi(struct pt_regs *regs, long error_ + { + nmi_enter(); + +-#ifdef CONFIG_X86_32 +- { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } +-#else +- add_pda(__nmi_count, 1); +-#endif ++ inc_irq_stat(__nmi_count); + + if (!ignore_nmis) + default_do_nmi(regs); +@@ -519,9 +518,11 @@ dotraplinkage void __kprobes do_int3(str + } + + #if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) +-/* Help handler running on IST stack to switch back to user stack +- for scheduling or signal handling. The actual stack switch is done in +- entry.S */ ++/* ++ * Help handler running on IST stack to switch back to user stack ++ * for scheduling or signal handling. The actual stack switch is done in ++ * entry.S ++ */ + asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) + { + struct pt_regs *regs = eregs; +@@ -531,8 +532,10 @@ asmlinkage __kprobes struct pt_regs *syn + /* Exception from user space */ + else if (user_mode(eregs)) + regs = task_pt_regs(current); +- /* Exception from kernel and interrupts are enabled. Move to +- kernel process stack. */ ++ /* ++ * Exception from kernel and interrupts are enabled. Move to ++ * kernel process stack. ++ */ + else if (eregs->flags & X86_EFLAGS_IF) + regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); + if (eregs != regs) +@@ -624,8 +627,10 @@ clear_dr7: + + #ifdef CONFIG_X86_32 + debug_vm86: ++ /* reenable preemption: handle_vm86_trap() might sleep */ ++ dec_preempt_count(); + handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); +- preempt_conditional_cli(regs); ++ conditional_cli(regs); + return; + #endif + +@@ -659,7 +664,7 @@ void math_error(void __user *ip) + { + struct task_struct *task; + siginfo_t info; +- unsigned short cwd, swd; ++ unsigned short cwd, swd, err; + + /* + * Save the info for the exception handler and clear the error. +@@ -670,7 +675,6 @@ void math_error(void __user *ip) + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; +- info.si_code = __SI_FAULT; + info.si_addr = ip; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked +@@ -684,34 +688,30 @@ void math_error(void __user *ip) + */ + cwd = get_fpu_cwd(task); + swd = get_fpu_swd(task); +- switch (swd & ~cwd & 0x3f) { +- case 0x000: /* No unmasked exception */ +-#ifdef CONFIG_X86_32 +- return; +-#endif +- default: /* Multiple exceptions */ +- break; +- case 0x001: /* Invalid Op */ ++ ++ err = swd & ~cwd; ++ ++ if (err & 0x001) { /* Invalid op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + info.si_code = FPE_FLTINV; +- break; +- case 0x002: /* Denormalize */ +- case 0x010: /* Underflow */ +- info.si_code = FPE_FLTUND; +- break; +- case 0x004: /* Zero Divide */ ++ } else if (err & 0x004) { /* Divide by Zero */ + info.si_code = FPE_FLTDIV; +- break; +- case 0x008: /* Overflow */ ++ } else if (err & 0x008) { /* Overflow */ + info.si_code = FPE_FLTOVF; +- break; +- case 0x020: /* Precision */ ++ } else if (err & 0x012) { /* Denormal, Underflow */ ++ info.si_code = FPE_FLTUND; ++ } else if (err & 0x020) { /* Precision */ + info.si_code = FPE_FLTRES; +- break; ++ } else { ++ /* ++ * If we're using IRQ 13, or supposedly even some trap 16 ++ * implementations, it's possible we get a spurious trap... ++ */ ++ return; /* Spurious trap, no error */ + } + force_sig_info(SIGFPE, &info, task); + } +@@ -901,7 +901,7 @@ asmlinkage void math_state_restore(void) + EXPORT_SYMBOL_GPL(math_state_restore); + + #ifndef CONFIG_MATH_EMULATION +-asmlinkage void math_emulate(long arg) ++void math_emulate(struct math_emu_info *info) + { + printk(KERN_EMERG + "math-emulation not enabled and no coprocessor found.\n"); +@@ -911,16 +911,19 @@ asmlinkage void math_emulate(long arg) + } + #endif /* CONFIG_MATH_EMULATION */ + +-dotraplinkage void __kprobes +-do_device_not_available(struct pt_regs *regs, long error) ++dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) + { + #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) + if (read_cr0() & X86_CR0_EM) { +- conditional_sti(regs); +- math_emulate(0); ++ struct math_emu_info info = { }; ++ ++ conditional_sti(®s); ++ ++ info.regs = ®s; ++ math_emulate(&info); + } else { + math_state_restore(); /* interrupts still off */ +- conditional_sti(regs); ++ conditional_sti(®s); + } + #else + math_state_restore(); +--- sle11sp1-2010-01-20.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/kernel/vsyscall_64-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -17,6 +17,9 @@ + * want per guest time just set the kernel.vsyscall64 sysctl to 0. + */ + ++/* Disable profiling for userspace code: */ ++#define DISABLE_BRANCH_PROFILING ++ + #include + #include + #include +@@ -128,7 +131,16 @@ static __always_inline void do_vgettimeo + gettimeofday(tv,NULL); + return; + } ++ ++ /* ++ * Surround the RDTSC by barriers, to make sure it's not ++ * speculated to outside the seqlock critical section and ++ * does not cause time warps: ++ */ ++ rdtsc_barrier(); + now = vread(); ++ rdtsc_barrier(); ++ + base = __vsyscall_gtod_data.clock.cycle_last; + mask = __vsyscall_gtod_data.clock.mask; + mult = __vsyscall_gtod_data.clock.mult; +--- sle11sp1-2010-01-20.orig/arch/x86/mm/fault-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/fault-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -53,7 +53,7 @@ + + static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) + { +-#ifdef CONFIG_MMIOTRACE_HOOKS ++#ifdef CONFIG_MMIOTRACE + if (unlikely(is_kmmio_active())) + if (kmmio_handler(regs, addr) == 1) + return -1; +@@ -406,7 +406,7 @@ static void show_fault_oops(struct pt_re + if (pte && pte_present(*pte) && !pte_exec(*pte)) + printk(KERN_CRIT "kernel tried to execute " + "NX-protected page - exploit attempt? " +- "(uid: %d)\n", current->uid); ++ "(uid: %d)\n", current_uid()); + } + #endif + +@@ -426,6 +426,7 @@ static noinline void pgtable_bad(unsigne + unsigned long error_code) + { + unsigned long flags = oops_begin(); ++ int sig = SIGKILL; + struct task_struct *tsk; + + printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", +@@ -436,8 +437,8 @@ static noinline void pgtable_bad(unsigne + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + if (__die("Bad pagetable", regs, error_code)) +- regs = NULL; +- oops_end(flags, regs, SIGKILL); ++ sig = 0; ++ oops_end(flags, regs, sig); + } + #endif + +@@ -546,10 +547,7 @@ static int vmalloc_fault(unsigned long a + happen within a race in page table update. In the later + case just flush. */ + +- /* On Xen the line below does not always work. Needs investigating! */ +- /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ +- pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); +- pgd += pgd_index(address); ++ pgd = pgd_offset(current->active_mm, address); + pgd_ref = pgd_offset_k(address); + if (pgd_none(*pgd_ref)) + return -1; +@@ -606,6 +604,7 @@ void __kprobes do_page_fault(struct pt_r + int fault; + #ifdef CONFIG_X86_64 + unsigned long flags; ++ int sig; + #endif + + /* Set the "privileged fault" bit to something sane. */ +@@ -623,8 +622,6 @@ void __kprobes do_page_fault(struct pt_r + + si_code = SEGV_MAPERR; + +- if (notify_page_fault(regs)) +- return; + if (unlikely(kmmio_fault(regs, address))) + return; + +@@ -663,6 +660,9 @@ void __kprobes do_page_fault(struct pt_r + if (spurious_fault(address, error_code)) + return; + ++ /* kprobes don't want to hook the spurious faults. */ ++ if (notify_page_fault(regs)) ++ return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch + * fault we could otherwise deadlock. +@@ -670,6 +670,9 @@ void __kprobes do_page_fault(struct pt_r + goto bad_area_nosemaphore; + } + ++ /* kprobes don't want to hook the spurious faults. */ ++ if (notify_page_fault(regs)) ++ return; + + /* + * It's safe to allow irq's after cr2 has been saved and the +@@ -696,7 +699,6 @@ void __kprobes do_page_fault(struct pt_r + if (unlikely(in_atomic() || !mm)) + goto bad_area_nosemaphore; + +-again: + /* + * When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the +@@ -880,32 +882,22 @@ no_context: + bust_spinlocks(0); + do_exit(SIGKILL); + #else ++ sig = SIGKILL; + if (__die("Oops", regs, error_code)) +- regs = NULL; ++ sig = 0; + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_EMERG "CR2: %016lx\n", address); +- oops_end(flags, regs, SIGKILL); ++ oops_end(flags, regs, sig); + #endif + +-/* +- * We ran out of memory, or some other thing happened to us that made +- * us unable to handle the page fault gracefully. +- */ + out_of_memory: ++ /* ++ * We ran out of memory, call the OOM killer, and return the userspace ++ * (which will retry the fault, or kill us if we got oom-killed). ++ */ + up_read(&mm->mmap_sem); +- if (is_global_init(tsk)) { +- yield(); +- /* +- * Re-lookup the vma - in theory the vma tree might +- * have changed: +- */ +- goto again; +- } +- +- printk("VM: killing process %s\n", tsk->comm); +- if (error_code & PF_USER) +- do_group_exit(SIGKILL); +- goto no_context; ++ pagefault_out_of_memory(); ++ return; + + do_sigbus: + up_read(&mm->mmap_sem); +--- sle11sp1-2010-01-20.orig/arch/x86/mm/hypervisor.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/hypervisor.c 2009-11-06 10:51:55.000000000 +0100 +@@ -79,12 +79,12 @@ static void multicall_failed(const multi + BUG(); + } + +-int xen_multicall_flush(bool ret_last) { ++static int _xen_multicall_flush(bool ret_last) { + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu); + multicall_entry_t *mc = lazy->mc; + unsigned int count = lazy->nr_mc; + +- if (!count || !use_lazy_mmu_mode()) ++ if (!count) + return 0; + + lazy->nr_mc = 0; +@@ -112,6 +112,11 @@ int xen_multicall_flush(bool ret_last) { + + return 0; + } ++ ++void xen_multicall_flush(bool force) { ++ if (force || use_lazy_mmu_mode()) ++ _xen_multicall_flush(false); ++} + EXPORT_SYMBOL(xen_multicall_flush); + + int xen_multi_update_va_mapping(unsigned long va, pte_t pte, +@@ -130,7 +135,7 @@ int xen_multi_update_va_mapping(unsigned + #endif + + if (unlikely(lazy->nr_mc == NR_MC)) +- xen_multicall_flush(false); ++ _xen_multicall_flush(false); + + mc = lazy->mc + lazy->nr_mc++; + mc->op = __HYPERVISOR_update_va_mapping; +@@ -169,7 +174,7 @@ int xen_multi_mmu_update(mmu_update_t *s + merge = lazy->nr_mc && !commit + && mmu_may_merge(mc - 1, __HYPERVISOR_mmu_update, domid); + if (unlikely(lazy->nr_mc == NR_MC) && !merge) { +- xen_multicall_flush(false); ++ _xen_multicall_flush(false); + mc = lazy->mc; + commit = count > NR_MMU || success_count; + } +@@ -207,7 +212,7 @@ int xen_multi_mmu_update(mmu_update_t *s + break; + } + +- return commit ? xen_multicall_flush(true) : 0; ++ return commit ? _xen_multicall_flush(true) : 0; + } + + int xen_multi_mmuext_op(struct mmuext_op *src, unsigned int count, +@@ -291,7 +296,7 @@ int xen_multi_mmuext_op(struct mmuext_op + merge = lazy->nr_mc && !commit + && mmu_may_merge(mc - 1, __HYPERVISOR_mmuext_op, domid); + if (unlikely(lazy->nr_mc == NR_MC) && !merge) { +- xen_multicall_flush(false); ++ _xen_multicall_flush(false); + mc = lazy->mc; + commit = count > NR_MMUEXT || success_count; + } +@@ -338,7 +343,7 @@ int xen_multi_mmuext_op(struct mmuext_op + break; + } + +- return commit ? xen_multicall_flush(true) : 0; ++ return commit ? _xen_multicall_flush(true) : 0; + } + + void xen_l1_entry_update(pte_t *ptr, pte_t val) +--- sle11sp1-2010-01-20.orig/arch/x86/mm/init_32-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/init_32-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -71,7 +71,7 @@ static unsigned long __initdata table_to + + static int __initdata after_init_bootmem; + +-static __init void *alloc_low_page(unsigned long *phys) ++static __init void *alloc_low_page(void) + { + unsigned long pfn = table_end++; + void *adr; +@@ -81,7 +81,6 @@ static __init void *alloc_low_page(unsig + + adr = __va(pfn * PAGE_SIZE); + memset(adr, 0, PAGE_SIZE); +- *phys = pfn * PAGE_SIZE; + return adr; + } + +@@ -96,17 +95,18 @@ static pmd_t * __init one_md_table_init( + pmd_t *pmd_table; + + #ifdef CONFIG_X86_PAE +- unsigned long phys; + if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) { + if (after_init_bootmem) + pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + else +- pmd_table = (pmd_t *)alloc_low_page(&phys); ++ pmd_table = (pmd_t *)alloc_low_page(); + paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); + make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); + BUG_ON(pmd_table != pmd_offset(pud, 0)); ++ ++ return pmd_table; + } + #endif + pud = pud_offset(pgd, 0); +@@ -135,10 +135,8 @@ static pte_t * __init one_page_table_ini + if (!page_table) + page_table = + (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); +- } else { +- unsigned long phys; +- page_table = (pte_t *)alloc_low_page(&phys); +- } ++ } else ++ page_table = (pte_t *)alloc_low_page(); + + paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); + make_lowmem_page_readonly(page_table, +@@ -150,6 +148,51 @@ static pte_t * __init one_page_table_ini + return pte_offset_kernel(pmd, 0); + } + ++static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, ++ unsigned long vaddr, pte_t *lastpte) ++{ ++#ifdef CONFIG_HIGHMEM ++ /* ++ * Something (early fixmap) may already have put a pte ++ * page here, which causes the page table allocation ++ * to become nonlinear. Attempt to fix it, and if it ++ * is still nonlinear then we have to bug. ++ */ ++ int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; ++ int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; ++ ++ if (pmd_idx_kmap_begin != pmd_idx_kmap_end ++ && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin ++ && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end ++ && ((__pa(pte) >> PAGE_SHIFT) < table_start ++ || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { ++ pte_t *newpte; ++ int i; ++ ++ BUG_ON(after_init_bootmem); ++ newpte = alloc_low_page(); ++ for (i = 0; i < PTRS_PER_PTE; i++) ++ set_pte(newpte + i, pte[i]); ++ ++ paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); ++ make_lowmem_page_readonly(newpte, ++ XENFEAT_writable_page_tables); ++ set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); ++ BUG_ON(newpte != pte_offset_kernel(pmd, 0)); ++ __flush_tlb_all(); ++ ++ paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); ++ make_lowmem_page_writable(pte, ++ XENFEAT_writable_page_tables); ++ pte = newpte; ++ } ++ BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) ++ && vaddr > fix_to_virt(FIX_KMAP_END) ++ && lastpte && lastpte + PTRS_PER_PTE != pte); ++#endif ++ return pte; ++} ++ + /* + * This function initializes a certain range of kernel virtual memory + * with new bootmem page tables, everywhere page tables are missing in +@@ -166,6 +209,7 @@ page_table_range_init(unsigned long star + unsigned long vaddr; + pgd_t *pgd; + pmd_t *pmd; ++ pte_t *pte = NULL; + + vaddr = start; + pgd_idx = pgd_index(vaddr); +@@ -177,8 +221,10 @@ page_table_range_init(unsigned long star + pmd = pmd + pmd_index(vaddr); + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); + pmd++, pmd_idx++) { +- if (vaddr < hypervisor_virt_start) +- one_page_table_init(pmd); ++ if (vaddr >= hypervisor_virt_start) ++ break; ++ pte = page_table_kmap_check(one_page_table_init(pmd), ++ pmd, vaddr, pte); + + vaddr += PMD_SIZE; + } +@@ -361,6 +407,8 @@ int devmem_is_allowed(unsigned long page + { + if (pagenr <= 256) + return 1; ++ if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) ++ return 0; + if (mfn_to_local_pfn(pagenr) >= max_pfn) + return 1; + return 0; +@@ -476,8 +524,12 @@ static void __init set_highmem_pages_ini + #endif /* !CONFIG_NUMA */ + + #else +-# define permanent_kmaps_init(pgd_base) do { } while (0) +-# define set_highmem_pages_init() do { } while (0) ++static inline void permanent_kmaps_init(pgd_t *pgd_base) ++{ ++} ++static inline void set_highmem_pages_init(void) ++{ ++} + #endif /* CONFIG_HIGHMEM */ + + pgd_t *swapper_pg_dir; +@@ -509,7 +561,6 @@ static void __init early_ioremap_page_ta + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ +- early_ioremap_clear(); + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; + page_table_range_init(vaddr, end, pgd_base); +@@ -856,10 +907,7 @@ static void __init find_early_table_spac + tables += PAGE_ALIGN(ptes * sizeof(pte_t)); + + /* for fixmap */ +- tables += PAGE_SIZE +- * ((((FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK) +- - (__fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK)) +- >> PMD_SHIFT); ++ tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t)); + + table_start = extend_init_mapping(tables); + +@@ -1023,8 +1071,6 @@ void __init mem_init(void) + + pci_iommu_alloc(); + +- start_periodic_check_for_corruption(); +- + #ifdef CONFIG_FLATMEM + BUG_ON(!mem_map); + #endif +@@ -1099,11 +1145,25 @@ void __init mem_init(void) + (unsigned long)&_text, (unsigned long)&_etext, + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + ++ /* ++ * Check boundaries twice: Some fundamental inconsistencies can ++ * be detected at build time already. ++ */ ++#define __FIXADDR_TOP (-PAGE_SIZE) ++#ifdef CONFIG_HIGHMEM ++ BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); ++ BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE); ++#endif ++#define high_memory (-128UL << 20) ++ BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); ++#undef high_memory ++#undef __FIXADDR_TOP ++ + #ifdef CONFIG_HIGHMEM + BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); + BUG_ON(VMALLOC_END > PKMAP_BASE); + #endif +- BUG_ON(VMALLOC_START > VMALLOC_END); ++ BUG_ON(VMALLOC_START >= VMALLOC_END); + BUG_ON((unsigned long)high_memory > VMALLOC_START); + + if (boot_cpu_data.wp_works_ok < 0) +@@ -1123,7 +1183,7 @@ int arch_add_memory(int nid, u64 start, + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + +- return __add_pages(zone, start_pfn, nr_pages); ++ return __add_pages(nid, zone, start_pfn, nr_pages); + } + #endif + +--- sle11sp1-2010-01-20.orig/arch/x86/mm/init_64-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/init_64-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -841,7 +841,7 @@ static void __init init_gbpages(void) + #endif + } + +-static unsigned long __init kernel_physical_mapping_init(unsigned long start, ++static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask) + { +@@ -966,6 +966,8 @@ unsigned long __init_refok init_memory_m + pos = start_pfn << PAGE_SHIFT; + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); ++ if (end_pfn > (end >> PAGE_SHIFT)) ++ end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; +@@ -1146,7 +1148,7 @@ int arch_add_memory(int nid, u64 start, + if (last_mapped_pfn > max_pfn_mapped) + max_pfn_mapped = last_mapped_pfn; + +- ret = __add_pages(zone, start_pfn, nr_pages); ++ ret = __add_pages(nid, zone, start_pfn, nr_pages); + WARN_ON_ONCE(ret); + + return ret; +@@ -1177,6 +1179,8 @@ int devmem_is_allowed(unsigned long page + { + if (pagenr <= 256) + return 1; ++ if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) ++ return 0; + if (mfn_to_local_pfn(pagenr) >= max_pfn) + return 1; + return 0; +@@ -1192,8 +1196,6 @@ void __init mem_init(void) + unsigned long absent_pages; + unsigned long pfn; + +- start_periodic_check_for_corruption(); +- + pci_iommu_alloc(); + + /* clear_bss() already clear the empty_zero_page */ +--- sle11sp1-2010-01-20.orig/arch/x86/mm/iomap_32-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/iomap_32-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -17,9 +17,21 @@ + */ + + #include ++#include + #include + #include + ++int is_io_mapping_possible(resource_size_t base, unsigned long size) ++{ ++#ifndef CONFIG_X86_PAE ++ /* There is no way to map greater than 1 << 32 address without PAE */ ++ if (base + size > 0x100000000ULL) ++ return 0; ++#endif ++ return 1; ++} ++EXPORT_SYMBOL_GPL(is_io_mapping_possible); ++ + /* Map 'mfn' using fixed map 'type' and protections 'prot' + */ + void * +@@ -30,6 +42,15 @@ iomap_atomic_prot_pfn(unsigned long mfn, + + pagefault_disable(); + ++ /* ++ * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. ++ * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the ++ * MTRR is UC or WC. UC_MINUS gets the real intention, of the ++ * user, which is "WC if the MTRR is WC, UC if you can't do that." ++ */ ++ if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) ++ prot = PAGE_KERNEL_UC_MINUS; ++ + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + pgprot_val(prot) |= _PAGE_IOMAP; +--- sle11sp1-2010-01-20.orig/arch/x86/mm/ioremap-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/ioremap-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -293,25 +293,6 @@ int page_is_ram(unsigned long pagenr) + return 0; + } + +-int pagerange_is_ram(unsigned long start, unsigned long end) +-{ +- int ram_page = 0, not_rampage = 0; +- unsigned long page_nr; +- +- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); +- ++page_nr) { +- if (page_is_ram(mfn_to_local_pfn(page_nr))) +- ram_page = 1; +- else +- not_rampage = 1; +- +- if (ram_page == not_rampage) +- return -1; +- } +- +- return ram_page; +-} +- + /* + * Fix up the linear direct mapping of the kernel to avoid cache attribute + * conflicts. +@@ -402,7 +383,8 @@ static void __iomem *__ioremap_caller(re + * Check if the request spans more than any BAR in the iomem resource + * tree. + */ +- WARN_ON(iomem_map_sanity_check(phys_addr, size)); ++ WARN_ONCE(iomem_map_sanity_check(phys_addr, size), ++ KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); + + /* + * Don't allow anybody to remap normal RAM that we're using.. +@@ -746,38 +728,10 @@ void __init early_ioremap_init(void) + } + } + +-#ifdef CONFIG_X86_32 +-void __init early_ioremap_clear(void) +-{ +- pmd_t *pmd; +- +- if (early_ioremap_debug) +- printk(KERN_INFO "early_ioremap_clear()\n"); +- +- pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); +- pmd_clear(pmd); +- make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables); +- /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */ +- __flush_tlb_all(); +-} +- + void __init early_ioremap_reset(void) + { +- enum fixed_addresses idx; +- unsigned long addr, phys; +- pte_t *pte; +- + after_paging_init = 1; +- for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { +- addr = fix_to_virt(idx); +- pte = early_ioremap_pte(addr); +- if (pte_present(*pte)) { +- phys = __pte_val(*pte) & PAGE_MASK; +- set_fixmap(idx, phys); +- } +- } + } +-#endif /* CONFIG_X86_32 */ + + static void __init __early_set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags) +--- sle11sp1-2010-01-20.orig/arch/x86/mm/pageattr-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/pageattr-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -524,22 +524,28 @@ static int split_large_page(pte_t *kpte, + set_pte(&pbase[i], pfn_pte_ma(mfn, ref_prot)); + + /* +- * Install the new, split up pagetable. Important details here: ++ * Install the new, split up pagetable. + * +- * On Intel the NX bit of all levels must be cleared to make a +- * page executable. See section 4.13.2 of Intel 64 and IA-32 +- * Architectures Software Developer's Manual). +- * +- * Mark the entry present. The current mapping might be +- * set to not present, which we preserved above. ++ * We use the standard kernel pagetable protections for the new ++ * pagetable protections, the actual ptes set above control the ++ * primary protection behavior: + */ + if (!xen_feature(XENFEAT_writable_page_tables) && + HYPERVISOR_update_va_mapping((unsigned long)pbase, + mk_pte(base, PAGE_KERNEL_RO), 0)) + BUG(); +- ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); +- pgprot_val(ref_prot) |= _PAGE_PRESENT; +- __set_pmd_pte(kpte, address, level, mk_pte(base, ref_prot)); ++ __set_pmd_pte(kpte, address, level, mk_pte(base, __pgprot(_KERNPG_TABLE))); ++ ++ /* ++ * Intel Atom errata AAH41 workaround. ++ * ++ * The real fix should be in hw or in a microcode update, but ++ * we also probabilistically try to reduce the window of having ++ * a large TLB mixed with 4K TLBs while instruction fetches are ++ * going on. ++ */ ++ __flush_tlb_all(); ++ + base = NULL; + + out_unlock: +@@ -554,6 +560,36 @@ out_unlock: + return 0; + } + ++static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, ++ int primary) ++{ ++ /* ++ * Ignore all non primary paths. ++ */ ++ if (!primary) ++ return 0; ++ ++ /* ++ * Ignore the NULL PTE for kernel identity mapping, as it is expected ++ * to have holes. ++ * Also set numpages to '1' indicating that we processed cpa req for ++ * one virtual address page and its pfn. TBD: numpages can be set based ++ * on the initial value and the level returned by lookup_address(). ++ */ ++ if (within(vaddr, PAGE_OFFSET, ++ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { ++ cpa->numpages = 1; ++ cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; ++ return 0; ++ } else { ++ WARN(1, KERN_WARNING "CPA: called for zero pte. " ++ "vaddr = %lx cpa->vaddr = %lx\n", vaddr, ++ *cpa->vaddr); ++ ++ return -EFAULT; ++ } ++} ++ + static int __change_page_attr(struct cpa_data *cpa, int primary) + { + unsigned long address; +@@ -565,21 +601,14 @@ static int __change_page_attr(struct cpa + address = cpa->vaddr[cpa->curpage]; + else + address = *cpa->vaddr; +- + repeat: + kpte = lookup_address(address, &level); + if (!kpte) +- return 0; ++ return __cpa_process_fault(cpa, address, primary); + + old_pte = *kpte; +- if (!__pte_val(old_pte)) { +- if (!primary) +- return 0; +- WARN(1, KERN_WARNING "CPA: called for zero pte. " +- "vaddr = %lx cpa->vaddr = %lx\n", address, +- *cpa->vaddr); +- return -EINVAL; +- } ++ if (!__pte_val(old_pte)) ++ return __cpa_process_fault(cpa, address, primary); + + if (level == PG_LEVEL_4K) { + pte_t new_pte; +@@ -678,12 +707,7 @@ static int cpa_process_alias(struct cpa_ + vaddr = *cpa->vaddr; + + if (!(within(vaddr, PAGE_OFFSET, +- PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) +-#ifdef CONFIG_X86_64 +- || within(vaddr, PAGE_OFFSET + (1UL<<32), +- PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) +-#endif +- )) { ++ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { + + alias_cpa = *cpa; + temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); +@@ -814,6 +838,15 @@ static int change_page_attr_set_clr(unsi + + vm_unmap_aliases(); + ++ /* ++ * If we're called with lazy mmu updates enabled, the ++ * in-memory pte state may be stale. Flush pending updates to ++ * bring them up to date. ++ * ++ arch_flush_lazy_mmu_mode();*/ ++ if (arch_use_lazy_mmu_mode()) ++ xen_multicall_flush(true); ++ + cpa.vaddr = addr; + cpa.numpages = numpages; + cpa.mask_set = mask_set; +@@ -856,6 +889,14 @@ static int change_page_attr_set_clr(unsi + } else + cpa_flush_all(cache); + ++ /* ++ * If we've been called with lazy mmu updates enabled, then ++ * make sure that everything gets flushed out before we ++ * return. ++ * ++ arch_flush_lazy_mmu_mode();*/ ++ WARN_ON_ONCE(arch_use_lazy_mmu_mode() && !irq_count()); ++ + out: + return ret; + } +--- sle11sp1-2010-01-20.orig/arch/x86/mm/pat-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/mm/pat-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -223,6 +224,33 @@ chk_conflict(struct memtype *new, struct + static struct memtype *cached_entry; + static u64 cached_start; + ++static int pat_pagerange_is_ram(unsigned long start, unsigned long end) ++{ ++ int ram_page = 0, not_rampage = 0; ++ unsigned long page_nr; ++ ++ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); ++ ++page_nr) { ++ /* ++ * For legacy reasons, physical address range in the legacy ISA ++ * region is tracked as non-RAM. This will allow users of ++ * /dev/mem to map portions of legacy ISA region, even when ++ * some of those portions are listed(or not even listed) with ++ * different e820 types(RAM/reserved/..) ++ */ ++ if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && ++ page_is_ram(mfn_to_local_pfn(page_nr))) ++ ram_page = 1; ++ else ++ not_rampage = 1; ++ ++ if (ram_page == not_rampage) ++ return -1; ++ } ++ ++ return ram_page; ++} ++ + /* + * For RAM pages, mark the pages as non WB memory type using + * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or +@@ -345,9 +373,13 @@ int reserve_memtype(u64 start, u64 end, + req_type & _PAGE_CACHE_MASK); + } + +- is_range_ram = pagerange_is_ram(start, end); ++ if (new_type) ++ *new_type = actual_type; ++ ++ is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) +- return reserve_ram_pages_type(start, end, req_type, new_type); ++ return reserve_ram_pages_type(start, end, req_type, ++ new_type); + else if (is_range_ram < 0) + return -EINVAL; + +@@ -359,9 +391,6 @@ int reserve_memtype(u64 start, u64 end, + new->end = end; + new->type = actual_type; + +- if (new_type) +- *new_type = actual_type; +- + spin_lock(&memtype_lock); + + if (cached_entry && start >= cached_start) +@@ -449,7 +478,7 @@ int free_memtype(u64 start, u64 end) + if (is_ISA_range(start, end - 1)) + return 0; + +- is_range_ram = pagerange_is_ram(start, end); ++ is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) +@@ -608,6 +637,254 @@ void unmap_devmem(unsigned long mfn, uns + free_memtype(addr, addr + size); + } + ++#ifndef CONFIG_XEN ++/* ++ * Internal interface to reserve a range of physical memory with prot. ++ * Reserved non RAM regions only and after successful reserve_memtype, ++ * this func also keeps identity mapping (if any) in sync with this new prot. ++ */ ++static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, ++ int strict_prot) ++{ ++ int is_ram = 0; ++ int id_sz, ret; ++ unsigned long flags; ++ unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); ++ ++ is_ram = pat_pagerange_is_ram(paddr, paddr + size); ++ ++ /* ++ * reserve_pfn_range() doesn't support RAM pages. ++ */ ++ if (is_ram != 0) ++ return -EINVAL; ++ ++ ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); ++ if (ret) ++ return ret; ++ ++ if (flags != want_flags) { ++ if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) { ++ free_memtype(paddr, paddr + size); ++ printk(KERN_ERR "%s:%d map pfn expected mapping type %s" ++ " for %Lx-%Lx, got %s\n", ++ current->comm, current->pid, ++ cattr_name(want_flags), ++ (unsigned long long)paddr, ++ (unsigned long long)(paddr + size), ++ cattr_name(flags)); ++ return -EINVAL; ++ } ++ /* ++ * We allow returning different type than the one requested in ++ * non strict case. ++ */ ++ *vma_prot = __pgprot((pgprot_val(*vma_prot) & ++ (~_PAGE_CACHE_MASK)) | ++ flags); ++ } ++ ++ /* Need to keep identity mapping in sync */ ++ if (paddr >= __pa(high_memory)) ++ return 0; ++ ++ id_sz = (__pa(high_memory) < paddr + size) ? ++ __pa(high_memory) - paddr : ++ size; ++ ++ if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { ++ free_memtype(paddr, paddr + size); ++ printk(KERN_ERR ++ "%s:%d reserve_pfn_range ioremap_change_attr failed %s " ++ "for %Lx-%Lx\n", ++ current->comm, current->pid, ++ cattr_name(flags), ++ (unsigned long long)paddr, ++ (unsigned long long)(paddr + size)); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/* ++ * Internal interface to free a range of physical memory. ++ * Frees non RAM regions only. ++ */ ++static void free_pfn_range(u64 paddr, unsigned long size) ++{ ++ int is_ram; ++ ++ is_ram = pat_pagerange_is_ram(paddr, paddr + size); ++ if (is_ram == 0) ++ free_memtype(paddr, paddr + size); ++} ++ ++/* ++ * track_pfn_vma_copy is called when vma that is covering the pfnmap gets ++ * copied through copy_page_range(). ++ * ++ * If the vma has a linear pfn mapping for the entire range, we get the prot ++ * from pte and reserve the entire vma range with single reserve_pfn_range call. ++ * Otherwise, we reserve the entire vma range, my ging through the PTEs page ++ * by page to get physical address and protection. ++ */ ++int track_pfn_vma_copy(struct vm_area_struct *vma) ++{ ++ int retval = 0; ++ unsigned long i, j; ++ resource_size_t paddr; ++ unsigned long prot; ++ unsigned long vma_start = vma->vm_start; ++ unsigned long vma_end = vma->vm_end; ++ unsigned long vma_size = vma_end - vma_start; ++ pgprot_t pgprot; ++ ++ if (!pat_enabled) ++ return 0; ++ ++ if (is_linear_pfn_mapping(vma)) { ++ /* ++ * reserve the whole chunk covered by vma. We need the ++ * starting address and protection from pte. ++ */ ++ if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { ++ WARN_ON_ONCE(1); ++ return -EINVAL; ++ } ++ pgprot = __pgprot(prot); ++ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); ++ } ++ ++ /* reserve entire vma page by page, using pfn and prot from pte */ ++ for (i = 0; i < vma_size; i += PAGE_SIZE) { ++ if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) ++ continue; ++ ++ pgprot = __pgprot(prot); ++ retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1); ++ if (retval) ++ goto cleanup_ret; ++ } ++ return 0; ++ ++cleanup_ret: ++ /* Reserve error: Cleanup partial reservation and return error */ ++ for (j = 0; j < i; j += PAGE_SIZE) { ++ if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) ++ continue; ++ ++ free_pfn_range(paddr, PAGE_SIZE); ++ } ++ ++ return retval; ++} ++ ++/* ++ * track_pfn_vma_new is called when a _new_ pfn mapping is being established ++ * for physical range indicated by pfn and size. ++ * ++ * prot is passed in as a parameter for the new mapping. If the vma has a ++ * linear pfn mapping for the entire range reserve the entire vma range with ++ * single reserve_pfn_range call. ++ * Otherwise, we look t the pfn and size and reserve only the specified range ++ * page by page. ++ * ++ * Note that this function can be called with caller trying to map only a ++ * subrange/page inside the vma. ++ */ ++int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, ++ unsigned long pfn, unsigned long size) ++{ ++ int retval = 0; ++ unsigned long i, j; ++ resource_size_t base_paddr; ++ resource_size_t paddr; ++ unsigned long vma_start = vma->vm_start; ++ unsigned long vma_end = vma->vm_end; ++ unsigned long vma_size = vma_end - vma_start; ++ ++ if (!pat_enabled) ++ return 0; ++ ++ if (is_linear_pfn_mapping(vma)) { ++ /* reserve the whole chunk starting from vm_pgoff */ ++ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; ++ return reserve_pfn_range(paddr, vma_size, prot, 0); ++ } ++ ++ /* reserve page by page using pfn and size */ ++ base_paddr = (resource_size_t)pfn << PAGE_SHIFT; ++ for (i = 0; i < size; i += PAGE_SIZE) { ++ paddr = base_paddr + i; ++ retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0); ++ if (retval) ++ goto cleanup_ret; ++ } ++ return 0; ++ ++cleanup_ret: ++ /* Reserve error: Cleanup partial reservation and return error */ ++ for (j = 0; j < i; j += PAGE_SIZE) { ++ paddr = base_paddr + j; ++ free_pfn_range(paddr, PAGE_SIZE); ++ } ++ ++ return retval; ++} ++ ++/* ++ * untrack_pfn_vma is called while unmapping a pfnmap for a region. ++ * untrack can be called for a specific region indicated by pfn and size or ++ * can be for the entire vma (in which case size can be zero). ++ */ ++void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, ++ unsigned long size) ++{ ++ unsigned long i; ++ resource_size_t paddr; ++ unsigned long prot; ++ unsigned long vma_start = vma->vm_start; ++ unsigned long vma_end = vma->vm_end; ++ unsigned long vma_size = vma_end - vma_start; ++ ++ if (!pat_enabled) ++ return; ++ ++ if (is_linear_pfn_mapping(vma)) { ++ /* free the whole chunk starting from vm_pgoff */ ++ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; ++ free_pfn_range(paddr, vma_size); ++ return; ++ } ++ ++ if (size != 0 && size != vma_size) { ++ /* free page by page, using pfn and size */ ++ paddr = (resource_size_t)pfn << PAGE_SHIFT; ++ for (i = 0; i < size; i += PAGE_SIZE) { ++ paddr = paddr + i; ++ free_pfn_range(paddr, PAGE_SIZE); ++ } ++ } else { ++ /* free entire vma, page by page, using the pfn from pte */ ++ for (i = 0; i < vma_size; i += PAGE_SIZE) { ++ if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) ++ continue; ++ ++ free_pfn_range(paddr, PAGE_SIZE); ++ } ++ } ++} ++#endif /* CONFIG_XEN */ ++ ++pgprot_t pgprot_writecombine(pgprot_t prot) ++{ ++ if (pat_enabled) ++ return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC); ++ else ++ return pgprot_noncached(prot); ++} ++EXPORT_SYMBOL_GPL(pgprot_writecombine); ++ + #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) + + /* get Nth element of the linked list */ +--- sle11sp1-2010-01-20.orig/arch/x86/pci/irq-xen.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/pci/irq-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -16,8 +16,7 @@ + #include + #include + #include +- +-#include "pci.h" ++#include + + #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) + #define PIRQ_VERSION 0x0100 +@@ -540,7 +539,7 @@ static int pirq_bios_set(struct pci_dev + { + struct pci_dev *bridge; + int pin = pci_get_interrupt_pin(dev, &bridge); +- return pcibios_set_irq_routing(bridge, pin, irq); ++ return pcibios_set_irq_routing(bridge, pin - 1, irq); + } + + #endif +@@ -579,6 +578,7 @@ static __init int intel_router_probe(str + case PCI_DEVICE_ID_INTEL_ICH7_1: + case PCI_DEVICE_ID_INTEL_ICH7_30: + case PCI_DEVICE_ID_INTEL_ICH7_31: ++ case PCI_DEVICE_ID_INTEL_TGP_LPC: + case PCI_DEVICE_ID_INTEL_ESB2_0: + case PCI_DEVICE_ID_INTEL_ICH8_0: + case PCI_DEVICE_ID_INTEL_ICH8_1: +@@ -894,7 +894,6 @@ static int pcibios_lookup_irq(struct pci + dev_dbg(&dev->dev, "no interrupt pin\n"); + return 0; + } +- pin = pin - 1; + + /* Find IRQ routing entry */ + +@@ -904,17 +903,17 @@ static int pcibios_lookup_irq(struct pci + info = pirq_get_info(dev); + if (!info) { + dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", +- 'A' + pin); ++ 'A' + pin - 1); + return 0; + } +- pirq = info->irq[pin].link; +- mask = info->irq[pin].bitmap; ++ pirq = info->irq[pin - 1].link; ++ mask = info->irq[pin - 1].bitmap; + if (!pirq) { +- dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); ++ dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1); + return 0; + } + dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", +- 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); ++ 'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs); + mask &= pcibios_irq_mask; + + /* Work around broken HP Pavilion Notebooks which assign USB to +@@ -956,7 +955,7 @@ static int pcibios_lookup_irq(struct pci + newirq = i; + } + } +- dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); ++ dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq); + + /* Check if it is hardcoded */ + if ((pirq & 0xf0) == 0xf0) { +@@ -984,18 +983,18 @@ static int pcibios_lookup_irq(struct pci + return 0; + } + } +- dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); ++ dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq); + + /* Update IRQ for all devices with the same pirq value */ + while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { + pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); + if (!pin) + continue; +- pin--; ++ + info = pirq_get_info(dev2); + if (!info) + continue; +- if (info->irq[pin].link == pirq) { ++ if (info->irq[pin - 1].link == pirq) { + /* + * We refuse to override the dev->irq + * information. Give a warning! +@@ -1049,6 +1048,9 @@ static void __init pcibios_fixup_irqs(vo + dev = NULL; + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); ++ if (!pin) ++ continue; ++ + #ifdef CONFIG_X86_IO_APIC + /* + * Recalculate IRQ numbers if we use the I/O APIC. +@@ -1056,15 +1058,11 @@ static void __init pcibios_fixup_irqs(vo + if (io_apic_assign_pci_irqs) { + int irq; + +- if (!pin) +- continue; +- + /* + * interrupt pins are numbered starting from 1 + */ +- pin--; + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, +- PCI_SLOT(dev->devfn), pin); ++ PCI_SLOT(dev->devfn), pin - 1); + /* + * Busses behind bridges are typically not listed in the + * MP-table. In this case we have to look up the IRQ +@@ -1077,22 +1075,22 @@ static void __init pcibios_fixup_irqs(vo + struct pci_dev *bridge = dev->bus->self; + int bus; + +- pin = (pin + PCI_SLOT(dev->devfn)) % 4; ++ pin = pci_swizzle_interrupt_pin(dev, pin); + bus = bridge->bus->number; + irq = IO_APIC_get_PCI_irq_vector(bus, +- PCI_SLOT(bridge->devfn), pin); ++ PCI_SLOT(bridge->devfn), pin - 1); + if (irq >= 0) + dev_warn(&dev->dev, + "using bridge %s INT %c to " + "get IRQ %d\n", + pci_name(bridge), +- 'A' + pin, irq); ++ 'A' + pin - 1, irq); + } + if (irq >= 0) { + dev_info(&dev->dev, + "PCI->APIC IRQ transform: INT %c " + "-> IRQ %d\n", +- 'A' + pin, irq); ++ 'A' + pin - 1, irq); + dev->irq = irq; + } + } +@@ -1100,7 +1098,7 @@ static void __init pcibios_fixup_irqs(vo + /* + * Still no IRQ? Try to lookup one... + */ +- if (pin && !dev->irq) ++ if (!dev->irq) + pcibios_lookup_irq(dev, 0); + } + } +@@ -1227,12 +1225,10 @@ static int pirq_enable_irq(struct pci_de + if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { + char *msg = ""; + +- pin--; /* interrupt pins are numbered starting from 1 */ +- + if (io_apic_assign_pci_irqs) { + int irq; + +- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); ++ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1); + /* + * Busses behind bridges are typically not listed in the MP-table. + * In this case we have to look up the IRQ based on the parent bus, +@@ -1243,20 +1239,20 @@ static int pirq_enable_irq(struct pci_de + while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ + struct pci_dev *bridge = dev->bus->self; + +- pin = (pin + PCI_SLOT(dev->devfn)) % 4; ++ pin = pci_swizzle_interrupt_pin(dev, pin); + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, +- PCI_SLOT(bridge->devfn), pin); ++ PCI_SLOT(bridge->devfn), pin - 1); + if (irq >= 0) + dev_warn(&dev->dev, "using bridge %s " + "INT %c to get IRQ %d\n", +- pci_name(bridge), 'A' + pin, ++ pci_name(bridge), 'A' + pin - 1, + irq); + dev = bridge; + } + dev = temp_dev; + if (irq >= 0) { + dev_info(&dev->dev, "PCI->APIC IRQ transform: " +- "INT %c -> IRQ %d\n", 'A' + pin, irq); ++ "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); + dev->irq = irq; + return 0; + } else +@@ -1275,7 +1271,7 @@ static int pirq_enable_irq(struct pci_de + return 0; + + dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", +- 'A' + pin, msg); ++ 'A' + pin - 1, msg); + } + return 0; + } +--- sle11sp1-2010-01-20.orig/arch/x86/pci/pcifront.c 2009-03-18 10:39:31.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/pci/pcifront.c 2009-11-06 10:51:55.000000000 +0100 +@@ -8,8 +8,8 @@ + #include + #include + #include ++#include + #include +-#include "pci.h" + + static int pcifront_enable_irq(struct pci_dev *dev) + { +--- sle11sp1-2010-01-20.orig/arch/x86/vdso/vdso32-setup-xen.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/arch/x86/vdso/vdso32-setup-xen.c 2009-11-06 10:51:55.000000000 +0100 +@@ -349,7 +349,7 @@ int __init sysenter_setup(void) + } + + /* Setup a VMA at program startup for the vsyscall page */ +-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) ++int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) + { + struct mm_struct *mm = current->mm; + unsigned long addr; +--- sle11sp1-2010-01-20.orig/drivers/acpi/Kconfig 2009-12-04 10:44:40.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/acpi/Kconfig 2009-11-06 10:51:55.000000000 +0100 +@@ -9,7 +9,7 @@ menuconfig ACPI + depends on PCI + depends on PM + select PNP +- select CPU_IDLE ++ select CPU_IDLE if !PROCESSOR_EXTERNAL_CONTROL + default y + help + Advanced Configuration and Power Interface (ACPI) support for +--- sle11sp1-2010-01-20.orig/drivers/acpi/processor_extcntl.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/acpi/processor_extcntl.c 2009-11-06 10:51:55.000000000 +0100 +@@ -230,3 +230,117 @@ err_out: + kfree(perf); + return ret; + } ++ ++/* ++ * Objects and functions removed in native 2.6.29, and thus moved here. ++ */ ++#ifdef CONFIG_SMP ++static void smp_callback(void *v) ++{ ++ /* we already woke the CPU up, nothing more to do */ ++} ++ ++/* ++ * This function gets called when a part of the kernel has a new latency ++ * requirement. This means we need to get all processors out of their C-state, ++ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that ++ * wakes them all right up. ++ */ ++static int acpi_processor_latency_notify(struct notifier_block *b, ++ unsigned long l, void *v) ++{ ++ smp_call_function(smp_callback, NULL, 1); ++ return NOTIFY_OK; ++} ++ ++struct notifier_block acpi_processor_latency_notifier = { ++ .notifier_call = acpi_processor_latency_notify, ++}; ++#endif ++ ++/* ++ * bm_history -- bit-mask with a bit per jiffy of bus-master activity ++ * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms ++ * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms ++ * 100 HZ: 0x0000000F: 4 jiffies = 40ms ++ * reduce history for more aggressive entry into C3 ++ */ ++static unsigned int bm_history __read_mostly = ++ (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); ++module_param(bm_history, uint, 0644); ++ ++int acpi_processor_set_power_policy(struct acpi_processor *pr) ++{ ++ unsigned int i; ++ unsigned int state_is_set = 0; ++ struct acpi_processor_cx *lower = NULL; ++ struct acpi_processor_cx *higher = NULL; ++ struct acpi_processor_cx *cx; ++ ++ ++ if (!pr) ++ return -EINVAL; ++ ++ /* ++ * This function sets the default Cx state policy (OS idle handler). ++ * Our scheme is to promote quickly to C2 but more conservatively ++ * to C3. We're favoring C2 for its characteristics of low latency ++ * (quick response), good power savings, and ability to allow bus ++ * mastering activity. Note that the Cx state policy is completely ++ * customizable and can be altered dynamically. ++ */ ++ ++ /* startup state */ ++ for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { ++ cx = &pr->power.states[i]; ++ if (!cx->valid) ++ continue; ++ ++ if (!state_is_set) ++ pr->power.state = cx; ++ state_is_set++; ++ break; ++ } ++ ++ if (!state_is_set) ++ return -ENODEV; ++ ++ /* demotion */ ++ for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { ++ cx = &pr->power.states[i]; ++ if (!cx->valid) ++ continue; ++ ++ if (lower) { ++ cx->demotion.state = lower; ++ cx->demotion.threshold.ticks = cx->latency_ticks; ++ cx->demotion.threshold.count = 1; ++ if (cx->type == ACPI_STATE_C3) ++ cx->demotion.threshold.bm = bm_history; ++ } ++ ++ lower = cx; ++ } ++ ++ /* promotion */ ++ for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { ++ cx = &pr->power.states[i]; ++ if (!cx->valid) ++ continue; ++ ++ if (higher) { ++ cx->promotion.state = higher; ++ cx->promotion.threshold.ticks = cx->latency_ticks; ++ if (cx->type >= ACPI_STATE_C2) ++ cx->promotion.threshold.count = 4; ++ else ++ cx->promotion.threshold.count = 10; ++ if (higher->type == ACPI_STATE_C3) ++ cx->promotion.threshold.bm = bm_history; ++ } ++ ++ higher = cx; ++ } ++ ++ return 0; ++} +--- sle11sp1-2010-01-20.orig/drivers/acpi/processor_idle.c 2009-11-06 10:51:17.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/acpi/processor_idle.c 2009-11-06 10:51:55.000000000 +0100 +@@ -114,6 +114,7 @@ static struct dmi_system_id __cpuinitdat + }; + + ++#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL + /* + * Callers should disable interrupts before the call and enable + * interrupts after return. +@@ -132,6 +133,7 @@ static void acpi_safe_halt(void) + } + current_thread_info()->status |= TS_POLLING; + } ++#endif + + #ifdef ARCH_APICTIMER_STOPS_ON_C3 + +@@ -196,7 +198,7 @@ static void lapic_timer_state_broadcast( + static void lapic_timer_check_state(int state, struct acpi_processor *pr, + struct acpi_processor_cx *cstate) { } + static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { } +-static void lapic_timer_state_broadcast(struct acpi_processor *pr, ++static inline void lapic_timer_state_broadcast(struct acpi_processor *pr, + struct acpi_processor_cx *cx, + int broadcast) + { +@@ -244,7 +246,8 @@ int acpi_processor_resume(struct acpi_de + return 0; + } + +-#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) ++#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) \ ++ && !defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) + static void tsc_check_state(int state) + { + switch (boot_cpu_data.x86_vendor) { +@@ -517,7 +520,11 @@ static void acpi_processor_power_verify_ + */ + cx->valid = 1; + ++#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL + cx->latency_ticks = cx->latency; ++#else ++ cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); ++#endif + + return; + } +@@ -600,7 +607,11 @@ static void acpi_processor_power_verify_ + */ + cx->valid = 1; + ++#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL + cx->latency_ticks = cx->latency; ++#else ++ cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); ++#endif + /* + * On older chipsets, BM_RLD needs to be set + * in order for Bus Master activity to wake the +@@ -674,6 +685,20 @@ static int acpi_processor_get_power_info + + pr->power.count = acpi_processor_power_verify(pr); + ++#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++ /* ++ * Set Default Policy ++ * ------------------ ++ * Now that we know which states are supported, set the default ++ * policy. Note that this policy can be changed dynamically ++ * (e.g. encourage deeper sleeps to conserve battery life when ++ * not on AC). ++ */ ++ result = acpi_processor_set_power_policy(pr); ++ if (result) ++ return result; ++#endif ++ + /* + * if one state of type C2 or C3 is available, mark this + * CPU as being "idle manageable" +@@ -771,6 +796,7 @@ static const struct file_operations acpi + }; + #endif + ++#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL + /** + * acpi_idle_bm_check - checks if bus master activity was detected + */ +@@ -1136,6 +1162,13 @@ static int acpi_processor_setup_cpuidle( + return 0; + } + ++#else /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ ++static inline int acpi_processor_setup_cpuidle(struct acpi_processor *pr) ++{ ++ return 0; ++} ++#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ ++ + int acpi_processor_cst_has_changed(struct acpi_processor *pr) + { + int ret = 0; +@@ -1202,6 +1235,10 @@ int __cpuinit acpi_processor_power_init( + "ACPI: processor limited to max C-state %d\n", + max_cstate); + first_run++; ++#if defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) && defined(CONFIG_SMP) ++ pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, ++ &acpi_processor_latency_notifier); ++#endif + } + + if (!pr) +@@ -1261,5 +1298,12 @@ int acpi_processor_power_exit(struct acp + acpi_device_dir(device)); + #endif + ++#if defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) && defined(CONFIG_SMP) ++ /* Unregister the idle handler when processor #0 is removed. */ ++ if (pr->id == 0) ++ pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY, ++ &acpi_processor_latency_notifier); ++#endif ++ + return 0; + } +--- sle11sp1-2010-01-20.orig/drivers/gpu/drm/i915/i915_drv.c 2010-01-20 10:20:50.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/gpu/drm/i915/i915_drv.c 2009-11-06 10:51:55.000000000 +0100 +@@ -330,7 +330,7 @@ static struct drm_driver driver = { + .open = drm_open, + .release = drm_release, + .ioctl = drm_ioctl, +- .mmap = drm_gem_mmap, ++ .mmap = i915_gem_mmap, + .poll = drm_poll, + .fasync = drm_fasync, + #ifdef CONFIG_COMPAT +--- sle11sp1-2010-01-20.orig/drivers/gpu/drm/i915/i915_drv.h 2010-01-20 10:20:50.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/gpu/drm/i915/i915_drv.h 2010-01-20 10:23:43.000000000 +0100 +@@ -825,6 +825,11 @@ uint32_t i915_add_request(struct drm_dev + uint32_t flush_domains); + int i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible); + int i915_lp_ring_sync(struct drm_device *dev); ++#ifdef CONFIG_XEN ++int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma); ++#else ++#define i915_gem_mmap drm_gem_mmap ++#endif + int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); + int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, + int write); +--- sle11sp1-2010-01-20.orig/drivers/gpu/drm/i915/i915_gem.c 2010-01-20 10:20:50.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/gpu/drm/i915/i915_gem.c 2010-01-07 10:00:50.000000000 +0100 +@@ -1165,6 +1165,17 @@ i915_gem_mmap_ioctl(struct drm_device *d + return 0; + } + ++#ifdef CONFIG_XEN ++int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) ++{ ++ int ret = drm_gem_mmap(filp, vma); ++ ++ pgprot_val(vma->vm_page_prot) |= _PAGE_IOMAP; ++ ++ return ret; ++} ++#endif ++ + /** + * i915_gem_fault - fault a page into the GTT + * vma: VMA in question +--- sle11sp1-2010-01-20.orig/drivers/oprofile/buffer_sync.c 2009-11-06 10:45:37.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/oprofile/buffer_sync.c 2009-11-06 10:51:55.000000000 +0100 +@@ -536,7 +536,6 @@ void sync_buffer(int cpu) + int cpu_mode = CPU_MODE_KERNEL; + sync_buffer_state state = sb_buffer_start; + unsigned int i; +- int domain_switch = 0; + unsigned long available; + unsigned long flags; + struct op_entry entry; +@@ -561,15 +560,6 @@ void sync_buffer(int cpu) + if (!sample) + break; + +-#ifdef CONFIG_XEN +- if (domain_switch) { +- cpu_current_domain[cpu] = sample->eip; +- add_domain_switch(sample->eip); +- domain_switch = 0; +- continue; +- } +-#endif +- + if (is_code(sample->eip)) { + flags = sample->event; + if (flags & TRACE_BEGIN) { +@@ -595,8 +585,11 @@ void sync_buffer(int cpu) + add_user_ctx_switch(new, cookie); + } + #ifdef CONFIG_XEN +- if (flags & DOMAIN_SWITCH) +- domain_switch = 1; ++ if ((flags & DOMAIN_SWITCH) ++ && op_cpu_buffer_get_data(&entry, &val)) { ++ cpu_current_domain[cpu] = val; ++ add_domain_switch(val); ++ } + #endif + if (op_cpu_buffer_get_size(&entry)) + add_data(&entry, mm); +--- sle11sp1-2010-01-20.orig/drivers/oprofile/cpu_buffer.c 2009-11-06 10:51:32.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/oprofile/cpu_buffer.c 2009-11-06 10:51:55.000000000 +0100 +@@ -445,34 +445,15 @@ void oprofile_add_pc(unsigned long pc, i + + #ifdef CONFIG_XEN + /* +- * This is basically log_sample(b, ESCAPE_CODE, cpu_mode, CPU_TRACE_BEGIN), ++ * This is basically log_sample(b, ESCAPE_CODE, 1, cpu_mode, CPU_TRACE_BEGIN), + * as was previously accessible through oprofile_add_pc(). + */ + void oprofile_add_mode(int cpu_mode) + { + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); +- struct task_struct *task; + +- if (nr_available_slots(cpu_buf) < 3) { ++ if (op_add_code(cpu_buf, 1, cpu_mode, current)) + cpu_buf->sample_lost_overflow++; +- return; +- } +- +- task = current; +- +- /* notice a switch from user->kernel or vice versa */ +- if (cpu_buf->last_cpu_mode != cpu_mode) { +- cpu_buf->last_cpu_mode = cpu_mode; +- add_code(cpu_buf, cpu_mode); +- } +- +- /* notice a task switch */ +- if (cpu_buf->last_task != task) { +- cpu_buf->last_task = task; +- add_code(cpu_buf, (unsigned long)task); +- } +- +- add_code(cpu_buf, CPU_TRACE_BEGIN); + } + #endif + +@@ -503,17 +484,18 @@ fail: + #ifdef CONFIG_XEN + int oprofile_add_domain_switch(int32_t domain_id) + { +- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); ++ struct op_entry entry; ++ struct op_sample *sample; + +- /* should have space for switching into and out of domain +- (2 slots each) plus one sample and one cpu mode switch */ +- if (((nr_available_slots(cpu_buf) < 6) && +- (domain_id != COORDINATOR_DOMAIN)) || +- (nr_available_slots(cpu_buf) < 2)) ++ sample = op_cpu_buffer_write_reserve(&entry, 1); ++ if (!sample) + return 0; + +- add_code(cpu_buf, DOMAIN_SWITCH); +- add_sample(cpu_buf, domain_id, 0); ++ sample->eip = ESCAPE_CODE; ++ sample->event = DOMAIN_SWITCH; ++ ++ op_cpu_buffer_add_data(&entry, domain_id); ++ op_cpu_buffer_write_commit(&entry); + + current_domain = domain_id; + +--- sle11sp1-2010-01-20.orig/drivers/pci/msi-xen.c 2009-12-04 11:28:36.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/pci/msi-xen.c 2009-12-04 11:28:44.000000000 +0100 +@@ -763,30 +763,21 @@ void pci_no_msi(void) + pci_msi_enable = 0; + } + ++/** ++ * pci_msi_enabled - is MSI enabled? ++ * ++ * Returns true if MSI has not been disabled by the command-line option ++ * pci=nomsi. ++ **/ ++int pci_msi_enabled(void) ++{ ++ return pci_msi_enable; ++} ++EXPORT_SYMBOL(pci_msi_enabled); ++ + void pci_msi_init_pci_dev(struct pci_dev *dev) + { + #ifndef CONFIG_XEN + INIT_LIST_HEAD(&dev->msi_list); + #endif + } +- +-#ifdef CONFIG_ACPI +-#include +-#include +-static void __devinit msi_acpi_init(void) +-{ +- if (acpi_pci_disabled) +- return; +- pci_osc_support_set(OSC_MSI_SUPPORT); +- pcie_osc_support_set(OSC_MSI_SUPPORT); +-} +-#else +-static inline void msi_acpi_init(void) { } +-#endif /* CONFIG_ACPI */ +- +-void __devinit msi_init(void) +-{ +- if (!pci_msi_enable) +- return; +- msi_acpi_init(); +-} +--- sle11sp1-2010-01-20.orig/drivers/xen/Kconfig 2009-11-06 10:51:32.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/Kconfig 2009-12-18 12:27:38.000000000 +0100 +@@ -386,7 +386,7 @@ config XEN_DEV_EVTCHN + + config XENFS + tristate "Xen filesystem" +- depends on XEN ++ depends on PARAVIRT_XEN + default y + help + The xen filesystem provides a way for domains to share +--- sle11sp1-2010-01-20.orig/drivers/xen/Makefile 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/Makefile 2009-11-06 10:51:55.000000000 +0100 +@@ -14,6 +14,7 @@ obj-$(CONFIG_XEN) += features.o util.o + obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y) + obj-$(CONFIG_XEN_XENCOMM) += $(xen-xencomm-y) + obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y) ++obj-$(CONFIG_XENFS) += xenfs/ + obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ + obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ + obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ +--- sle11sp1-2010-01-20.orig/drivers/xen/balloon/sysfs.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/balloon/sysfs.c 2009-11-06 10:51:55.000000000 +0100 +@@ -67,7 +67,7 @@ static ssize_t store_target_kb(struct sy + struct sysdev_attribute *attr, + const char *buf, size_t count) + { +- char memstring[64], *endchar; ++ char *endchar; + unsigned long long target_bytes; + + if (!capable(CAP_SYS_ADMIN)) +@@ -75,11 +75,8 @@ static ssize_t store_target_kb(struct sy + + if (count <= 1) + return -EBADMSG; /* runt */ +- if (count > sizeof(memstring)) +- return -EFBIG; /* too long */ +- strcpy(memstring, buf); + +- target_bytes = memparse(memstring, &endchar); ++ target_bytes = simple_strtoull(buf, &endchar, 0) << 10; + balloon_set_new_target(target_bytes >> PAGE_SHIFT); + + return count; +@@ -88,8 +85,40 @@ static ssize_t store_target_kb(struct sy + static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, + show_target_kb, store_target_kb); + ++static ssize_t show_target(struct sys_device *dev, ++ struct sysdev_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "%llu\n", ++ (unsigned long long)balloon_stats.target_pages ++ << PAGE_SHIFT); ++} ++ ++static ssize_t store_target(struct sys_device *dev, ++ struct sysdev_attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ char *endchar; ++ unsigned long long target_bytes; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ if (count <= 1) ++ return -EBADMSG; /* runt */ ++ ++ target_bytes = memparse(buf, &endchar); ++ balloon_set_new_target(target_bytes >> PAGE_SHIFT); ++ ++ return count; ++} ++ ++static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, ++ show_target, store_target); ++ + static struct sysdev_attribute *balloon_attrs[] = { + &attr_target_kb, ++ &attr_target, + }; + + static struct attribute *balloon_info_attrs[] = { +--- sle11sp1-2010-01-20.orig/drivers/xen/blkfront/vbd.c 2010-01-18 16:49:11.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/blkfront/vbd.c 2010-01-18 16:50:37.000000000 +0100 +@@ -308,6 +308,10 @@ xlvbd_init_blk_queue(struct gendisk *gd, + if (rq == NULL) + return -1; + ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) ++ queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); ++#endif ++ + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_hardsect_size(rq, sector_size); + blk_queue_max_sectors(rq, 512); +--- sle11sp1-2010-01-20.orig/drivers/xen/core/cpu_hotplug.c 2009-04-07 13:58:48.000000000 +0200 ++++ sle11sp1-2010-01-20/drivers/xen/core/cpu_hotplug.c 2009-11-06 10:51:55.000000000 +0100 +@@ -10,10 +10,10 @@ + * Set of CPUs that remote admin software will allow us to bring online. + * Notified to us via xenbus. + */ +-static cpumask_t xenbus_allowed_cpumask; ++static cpumask_var_t xenbus_allowed_cpumask; + + /* Set of CPUs that local admin will allow us to bring online. */ +-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; ++static cpumask_var_t local_allowed_cpumask; + + static int local_cpu_hotplug_request(void) + { +@@ -40,10 +40,10 @@ static void vcpu_hotplug(unsigned int cp + } + + if (strcmp(state, "online") == 0) { +- cpu_set(cpu, xenbus_allowed_cpumask); ++ cpumask_set_cpu(cpu, xenbus_allowed_cpumask); + (void)cpu_up(cpu); + } else if (strcmp(state, "offline") == 0) { +- cpu_clear(cpu, xenbus_allowed_cpumask); ++ cpumask_clear_cpu(cpu, xenbus_allowed_cpumask); + (void)cpu_down(cpu); + } else { + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", +@@ -75,7 +75,7 @@ static int smpboot_cpu_notify(struct not + * as it's always executed from within a stopmachine kthread. + */ + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) +- cpu_clear(cpu, local_allowed_cpumask); ++ cpumask_clear_cpu(cpu, local_allowed_cpumask); + + return NOTIFY_OK; + } +@@ -156,21 +156,26 @@ int cpu_up_check(unsigned int cpu) + int rc = 0; + + if (local_cpu_hotplug_request()) { +- cpu_set(cpu, local_allowed_cpumask); +- if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { ++ cpumask_set_cpu(cpu, local_allowed_cpumask); ++ if (!cpumask_test_cpu(cpu, xenbus_allowed_cpumask)) { + printk("%s: attempt to bring up CPU %u disallowed by " + "remote admin.\n", __FUNCTION__, cpu); + rc = -EBUSY; + } +- } else if (!cpu_isset(cpu, local_allowed_cpumask) || +- !cpu_isset(cpu, xenbus_allowed_cpumask)) { ++ } else if (!cpumask_test_cpu(cpu, local_allowed_cpumask) || ++ !cpumask_test_cpu(cpu, xenbus_allowed_cpumask)) { + rc = -EBUSY; + } + + return rc; + } + +-void init_xenbus_allowed_cpumask(void) ++void __init init_xenbus_allowed_cpumask(void) + { +- xenbus_allowed_cpumask = cpu_present_map; ++ if (!alloc_cpumask_var(&xenbus_allowed_cpumask, GFP_KERNEL)) ++ BUG(); ++ cpumask_copy(xenbus_allowed_cpumask, cpu_present_mask); ++ if (!alloc_cpumask_var(&local_allowed_cpumask, GFP_KERNEL)) ++ BUG(); ++ cpumask_setall(local_allowed_cpumask); + } +--- sle11sp1-2010-01-20.orig/drivers/xen/core/evtchn.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/core/evtchn.c 2009-11-06 10:51:55.000000000 +0100 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -57,9 +58,6 @@ static DEFINE_SPINLOCK(irq_mapping_updat + static int evtchn_to_irq[NR_EVENT_CHANNELS] = { + [0 ... NR_EVENT_CHANNELS-1] = -1 }; + +-/* Packed IRQ information: binding type, sub-type index, and event channel. */ +-static u32 irq_info[NR_IRQS]; +- + /* Binding types. */ + enum { + IRQT_UNBOUND, +@@ -75,6 +73,30 @@ enum { + #define _EVTCHN_BITS 12 + #define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS) + ++/* Convenient shorthand for packed representation of an unbound IRQ. */ ++#define IRQ_UNBOUND (IRQT_UNBOUND << (32 - _IRQT_BITS)) ++ ++static struct irq_cfg _irq_cfg[] = { ++ [0 ... ++#ifdef CONFIG_SPARSE_IRQ ++ BUILD_BUG_ON_ZERO(PIRQ_BASE) + NR_IRQS_LEGACY ++#else ++ NR_IRQS ++#endif ++ - 1].info = IRQ_UNBOUND ++}; ++ ++static inline struct irq_cfg *__pure irq_cfg(unsigned int irq) ++{ ++#ifdef CONFIG_SPARSE_IRQ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ return desc ? desc->chip_data : NULL; ++#else ++ return irq < NR_IRQS ? _irq_cfg + irq : NULL; ++#endif ++} ++ + /* Constructor for packed IRQ information. */ + static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn) + { +@@ -90,26 +112,30 @@ static inline u32 mk_irq_info(u32 type, + return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn); + } + +-/* Convenient shorthand for packed representation of an unbound IRQ. */ +-#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) +- + /* + * Accessors for packed IRQ information. + */ + + static inline unsigned int evtchn_from_irq(int irq) + { +- return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1); ++ const struct irq_cfg *cfg = irq_cfg(irq); ++ ++ return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0; + } + + static inline unsigned int index_from_irq(int irq) + { +- return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1); ++ const struct irq_cfg *cfg = irq_cfg(irq); ++ ++ return cfg ? (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1) ++ : 0; + } + + static inline unsigned int type_from_irq(int irq) + { +- return irq_info[irq] >> (32 - _IRQT_BITS); ++ const struct irq_cfg *cfg = irq_cfg(irq); ++ ++ return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND; + } + + /* IRQ <-> VIRQ mapping. */ +@@ -121,9 +147,6 @@ DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS + #endif + DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1}; + +-/* Reference counts for bindings to IRQs. */ +-static int irq_bindcount[NR_IRQS]; +- + #ifdef CONFIG_SMP + + static u8 cpu_evtchn[NR_EVENT_CHANNELS]; +@@ -157,8 +180,12 @@ static void init_evtchn_cpu_bindings(voi + int i; + + /* By default all event channels notify CPU#0. */ +- for (i = 0; i < NR_IRQS; i++) +- irq_to_desc(i)->affinity = cpumask_of_cpu(0); ++ for (i = 0; i < nr_irqs; i++) { ++ struct irq_desc *desc = irq_to_desc(i); ++ ++ if (desc) ++ desc->affinity = cpumask_of_cpu(0); ++ } + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +@@ -232,7 +259,7 @@ static DEFINE_PER_CPU(unsigned int, last + static DEFINE_PER_CPU(unsigned int, last_processed_l2i) = { BITS_PER_LONG - 1 }; + + /* NB. Interrupts are disabled on entry. */ +-asmlinkage void evtchn_do_upcall(struct pt_regs *regs) ++asmlinkage void __irq_entry evtchn_do_upcall(struct pt_regs *regs) + { + unsigned long l1, l2; + unsigned long masked_l1, masked_l2; +@@ -314,14 +341,25 @@ asmlinkage void evtchn_do_upcall(struct + irq_exit(); + } + +-static int find_unbound_irq(void) ++static struct irq_chip dynirq_chip; ++ ++static int find_unbound_irq(unsigned int cpu) + { + static int warned; + int irq; + +- for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++) +- if (irq_bindcount[irq] == 0) ++ for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++) { ++ struct irq_desc *desc = irq_to_desc_alloc_cpu(irq, cpu); ++ struct irq_cfg *cfg = desc->chip_data; ++ ++ if (!cfg->bindcount) { ++ desc->status |= IRQ_NOPROBE; ++ set_irq_chip_and_handler_name(irq, &dynirq_chip, ++ handle_level_irq, ++ "level"); + return irq; ++ } ++ } + + if (!warned) { + warned = 1; +@@ -339,14 +377,15 @@ static int bind_caller_port_to_irq(unsig + spin_lock(&irq_mapping_update_lock); + + if ((irq = evtchn_to_irq[caller_port]) == -1) { +- if ((irq = find_unbound_irq()) < 0) ++ if ((irq = find_unbound_irq(smp_processor_id())) < 0) + goto out; + + evtchn_to_irq[caller_port] = irq; +- irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_CALLER_PORT, ++ 0, caller_port); + } + +- irq_bindcount[irq]++; ++ irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); +@@ -361,7 +400,7 @@ static int bind_local_port_to_irq(unsign + + BUG_ON(evtchn_to_irq[local_port] != -1); + +- if ((irq = find_unbound_irq()) < 0) { ++ if ((irq = find_unbound_irq(smp_processor_id())) < 0) { + struct evtchn_close close = { .port = local_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + BUG(); +@@ -369,8 +408,8 @@ static int bind_local_port_to_irq(unsign + } + + evtchn_to_irq[local_port] = irq; +- irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); +- irq_bindcount[irq]++; ++ irq_cfg(irq)->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); ++ irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); +@@ -414,7 +453,7 @@ static int bind_virq_to_irq(unsigned int + spin_lock(&irq_mapping_update_lock); + + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { +- if ((irq = find_unbound_irq()) < 0) ++ if ((irq = find_unbound_irq(cpu)) < 0) + goto out; + + bind_virq.virq = virq; +@@ -425,14 +464,14 @@ static int bind_virq_to_irq(unsigned int + evtchn = bind_virq.port; + + evtchn_to_irq[evtchn] = irq; +- irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + per_cpu(virq_to_irq, cpu)[virq] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + +- irq_bindcount[irq]++; ++ irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); +@@ -447,7 +486,7 @@ static int bind_ipi_to_irq(unsigned int + spin_lock(&irq_mapping_update_lock); + + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) { +- if ((irq = find_unbound_irq()) < 0) ++ if ((irq = find_unbound_irq(cpu)) < 0) + goto out; + + bind_ipi.vcpu = cpu; +@@ -457,14 +496,14 @@ static int bind_ipi_to_irq(unsigned int + evtchn = bind_ipi.port; + + evtchn_to_irq[evtchn] = irq; +- irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn); + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + +- irq_bindcount[irq]++; ++ irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); +@@ -479,7 +518,7 @@ static void unbind_from_irq(unsigned int + + spin_lock(&irq_mapping_update_lock); + +- if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { ++ if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) { + close.port = evtchn; + if ((type_from_irq(irq) != IRQT_CALLER_PORT) && + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) +@@ -502,11 +541,15 @@ static void unbind_from_irq(unsigned int + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; +- irq_info[irq] = IRQ_UNBOUND; ++ irq_cfg(irq)->info = IRQ_UNBOUND; + + /* Zap stats across IRQ changes of use. */ + for_each_possible_cpu(cpu) ++#ifdef CONFIG_SPARSE_IRQ ++ irq_to_desc(irq)->kstat_irqs[cpu] = 0; ++#else + kstat_cpu(cpu).irqs[irq] = 0; ++#endif + } + + spin_unlock(&irq_mapping_update_lock); +@@ -658,10 +701,9 @@ static void rebind_irq_to_cpu(unsigned i + rebind_evtchn_to_cpu(evtchn, tcpu); + } + +-static void set_affinity_irq(unsigned int irq, cpumask_t dest) ++static void set_affinity_irq(unsigned int irq, const struct cpumask *dest) + { +- unsigned tcpu = first_cpu(dest); +- rebind_irq_to_cpu(irq, tcpu); ++ rebind_irq_to_cpu(irq, cpumask_first(dest)); + } + #endif + +@@ -830,7 +872,7 @@ static unsigned int startup_pirq(unsigne + + evtchn_to_irq[evtchn] = irq; + bind_evtchn_to_cpu(evtchn, 0); +- irq_info[irq] = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn); + + out: + pirq_unmask_and_notify(evtchn, irq); +@@ -854,7 +896,7 @@ static void shutdown_pirq(unsigned int i + + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = -1; +- irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0); + } + + static void unmask_pirq(unsigned int irq) +@@ -1021,7 +1063,7 @@ static void restore_cpu_virqs(unsigned i + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) + continue; + +- BUG_ON(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0)); ++ BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_VIRQ, virq, 0)); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; +@@ -1033,7 +1075,7 @@ static void restore_cpu_virqs(unsigned i + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; +- irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ +@@ -1050,7 +1092,7 @@ static void restore_cpu_ipis(unsigned in + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) + continue; + +- BUG_ON(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0)); ++ BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_IPI, ipi, 0)); + + /* Get a new binding from Xen. */ + bind_ipi.vcpu = cpu; +@@ -1061,7 +1103,7 @@ static void restore_cpu_ipis(unsigned in + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; +- irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ +@@ -1073,6 +1115,7 @@ static void restore_cpu_ipis(unsigned in + void irq_resume(void) + { + unsigned int cpu, irq, evtchn; ++ struct irq_cfg *cfg; + + init_evtchn_cpu_bindings(); + +@@ -1089,12 +1132,17 @@ void irq_resume(void) + mask_evtchn(evtchn); + + /* Check that no PIRQs are still bound. */ +- for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++) +- BUG_ON(irq_info[irq] != IRQ_UNBOUND); ++ for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++) { ++ cfg = irq_cfg(irq); ++ BUG_ON(cfg && cfg->info != IRQ_UNBOUND); ++ } + + /* No IRQ <-> event-channel mappings. */ +- for (irq = 0; irq < NR_IRQS; irq++) +- irq_info[irq] &= ~((1U << _EVTCHN_BITS) - 1); ++ for (irq = 0; irq < nr_irqs; irq++) { ++ cfg = irq_cfg(irq); ++ if (cfg) ++ cfg->info &= ~((1U << _EVTCHN_BITS) - 1); ++ } + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + +@@ -1106,10 +1154,56 @@ void irq_resume(void) + } + #endif + ++int __init arch_early_irq_init(void) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++) ++ irq_to_desc(i)->chip_data = _irq_cfg + i; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_SPARSE_IRQ ++int arch_init_chip_data(struct irq_desc *desc, int cpu) ++{ ++ if (!desc->chip_data) { ++ /* By default all event channels notify CPU#0. */ ++ desc->affinity = cpumask_of_cpu(0); ++ ++ desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC); ++ } ++ if (!desc->chip_data) { ++ printk(KERN_ERR "cannot alloc irq_cfg\n"); ++ BUG(); ++ } ++ ++ return 0; ++} ++#endif ++ + #if defined(CONFIG_X86_IO_APIC) ++int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) ++{ ++ struct physdev_irq irq_op; ++ ++ if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS) ++ return -EINVAL; ++ ++ if (cfg->vector) ++ return 0; ++ ++ irq_op.irq = irq; ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) ++ return -ENOSPC; ++ ++ cfg->vector = irq_op.vector; ++ ++ return 0; ++} + #define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE)) + #elif defined(CONFIG_X86) +-#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < 16) ++#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < NR_IRQS_LEGACY) + #else + #define identity_mapped_irq(irq) (1) + #endif +@@ -1119,7 +1213,7 @@ void evtchn_register_pirq(int irq) + BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS); + if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND) + return; +- irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0); ++ irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0); + set_irq_chip_and_handler_name(irq, &pirq_chip, handle_level_irq, + "level"); + } +@@ -1132,12 +1226,17 @@ int evtchn_map_pirq(int irq, int xen_pir + irq = PIRQ_BASE + NR_PIRQS - 1; + spin_lock(&irq_alloc_lock); + do { ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ + if (identity_mapped_irq(irq)) + continue; ++ desc = irq_to_desc_alloc_cpu(irq, smp_processor_id()); ++ cfg = desc->chip_data; + if (!index_from_irq(irq)) { + BUG_ON(type_from_irq(irq) != IRQT_UNBOUND); +- irq_info[irq] = mk_irq_info(IRQT_PIRQ, +- xen_pirq, 0); ++ cfg->info = mk_irq_info(IRQT_PIRQ, ++ xen_pirq, 0); + break; + } + } while (--irq >= PIRQ_BASE); +@@ -1156,7 +1255,7 @@ int evtchn_map_pirq(int irq, int xen_pir + * then causes a warning in dynamic_irq_cleanup(). + */ + set_irq_chip_and_handler(irq, NULL, NULL); +- irq_info[irq] = IRQ_UNBOUND; ++ irq_cfg(irq)->info = IRQ_UNBOUND; + return 0; + } else if (type_from_irq(irq) != IRQT_PIRQ + || index_from_irq(irq) != xen_pirq) { +@@ -1193,23 +1292,17 @@ void __init xen_init_IRQ(void) + for (i = 0; i < NR_EVENT_CHANNELS; i++) + mask_evtchn(i); + +- /* No IRQ -> event-channel mappings. */ +- for (i = 0; i < NR_IRQS; i++) +- irq_info[i] = IRQ_UNBOUND; +- +- /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ ++#ifndef CONFIG_SPARSE_IRQ + for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) { +- irq_bindcount[i] = 0; +- + irq_to_desc(i)->status |= IRQ_NOPROBE; + set_irq_chip_and_handler_name(i, &dynirq_chip, + handle_level_irq, "level"); + } + +- /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */ + for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) { +- irq_bindcount[i] = 1; +- ++#else ++ for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_IRQS_LEGACY); i++) { ++#endif + if (!identity_mapped_irq(i)) + continue; + +--- sle11sp1-2010-01-20.orig/drivers/xen/core/machine_reboot.c 2009-11-23 10:40:43.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/core/machine_reboot.c 2009-12-18 13:34:27.000000000 +0100 +@@ -19,6 +19,9 @@ + #include + + #if defined(__i386__) || defined(__x86_64__) ++#include ++/* TBD: Dom0 should propagate the determined value to Xen. */ ++bool port_cf9_safe = false; + + /* + * Power off function, if any +@@ -84,7 +87,7 @@ static void post_suspend(int suspend_can + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { + #ifdef CONFIG_SMP +- cpu_initialized_map = cpu_online_map; ++ cpumask_copy(vcpu_initialized_mask, cpu_online_mask); + #endif + for_each_possible_cpu(i) + setup_runstate_area(i); +@@ -222,6 +225,12 @@ int __xen_suspend(int fast_suspend, void + if (num_possible_cpus() == 1) + fast_suspend = 0; + ++ if (fast_suspend) { ++ err = stop_machine_create(); ++ if (err) ++ return err; ++ } ++ + suspend.fast_suspend = fast_suspend; + suspend.resume_notifier = resume_notifier; + +@@ -248,6 +257,8 @@ int __xen_suspend(int fast_suspend, void + + if (!fast_suspend) + smp_resume(); ++ else ++ stop_machine_destroy(); + + return 0; + } +--- sle11sp1-2010-01-20.orig/drivers/xen/core/smpboot.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/core/smpboot.c 2009-11-06 10:51:55.000000000 +0100 +@@ -36,11 +36,7 @@ extern void smp_trap_init(trap_info_t *) + /* Number of siblings per CPU package */ + int smp_num_siblings = 1; + +-cpumask_t cpu_online_map; +-EXPORT_SYMBOL(cpu_online_map); +-cpumask_t cpu_possible_map; +-EXPORT_SYMBOL(cpu_possible_map); +-cpumask_t cpu_initialized_map; ++cpumask_var_t vcpu_initialized_mask; + + DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info); + EXPORT_PER_CPU_SYMBOL(cpu_info); +@@ -72,7 +68,7 @@ void __init prefill_possible_map(void) + for (i = 0; i < NR_CPUS; i++) { + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); + if (rc >= 0) { +- cpu_set(i, cpu_possible_map); ++ set_cpu_possible(i, true); + nr_cpu_ids = i + 1; + } + } +@@ -199,7 +195,7 @@ static void __cpuinit cpu_initialize_con + + struct task_struct *idle = idle_task(cpu); + +- if (cpu_test_and_set(cpu, cpu_initialized_map)) ++ if (cpumask_test_and_set_cpu(cpu, vcpu_initialized_mask)) + return; + + spin_lock(&ctxt_lock); +@@ -280,13 +276,15 @@ void __init smp_prepare_cpus(unsigned in + if (xen_smp_intr_init(0)) + BUG(); + +- cpu_initialized_map = cpumask_of_cpu(0); ++ if (!alloc_cpumask_var(&vcpu_initialized_mask, GFP_KERNEL)) ++ BUG(); ++ cpumask_copy(vcpu_initialized_mask, cpumask_of(0)); + + /* Restrict the possible_map according to max_cpus. */ + while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { +- for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) ++ for (cpu = nr_cpu_ids-1; !cpumask_test_cpu(cpu, cpu_possible_mask); cpu--) + continue; +- cpu_clear(cpu, cpu_possible_map); ++ set_cpu_possible(cpu, false); + } + + for_each_possible_cpu (cpu) { +@@ -324,10 +322,8 @@ void __init smp_prepare_cpus(unsigned in + + #ifdef CONFIG_HOTPLUG_CPU + if (is_initial_xendomain()) +- cpu_set(cpu, cpu_present_map); +-#else +- cpu_set(cpu, cpu_present_map); + #endif ++ set_cpu_present(cpu, true); + } + + init_xenbus_allowed_cpumask(); +@@ -360,14 +356,17 @@ void __init smp_prepare_boot_cpu(void) + */ + static int __init initialize_cpu_present_map(void) + { +- cpu_present_map = cpu_possible_map; ++ unsigned int cpu; ++ ++ for_each_possible_cpu(cpu) ++ set_cpu_present(cpu, true); ++ + return 0; + } + core_initcall(initialize_cpu_present_map); + + int __cpuexit __cpu_disable(void) + { +- cpumask_t map = cpu_online_map; + unsigned int cpu = smp_processor_id(); + + if (cpu == 0) +@@ -375,9 +374,8 @@ int __cpuexit __cpu_disable(void) + + remove_siblinginfo(cpu); + +- cpu_clear(cpu, map); +- fixup_irqs(map); +- cpu_clear(cpu, cpu_online_map); ++ set_cpu_online(cpu, false); ++ fixup_irqs(); + + return 0; + } +@@ -420,7 +418,7 @@ int __cpuinit __cpu_up(unsigned int cpu) + return rc; + } + +- cpu_set(cpu, cpu_online_map); ++ set_cpu_online(cpu, true); + + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + BUG_ON(rc); +@@ -432,7 +430,7 @@ void __ref play_dead(void) + { + idle_task_exit(); + local_irq_disable(); +- cpu_clear(smp_processor_id(), cpu_initialized); ++ cpumask_clear_cpu(smp_processor_id(), cpu_initialized_mask); + preempt_enable_no_resched(); + VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); + #ifdef CONFIG_HOTPLUG_CPU +--- sle11sp1-2010-01-20.orig/drivers/xen/core/spinlock.c 2009-11-17 15:19:07.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/core/spinlock.c 2009-11-17 14:57:38.000000000 +0100 +@@ -99,7 +99,8 @@ int xen_spin_wait(raw_spinlock_t *lock, + + /* Leave the irq pending so that any interrupted blocker will + * re-check. */ +- kstat_this_cpu.irqs[irq] += !rc; ++ if (!rc) ++ kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + + /* announce we're done */ + x86_write_percpu(spinning, spinning.prev); +--- sle11sp1-2010-01-20.orig/drivers/xen/netback/interface.c 2010-01-04 12:26:37.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/netback/interface.c 2010-01-04 12:40:36.000000000 +0100 +@@ -176,6 +176,14 @@ static struct ethtool_ops network_ethtoo + .get_strings = netbk_get_strings, + }; + ++static const struct net_device_ops netif_be_netdev_ops = { ++ .ndo_open = net_open, ++ .ndo_stop = net_close, ++ .ndo_start_xmit = netif_be_start_xmit, ++ .ndo_change_mtu = netbk_change_mtu, ++ .ndo_get_stats = netif_be_get_stats, ++}; ++ + netif_t *netif_alloc(struct device *parent, domid_t domid, unsigned int handle) + { + int err = 0; +@@ -210,11 +218,7 @@ netif_t *netif_alloc(struct device *pare + + init_timer(&netif->tx_queue_timeout); + +- dev->hard_start_xmit = netif_be_start_xmit; +- dev->get_stats = netif_be_get_stats; +- dev->open = net_open; +- dev->stop = net_close; +- dev->change_mtu = netbk_change_mtu; ++ dev->netdev_ops = &netif_be_netdev_ops; + dev->features = NETIF_F_IP_CSUM; + + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); +--- sle11sp1-2010-01-20.orig/drivers/xen/netback/loopback.c 2009-11-06 10:51:17.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/netback/loopback.c 2009-11-06 10:51:55.000000000 +0100 +@@ -201,19 +201,21 @@ static void loopback_set_multicast_list( + { + } + ++static const struct net_device_ops loopback_netdev_ops = { ++ .ndo_open = loopback_open, ++ .ndo_stop = loopback_close, ++ .ndo_start_xmit = loopback_start_xmit, ++ .ndo_set_multicast_list = loopback_set_multicast_list, ++ .ndo_change_mtu = NULL, /* allow arbitrary mtu */ ++ .ndo_get_stats = loopback_get_stats, ++}; ++ + static void loopback_construct(struct net_device *dev, struct net_device *lo) + { + struct net_private *np = netdev_priv(dev); + + np->loopback_dev = lo; +- +- dev->open = loopback_open; +- dev->stop = loopback_close; +- dev->hard_start_xmit = loopback_start_xmit; +- dev->get_stats = loopback_get_stats; +- dev->set_multicast_list = loopback_set_multicast_list; +- dev->change_mtu = NULL; /* allow arbitrary mtu */ +- ++ dev->netdev_ops = &loopback_netdev_ops; + dev->tx_queue_len = 0; + + dev->features = (NETIF_F_HIGHDMA | +--- sle11sp1-2010-01-20.orig/drivers/xen/netback/netback.c 2009-11-06 10:51:17.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/netback/netback.c 2009-11-06 10:51:55.000000000 +0100 +@@ -354,7 +354,7 @@ static void xen_network_done_notify(void + static struct net_device *eth0_dev = NULL; + if (unlikely(eth0_dev == NULL)) + eth0_dev = __dev_get_by_name(&init_net, "eth0"); +- netif_rx_schedule(eth0_dev, ???); ++ netif_rx_schedule(???); + } + /* + * Add following to poll() function in NAPI driver (Tigon3 is example): +--- sle11sp1-2010-01-20.orig/drivers/xen/netfront/netfront.c 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/netfront/netfront.c 2009-11-06 10:51:55.000000000 +0100 +@@ -635,7 +635,7 @@ static int network_open(struct net_devic + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ + netfront_accelerator_call_stop_napi_irq(np, dev); + +- netif_rx_schedule(dev, &np->napi); ++ netif_rx_schedule(&np->napi); + } + } + spin_unlock_bh(&np->rx_lock); +@@ -707,7 +707,7 @@ static void rx_refill_timeout(unsigned l + + netfront_accelerator_call_stop_napi_irq(np, dev); + +- netif_rx_schedule(dev, &np->napi); ++ netif_rx_schedule(&np->napi); + } + + static void network_alloc_rx_buffers(struct net_device *dev) +@@ -1064,8 +1064,7 @@ static irqreturn_t netif_int(int irq, vo + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { + netfront_accelerator_call_stop_napi_irq(np, dev); + +- netif_rx_schedule(dev, &np->napi); +- dev->last_rx = jiffies; ++ netif_rx_schedule(&np->napi); + } + } + +@@ -1481,7 +1480,6 @@ err: + + /* Pass it up. */ + netif_receive_skb(skb); +- dev->last_rx = jiffies; + } + + /* If we get a callback with very few responses, reduce fill target. */ +@@ -1523,7 +1521,7 @@ err: + } + + if (!more_to_do && !accel_more_to_do) +- __netif_rx_complete(dev, napi); ++ __netif_rx_complete(napi); + + local_irq_restore(flags); + } +@@ -2024,6 +2022,18 @@ static void network_set_multicast_list(s + { + } + ++static const struct net_device_ops xennet_netdev_ops = { ++ .ndo_uninit = netif_uninit, ++ .ndo_open = network_open, ++ .ndo_stop = network_close, ++ .ndo_start_xmit = network_start_xmit, ++ .ndo_set_multicast_list = network_set_multicast_list, ++ .ndo_set_mac_address = xennet_set_mac_address, ++ .ndo_validate_addr = eth_validate_addr, ++ .ndo_change_mtu = xennet_change_mtu, ++ .ndo_get_stats = network_get_stats, ++}; ++ + static struct net_device * __devinit create_netdev(struct xenbus_device *dev) + { + int i, err = 0; +@@ -2080,15 +2090,8 @@ static struct net_device * __devinit cre + goto exit_free_tx; + } + +- netdev->open = network_open; +- netdev->hard_start_xmit = network_start_xmit; +- netdev->stop = network_close; +- netdev->get_stats = network_get_stats; ++ netdev->netdev_ops = &xennet_netdev_ops; + netif_napi_add(netdev, &np->napi, netif_poll, 64); +- netdev->set_multicast_list = network_set_multicast_list; +- netdev->uninit = netif_uninit; +- netdev->set_mac_address = xennet_set_mac_address; +- netdev->change_mtu = xennet_change_mtu; + netdev->features = NETIF_F_IP_CSUM; + + SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); +@@ -2119,7 +2122,7 @@ inetdev_notify(struct notifier_block *th + struct net_device *dev = ifa->ifa_dev->dev; + + /* UP event and is it one of our devices? */ +- if (event == NETDEV_UP && dev->open == network_open) ++ if (event == NETDEV_UP && dev->netdev_ops->ndo_open == network_open) + send_fake_arp(dev); + + return NOTIFY_DONE; +--- sle11sp1-2010-01-20.orig/drivers/xen/sfc_netfront/accel_msg.c 2009-11-06 10:51:17.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/sfc_netfront/accel_msg.c 2009-11-06 10:51:55.000000000 +0100 +@@ -47,7 +47,7 @@ static void vnic_start_interrupts(netfro + netfront_accel_disable_net_interrupts(vnic); + vnic->irq_enabled = 0; + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++); +- netif_rx_schedule(vnic->net_dev, &np->napi); ++ netif_rx_schedule(&np->napi); + } else { + /* + * Nothing yet, make sure we get interrupts through +@@ -532,7 +532,7 @@ irqreturn_t netfront_accel_net_channel_i + vnic->stats.event_count_since_irq; + vnic->stats.event_count_since_irq = 0; + #endif +- netif_rx_schedule(net_dev, &np->napi); ++ netif_rx_schedule(&np->napi); + } + else { + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); +--- sle11sp1-2010-01-20.orig/drivers/xen/xenbus/xenbus_client.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/xenbus/xenbus_client.c 2009-11-06 10:51:55.000000000 +0100 +@@ -170,7 +170,6 @@ EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); + /** + * xenbus_switch_state + * @dev: xenbus device +- * @xbt: transaction handle + * @state: new state + * + * Advertise in the store a change of the given driver to the given new_state. +@@ -304,7 +303,7 @@ EXPORT_SYMBOL_GPL(xenbus_dev_error); + * @fmt: error message format + * + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by +- * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly ++ * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ + void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, +--- sle11sp1-2010-01-20.orig/drivers/xen/xenbus/xenbus_probe.c 2009-11-06 10:51:42.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/xenbus/xenbus_probe.c 2009-12-04 11:28:50.000000000 +0100 +@@ -42,6 +42,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -73,6 +74,10 @@ + #endif + + int xen_store_evtchn; ++#if !defined(CONFIG_XEN) && !defined(MODULE) ++EXPORT_SYMBOL(xen_store_evtchn); ++#endif ++ + struct xenstore_domain_interface *xen_store_interface; + static unsigned long xen_store_mfn; + +@@ -197,6 +202,12 @@ static int xenbus_uevent_frontend(struct + } + #endif + ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) ++static struct device_attribute xenbus_dev_attrs[] = { ++ __ATTR_NULL ++}; ++#endif ++ + /* Bus type for frontend drivers. */ + static struct xen_bus_type xenbus_frontend = { + .root = "device", +@@ -205,13 +216,16 @@ static struct xen_bus_type xenbus_fronte + .probe = xenbus_probe_frontend, + .error = -ENODEV, + .bus = { +- .name = "xen", +- .match = xenbus_match, ++ .name = "xen", ++ .match = xenbus_match, + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +- .probe = xenbus_dev_probe, +- .remove = xenbus_dev_remove, +- .shutdown = xenbus_dev_shutdown, +- .uevent = xenbus_uevent_frontend, ++ .probe = xenbus_dev_probe, ++ .remove = xenbus_dev_remove, ++ .shutdown = xenbus_dev_shutdown, ++ .uevent = xenbus_uevent_frontend, ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) ++ .dev_attrs = xenbus_dev_attrs, + #endif + }, + #if defined(CONFIG_XEN) || defined(MODULE) +@@ -584,7 +598,17 @@ int xenbus_probe_node(struct xen_bus_typ + xendev->dev.bus = &bus->bus; + xendev->dev.release = xenbus_dev_release; + ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++ { ++ char devname[XEN_BUS_ID_SIZE]; ++ ++ err = bus->get_bus_id(devname, xendev->nodename); ++ if (!err) ++ dev_set_name(&xendev->dev, devname); ++ } ++#else + err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); ++#endif + if (err) + goto fail; + +@@ -770,7 +794,7 @@ static int suspend_dev(struct device *de + err = drv->suspend(xdev); + if (err) + printk(KERN_WARNING +- "xenbus: suspend %s failed: %i\n", dev->bus_id, err); ++ "xenbus: suspend %s failed: %i\n", dev_name(dev), err); + return 0; + } + +@@ -791,7 +815,7 @@ static int suspend_cancel_dev(struct dev + if (err) + printk(KERN_WARNING + "xenbus: suspend_cancel %s failed: %i\n", +- dev->bus_id, err); ++ dev_name(dev), err); + return 0; + } + +@@ -813,7 +837,7 @@ static int resume_dev(struct device *dev + if (err) { + printk(KERN_WARNING + "xenbus: resume (talk_to_otherend) %s failed: %i\n", +- dev->bus_id, err); ++ dev_name(dev), err); + return err; + } + +@@ -824,7 +848,7 @@ static int resume_dev(struct device *dev + if (err) { + printk(KERN_WARNING + "xenbus: resume %s failed: %i\n", +- dev->bus_id, err); ++ dev_name(dev), err); + return err; + } + } +@@ -833,7 +857,7 @@ static int resume_dev(struct device *dev + if (err) { + printk(KERN_WARNING + "xenbus_probe: resume (watch_otherend) %s failed: " +- "%d.\n", dev->bus_id, err); ++ "%d.\n", dev_name(dev), err); + return err; + } + +@@ -1145,6 +1169,14 @@ static int __devinit xenbus_probe_init(v + if (!is_initial_xendomain()) + xenbus_probe(NULL); + ++#if defined(CONFIG_XEN_COMPAT_XENFS) && !defined(MODULE) ++ /* ++ * Create xenfs mountpoint in /proc for compatibility with ++ * utilities that expect to find "xenbus" under "/proc/xen". ++ */ ++ proc_mkdir("xen", NULL); ++#endif ++ + return 0; + + err: +--- sle11sp1-2010-01-20.orig/drivers/xen/xenbus/xenbus_probe.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/xenbus/xenbus_probe.h 2009-11-06 10:51:55.000000000 +0100 +@@ -45,6 +45,10 @@ + #define is_initial_xendomain() xen_initial_domain() + #endif + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ++#define dev_name(dev) ((dev)->bus_id) ++#endif ++ + #if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE) + extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); + extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); +--- sle11sp1-2010-01-20.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2009-11-06 10:51:17.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/xenbus/xenbus_probe_backend.c 2009-11-06 10:51:55.000000000 +0100 +@@ -36,6 +36,7 @@ + __FUNCTION__, __LINE__, ##args) + + #include ++#include + #include + #include + #include +@@ -108,6 +109,10 @@ static int backend_bus_id(char bus_id[BU + return 0; + } + ++static struct device_attribute xenbus_backend_attrs[] = { ++ __ATTR_NULL ++}; ++ + static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type// */ +@@ -115,12 +120,13 @@ static struct xen_bus_type xenbus_backen + .probe = xenbus_probe_backend, + .error = -ENODEV, + .bus = { +- .name = "xen-backend", +- .match = xenbus_match, +- .probe = xenbus_dev_probe, +- .remove = xenbus_dev_remove, +-// .shutdown = xenbus_dev_shutdown, +- .uevent = xenbus_uevent_backend, ++ .name = "xen-backend", ++ .match = xenbus_match, ++ .probe = xenbus_dev_probe, ++ .remove = xenbus_dev_remove, ++// .shutdown = xenbus_dev_shutdown, ++ .uevent = xenbus_uevent_backend, ++ .dev_attrs = xenbus_backend_attrs, + }, + .dev = { + .bus_id = "xen-backend", +--- sle11sp1-2010-01-20.orig/drivers/xen/xenbus/xenbus_xs.c 2009-11-06 10:51:07.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/xenbus/xenbus_xs.c 2009-11-06 10:51:55.000000000 +0100 +@@ -227,6 +227,9 @@ void *xenbus_dev_request_and_reply(struc + + return ret; + } ++#if !defined(CONFIG_XEN) && !defined(MODULE) ++EXPORT_SYMBOL(xenbus_dev_request_and_reply); ++#endif + + /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ + static void *xs_talkv(struct xenbus_transaction t, +--- sle11sp1-2010-01-20.orig/drivers/xen/xenoprof/xenoprofile.c 2010-01-07 09:58:35.000000000 +0100 ++++ sle11sp1-2010-01-20/drivers/xen/xenoprof/xenoprofile.c 2010-01-07 09:59:32.000000000 +0100 +@@ -50,7 +50,7 @@ static int xenoprof_enabled = 0; + static int xenoprof_is_primary = 0; + static int active_defined; + +-extern unsigned long backtrace_depth; ++extern unsigned long oprofile_backtrace_depth; + + /* Number of buffers in shared area (one per VCPU) */ + static int nbuf; +@@ -339,11 +339,11 @@ static int xenoprof_setup(void) + active_defined = 1; + } + +- if (backtrace_depth > 0) { ++ if (oprofile_backtrace_depth > 0) { + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_backtrace, +- &backtrace_depth); ++ &oprofile_backtrace_depth); + if (ret) +- backtrace_depth = 0; ++ oprofile_backtrace_depth = 0; + } + + ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL); +--- sle11sp1-2010-01-20.orig/fs/xfs/linux-2.6/xfs_buf.c 2010-01-20 10:20:50.000000000 +0100 ++++ sle11sp1-2010-01-20/fs/xfs/linux-2.6/xfs_buf.c 2009-11-06 10:51:55.000000000 +0100 +@@ -194,7 +194,7 @@ free_address( + { + a_list_t *aentry; + +-#ifdef CONFIG_XEN ++#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) + /* + * Xen needs to be able to make sure it can get an exclusive + * RO mapping of pages it wants to turn into a pagetable. If +--- sle11sp1-2010-01-20.orig/include/acpi/processor.h 2009-12-04 10:44:41.000000000 +0100 ++++ sle11sp1-2010-01-20/include/acpi/processor.h 2009-11-06 10:51:55.000000000 +0100 +@@ -438,6 +438,13 @@ extern int processor_extcntl_prepare(str + extern int acpi_processor_get_performance_info(struct acpi_processor *pr); + extern int acpi_processor_get_psd(struct acpi_processor *pr); + void arch_acpi_processor_init_extcntl(const struct processor_extcntl_ops **); ++ ++/* ++ * Declarations for objects and functions removed in native 2.6.29, and ++ * thus moved to drivers/acpi/processor_extcntl.c. ++ */ ++extern struct notifier_block acpi_processor_latency_notifier; ++int acpi_processor_set_power_policy(struct acpi_processor *); + #else + static inline int processor_cntl_external(void) {return 0;} + static inline int processor_pm_external(void) {return 0;} +--- sle11sp1-2010-01-20.orig/include/xen/cpu_hotplug.h 2009-11-06 10:51:47.000000000 +0100 ++++ sle11sp1-2010-01-20/include/xen/cpu_hotplug.h 2009-11-06 10:51:55.000000000 +0100 +@@ -5,7 +5,7 @@ + #include + + #if defined(CONFIG_X86) && defined(CONFIG_SMP) +-extern cpumask_t cpu_initialized_map; ++extern cpumask_var_t vcpu_initialized_mask; + #endif + + #if defined(CONFIG_HOTPLUG_CPU) +--- sle11sp1-2010-01-20.orig/include/xen/evtchn.h 2009-12-18 10:09:52.000000000 +0100 ++++ sle11sp1-2010-01-20/include/xen/evtchn.h 2009-12-18 10:10:04.000000000 +0100 +@@ -47,6 +47,17 @@ + /* + * LOW-LEVEL DEFINITIONS + */ ++struct irq_cfg { ++ u32 info; ++ union { ++ int bindcount; /* for dynamic IRQs */ ++#ifdef CONFIG_X86_IO_APIC ++ u8 vector; /* for physical IRQs */ ++#endif ++ }; ++}; ++ ++int assign_irq_vector(int irq, struct irq_cfg *, const struct cpumask *); + + /* + * Dynamically bind an event source to an IRQ-like callback handler. +--- sle11sp1-2010-01-20.orig/include/xen/xenbus.h 2009-11-06 10:51:17.000000000 +0100 ++++ sle11sp1-2010-01-20/include/xen/xenbus.h 2009-11-06 10:51:55.000000000 +0100 +@@ -322,7 +322,9 @@ void xenbus_dev_error(struct xenbus_devi + void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...); + ++#if defined(CONFIG_XEN) || defined(MODULE) + int xenbus_dev_init(void); ++#endif + + const char *xenbus_strstate(enum xenbus_state state); + int xenbus_dev_is_online(struct xenbus_device *dev); +--- sle11sp1-2010-01-20.orig/lib/swiotlb-xen.c 2009-12-14 17:24:14.000000000 +0100 ++++ sle11sp1-2010-01-20/lib/swiotlb-xen.c 2009-12-14 17:27:59.000000000 +0100 +@@ -8,6 +8,7 @@ + * Copyright (C) 2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2005 Keir Fraser ++ * 08/12/11 beckyb Add highmem support + */ + + #include +@@ -16,6 +17,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -30,27 +33,11 @@ + #include + #include + +-int swiotlb; +- + #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) + +-/* +- * Maximum allowable number of contiguous slabs to map, +- * must be a power of 2. What is the appropriate value ? +- * The complexity of {map,unmap}_single is linearly dependent on this value. +- */ +-#define IO_TLB_SEGSIZE 128 +- +-/* +- * log of the size of each IO TLB slab. The number of slabs is command line +- * controllable. +- */ +-#define IO_TLB_SHIFT 11 +- ++int swiotlb; + int swiotlb_force; + +-static unsigned long iotlb_nslabs; +- + /* + * Used to do a quick range check in swiotlb_unmap_single and + * swiotlb_sync_single_*, to see if the memory was in fact allocated by this +@@ -59,6 +46,12 @@ static unsigned long iotlb_nslabs; + static char *io_tlb_start, *io_tlb_end; + + /* ++ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and ++ * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. ++ */ ++static unsigned long io_tlb_nslabs; ++ ++/* + * When the IOMMU overflows we return a fallback buffer. This sets the size. + */ + static unsigned long io_tlb_overflow = 32*1024; +@@ -76,10 +69,7 @@ static unsigned int io_tlb_index; + * We need to save away the original address corresponding to a mapped entry + * for the sync operations. + */ +-static struct phys_addr { +- struct page *page; +- unsigned int offset; +-} *io_tlb_orig_addr; ++static phys_addr_t *io_tlb_orig_addr; + + /* + * Protect the above data structures in the map and unmap calls +@@ -101,9 +91,9 @@ setup_io_tlb_npages(char *str) + { + /* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */ + if (isdigit(*str)) { +- iotlb_nslabs = simple_strtoul(str, &str, 0) << ++ io_tlb_nslabs = simple_strtoul(str, &str, 0) << + (20 - IO_TLB_SHIFT); +- iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); ++ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + if (*str == ',') + ++str; +@@ -121,35 +111,17 @@ setup_io_tlb_npages(char *str) + __setup("swiotlb=", setup_io_tlb_npages); + /* make io_tlb_overflow tunable too? */ + +-/* +- * Statically reserve bounce buffer space and initialize bounce buffer data +- * structures for the software IO TLB used to implement the PCI DMA API. +- */ +-void __init +-swiotlb_init_with_default_size(size_t default_size) ++void *__init swiotlb_alloc_boot(size_t size, unsigned long nslabs) + { +- unsigned long i, bytes; ++ void *start = alloc_bootmem_pages(size); ++ unsigned int i; + int rc; + +- if (!iotlb_nslabs) { +- iotlb_nslabs = (default_size >> IO_TLB_SHIFT); +- iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); +- } +- +- bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT); +- +- /* +- * Get IO TLB memory from the low pages +- */ +- io_tlb_start = alloc_bootmem_pages(bytes); +- if (!io_tlb_start) +- panic("Cannot allocate SWIOTLB buffer!\n"); +- + dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; +- for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) { ++ for (i = 0; i < nslabs; i += IO_TLB_SEGSIZE) { + do { + rc = xen_create_contiguous_region( +- (unsigned long)io_tlb_start + (i << IO_TLB_SHIFT), ++ (unsigned long)start + (i << IO_TLB_SHIFT), + get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); +@@ -158,12 +130,12 @@ swiotlb_init_with_default_size(size_t de + panic("No suitable physical memory available for SWIOTLB buffer!\n" + "Use dom0_mem Xen boot parameter to reserve\n" + "some DMA memory (e.g., dom0_mem=-128M).\n"); +- iotlb_nslabs = i; ++ io_tlb_nslabs = i; + i <<= IO_TLB_SHIFT; +- free_bootmem(__pa(io_tlb_start + i), bytes - i); +- bytes = i; ++ free_bootmem(__pa(start + i), size - i); ++ size = i; + for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) { +- unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1)); ++ unsigned int bits = fls64(virt_to_bus(start + i - 1)); + + if (bits > dma_bits) + dma_bits = bits; +@@ -171,18 +143,88 @@ swiotlb_init_with_default_size(size_t de + break; + } + } ++ ++ return start; ++} ++ ++#ifndef CONFIG_XEN ++void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs) ++{ ++ return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); ++} ++#endif ++ ++dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) ++{ ++ return phys_to_machine(paddr); ++} ++ ++phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) ++{ ++ return machine_to_phys(baddr); ++} ++ ++static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, ++ volatile void *address) ++{ ++ return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); ++} ++ ++static void *swiotlb_bus_to_virt(dma_addr_t address) ++{ ++ return phys_to_virt(swiotlb_bus_to_phys(address)); ++} ++ ++int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) ++{ ++ return 0; ++} ++ ++static void swiotlb_print_info(unsigned long bytes) ++{ ++ printk(KERN_INFO "Software IO TLB enabled: \n" ++ " Aperture: %lu megabytes\n" ++ " Address size: %u bits\n" ++ " Kernel range: %p - %p\n", ++ bytes >> 20, dma_bits, ++ io_tlb_start, io_tlb_end); ++} ++ ++/* ++ * Statically reserve bounce buffer space and initialize bounce buffer data ++ * structures for the software IO TLB used to implement the PCI DMA API. ++ */ ++void __init ++swiotlb_init_with_default_size(size_t default_size) ++{ ++ unsigned long i, bytes; ++ int rc; ++ ++ if (!io_tlb_nslabs) { ++ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); ++ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); ++ } ++ ++ bytes = io_tlb_nslabs << IO_TLB_SHIFT; ++ ++ /* ++ * Get IO TLB memory from the low pages ++ */ ++ io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs); ++ if (!io_tlb_start) ++ panic("Cannot allocate SWIOTLB buffer!\n"); ++ bytes = io_tlb_nslabs << IO_TLB_SHIFT; + io_tlb_end = io_tlb_start + bytes; + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE. + */ +- io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int)); +- for (i = 0; i < iotlb_nslabs; i++) ++ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); ++ for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; +- io_tlb_orig_addr = alloc_bootmem( +- iotlb_nslabs * sizeof(*io_tlb_orig_addr)); ++ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); + + /* + * Get the overflow emergency buffer +@@ -200,13 +242,7 @@ swiotlb_init_with_default_size(size_t de + if (rc) + panic("No suitable physical memory available for SWIOTLB overflow buffer!\n"); + +- printk(KERN_INFO "Software IO TLB enabled: \n" +- " Aperture: %lu megabytes\n" +- " Kernel range: %p - %p\n" +- " Address size: %u bits\n", +- bytes >> 20, +- io_tlb_start, io_tlb_end, +- dma_bits); ++ swiotlb_print_info(bytes); + } + + void __init +@@ -233,6 +269,11 @@ swiotlb_init(void) + printk(KERN_INFO "Software IO TLB disabled\n"); + } + ++static inline int range_needs_mapping(phys_addr_t pa, size_t size) ++{ ++ return range_straddles_page_boundary(pa, size); ++} ++ + static int is_swiotlb_buffer(dma_addr_t addr) + { + unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr)); +@@ -246,46 +287,50 @@ static int is_swiotlb_buffer(dma_addr_t + } + + /* ++ * Bounce: copy the swiotlb buffer back to the original dma location ++ * + * We use __copy_to_user_inatomic to transfer to the host buffer because the + * buffer may be mapped read-only (e.g, in blkback driver) but lower-level + * drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an + * unnecessary copy from the aperture to the host buffer, and a page fault. + */ +-static void +-__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir) ++static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, ++ enum dma_data_direction dir) + { +- if (PageHighMem(buffer.page)) { +- size_t len, bytes; +- char *dev, *host, *kmp; +- len = size; +- while (len != 0) { +- unsigned long flags; +- +- if (((bytes = len) + buffer.offset) > PAGE_SIZE) +- bytes = PAGE_SIZE - buffer.offset; +- local_irq_save(flags); /* protects KM_BOUNCE_READ */ +- kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ); +- dev = dma_addr + size - len; +- host = kmp + buffer.offset; +- if (dir == DMA_FROM_DEVICE) { +- if (__copy_to_user_inatomic(host, dev, bytes)) +- /* inaccessible */; +- } else +- memcpy(dev, host, bytes); +- kunmap_atomic(kmp, KM_BOUNCE_READ); ++ unsigned long pfn = PFN_DOWN(phys); ++ ++ if (PageHighMem(pfn_to_page(pfn))) { ++ /* The buffer does not have a mapping. Map it in and copy */ ++ unsigned int offset = phys & ~PAGE_MASK; ++ char *buffer; ++ unsigned int sz = 0; ++ unsigned long flags; ++ ++ while (size) { ++ sz = min((size_t)(PAGE_SIZE - offset), size); ++ ++ local_irq_save(flags); ++ buffer = kmap_atomic(pfn_to_page(pfn), ++ KM_BOUNCE_READ); ++ if (dir == DMA_TO_DEVICE) ++ memcpy(dma_addr, buffer + offset, sz); ++ else if (__copy_to_user_inatomic(buffer + offset, ++ dma_addr, sz)) ++ /* inaccessible */; ++ kunmap_atomic(buffer, KM_BOUNCE_READ); + local_irq_restore(flags); +- len -= bytes; +- buffer.page++; +- buffer.offset = 0; ++ ++ size -= sz; ++ pfn++; ++ dma_addr += sz; ++ offset = 0; + } + } else { +- char *host = (char *)phys_to_virt( +- page_to_pseudophys(buffer.page)) + buffer.offset; +- if (dir == DMA_FROM_DEVICE) { +- if (__copy_to_user_inatomic(host, dma_addr, size)) +- /* inaccessible */; +- } else if (dir == DMA_TO_DEVICE) +- memcpy(dma_addr, host, size); ++ if (dir == DMA_TO_DEVICE) ++ memcpy(dma_addr, phys_to_virt(phys), size); ++ else if (__copy_to_user_inatomic(phys_to_virt(phys), ++ dma_addr, size)) ++ /* inaccessible */; + } + } + +@@ -293,12 +338,11 @@ __sync_single(struct phys_addr buffer, c + * Allocates bounce buffer and returns its kernel virtual address. + */ + static void * +-map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir) ++map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) + { + unsigned long flags; + char *dma_addr; + unsigned int nslots, stride, index, wrap; +- struct phys_addr slot_buf; + int i; + unsigned long mask; + unsigned long offset_slots; +@@ -306,6 +350,10 @@ map_single(struct device *hwdev, struct + + mask = dma_get_seg_boundary(hwdev); + offset_slots = -IO_TLB_SEGSIZE; ++ ++ /* ++ * Carefully handle integer overflow which can occur when mask == ~0UL. ++ */ + max_slots = mask + 1 + ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT + : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); +@@ -328,7 +376,7 @@ map_single(struct device *hwdev, struct + */ + spin_lock_irqsave(&io_tlb_lock, flags); + index = ALIGN(io_tlb_index, stride); +- if (index >= iotlb_nslabs) ++ if (index >= io_tlb_nslabs) + index = 0; + wrap = index; + +@@ -336,7 +384,7 @@ map_single(struct device *hwdev, struct + while (iommu_is_span_boundary(index, nslots, offset_slots, + max_slots)) { + index += stride; +- if (index >= iotlb_nslabs) ++ if (index >= io_tlb_nslabs) + index = 0; + if (index == wrap) + goto not_found; +@@ -360,13 +408,13 @@ map_single(struct device *hwdev, struct + * Update the indices to avoid searching in the next + * round. + */ +- io_tlb_index = ((index + nslots) < iotlb_nslabs ++ io_tlb_index = ((index + nslots) < io_tlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; +- if (index >= iotlb_nslabs) ++ if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + +@@ -381,29 +429,14 @@ found: + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ +- slot_buf = buffer; +- for (i = 0; i < nslots; i++) { +- slot_buf.page += slot_buf.offset >> PAGE_SHIFT; +- slot_buf.offset &= PAGE_SIZE - 1; +- io_tlb_orig_addr[index+i] = slot_buf; +- slot_buf.offset += 1 << IO_TLB_SHIFT; +- } +- if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL)) +- __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); ++ for (i = 0; i < nslots; i++) ++ io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); ++ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) ++ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); + + return dma_addr; + } + +-static struct phys_addr dma_addr_to_phys_addr(char *dma_addr) +-{ +- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; +- struct phys_addr buffer = io_tlb_orig_addr[index]; +- buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1); +- buffer.page += buffer.offset >> PAGE_SHIFT; +- buffer.offset &= PAGE_SIZE - 1; +- return buffer; +-} +- + /* + * dma_addr is the kernel virtual address of the bounce buffer to unmap. + */ +@@ -413,13 +446,13 @@ unmap_single(struct device *hwdev, char + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; +- struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr); ++ phys_addr_t phys = io_tlb_orig_addr[index]; + + /* + * First, sync the memory before unmapping the entry + */ +- if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)) +- __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); ++ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) ++ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); + + /* + * Return the buffer to the free list by setting the corresponding +@@ -453,9 +486,13 @@ unmap_single(struct device *hwdev, char + static void + sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) + { +- struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr); ++ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; ++ phys_addr_t phys = io_tlb_orig_addr[index]; ++ ++ phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); ++ + BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE)); +- __sync_single(buffer, dma_addr, size, dir); ++ swiotlb_bounce(phys, dma_addr, size, dir); + } + + static void +@@ -469,7 +506,7 @@ swiotlb_full(struct device *dev, size_t + * the damage, or panic when the transfer is too big. + */ + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at " +- "device %s\n", size, dev ? dev->bus_id : "?"); ++ "device %s\n", size, dev ? dev_name(dev) : "?"); + + if (size > io_tlb_overflow && do_panic) { + if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) +@@ -494,7 +531,6 @@ _swiotlb_map_single(struct device *hwdev + dma_addr_t dev_addr = gnttab_dma_map_page(page) + + offset_in_page(paddr); + void *map; +- struct phys_addr buffer; + + BUG_ON(dir == DMA_NONE); + +@@ -503,23 +539,21 @@ _swiotlb_map_single(struct device *hwdev + * we can safely return the device addr and not worry about bounce + * buffering it. + */ +- if (!range_straddles_page_boundary(paddr, size) && +- !address_needs_mapping(hwdev, dev_addr, size)) ++ if (!address_needs_mapping(hwdev, dev_addr, size) && ++ !range_needs_mapping(paddr, size)) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + gnttab_dma_unmap_page(dev_addr); +- buffer.page = page; +- buffer.offset = offset_in_page(paddr); +- map = map_single(hwdev, buffer, size, dir); ++ map = map_single(hwdev, paddr, size, dir); + if (!map) { + swiotlb_full(hwdev, size, dir, 1); + map = io_tlb_overflow_buffer; + } + +- dev_addr = virt_to_bus(map); ++ dev_addr = swiotlb_virt_to_bus(hwdev, map); + return dev_addr; + } + +@@ -536,6 +570,7 @@ swiotlb_map_single(struct device *hwdev, + { + return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, NULL); + } ++EXPORT_SYMBOL(swiotlb_map_single); + + dma_addr_t + swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) +@@ -555,7 +590,7 @@ void + swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir, struct dma_attrs *attrs) + { +- char *dma_addr = bus_to_virt(dev_addr); ++ char *dma_addr = swiotlb_bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (is_swiotlb_buffer(dev_addr)) +@@ -571,6 +606,8 @@ swiotlb_unmap_single(struct device *hwde + { + return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL); + } ++EXPORT_SYMBOL(swiotlb_unmap_single); ++ + /* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. +@@ -585,48 +622,50 @@ void + swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) + { +- char *dma_addr = bus_to_virt(dev_addr); ++ char *dma_addr = swiotlb_bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (is_swiotlb_buffer(dev_addr)) + sync_single(hwdev, dma_addr, size, dir); + } ++EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); + + void + swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) + { +- char *dma_addr = bus_to_virt(dev_addr); ++ char *dma_addr = swiotlb_bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (is_swiotlb_buffer(dev_addr)) + sync_single(hwdev, dma_addr, size, dir); + } ++EXPORT_SYMBOL(swiotlb_sync_single_for_device); + + void + swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, int dir) + { +- char *dma_addr = bus_to_virt(dev_addr); ++ char *dma_addr = swiotlb_bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (is_swiotlb_buffer(dev_addr)) + sync_single(hwdev, dma_addr + offset, size, dir); + } ++EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); + + void + swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, int dir) + { +- char *dma_addr = bus_to_virt(dev_addr); ++ char *dma_addr = swiotlb_bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (is_swiotlb_buffer(dev_addr)) + sync_single(hwdev, dma_addr + offset, size, dir); + } ++EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); + +-void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int, +- struct dma_attrs *); + /* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above swiotlb_map_single +@@ -648,23 +687,23 @@ swiotlb_map_sg_attrs(struct device *hwde + int dir, struct dma_attrs *attrs) + { + struct scatterlist *sg; +- struct phys_addr buffer; +- dma_addr_t dev_addr; +- char *map; + int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) { +- dev_addr = gnttab_dma_map_page(sg_page(sg)) + sg->offset; ++ dma_addr_t dev_addr = gnttab_dma_map_page(sg_page(sg)) ++ + sg->offset; ++ phys_addr_t paddr = page_to_pseudophys(sg_page(sg)) ++ + sg->offset; + +- if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg)) +- + sg->offset, sg->length) ++ if (range_needs_mapping(paddr, sg->length) + || address_needs_mapping(hwdev, dev_addr, sg->length)) { ++ void *map; ++ + gnttab_dma_unmap_page(dev_addr); +- buffer.page = sg_page(sg); +- buffer.offset = sg->offset; +- map = map_single(hwdev, buffer, sg->length, dir); ++ map = map_single(hwdev, paddr, ++ sg->length, dir); + if (!map) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ +@@ -674,7 +713,7 @@ swiotlb_map_sg_attrs(struct device *hwde + sgl[0].dma_length = 0; + return 0; + } +- sg->dma_address = virt_to_bus(map); ++ sg->dma_address = swiotlb_virt_to_bus(hwdev, map); + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; +@@ -689,6 +728,7 @@ swiotlb_map_sg(struct device *hwdev, str + { + return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); + } ++EXPORT_SYMBOL(swiotlb_map_sg); + + /* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules +@@ -705,7 +745,7 @@ swiotlb_unmap_sg_attrs(struct device *hw + + for_each_sg(sgl, sg, nelems, i) { + if (sg->dma_address != sg_phys(sg)) +- unmap_single(hwdev, bus_to_virt(sg->dma_address), ++ unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), + sg->dma_length, dir); + else + gnttab_dma_unmap_page(sg->dma_address); +@@ -719,6 +759,7 @@ swiotlb_unmap_sg(struct device *hwdev, s + { + return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); + } ++EXPORT_SYMBOL(swiotlb_unmap_sg); + + /* + * Make physical memory consistent for a set of streaming mode DMA translations +@@ -738,10 +779,11 @@ swiotlb_sync_sg_for_cpu(struct device *h + + for_each_sg(sgl, sg, nelems, i) { + if (sg->dma_address != sg_phys(sg)) +- sync_single(hwdev, bus_to_virt(sg->dma_address), ++ sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } + } ++EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); + + void + swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl, +@@ -754,16 +796,18 @@ swiotlb_sync_sg_for_device(struct device + + for_each_sg(sgl, sg, nelems, i) { + if (sg->dma_address != sg_phys(sg)) +- sync_single(hwdev, bus_to_virt(sg->dma_address), ++ sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } + } ++EXPORT_SYMBOL(swiotlb_sync_sg_for_device); + + int + swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) + { +- return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); ++ return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer)); + } ++EXPORT_SYMBOL(swiotlb_dma_mapping_error); + + /* + * Return whether the given PCI device DMA address mask can be supported +@@ -776,14 +820,4 @@ swiotlb_dma_supported (struct device *hw + { + return (mask >= ((1UL << dma_bits) - 1)); + } +- +-EXPORT_SYMBOL(swiotlb_map_single); +-EXPORT_SYMBOL(swiotlb_unmap_single); +-EXPORT_SYMBOL(swiotlb_map_sg); +-EXPORT_SYMBOL(swiotlb_unmap_sg); +-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); +-EXPORT_SYMBOL(swiotlb_sync_single_for_device); +-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); +-EXPORT_SYMBOL(swiotlb_sync_sg_for_device); +-EXPORT_SYMBOL(swiotlb_dma_mapping_error); + EXPORT_SYMBOL(swiotlb_dma_supported); --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-ipi-per-cpu-irq +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-ipi-per-cpu-irq @@ -0,0 +1,797 @@ +From: jbeulich@novell.com +Subject: fold IPIs onto a single IRQ each +Patch-mainline: obsolete + +--- head-2010-01-18.orig/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic/ipi-xen.c 2009-11-06 11:10:20.000000000 +0100 +@@ -21,31 +21,22 @@ + + #include + +-DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); +- +-static inline void __send_IPI_one(unsigned int cpu, int vector) +-{ +- int irq = per_cpu(ipi_to_irq, cpu)[vector]; +- BUG_ON(irq < 0); +- notify_remote_via_irq(irq); +-} +- + static void __send_IPI_shortcut(unsigned int shortcut, int vector) + { + unsigned int cpu; + + switch (shortcut) { + case APIC_DEST_SELF: +- __send_IPI_one(smp_processor_id(), vector); ++ notify_remote_via_ipi(vector, smp_processor_id()); + break; + case APIC_DEST_ALLBUT: + for_each_online_cpu(cpu) + if (cpu != smp_processor_id()) +- __send_IPI_one(cpu, vector); ++ notify_remote_via_ipi(vector, cpu); + break; + case APIC_DEST_ALLINC: + for_each_online_cpu(cpu) +- __send_IPI_one(cpu, vector); ++ notify_remote_via_ipi(vector, cpu); + break; + default: + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, +@@ -63,7 +54,7 @@ void xen_send_IPI_mask_allbutself(const + WARN_ON(!cpumask_subset(cpumask, cpu_online_mask)); + for_each_cpu_and(cpu, cpumask, cpu_online_mask) + if (cpu != smp_processor_id()) +- __send_IPI_one(cpu, vector); ++ notify_remote_via_ipi(vector, cpu); + local_irq_restore(flags); + } + +@@ -75,7 +66,7 @@ void xen_send_IPI_mask(const struct cpum + local_irq_save(flags); + WARN_ON(!cpumask_subset(cpumask, cpu_online_mask)); + for_each_cpu_and(cpu, cpumask, cpu_online_mask) +- __send_IPI_one(cpu, vector); ++ notify_remote_via_ipi(vector, cpu); + local_irq_restore(flags); + } + +--- head-2010-01-18.orig/arch/x86/kernel/irq-xen.c 2010-01-07 11:22:00.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq-xen.c 2010-01-07 11:22:50.000000000 +0100 +@@ -312,6 +312,7 @@ void fixup_irqs(void) + + affinity = desc->affinity; + if (!irq_has_action(irq) || ++ (desc->status & IRQ_PER_CPU) || + cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); + continue; +--- head-2010-01-18.orig/drivers/xen/Kconfig 2009-12-18 12:30:46.000000000 +0100 ++++ head-2010-01-18/drivers/xen/Kconfig 2009-12-18 12:30:54.000000000 +0100 +@@ -4,6 +4,7 @@ + + config XEN + bool ++ select IRQ_PER_CPU if SMP + + if XEN + config XEN_INTERFACE_VERSION +@@ -349,6 +350,9 @@ endmenu + config HAVE_IRQ_IGNORE_UNHANDLED + def_bool y + ++config IRQ_PER_CPU ++ bool ++ + config NO_IDLE_HZ + def_bool y + +--- head-2010-01-18.orig/drivers/xen/core/evtchn.c 2009-11-06 11:10:15.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/evtchn.c 2009-11-06 11:10:20.000000000 +0100 +@@ -58,6 +58,22 @@ static DEFINE_SPINLOCK(irq_mapping_updat + static int evtchn_to_irq[NR_EVENT_CHANNELS] = { + [0 ... NR_EVENT_CHANNELS-1] = -1 }; + ++/* IRQ <-> IPI mapping. */ ++#ifndef NR_IPIS ++#define NR_IPIS 1 ++#endif ++#if defined(CONFIG_SMP) && defined(CONFIG_X86) ++static int ipi_to_irq[NR_IPIS] __read_mostly = {[0 ... NR_IPIS-1] = -1}; ++static DEFINE_PER_CPU(int[NR_IPIS], ipi_to_evtchn); ++#else ++#define PER_CPU_IPI_IRQ ++#endif ++#if !defined(CONFIG_SMP) || !defined(PER_CPU_IPI_IRQ) ++#define BUG_IF_IPI(irq) BUG_ON(type_from_irq(irq) == IRQT_IPI) ++#else ++#define BUG_IF_IPI(irq) ((void)(irq)) ++#endif ++ + /* Binding types. */ + enum { + IRQT_UNBOUND, +@@ -116,12 +132,14 @@ static inline u32 mk_irq_info(u32 type, + * Accessors for packed IRQ information. + */ + ++#ifdef PER_CPU_IPI_IRQ + static inline unsigned int evtchn_from_irq(int irq) + { + const struct irq_cfg *cfg = irq_cfg(irq); + + return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0; + } ++#endif + + static inline unsigned int index_from_irq(int irq) + { +@@ -138,14 +156,32 @@ static inline unsigned int type_from_irq + return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND; + } + ++#ifndef PER_CPU_IPI_IRQ ++static inline unsigned int evtchn_from_per_cpu_irq(unsigned int irq, ++ unsigned int cpu) ++{ ++ BUG_ON(type_from_irq(irq) != IRQT_IPI); ++ return per_cpu(ipi_to_evtchn, cpu)[index_from_irq(irq)]; ++} ++ ++static inline unsigned int evtchn_from_irq(unsigned int irq) ++{ ++ if (type_from_irq(irq) != IRQT_IPI) { ++ const struct irq_cfg *cfg = irq_cfg(irq); ++ ++ return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0; ++ } ++ return evtchn_from_per_cpu_irq(irq, smp_processor_id()); ++} ++#endif ++ + /* IRQ <-> VIRQ mapping. */ + DEFINE_PER_CPU(int[NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; + ++#if defined(CONFIG_SMP) && defined(PER_CPU_IPI_IRQ) + /* IRQ <-> IPI mapping. */ +-#ifndef NR_IPIS +-#define NR_IPIS 1 +-#endif + DEFINE_PER_CPU(int[NR_IPIS], ipi_to_irq) = {[0 ... NR_IPIS-1] = -1}; ++#endif + + #ifdef CONFIG_SMP + +@@ -169,8 +205,14 @@ static void bind_evtchn_to_cpu(unsigned + + BUG_ON(!test_bit(chn, s->evtchn_mask)); + +- if (irq != -1) +- cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); ++ if (irq != -1) { ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (!(desc->status & IRQ_PER_CPU)) ++ cpumask_copy(desc->affinity, cpumask_of(cpu)); ++ else ++ cpumask_set_cpu(cpu, desc->affinity); ++ } + + clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_evtchn[chn])); + set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); +@@ -338,7 +380,7 @@ asmlinkage void __irq_entry evtchn_do_up + + static struct irq_chip dynirq_chip; + +-static int find_unbound_irq(unsigned int cpu) ++static int find_unbound_irq(unsigned int cpu, bool percpu) + { + static int warned; + int irq; +@@ -348,10 +390,19 @@ static int find_unbound_irq(unsigned int + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->bindcount) { ++ irq_flow_handler_t handle; ++ const char *name; ++ + desc->status |= IRQ_NOPROBE; ++ if (!percpu) { ++ handle = handle_level_irq; ++ name = "level"; ++ } else { ++ handle = handle_percpu_irq; ++ name = "percpu"; ++ } + set_irq_chip_and_handler_name(irq, &dynirq_chip, +- handle_level_irq, +- "level"); ++ handle, name); + return irq; + } + } +@@ -372,7 +423,7 @@ static int bind_caller_port_to_irq(unsig + spin_lock(&irq_mapping_update_lock); + + if ((irq = evtchn_to_irq[caller_port]) == -1) { +- if ((irq = find_unbound_irq(smp_processor_id())) < 0) ++ if ((irq = find_unbound_irq(smp_processor_id(), false)) < 0) + goto out; + + evtchn_to_irq[caller_port] = irq; +@@ -395,7 +446,7 @@ static int bind_local_port_to_irq(unsign + + BUG_ON(evtchn_to_irq[local_port] != -1); + +- if ((irq = find_unbound_irq(smp_processor_id())) < 0) { ++ if ((irq = find_unbound_irq(smp_processor_id(), false)) < 0) { + struct evtchn_close close = { .port = local_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + BUG(); +@@ -448,7 +499,7 @@ static int bind_virq_to_irq(unsigned int + spin_lock(&irq_mapping_update_lock); + + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { +- if ((irq = find_unbound_irq(cpu)) < 0) ++ if ((irq = find_unbound_irq(cpu, false)) < 0) + goto out; + + bind_virq.virq = virq; +@@ -473,6 +524,7 @@ static int bind_virq_to_irq(unsigned int + return irq; + } + ++#if defined(CONFIG_SMP) && defined(PER_CPU_IPI_IRQ) + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) + { + struct evtchn_bind_ipi bind_ipi; +@@ -481,7 +533,7 @@ static int bind_ipi_to_irq(unsigned int + spin_lock(&irq_mapping_update_lock); + + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) { +- if ((irq = find_unbound_irq(cpu)) < 0) ++ if ((irq = find_unbound_irq(cpu, false)) < 0) + goto out; + + bind_ipi.vcpu = cpu; +@@ -504,6 +556,7 @@ static int bind_ipi_to_irq(unsigned int + spin_unlock(&irq_mapping_update_lock); + return irq; + } ++#endif + + static void unbind_from_irq(unsigned int irq) + { +@@ -511,6 +564,7 @@ static void unbind_from_irq(unsigned int + unsigned int cpu; + int evtchn = evtchn_from_irq(irq); + ++ BUG_IF_IPI(irq); + spin_lock(&irq_mapping_update_lock); + + if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) { +@@ -524,10 +578,12 @@ static void unbind_from_irq(unsigned int + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) + [index_from_irq(irq)] = -1; + break; ++#if defined(CONFIG_SMP) && defined(PER_CPU_IPI_IRQ) + case IRQT_IPI: + per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) + [index_from_irq(irq)] = -1; + break; ++#endif + default: + break; + } +@@ -550,6 +606,46 @@ static void unbind_from_irq(unsigned int + spin_unlock(&irq_mapping_update_lock); + } + ++#if defined(CONFIG_SMP) && !defined(PER_CPU_IPI_IRQ) ++void unbind_from_per_cpu_irq(unsigned int irq, unsigned int cpu) ++{ ++ struct evtchn_close close; ++ int evtchn = evtchn_from_per_cpu_irq(irq, cpu); ++ ++ spin_lock(&irq_mapping_update_lock); ++ ++ if (VALID_EVTCHN(evtchn)) { ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ mask_evtchn(evtchn); ++ ++ BUG_ON(irq_cfg(irq)->bindcount <= 1); ++ irq_cfg(irq)->bindcount--; ++ cpumask_clear_cpu(cpu, desc->affinity); ++ ++ close.port = evtchn; ++ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) ++ BUG(); ++ ++ switch (type_from_irq(irq)) { ++ case IRQT_IPI: ++ per_cpu(ipi_to_evtchn, cpu)[index_from_irq(irq)] = 0; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ ++ /* Closed ports are implicitly re-bound to VCPU0. */ ++ bind_evtchn_to_cpu(evtchn, 0); ++ ++ evtchn_to_irq[evtchn] = -1; ++ } ++ ++ spin_unlock(&irq_mapping_update_lock); ++} ++#endif /* CONFIG_SMP && !PER_CPU_IPI_IRQ */ ++ + int bind_caller_port_to_irqhandler( + unsigned int caller_port, + irq_handler_t handler, +@@ -644,6 +740,8 @@ int bind_virq_to_irqhandler( + } + EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); + ++#ifdef CONFIG_SMP ++#ifdef PER_CPU_IPI_IRQ + int bind_ipi_to_irqhandler( + unsigned int ipi, + unsigned int cpu, +@@ -667,7 +765,71 @@ int bind_ipi_to_irqhandler( + + return irq; + } +-EXPORT_SYMBOL_GPL(bind_ipi_to_irqhandler); ++#else ++int __cpuinit bind_ipi_to_irqaction( ++ unsigned int ipi, ++ unsigned int cpu, ++ struct irqaction *action) ++{ ++ struct evtchn_bind_ipi bind_ipi; ++ int evtchn, irq, retval = 0; ++ ++ spin_lock(&irq_mapping_update_lock); ++ ++ if (VALID_EVTCHN(per_cpu(ipi_to_evtchn, cpu)[ipi])) { ++ spin_unlock(&irq_mapping_update_lock); ++ return -EBUSY; ++ } ++ ++ if ((irq = ipi_to_irq[ipi]) == -1) { ++ if ((irq = find_unbound_irq(cpu, true)) < 0) { ++ spin_unlock(&irq_mapping_update_lock); ++ return irq; ++ } ++ ++ /* Extra reference so count will never drop to zero. */ ++ irq_cfg(irq)->bindcount++; ++ ++ ipi_to_irq[ipi] = irq; ++ irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, 0); ++ retval = 1; ++ } ++ ++ bind_ipi.vcpu = cpu; ++ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, ++ &bind_ipi) != 0) ++ BUG(); ++ ++ evtchn = bind_ipi.port; ++ evtchn_to_irq[evtchn] = irq; ++ per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; ++ ++ bind_evtchn_to_cpu(evtchn, cpu); ++ ++ irq_cfg(irq)->bindcount++; ++ ++ spin_unlock(&irq_mapping_update_lock); ++ ++ if (retval == 0) { ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ unmask_evtchn(evtchn); ++ local_irq_restore(flags); ++ } else { ++ action->flags |= IRQF_PERCPU | IRQF_NO_SUSPEND; ++ retval = setup_irq(irq, action); ++ if (retval) { ++ unbind_from_per_cpu_irq(irq, cpu); ++ BUG_ON(retval > 0); ++ irq = retval; ++ } ++ } ++ ++ return irq; ++} ++#endif /* PER_CPU_IPI_IRQ */ ++#endif /* CONFIG_SMP */ + + void unbind_from_irqhandler(unsigned int irq, void *dev_id) + { +@@ -693,6 +855,7 @@ static void rebind_irq_to_cpu(unsigned i + { + int evtchn = evtchn_from_irq(irq); + ++ BUG_IF_IPI(irq); + if (VALID_EVTCHN(evtchn)) + rebind_evtchn_to_cpu(evtchn, tcpu); + } +@@ -778,6 +941,7 @@ static struct irq_chip dynirq_chip = { + .unmask = unmask_dynirq, + .mask_ack = ack_dynirq, + .ack = ack_dynirq, ++ .eoi = end_dynirq, + .end = end_dynirq, + #ifdef CONFIG_SMP + .set_affinity = set_affinity_irq, +@@ -957,10 +1121,21 @@ int irq_ignore_unhandled(unsigned int ir + return !!(irq_status.flags & XENIRQSTAT_shared); + } + ++#if defined(CONFIG_SMP) && !defined(PER_CPU_IPI_IRQ) ++void notify_remote_via_ipi(unsigned int ipi, unsigned int cpu) ++{ ++ int evtchn = evtchn_from_per_cpu_irq(ipi_to_irq[ipi], cpu); ++ ++ if (VALID_EVTCHN(evtchn)) ++ notify_remote_via_evtchn(evtchn); ++} ++#endif ++ + void notify_remote_via_irq(int irq) + { + int evtchn = evtchn_from_irq(irq); + ++ BUG_IF_IPI(irq); + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); + } +@@ -968,6 +1143,7 @@ EXPORT_SYMBOL_GPL(notify_remote_via_irq) + + int irq_to_evtchn_port(int irq) + { ++ BUG_IF_IPI(irq); + return evtchn_from_irq(irq); + } + EXPORT_SYMBOL_GPL(irq_to_evtchn_port); +@@ -1083,11 +1259,17 @@ static void restore_cpu_virqs(unsigned i + + static void restore_cpu_ipis(unsigned int cpu) + { ++#ifdef CONFIG_SMP + struct evtchn_bind_ipi bind_ipi; + int ipi, irq, evtchn; + + for (ipi = 0; ipi < NR_IPIS; ipi++) { ++#ifdef PER_CPU_IPI_IRQ + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) ++#else ++ if ((irq = ipi_to_irq[ipi]) == -1 ++ || !VALID_EVTCHN(per_cpu(ipi_to_evtchn, cpu)[ipi])) ++#endif + continue; + + BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_IPI, ipi, 0)); +@@ -1101,13 +1283,18 @@ static void restore_cpu_ipis(unsigned in + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; ++#ifdef PER_CPU_IPI_IRQ + irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn); ++#else ++ per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; ++#endif + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + if (!(irq_to_desc(irq)->status & IRQ_DISABLED)) + unmask_evtchn(evtchn); + } ++#endif + } + + static int evtchn_resume(struct sys_device *dev) +--- head-2010-01-18.orig/drivers/xen/core/smpboot.c 2009-11-06 11:09:19.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/smpboot.c 2009-11-06 11:10:20.000000000 +0100 +@@ -40,14 +40,10 @@ cpumask_var_t vcpu_initialized_mask; + DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info); + EXPORT_PER_CPU_SYMBOL(cpu_info); + +-static DEFINE_PER_CPU(int, resched_irq); +-static DEFINE_PER_CPU(int, callfunc_irq); +-static DEFINE_PER_CPU(int, call1func_irq); +-static DEFINE_PER_CPU(int, reboot_irq); +-static char resched_name[NR_CPUS][15]; +-static char callfunc_name[NR_CPUS][15]; +-static char call1func_name[NR_CPUS][15]; +-static char reboot_name[NR_CPUS][15]; ++static int __read_mostly resched_irq = -1; ++static int __read_mostly callfunc_irq = -1; ++static int __read_mostly call1func_irq = -1; ++static int __read_mostly reboot_irq = -1; + + #ifdef CONFIG_X86_LOCAL_APIC + #define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid)) +@@ -101,58 +97,68 @@ remove_siblinginfo(unsigned int cpu) + + static int __cpuinit xen_smp_intr_init(unsigned int cpu) + { ++ static struct irqaction resched_action = { ++ .handler = smp_reschedule_interrupt, ++ .flags = IRQF_DISABLED, ++ .name = "resched" ++ }, callfunc_action = { ++ .handler = smp_call_function_interrupt, ++ .flags = IRQF_DISABLED, ++ .name = "callfunc" ++ }, call1func_action = { ++ .handler = smp_call_function_single_interrupt, ++ .flags = IRQF_DISABLED, ++ .name = "call1func" ++ }, reboot_action = { ++ .handler = smp_reboot_interrupt, ++ .flags = IRQF_DISABLED, ++ .name = "reboot" ++ }; + int rc; + +- per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = +- per_cpu(call1func_irq, cpu) = per_cpu(reboot_irq, cpu) = -1; +- +- sprintf(resched_name[cpu], "resched%u", cpu); +- rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, +- cpu, +- smp_reschedule_interrupt, +- IRQF_DISABLED|IRQF_NOBALANCING, +- resched_name[cpu], +- NULL); ++ rc = bind_ipi_to_irqaction(RESCHEDULE_VECTOR, ++ cpu, ++ &resched_action); + if (rc < 0) +- goto fail; +- per_cpu(resched_irq, cpu) = rc; +- +- sprintf(callfunc_name[cpu], "callfunc%u", cpu); +- rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, +- cpu, +- smp_call_function_interrupt, +- IRQF_DISABLED|IRQF_NOBALANCING, +- callfunc_name[cpu], +- NULL); ++ return rc; ++ if (resched_irq < 0) ++ resched_irq = rc; ++ else ++ BUG_ON(resched_irq != rc); ++ ++ rc = bind_ipi_to_irqaction(CALL_FUNCTION_VECTOR, ++ cpu, ++ &callfunc_action); + if (rc < 0) +- goto fail; +- per_cpu(callfunc_irq, cpu) = rc; +- +- sprintf(call1func_name[cpu], "call1func%u", cpu); +- rc = bind_ipi_to_irqhandler(CALL_FUNC_SINGLE_VECTOR, +- cpu, +- smp_call_function_single_interrupt, +- IRQF_DISABLED|IRQF_NOBALANCING, +- call1func_name[cpu], +- NULL); ++ goto unbind_resched; ++ if (callfunc_irq < 0) ++ callfunc_irq = rc; ++ else ++ BUG_ON(callfunc_irq != rc); ++ ++ rc = bind_ipi_to_irqaction(CALL_FUNC_SINGLE_VECTOR, ++ cpu, ++ &call1func_action); + if (rc < 0) +- goto fail; +- per_cpu(call1func_irq, cpu) = rc; +- +- sprintf(reboot_name[cpu], "reboot%u", cpu); +- rc = bind_ipi_to_irqhandler(REBOOT_VECTOR, +- cpu, +- smp_reboot_interrupt, +- IRQF_DISABLED|IRQF_NOBALANCING, +- reboot_name[cpu], +- NULL); ++ goto unbind_call; ++ if (call1func_irq < 0) ++ call1func_irq = rc; ++ else ++ BUG_ON(call1func_irq != rc); ++ ++ rc = bind_ipi_to_irqaction(REBOOT_VECTOR, ++ cpu, ++ &reboot_action); + if (rc < 0) +- goto fail; +- per_cpu(reboot_irq, cpu) = rc; ++ goto unbind_call1; ++ if (reboot_irq < 0) ++ reboot_irq = rc; ++ else ++ BUG_ON(reboot_irq != rc); + + rc = xen_spinlock_init(cpu); + if (rc < 0) +- goto fail; ++ goto unbind_reboot; + + if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0)) + goto fail; +@@ -160,15 +166,15 @@ static int __cpuinit xen_smp_intr_init(u + return 0; + + fail: +- if (per_cpu(resched_irq, cpu) >= 0) +- unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); +- if (per_cpu(callfunc_irq, cpu) >= 0) +- unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); +- if (per_cpu(call1func_irq, cpu) >= 0) +- unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL); +- if (per_cpu(reboot_irq, cpu) >= 0) +- unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL); + xen_spinlock_cleanup(cpu); ++ unbind_reboot: ++ unbind_from_per_cpu_irq(reboot_irq, cpu); ++ unbind_call1: ++ unbind_from_per_cpu_irq(call1func_irq, cpu); ++ unbind_call: ++ unbind_from_per_cpu_irq(callfunc_irq, cpu); ++ unbind_resched: ++ unbind_from_per_cpu_irq(resched_irq, cpu); + return rc; + } + +@@ -178,10 +184,10 @@ static void __cpuinit xen_smp_intr_exit( + if (cpu != 0) + local_teardown_timer(cpu); + +- unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); +- unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); +- unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL); +- unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL); ++ unbind_from_per_cpu_irq(resched_irq, cpu); ++ unbind_from_per_cpu_irq(callfunc_irq, cpu); ++ unbind_from_per_cpu_irq(call1func_irq, cpu); ++ unbind_from_per_cpu_irq(reboot_irq, cpu); + xen_spinlock_cleanup(cpu); + } + #endif +--- head-2010-01-18.orig/drivers/xen/core/spinlock.c 2010-01-18 16:53:32.000000000 +0100 ++++ head-2010-01-18/drivers/xen/core/spinlock.c 2010-01-18 17:11:17.000000000 +0100 +@@ -14,8 +14,7 @@ + + #ifdef TICKET_SHIFT + +-static DEFINE_PER_CPU(int, spinlock_irq) = -1; +-static char spinlock_name[NR_CPUS][15]; ++static int __read_mostly spinlock_irq = -1; + + struct spinning { + raw_spinlock_t *lock; +@@ -32,34 +31,36 @@ static DEFINE_PER_CPU(raw_rwlock_t, spin + + int __cpuinit xen_spinlock_init(unsigned int cpu) + { ++ static struct irqaction spinlock_action = { ++ .handler = smp_reschedule_interrupt, ++ .flags = IRQF_DISABLED, ++ .name = "spinlock" ++ }; + int rc; + +- sprintf(spinlock_name[cpu], "spinlock%u", cpu); +- rc = bind_ipi_to_irqhandler(SPIN_UNLOCK_VECTOR, +- cpu, +- smp_reschedule_interrupt, +- IRQF_DISABLED|IRQF_NOBALANCING, +- spinlock_name[cpu], +- NULL); ++ rc = bind_ipi_to_irqaction(SPIN_UNLOCK_VECTOR, ++ cpu, ++ &spinlock_action); + if (rc < 0) + return rc; + +- disable_irq(rc); /* make sure it's never delivered */ +- per_cpu(spinlock_irq, cpu) = rc; ++ if (spinlock_irq < 0) { ++ disable_irq(rc); /* make sure it's never delivered */ ++ spinlock_irq = rc; ++ } else ++ BUG_ON(spinlock_irq != rc); + + return 0; + } + + void __cpuinit xen_spinlock_cleanup(unsigned int cpu) + { +- if (per_cpu(spinlock_irq, cpu) >= 0) +- unbind_from_irqhandler(per_cpu(spinlock_irq, cpu), NULL); +- per_cpu(spinlock_irq, cpu) = -1; ++ unbind_from_per_cpu_irq(spinlock_irq, cpu); + } + + int xen_spin_wait(raw_spinlock_t *lock, unsigned int token) + { +- int rc = 0, irq = percpu_read(spinlock_irq); ++ int rc = 0, irq = spinlock_irq; + raw_rwlock_t *rm_lock; + unsigned long flags; + struct spinning spinning; +@@ -133,7 +134,7 @@ unsigned int xen_spin_adjust(raw_spinloc + int xen_spin_wait_flags(raw_spinlock_t *lock, unsigned int *ptok, + unsigned int flags) + { +- int rc = 0, irq = __get_cpu_var(spinlock_irq); ++ int rc = 0, irq = spinlock_irq; + raw_rwlock_t *rm_lock; + struct spinning spinning, *nested; + +@@ -221,7 +222,7 @@ void xen_spin_kick(raw_spinlock_t *lock, + raw_local_irq_restore(flags); + + if (unlikely(spinning)) { +- notify_remote_via_irq(per_cpu(spinlock_irq, cpu)); ++ notify_remote_via_ipi(SPIN_UNLOCK_VECTOR, cpu); + return; + } + } +--- head-2010-01-18.orig/include/xen/evtchn.h 2009-12-18 10:13:12.000000000 +0100 ++++ head-2010-01-18/include/xen/evtchn.h 2009-12-18 10:13:26.000000000 +0100 +@@ -92,6 +92,8 @@ int bind_virq_to_irqhandler( + unsigned long irqflags, + const char *devname, + void *dev_id); ++#if defined(CONFIG_SMP) && !defined(MODULE) ++#ifndef CONFIG_X86 + int bind_ipi_to_irqhandler( + unsigned int ipi, + unsigned int cpu, +@@ -99,6 +101,13 @@ int bind_ipi_to_irqhandler( + unsigned long irqflags, + const char *devname, + void *dev_id); ++#else ++int bind_ipi_to_irqaction( ++ unsigned int ipi, ++ unsigned int cpu, ++ struct irqaction *action); ++#endif ++#endif + + /* + * Common unbind function for all event sources. Takes IRQ to unbind from. +@@ -107,6 +116,11 @@ int bind_ipi_to_irqhandler( + */ + void unbind_from_irqhandler(unsigned int irq, void *dev_id); + ++#if defined(CONFIG_SMP) && !defined(MODULE) && defined(CONFIG_X86) ++/* Specialized unbind function for per-CPU IRQs. */ ++void unbind_from_per_cpu_irq(unsigned int irq, unsigned int cpu); ++#endif ++ + #ifndef CONFIG_XEN + void irq_resume(void); + #endif +@@ -184,5 +198,9 @@ void xen_poll_irq(int irq); + void notify_remote_via_irq(int irq); + int irq_to_evtchn_port(int irq); + ++#if defined(CONFIG_SMP) && !defined(MODULE) && defined(CONFIG_X86) ++void notify_remote_via_ipi(unsigned int ipi, unsigned int cpu); ++#endif ++ + #endif /* __ASM_EVTCHN_H__ */ + #endif /* CONFIG_PARAVIRT_XEN */ --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-x86-per-cpu-vcpu-info +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-x86-per-cpu-vcpu-info @@ -0,0 +1,611 @@ +From: jbeulich@novell.com +Subject: x86: use per-cpu storage for shared vcpu_info structure +Patch-mainline: obsolete + +... reducing access code size and latency, as well as being the +prerequisite for removing the limitation on 32 vCPU-s per guest. + +--- head-2010-01-04.orig/arch/x86/include/asm/percpu.h 2010-01-05 16:46:11.000000000 +0100 ++++ head-2010-01-04/arch/x86/include/asm/percpu.h 2009-11-06 11:12:01.000000000 +0100 +@@ -133,6 +133,38 @@ do { \ + ret__; \ + }) + ++#define percpu_xchg_op(op, var, val) \ ++({ \ ++ typedef typeof(var) T__; \ ++ T__ ret__; \ ++ if (0) \ ++ ret__ = (val); \ ++ switch (sizeof(var)) { \ ++ case 1: \ ++ asm(op "b %0,"__percpu_arg(1) \ ++ : "=q" (ret__), "+m" (var) \ ++ : "0" ((T__)(val))); \ ++ break; \ ++ case 2: \ ++ asm(op "w %0,"__percpu_arg(1) \ ++ : "=r" (ret__), "+m" (var) \ ++ : "0" ((T__)(val))); \ ++ break; \ ++ case 4: \ ++ asm(op "l %0,"__percpu_arg(1) \ ++ : "=r" (ret__), "+m" (var) \ ++ : "0" ((T__)(val))); \ ++ break; \ ++ case 8: \ ++ asm(op "q %0,"__percpu_arg(1) \ ++ : "=r" (ret__), "+m" (var) \ ++ : "0" ((T__)(val))); \ ++ break; \ ++ default: __bad_percpu_size(); \ ++ } \ ++ ret__; \ ++}) ++ + /* + * percpu_read() makes gcc load the percpu variable every time it is + * accessed while percpu_read_stable() allows the value to be cached. +@@ -152,6 +184,10 @@ do { \ + #define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) + #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) + #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) ++#define percpu_xchg(var, val) percpu_xchg_op("xchg", per_cpu__##var, val) ++#if defined(CONFIG_X86_XADD) || defined(CONFIG_X86_64) ++#define percpu_xadd(var, val) percpu_xchg_op("xadd", per_cpu__##var, val) ++#endif + + /* This is not atomic against other CPUs -- CPU preemption needs to be off */ + #define x86_test_and_clear_bit_percpu(bit, var) \ +--- head-2010-01-04.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:49:39.000000000 +0100 ++++ head-2010-01-04/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:53:45.000000000 +0100 +@@ -50,12 +50,26 @@ + + extern shared_info_t *HYPERVISOR_shared_info; + ++#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT ++DECLARE_PER_CPU(struct vcpu_info, vcpu_info); ++#define vcpu_info(cpu) (&per_cpu(vcpu_info, cpu)) ++#define current_vcpu_info() (&__get_cpu_var(vcpu_info)) ++#define vcpu_info_read(fld) percpu_read(vcpu_info.fld) ++#define vcpu_info_write(fld, val) percpu_write(vcpu_info.fld, val) ++#define vcpu_info_xchg(fld, val) percpu_xchg(vcpu_info.fld, val) ++void setup_vcpu_info(unsigned int cpu); ++void adjust_boot_vcpu_info(void); ++#else + #define vcpu_info(cpu) (HYPERVISOR_shared_info->vcpu_info + (cpu)) + #ifdef CONFIG_SMP + #define current_vcpu_info() vcpu_info(smp_processor_id()) + #else + #define current_vcpu_info() vcpu_info(0) + #endif ++#define vcpu_info_read(fld) (current_vcpu_info()->fld) ++#define vcpu_info_write(fld, val) (current_vcpu_info()->fld = (val)) ++static inline void setup_vcpu_info(unsigned int cpu) {} ++#endif + + #ifdef CONFIG_X86_32 + extern unsigned long hypervisor_virt_start; +--- head-2010-01-04.orig/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 10:52:22.000000000 +0100 ++++ head-2010-01-04/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 11:12:01.000000000 +0100 +@@ -12,7 +12,7 @@ + * includes these barriers, for example. + */ + +-#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask) ++#define xen_save_fl(void) vcpu_info_read(evtchn_upcall_mask) + + #define xen_restore_fl(f) \ + do { \ +@@ -28,7 +28,7 @@ do { \ + + #define xen_irq_disable() \ + do { \ +- current_vcpu_info()->evtchn_upcall_mask = 1; \ ++ vcpu_info_write(evtchn_upcall_mask, 1); \ + barrier(); \ + } while (0) + +@@ -90,8 +90,6 @@ static inline void halt(void) + #define evtchn_upcall_pending /* 0 */ + #define evtchn_upcall_mask 1 + +-#define sizeof_vcpu_shift 6 +- + #ifdef CONFIG_X86_64 + # define __REG_si %rsi + # define __CPU_num PER_CPU_VAR(cpu_number) +@@ -100,6 +98,22 @@ static inline void halt(void) + # define __CPU_num TI_cpu(%ebp) + #endif + ++#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT ++ ++#define GET_VCPU_INFO PER_CPU(vcpu_info, __REG_si) ++#define __DISABLE_INTERRUPTS movb $1,PER_CPU_VAR(vcpu_info+evtchn_upcall_mask) ++#define __ENABLE_INTERRUPTS movb $0,PER_CPU_VAR(vcpu_info+evtchn_upcall_mask) ++#define __TEST_PENDING cmpb $0,PER_CPU_VAR(vcpu_info+evtchn_upcall_pending+0) ++#define DISABLE_INTERRUPTS(clb) __DISABLE_INTERRUPTS ++#define ENABLE_INTERRUPTS(clb) __ENABLE_INTERRUPTS ++ ++#define __SIZEOF_DISABLE_INTERRUPTS 8 ++#define __SIZEOF_TEST_PENDING 8 ++ ++#else /* CONFIG_XEN_VCPU_INFO_PLACEMENT */ ++ ++#define sizeof_vcpu_shift 6 ++ + #ifdef CONFIG_SMP + #define GET_VCPU_INFO movl __CPU_num,%esi ; \ + shl $sizeof_vcpu_shift,%esi ; \ +@@ -116,15 +130,21 @@ static inline void halt(void) + #define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ + __ENABLE_INTERRUPTS + ++#define __SIZEOF_DISABLE_INTERRUPTS 4 ++#define __SIZEOF_TEST_PENDING 3 ++ ++#endif /* CONFIG_XEN_VCPU_INFO_PLACEMENT */ ++ + #ifndef CONFIG_X86_64 + #define INTERRUPT_RETURN iret +-#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ ++#define ENABLE_INTERRUPTS_SYSEXIT \ ++ movb $0,evtchn_upcall_mask(%esi) /* __ENABLE_INTERRUPTS */ ; \ + sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ +- __TEST_PENDING ; \ ++ cmpb $0,evtchn_upcall_pending(%esi) /* __TEST_PENDING */ ; \ + jnz 14f /* process more events if necessary... */ ; \ + movl PT_ESI(%esp), %esi ; \ + sysexit ; \ +-14: __DISABLE_INTERRUPTS ; \ ++14: movb $1,evtchn_upcall_mask(%esi) /* __DISABLE_INTERRUPTS */ ; \ + TRACE_IRQS_OFF ; \ + sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ + mov $__KERNEL_PERCPU, %ecx ; \ +--- head-2010-01-04.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-10-13 17:22:09.000000000 +0200 ++++ head-2010-01-04/arch/x86/include/mach-xen/asm/pgtable_64.h 2009-11-06 11:12:01.000000000 +0100 +@@ -117,6 +117,8 @@ static inline void xen_set_pgd(pgd_t *pg + + #define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT) + ++extern unsigned long early_arbitrary_virt_to_mfn(void *va); ++ + /* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. +--- head-2010-01-04.orig/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:52:22.000000000 +0100 ++++ head-2010-01-04/arch/x86/include/mach-xen/asm/system.h 2009-11-06 11:12:01.000000000 +0100 +@@ -233,8 +233,8 @@ static inline void xen_write_cr0(unsigne + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); + } + +-#define xen_read_cr2() (current_vcpu_info()->arch.cr2) +-#define xen_write_cr2(val) ((void)(current_vcpu_info()->arch.cr2 = (val))) ++#define xen_read_cr2() vcpu_info_read(arch.cr2) ++#define xen_write_cr2(val) vcpu_info_write(arch.cr2, val) + + static inline unsigned long xen_read_cr3(void) + { +--- head-2010-01-04.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:52:22.000000000 +0100 ++++ head-2010-01-04/arch/x86/kernel/cpu/common-xen.c 2009-11-06 11:12:01.000000000 +0100 +@@ -335,8 +335,16 @@ static const char *__cpuinit table_looku + __u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; + __u32 cpu_caps_set[NCAPINTS] __cpuinitdata; + +-void load_percpu_segment(int cpu) ++void __ref load_percpu_segment(int cpu) + { ++#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT ++ static bool done; ++ ++ if (!done) { ++ done = true; ++ adjust_boot_vcpu_info(); ++ } ++#endif + #ifdef CONFIG_X86_32 + loadsegment(fs, __KERNEL_PERCPU); + #else +--- head-2010-01-04.orig/arch/x86/kernel/entry_32-xen.S 2009-10-13 17:01:47.000000000 +0200 ++++ head-2010-01-04/arch/x86/kernel/entry_32-xen.S 2009-11-06 11:12:01.000000000 +0100 +@@ -463,6 +463,9 @@ sysenter_exit: + movl PT_EIP(%esp), %edx + movl PT_OLDESP(%esp), %ecx + xorl %ebp,%ebp ++#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT ++ GET_VCPU_INFO ++#endif + TRACE_IRQS_ON + 1: mov PT_FS(%esp), %fs + PTGS_TO_GS +@@ -975,7 +978,9 @@ critical_region_fixup: + + .section .rodata,"a" + critical_fixup_table: +- .byte -1,-1,-1 # testb $0xff,(%esi) = __TEST_PENDING ++ .rept __SIZEOF_TEST_PENDING ++ .byte -1 ++ .endr + .byte -1,-1 # jnz 14f + .byte 0 # pop %ebx + .byte 1 # pop %ecx +@@ -994,7 +999,9 @@ critical_fixup_table: + .byte 10,10,10 # add $8,%esp + #endif + .byte 12 # iret +- .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS ++ .rept __SIZEOF_DISABLE_INTERRUPTS ++ .byte -1 ++ .endr + .previous + + # Hypervisor uses this for application faults while it executes. +--- head-2010-01-04.orig/arch/x86/kernel/head-xen.c 2009-10-15 15:32:46.000000000 +0200 ++++ head-2010-01-04/arch/x86/kernel/head-xen.c 2009-11-06 11:12:01.000000000 +0100 +@@ -151,6 +151,8 @@ void __init xen_start_kernel(void) + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); + ++ setup_vcpu_info(0); ++ + /* Set up mapping of lowest 1MB of physical memory. */ + for (i = 0; i < NR_FIX_ISAMAPS; i++) + if (is_initial_xendomain()) +--- head-2010-01-04.orig/arch/x86/kernel/time-xen.c 2009-11-23 10:49:09.000000000 +0100 ++++ head-2010-01-04/arch/x86/kernel/time-xen.c 2009-11-23 10:53:51.000000000 +0100 +@@ -267,16 +267,10 @@ static void get_time_values_from_xen(uns + local_irq_restore(flags); + } + +-static inline int time_values_up_to_date(unsigned int cpu) ++static inline int time_values_up_to_date(void) + { +- struct vcpu_time_info *src; +- struct shadow_time_info *dst; +- +- src = &vcpu_info(cpu)->time; +- dst = &per_cpu(shadow_time, cpu); +- + rmb(); +- return (dst->version == src->version); ++ return percpu_read(shadow_time.version) == vcpu_info_read(time.version); + } + + static void sync_xen_wallclock(unsigned long dummy); +@@ -322,7 +316,7 @@ static unsigned long long local_clock(vo + local_time_version = shadow->version; + rdtsc_barrier(); + time = shadow->system_timestamp + get_nsec_offset(shadow); +- if (!time_values_up_to_date(cpu)) ++ if (!time_values_up_to_date()) + get_time_values_from_xen(cpu); + barrier(); + } while (local_time_version != shadow->version); +@@ -446,7 +440,7 @@ irqreturn_t timer_interrupt(int irq, voi + delta_cpu -= per_cpu(processed_system_time, cpu); + + get_runstate_snapshot(&runstate); +- } while (!time_values_up_to_date(cpu)); ++ } while (!time_values_up_to_date()); + + if ((unlikely(delta < -(s64)permitted_clock_jitter) || + unlikely(delta_cpu < -(s64)permitted_clock_jitter)) +--- head-2010-01-04.orig/arch/x86/mm/hypervisor.c 2009-12-11 15:27:37.000000000 +0100 ++++ head-2010-01-04/arch/x86/mm/hypervisor.c 2010-01-05 16:47:18.000000000 +0100 +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -50,7 +51,105 @@ + EXPORT_SYMBOL(hypercall_page); + + shared_info_t *__read_mostly HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; ++#ifndef CONFIG_XEN_VCPU_INFO_PLACEMENT + EXPORT_SYMBOL(HYPERVISOR_shared_info); ++#else ++DEFINE_PER_CPU(struct vcpu_info, vcpu_info) __aligned(sizeof(struct vcpu_info)); ++EXPORT_PER_CPU_SYMBOL(vcpu_info); ++ ++void __ref setup_vcpu_info(unsigned int cpu) ++{ ++ struct vcpu_info *v = &per_cpu(vcpu_info, cpu); ++ struct vcpu_register_vcpu_info info; ++#ifdef CONFIG_X86_64 ++ static bool first = true; ++ ++ if (first) { ++ first = false; ++ info.mfn = early_arbitrary_virt_to_mfn(v); ++ } else ++#endif ++ info.mfn = arbitrary_virt_to_mfn(v); ++ info.offset = offset_in_page(v); ++ ++ if (HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, ++ cpu, &info)) ++ BUG(); ++} ++ ++void __init adjust_boot_vcpu_info(void) ++{ ++ unsigned long lpfn, rpfn, lmfn, rmfn; ++ pte_t *lpte, *rpte; ++ unsigned int level; ++ mmu_update_t mmu[2]; ++ ++ /* ++ * setup_vcpu_info() cannot be used more than once for a given (v)CPU, ++ * hence we must swap the underlying MFNs of the two pages holding old ++ * and new vcpu_info of the boot CPU. ++ * ++ * Do *not* use __get_cpu_var() or percpu_{write,...}() here, as the per- ++ * CPU segment didn't get reloaded yet. Using percpu_read(), as in ++ * arch_use_lazy_mmu_mode(), though undesirable, is safe except for the ++ * accesses to variables that were updated in setup_percpu_areas(). ++ */ ++ lpte = lookup_address((unsigned long)&per_cpu_var(vcpu_info) ++ + (__per_cpu_load - __per_cpu_start), ++ &level); ++ rpte = lookup_address((unsigned long)&per_cpu(vcpu_info, 0), &level); ++ BUG_ON(!lpte || !(pte_flags(*lpte) & _PAGE_PRESENT)); ++ BUG_ON(!rpte || !(pte_flags(*rpte) & _PAGE_PRESENT)); ++ lmfn = __pte_mfn(*lpte); ++ rmfn = __pte_mfn(*rpte); ++ ++ if (lmfn == rmfn) ++ return; ++ ++ lpfn = mfn_to_local_pfn(lmfn); ++ rpfn = mfn_to_local_pfn(rmfn); ++ ++ printk(KERN_INFO ++ "Swapping MFNs for PFN %lx and %lx (MFN %lx and %lx)\n", ++ lpfn, rpfn, lmfn, rmfn); ++ ++ xen_l1_entry_update(lpte, pfn_pte_ma(rmfn, pte_pgprot(*lpte))); ++ xen_l1_entry_update(rpte, pfn_pte_ma(lmfn, pte_pgprot(*rpte))); ++#ifdef CONFIG_X86_64 ++ if (HYPERVISOR_update_va_mapping((unsigned long)__va(lpfn<> PAGE_SHIFT; ++} ++ + #ifndef CONFIG_XEN + static int __init parse_direct_gbpages_off(char *arg) + { +--- head-2010-01-04.orig/drivers/xen/Kconfig 2009-12-18 12:31:08.000000000 +0100 ++++ head-2010-01-04/drivers/xen/Kconfig 2009-12-18 12:31:21.000000000 +0100 +@@ -365,6 +365,18 @@ config XEN_COMPAT + default 0x030002 if XEN_COMPAT_030002_AND_LATER + default 0 + ++config XEN_VCPU_INFO_PLACEMENT ++ bool "Place shared vCPU info in per-CPU storage" ++# depends on X86 && (XEN_COMPAT >= 0x00030101) ++ depends on X86 ++ depends on !XEN_COMPAT_030002_AND_LATER ++ depends on !XEN_COMPAT_030004_AND_LATER ++ depends on !XEN_COMPAT_030100_AND_LATER ++ default SMP ++ ---help--- ++ This allows faster access to the per-vCPU shared info ++ structure. ++ + endmenu + + config HAVE_IRQ_IGNORE_UNHANDLED +--- head-2010-01-04.orig/drivers/xen/core/evtchn.c 2009-11-06 11:10:30.000000000 +0100 ++++ head-2010-01-04/drivers/xen/core/evtchn.c 2009-11-06 11:12:01.000000000 +0100 +@@ -316,6 +316,24 @@ static DEFINE_PER_CPU(unsigned int, upca + static DEFINE_PER_CPU(unsigned int, last_processed_l1i) = { BITS_PER_LONG - 1 }; + static DEFINE_PER_CPU(unsigned int, last_processed_l2i) = { BITS_PER_LONG - 1 }; + ++#ifndef vcpu_info_xchg ++#define vcpu_info_xchg(fld, val) xchg(¤t_vcpu_info()->fld, val) ++#endif ++ ++#ifndef percpu_xadd ++#define percpu_xadd(var, val) \ ++({ \ ++ typeof(per_cpu_var(var)) __tmp_var__; \ ++ unsigned long flags; \ ++ local_irq_save(flags); \ ++ __tmp_var__ = get_cpu_var(var); \ ++ __get_cpu_var(var) += (val); \ ++ put_cpu_var(var); \ ++ local_irq_restore(flags); \ ++ __tmp_var__; \ ++}) ++#endif ++ + /* NB. Interrupts are disabled on entry. */ + asmlinkage void __irq_entry evtchn_do_upcall(struct pt_regs *regs) + { +@@ -324,25 +342,25 @@ asmlinkage void __irq_entry evtchn_do_up + unsigned long masked_l1, masked_l2; + unsigned int l1i, l2i, port, count; + int irq; +- vcpu_info_t *vcpu_info = current_vcpu_info(); + + exit_idle(); + irq_enter(); + + do { + /* Avoid a callback storm when we reenable delivery. */ +- vcpu_info->evtchn_upcall_pending = 0; ++ vcpu_info_write(evtchn_upcall_pending, 0); + + /* Nested invocations bail immediately. */ +- percpu_add(upcall_count, 1); +- if (unlikely(percpu_read(upcall_count) != 1)) ++ if (unlikely(percpu_xadd(upcall_count, 1))) + break; + + #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); ++#else ++ barrier(); + #endif +- l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); ++ l1 = vcpu_info_xchg(evtchn_pending_sel, 0); + + l1i = percpu_read(last_processed_l1i); + l2i = percpu_read(last_processed_l2i); +@@ -1363,7 +1381,6 @@ void unmask_evtchn(int port) + { + shared_info_t *s = HYPERVISOR_shared_info; + unsigned int cpu = smp_processor_id(); +- vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + BUG_ON(!irqs_disabled()); + +@@ -1377,10 +1394,13 @@ void unmask_evtchn(int port) + synch_clear_bit(port, s->evtchn_mask); + + /* Did we miss an interrupt 'edge'? Re-fire if so. */ +- if (synch_test_bit(port, s->evtchn_pending) && +- !synch_test_and_set_bit(port / BITS_PER_LONG, +- &vcpu_info->evtchn_pending_sel)) +- vcpu_info->evtchn_upcall_pending = 1; ++ if (synch_test_bit(port, s->evtchn_pending)) { ++ vcpu_info_t *vcpu_info = current_vcpu_info(); ++ ++ if (!synch_test_and_set_bit(port / BITS_PER_LONG, ++ &vcpu_info->evtchn_pending_sel)) ++ vcpu_info->evtchn_upcall_pending = 1; ++ } + } + EXPORT_SYMBOL_GPL(unmask_evtchn); + +--- head-2010-01-04.orig/drivers/xen/core/machine_reboot.c 2009-12-18 14:15:04.000000000 +0100 ++++ head-2010-01-04/drivers/xen/core/machine_reboot.c 2009-12-18 14:15:17.000000000 +0100 +@@ -73,7 +73,7 @@ static void pre_suspend(void) + mfn_to_pfn(xen_start_info->console.domU.mfn); + } + +-static void post_suspend(int suspend_cancelled) ++static void post_suspend(int suspend_cancelled, int fast_suspend) + { + int i, j, k, fpp; + unsigned long shinfo_mfn; +@@ -90,8 +90,21 @@ static void post_suspend(int suspend_can + #ifdef CONFIG_SMP + cpumask_copy(vcpu_initialized_mask, cpu_online_mask); + #endif +- for_each_possible_cpu(i) ++ for_each_possible_cpu(i) { + setup_runstate_area(i); ++ ++#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT ++ if (fast_suspend && i != smp_processor_id() ++ && HYPERVISOR_vcpu_op(VCPUOP_down, i, NULL)) ++ BUG(); ++ ++ setup_vcpu_info(i); ++ ++ if (fast_suspend && i != smp_processor_id() ++ && HYPERVISOR_vcpu_op(VCPUOP_up, i, NULL)) ++ BUG(); ++#endif ++ } + } + + shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT; +@@ -133,7 +146,7 @@ static void post_suspend(int suspend_can + #define switch_idle_mm() ((void)0) + #define mm_pin_all() ((void)0) + #define pre_suspend() xen_pre_suspend() +-#define post_suspend(x) xen_post_suspend(x) ++#define post_suspend(x, f) xen_post_suspend(x) + + #endif + +@@ -164,7 +177,7 @@ static int take_machine_down(void *_susp + BUG_ON(suspend_cancelled > 0); + suspend->resume_notifier(suspend_cancelled); + if (suspend_cancelled >= 0) { +- post_suspend(suspend_cancelled); ++ post_suspend(suspend_cancelled, suspend->fast_suspend); + sysdev_resume(); + } + if (!suspend_cancelled) { +--- head-2010-01-04.orig/drivers/xen/core/smpboot.c 2009-10-15 11:12:11.000000000 +0200 ++++ head-2010-01-04/drivers/xen/core/smpboot.c 2009-11-06 11:12:01.000000000 +0100 +@@ -361,8 +361,13 @@ void __init smp_prepare_cpus(unsigned in + + void __init smp_prepare_boot_cpu(void) + { ++ unsigned int cpu; ++ + switch_to_new_gdt(smp_processor_id()); + prefill_possible_map(); ++ for_each_possible_cpu(cpu) ++ if (cpu != smp_processor_id()) ++ setup_vcpu_info(cpu); + } + + #ifdef CONFIG_HOTPLUG_CPU --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32.3-4 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32.3-4 @@ -0,0 +1,61 @@ +From: Greg Kroah-Hartman +Subject: Linux 2.6.32.4 +Patch-mainline: 2.6.32.4 + +Signed-off-by: Greg Kroah-Hartman + +Automatically created from "patches.kernel.org/patch-2.6.32.3-4" by xen-port-patches.py + +--- sle11sp1-2010-10-26.orig/arch/x86/ia32/ia32entry-xen.S 2009-11-06 14:53:39.000000000 +0100 ++++ sle11sp1-2010-10-26/arch/x86/ia32/ia32entry-xen.S 2010-01-20 10:28:42.000000000 +0100 +@@ -589,7 +589,7 @@ ia32_sys_call_table: + .quad quiet_ni_syscall /* streams2 */ + .quad stub32_vfork /* 190 */ + .quad compat_sys_getrlimit +- .quad sys32_mmap2 ++ .quad sys_mmap_pgoff + .quad sys32_truncate64 + .quad sys32_ftruncate64 + .quad sys32_stat64 /* 195 */ +--- sle11sp1-2010-10-26.orig/drivers/hwmon/coretemp-xen.c 2010-10-26 09:19:16.000000000 +0200 ++++ sle11sp1-2010-10-26/drivers/hwmon/coretemp-xen.c 2010-10-26 09:19:30.000000000 +0200 +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + #include + #include + #include "../xen/core/domctl.h" +@@ -166,6 +167,7 @@ static int adjust_tjmax(struct coretemp_ + int usemsr_ee = 1; + int err; + u32 eax, edx; ++ struct pci_dev *host_bridge; + + /* Early chips have no MSR for TjMax */ + +@@ -173,11 +175,21 @@ static int adjust_tjmax(struct coretemp_ + usemsr_ee = 0; + } + +- /* Atoms seems to have TjMax at 90C */ ++ /* Atom CPUs */ + + if (c->x86_model == 0x1c) { + usemsr_ee = 0; +- tjmax = 90000; ++ ++ host_bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); ++ ++ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL ++ && (host_bridge->device == 0xa000 /* NM10 based nettop */ ++ || host_bridge->device == 0xa010)) /* NM10 based netbook */ ++ tjmax = 100000; ++ else ++ tjmax = 90000; ++ ++ pci_dev_put(host_bridge); + } + + if ((c->x86_model > 0xe) && (usemsr_ee)) { --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen-vmalloc_32 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen-vmalloc_32 @@ -0,0 +1,66 @@ +From: jbeulich@novell.com +Subject: guarantee 32-bit (bus-)addressability of vmalloc_32() output +Patch-mainline: non-Xen part in 2.6.33 +References: bnc#548010, bnc#552492 + +--- head-2009-12-16.orig/mm/vmalloc.c 2009-12-16 11:52:01.000000000 +0100 ++++ head-2009-12-16/mm/vmalloc.c 2009-12-16 12:15:18.000000000 +0100 +@@ -1416,6 +1416,14 @@ static void *__vmalloc_area_node(struct + { + struct page **pages; + unsigned int nr_pages, array_size, i; ++ gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; ++#ifdef CONFIG_XEN ++ gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32); ++ ++ BUILD_BUG_ON((__GFP_DMA | __GFP_DMA32) != (__GFP_DMA + __GFP_DMA32)); ++ if (dma_mask == (__GFP_DMA | __GFP_DMA32)) ++ gfp_mask &= ~(__GFP_DMA | __GFP_DMA32); ++#endif + + nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; + array_size = (nr_pages * sizeof(struct page *)); +@@ -1423,13 +1431,12 @@ static void *__vmalloc_area_node(struct + area->nr_pages = nr_pages; + /* Please note that the recursion is strictly bounded. */ + if (array_size > PAGE_SIZE) { +- pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO, +- PAGE_KERNEL, node, caller); ++ pages = __vmalloc_node(array_size, 1, ++ nested_gfp | __GFP_HIGHMEM, ++ PAGE_KERNEL, node, caller); + area->flags |= VM_VPAGES; + } else { +- pages = kmalloc_node(array_size, +- (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO, +- node); ++ pages = kmalloc_node(array_size, nested_gfp, node); + } + area->pages = pages; + area->caller = caller; +@@ -1453,6 +1460,16 @@ static void *__vmalloc_area_node(struct + goto fail; + } + area->pages[i] = page; ++#ifdef CONFIG_XEN ++ if (dma_mask) { ++ if (xen_limit_pages_to_max_mfn(page, 0, 32)) { ++ area->nr_pages = i + 1; ++ goto fail; ++ } ++ if (gfp_mask & __GFP_ZERO) ++ clear_highpage(page); ++ } ++#endif + } + + if (map_vm_area(area, prot, &pages)) +@@ -1612,6 +1629,8 @@ void *vmalloc_exec(unsigned long size) + #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL + #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) + #define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL ++#elif defined(CONFIG_XEN) ++#define GFP_VMALLOC32 __GFP_DMA | __GFP_DMA32 | GFP_KERNEL + #else + #define GFP_VMALLOC32 GFP_KERNEL + #endif --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32.7-8 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32.7-8 @@ -0,0 +1,104 @@ +From: Greg Kroah-Hartman +Subject: Linux 2.6.32.8 +Patch-mainline: 2.6.32.8 + +Signed-off-by: Greg Kroah-Hartman + +Automatically created from "patches.kernel.org/patch-2.6.32.7-8" by xen-port-patches.py + +--- sle11sp1-2010-03-11.orig/arch/x86/kernel/process-xen.c 2009-11-06 10:52:23.000000000 +0100 ++++ sle11sp1-2010-03-11/arch/x86/kernel/process-xen.c 2010-02-09 17:12:56.000000000 +0100 +@@ -93,18 +93,6 @@ void flush_thread(void) + { + struct task_struct *tsk = current; + +-#ifdef CONFIG_X86_64 +- if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { +- clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); +- if (test_tsk_thread_flag(tsk, TIF_IA32)) { +- clear_tsk_thread_flag(tsk, TIF_IA32); +- } else { +- set_tsk_thread_flag(tsk, TIF_IA32); +- current_thread_info()->status |= TS_COMPAT; +- } +- } +-#endif +- + clear_tsk_thread_flag(tsk, TIF_DEBUG); + + tsk->thread.debugreg0 = 0; +--- sle11sp1-2010-03-11.orig/arch/x86/kernel/process_64-xen.c 2010-03-17 14:37:05.000000000 +0100 ++++ sle11sp1-2010-03-11/arch/x86/kernel/process_64-xen.c 2010-03-17 14:38:41.000000000 +0100 +@@ -615,6 +615,17 @@ sys_clone(unsigned long clone_flags, uns + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); + } + ++void set_personality_ia32(void) ++{ ++ /* inherit personality from parent */ ++ ++ /* Make sure to be in 32bit mode */ ++ set_thread_flag(TIF_IA32); ++ ++ /* Prepare the first "return" to user space */ ++ current_thread_info()->status |= TS_COMPAT; ++} ++ + unsigned long get_wchan(struct task_struct *p) + { + unsigned long stack; +--- sle11sp1-2010-03-11.orig/arch/x86/kernel/quirks-xen.c 2009-11-06 10:52:23.000000000 +0100 ++++ sle11sp1-2010-03-11/arch/x86/kernel/quirks-xen.c 2010-02-09 17:12:56.000000000 +0100 +@@ -492,6 +492,19 @@ void force_hpet_resume(void) + break; + } + } ++ ++/* ++ * HPET MSI on some boards (ATI SB700/SB800) has side effect on ++ * floppy DMA. Disable HPET MSI on such platforms. ++ */ ++static void force_disable_hpet_msi(struct pci_dev *unused) ++{ ++ hpet_msi_disable = 1; ++} ++ ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, ++ force_disable_hpet_msi); ++ + #endif + + #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) +--- sle11sp1-2010-03-11.orig/arch/x86/kernel/setup-xen.c 2010-01-04 12:50:03.000000000 +0100 ++++ sle11sp1-2010-03-11/arch/x86/kernel/setup-xen.c 2010-02-09 17:12:56.000000000 +0100 +@@ -736,19 +736,27 @@ static struct dmi_system_id __initdata b + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), + }, + }, +- { + /* +- * AMI BIOS with low memory corruption was found on Intel DG45ID board. +- * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will ++ * AMI BIOS with low memory corruption was found on Intel DG45ID and ++ * DG45FC boards. ++ * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will + * match only DMI_BOARD_NAME and see if there is more bad products + * with this vendor. + */ ++ { + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), + }, + }, ++ { ++ .callback = dmi_low_memory_corruption, ++ .ident = "AMI BIOS", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), ++ }, ++ }, + #endif + {} + }; --- linux-ec2-2.6.32.orig/debian.ec2/patches.xen/xen3-patch-2.6.32 +++ linux-ec2-2.6.32/debian.ec2/patches.xen/xen3-patch-2.6.32 @@ -0,0 +1,6481 @@ +From: Linux Kernel Mailing List +Subject: Linux: 2.6.32 +Patch-mainline: 2.6.32 + + This patch contains the differences between 2.6.31 and 2.6.32. + +Acked-by: Jeff Mahoney +Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches.py + +--- head-2010-01-18.orig/arch/x86/ia32/ia32entry-xen.S 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/ia32/ia32entry-xen.S 2009-11-06 14:53:39.000000000 +0100 +@@ -20,18 +20,15 @@ + #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) + #define __AUDIT_ARCH_LE 0x40000000 + +-#ifndef CONFIG_AUDITSYSCALL +-#define sysexit_audit int_ret_from_sys_call +-#define sysretl_audit int_ret_from_sys_call +-#endif +- + #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) + + .macro IA32_ARG_FIXUP noebp=0 + movl %edi,%r8d + .if \noebp ++ jmp ia32_common + .else + movl %ebp,%r9d ++ia32_common: + .endif + xchg %ecx,%esi + movl %ebx,%edi +@@ -39,12 +36,12 @@ + .endm + + /* clobbers %eax */ +- .macro CLEAR_RREGS _r9=rax ++ .macro CLEAR_RREGS offset=0, _r9=rax + xorl %eax,%eax +- movq %rax,R11(%rsp) +- movq %rax,R10(%rsp) +- movq %\_r9,R9(%rsp) +- movq %rax,R8(%rsp) ++ movq %rax,\offset+R11(%rsp) ++ movq %rax,\offset+R10(%rsp) ++ movq %\_r9,\offset+R9(%rsp) ++ movq %rax,\offset+R8(%rsp) + .endm + + /* +@@ -144,17 +141,7 @@ ENTRY(ia32_sysenter_target) + jnz sysenter_tracesys + cmpl $(IA32_NR_syscalls-1),%eax + ja ia32_badsys +-sysenter_do_call: +- IA32_ARG_FIXUP +-sysenter_dispatch: +- call *ia32_sys_call_table(,%rax,8) +- movq %rax,RAX-ARGOFFSET(%rsp) +- GET_THREAD_INFO(%r10) +- DISABLE_INTERRUPTS(CLBR_NONE) +- TRACE_IRQS_OFF +- testl $_TIF_ALLWORK_MASK,TI_flags(%r10) +- jnz sysexit_audit +- jmp int_ret_from_sys_call ++ jmp ia32_do_call + + #ifdef CONFIG_AUDITSYSCALL + .macro auditsys_entry_common +@@ -175,31 +162,10 @@ sysenter_dispatch: + movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ + .endm + +- .macro auditsys_exit exit,ebpsave=RBP +- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) +- jnz int_ret_from_sys_call +- TRACE_IRQS_ON +- ENABLE_INTERRUPTS(CLBR_NONE) +- movl %eax,%esi /* second arg, syscall return value */ +- cmpl $0,%eax /* is it < 0? */ +- setl %al /* 1 if so, 0 if not */ +- movzbl %al,%edi /* zero-extend that into %edi */ +- inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ +- call audit_syscall_exit +- movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ +- movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi +- DISABLE_INTERRUPTS(CLBR_NONE) +- TRACE_IRQS_OFF +- jmp int_with_check +- .endm +- + sysenter_auditsys: + auditsys_entry_common + movl %ebp,%r9d /* reload 6th syscall arg */ +- jmp sysenter_dispatch +- +-sysexit_audit: +- auditsys_exit sysexit_from_sys_call ++ jmp ia32_dispatch + #endif + + sysenter_tracesys: +@@ -216,7 +182,7 @@ sysenter_tracesys: + RESTORE_REST + cmpl $(IA32_NR_syscalls-1),%eax + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ +- jmp sysenter_do_call ++ jmp ia32_do_call + CFI_ENDPROC + ENDPROC(ia32_sysenter_target) + +@@ -272,24 +238,13 @@ ENTRY(ia32_cstar_target) + ja ia32_badsys + cstar_do_call: + IA32_ARG_FIXUP 1 +-cstar_dispatch: +- call *ia32_sys_call_table(,%rax,8) +- movq %rax,RAX-ARGOFFSET(%rsp) +- GET_THREAD_INFO(%r10) +- DISABLE_INTERRUPTS(CLBR_NONE) +- testl $_TIF_ALLWORK_MASK,TI_flags(%r10) +- jnz sysretl_audit +- jmp int_ret_from_sys_call + + #ifdef CONFIG_AUDITSYSCALL + cstar_auditsys: + movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ + auditsys_entry_common + movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ +- jmp cstar_dispatch +- +-sysretl_audit: +- auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ ++ jmp ia32_dispatch + #endif + + cstar_tracesys: +@@ -299,7 +254,7 @@ cstar_tracesys: + #endif + xchgl %r9d,%ebp + SAVE_REST +- CLEAR_RREGS r9 ++ CLEAR_RREGS 0, r9 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter +@@ -367,9 +322,11 @@ ENTRY(ia32_syscall) + ja ia32_badsys + ia32_do_call: + IA32_ARG_FIXUP ++ia32_dispatch: + call *ia32_sys_call_table(,%rax,8) # xxx: rip relative + ia32_sysret: + movq %rax,RAX-ARGOFFSET(%rsp) ++ CLEAR_RREGS -ARGOFFSET + jmp int_ret_from_sys_call + + ia32_tracesys: +@@ -387,8 +344,8 @@ END(ia32_syscall) + + ia32_badsys: + movq $0,ORIG_RAX-ARGOFFSET(%rsp) +- movq $-ENOSYS,RAX-ARGOFFSET(%rsp) +- jmp int_ret_from_sys_call ++ movq $-ENOSYS,%rax ++ jmp ia32_sysret + + quiet_ni_syscall: + movq $-ENOSYS,%rax +@@ -482,7 +439,7 @@ ia32_sys_call_table: + .quad sys_mkdir + .quad sys_rmdir /* 40 */ + .quad sys_dup +- .quad sys32_pipe ++ .quad sys_pipe + .quad compat_sys_times + .quad quiet_ni_syscall /* old prof syscall holder */ + .quad sys_brk /* 45 */ +@@ -776,5 +733,5 @@ ia32_sys_call_table: + .quad compat_sys_preadv + .quad compat_sys_pwritev + .quad compat_sys_rt_tgsigqueueinfo /* 335 */ +- .quad sys_perf_counter_open ++ .quad sys_perf_event_open + ia32_syscall_end: +--- head-2010-01-18.orig/arch/x86/include/asm/time.h 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/time.h 2009-11-06 10:52:22.000000000 +0100 +@@ -8,8 +8,9 @@ extern void hpet_time_init(void); + extern void time_init(void); + + #ifdef CONFIG_XEN ++struct timespec; + extern int xen_independent_wallclock(void); +-extern unsigned long xen_read_persistent_clock(void); ++extern void xen_read_persistent_clock(struct timespec *); + extern int xen_update_persistent_clock(void); + #endif + +--- head-2010-01-18.orig/arch/x86/include/asm/uv/uv_hub.h 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/asm/uv/uv_hub.h 2009-11-06 11:32:17.000000000 +0100 +@@ -11,7 +11,7 @@ + #ifndef _ASM_X86_UV_UV_HUB_H + #define _ASM_X86_UV_UV_HUB_H + +-#ifdef CONFIG_X86_64 ++#ifdef CONFIG_X86_UV + #include + #include + #include +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/agp.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/agp.h 2009-11-06 10:52:22.000000000 +0100 +@@ -28,10 +28,7 @@ + */ + #define flush_agp_cache() wbinvd() + +-/* Convert a physical address to an address suitable for the GART. */ +-#define phys_to_gart(x) phys_to_machine(x) +-#define gart_to_phys(x) machine_to_phys(x) +-#define page_to_gart(x) phys_to_gart(page_to_pseudophys(x)) ++#define virt_to_gart virt_to_machine + + /* GATT allocation. Returns/accepts GATT kernel virtual address. */ + #define alloc_gatt_pages(order) ({ \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/desc.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/desc.h 2009-11-18 14:54:16.000000000 +0100 +@@ -312,7 +312,14 @@ static inline void load_LDT(mm_context_t + + static inline unsigned long get_desc_base(const struct desc_struct *desc) + { +- return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); ++ return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); ++} ++ ++static inline void set_desc_base(struct desc_struct *desc, unsigned long base) ++{ ++ desc->base0 = base & 0xffff; ++ desc->base1 = (base >> 16) & 0xff; ++ desc->base2 = (base >> 24) & 0xff; + } + + static inline unsigned long get_desc_limit(const struct desc_struct *desc) +@@ -320,6 +327,12 @@ static inline unsigned long get_desc_lim + return desc->limit0 | (desc->limit << 16); + } + ++static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) ++{ ++ desc->limit0 = limit & 0xffff; ++ desc->limit = (limit >> 16) & 0xf; ++} ++ + #ifndef CONFIG_X86_NO_IDT + static inline void _set_gate(int gate, unsigned type, void *addr, + unsigned dpl, unsigned ist, unsigned seg) +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/dma-mapping.h 2009-11-06 10:51:47.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/dma-mapping.h 2009-11-06 10:52:22.000000000 +0100 +@@ -1,11 +1,24 @@ + #ifndef _ASM_X86_DMA_MAPPING_H_ + ++#define phys_to_dma _phys_to_dma_ ++#define dma_to_phys _dma_to_phys_ ++ + #include_next + +-void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t); ++#undef phys_to_dma ++#undef dma_to_phys ++ ++static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) ++{ ++ return phys_to_machine(paddr); ++} + +-#define address_needs_mapping(hwdev, addr, size) \ +- !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size) ++static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) ++{ ++ return machine_to_phys(daddr); ++} ++ ++void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t); + + extern int range_straddles_page_boundary(paddr_t p, size_t size); + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/fixmap.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/fixmap.h 2009-11-06 10:52:22.000000000 +0100 +@@ -139,6 +139,9 @@ enum fixed_addresses { + #ifdef CONFIG_X86_32 + FIX_WP_TEST, + #endif ++#ifdef CONFIG_INTEL_TXT ++ FIX_TBOOT_BASE, ++#endif + __end_of_fixed_addresses + }; + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:43:12.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/hypervisor.h 2009-11-23 10:44:04.000000000 +0100 +@@ -70,6 +70,7 @@ extern start_info_t *xen_start_info; + #endif + + #define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN)) ++#define init_hypervisor_platform() init_hypervisor(&boot_cpu_data) + + struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu); + +@@ -351,6 +352,6 @@ MULTI_grant_table_op(multicall_entry_t * + + #endif + +-#define uvm_multi(cpumask) ((unsigned long)cpus_addr(cpumask) | UVMF_MULTI) ++#define uvm_multi(cpumask) ((unsigned long)cpumask_bits(cpumask) | UVMF_MULTI) + + #endif /* __HYPERVISOR_H__ */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/irqflags.h 2009-11-06 10:52:22.000000000 +0100 +@@ -1,7 +1,7 @@ + #ifndef _X86_IRQFLAGS_H_ + #define _X86_IRQFLAGS_H_ + +-#include ++#include + + #ifndef __ASSEMBLY__ + /* +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/mmu_context.h 2009-11-06 10:52:22.000000000 +0100 +@@ -88,12 +88,12 @@ static inline void switch_mm(struct mm_s + !PagePinned(virt_to_page(next->pgd))); + + /* stop flush ipis for the previous mm */ +- cpu_clear(cpu, prev->cpu_vm_mask); ++ cpumask_clear_cpu(cpu, mm_cpumask(prev)); + #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); + #endif +- cpu_set(cpu, next->cpu_vm_mask); ++ cpumask_set_cpu(cpu, mm_cpumask(next)); + + /* Re-load page tables: load_cr3(next->pgd) */ + op->cmd = MMUEXT_NEW_BASEPTR; +@@ -125,7 +125,7 @@ static inline void switch_mm(struct mm_s + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + +- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { ++ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pci.h 2009-11-06 10:52:22.000000000 +0100 +@@ -151,7 +151,11 @@ static inline int __pcibus_to_node(const + static inline const struct cpumask * + cpumask_of_pcibus(const struct pci_bus *bus) + { +- return cpumask_of_node(__pcibus_to_node(bus)); ++ int node; ++ ++ node = __pcibus_to_node(bus); ++ return (node == -1) ? cpu_online_mask : ++ cpumask_of_node(node); + } + #endif + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-20 11:17:56.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable.h 2009-11-20 11:18:13.000000000 +0100 +@@ -53,16 +53,6 @@ extern struct list_head pgd_list; + #define pte_update(mm, addr, ptep) do { } while (0) + #define pte_update_defer(mm, addr, ptep) do { } while (0) + +-static inline void __init paravirt_pagetable_setup_start(pgd_t *base) +-{ +- xen_pagetable_setup_start(base); +-} +- +-static inline void __init paravirt_pagetable_setup_done(pgd_t *base) +-{ +- xen_pagetable_setup_done(base); +-} +- + #define pgd_val(x) xen_pgd_val(x) + #define __pgd(x) xen_make_pgd(x) + +@@ -134,6 +124,11 @@ static inline int pte_special(pte_t pte) + + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) + ++static inline unsigned long pmd_pfn(pmd_t pmd) ++{ ++ return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; ++} ++ + static inline int pmd_large(pmd_t pte) + { + return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == +@@ -363,7 +358,7 @@ static inline unsigned long pmd_page_vad + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +-static inline unsigned pmd_index(unsigned long address) ++static inline unsigned long pmd_index(unsigned long address) + { + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); + } +@@ -383,7 +378,7 @@ static inline unsigned pmd_index(unsigne + * this function returns the index of the entry in the pte page which would + * control the given virtual address + */ +-static inline unsigned pte_index(unsigned long address) ++static inline unsigned long pte_index(unsigned long address) + { + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + } +@@ -439,11 +434,6 @@ static inline pmd_t *pmd_offset(pud_t *p + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); + } + +-static inline unsigned long pmd_pfn(pmd_t pmd) +-{ +- return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; +-} +- + static inline int pud_large(pud_t pud) + { + return (__pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == +@@ -479,7 +469,7 @@ static inline unsigned long pgd_page_vad + #define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) + + /* to find an entry in a page-table-directory. */ +-static inline unsigned pud_index(unsigned long address) ++static inline unsigned long pud_index(unsigned long address) + { + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); + } +@@ -600,7 +590,7 @@ extern int ptep_clear_flush_young(struct + if (!pte_none(__res) && \ + ((vma)->vm_mm != current->mm || \ + HYPERVISOR_update_va_mapping(addr, __pte(0), \ +- uvm_multi((vma)->vm_mm->cpu_vm_mask) | \ ++ uvm_multi(mm_cpumask((vma)->vm_mm)) | \ + UVMF_INVLPG))) { \ + __xen_pte_clear(__ptep); \ + flush_tlb_page(vma, addr); \ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/pgtable_types.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/pgtable_types.h 2009-11-06 10:52:22.000000000 +0100 +@@ -334,6 +334,7 @@ static inline pteval_t pte_flags(pte_t p + typedef struct page *pgtable_t; + + extern pteval_t __supported_pte_mask; ++extern void set_nx(void); + extern int nx_enabled; + + #define pgprot_writecombine pgprot_writecombine +@@ -354,14 +355,6 @@ int phys_mem_access_prot_allowed(struct + /* Install a pte for a particular vaddr in kernel space. */ + void set_pte_vaddr(unsigned long vaddr, pte_t pte); + +-#ifndef CONFIG_XEN +-extern void native_pagetable_setup_start(pgd_t *base); +-extern void native_pagetable_setup_done(pgd_t *base); +-#else +-static inline void xen_pagetable_setup_start(pgd_t *base) {} +-static inline void xen_pagetable_setup_done(pgd_t *base) {} +-#endif +- + struct seq_file; + extern void arch_report_meminfo(struct seq_file *m); + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/processor.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/processor.h 2009-11-18 14:54:16.000000000 +0100 +@@ -27,6 +27,7 @@ struct mm_struct; + #include + #include + #include ++#include + #include + #include + +@@ -411,7 +412,17 @@ extern unsigned long kernel_eflags; + extern asmlinkage void ignore_sysret(void); + #else /* X86_64 */ + #ifdef CONFIG_CC_STACKPROTECTOR +-DECLARE_PER_CPU(unsigned long, stack_canary); ++/* ++ * Make sure stack canary segment base is cached-aligned: ++ * "For Intel Atom processors, avoid non zero segment base address ++ * that is not aligned to cache line boundary at all cost." ++ * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) ++ */ ++struct stack_canary { ++ char __pad[20]; /* canary at %gs:20 */ ++ unsigned long canary; ++}; ++DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); + #endif + #endif /* X86_64 */ + +@@ -648,13 +659,23 @@ static inline void cpu_relax(void) + rep_nop(); + } + +-/* Stop speculative execution: */ ++/* Stop speculative execution and prefetching of modified code. */ + static inline void sync_core(void) + { + int tmp; + +- asm volatile("cpuid" : "=a" (tmp) : "0" (1) +- : "ebx", "ecx", "edx", "memory"); ++#if defined(CONFIG_M386) || defined(CONFIG_M486) ++ if (boot_cpu_data.x86 < 5) ++ /* There is no speculative execution. ++ * jmp is a barrier to prefetching. */ ++ asm volatile("jmp 1f\n1:\n" ::: "memory"); ++ else ++#endif ++ /* cpuid is a barrier to speculative execution. ++ * Prefetched instructions are automatically ++ * invalidated when modified. */ ++ asm volatile("cpuid" : "=a" (tmp) : "0" (1) ++ : "ebx", "ecx", "edx", "memory"); + } + + static inline void __monitor(const void *eax, unsigned long ecx, +@@ -926,7 +947,7 @@ extern unsigned long thread_saved_pc(str + #define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) + + #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) +-#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ ++extern unsigned long KSTK_ESP(struct task_struct *task); + #endif /* CONFIG_X86_64 */ + + extern void start_thread(struct pt_regs *regs, unsigned long new_ip, +@@ -947,4 +968,35 @@ extern void start_thread(struct pt_regs + extern int get_tsc_mode(unsigned long adr); + extern int set_tsc_mode(unsigned int val); + ++extern int amd_get_nb_id(int cpu); ++ ++struct aperfmperf { ++ u64 aperf, mperf; ++}; ++ ++static inline void get_aperfmperf(struct aperfmperf *am) ++{ ++ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF)); ++ ++ rdmsrl(MSR_IA32_APERF, am->aperf); ++ rdmsrl(MSR_IA32_MPERF, am->mperf); ++} ++ ++#define APERFMPERF_SHIFT 10 ++ ++static inline ++unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, ++ struct aperfmperf *new) ++{ ++ u64 aperf = new->aperf - old->aperf; ++ u64 mperf = new->mperf - old->mperf; ++ unsigned long ratio = aperf; ++ ++ mperf >>= APERFMPERF_SHIFT; ++ if (mperf) ++ ratio = div64_u64(aperf, mperf); ++ ++ return ratio; ++} ++ + #endif /* _ASM_X86_PROCESSOR_H */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/setup.h 2009-11-06 10:52:22.000000000 +0100 +@@ -0,0 +1,8 @@ ++#ifndef __ASSEMBLY__ ++ ++void xen_start_kernel(void); ++void xen_arch_setup(void); ++ ++#endif ++ ++#include_next +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp-processor-id.h 2009-11-06 10:52:22.000000000 +0100 +@@ -0,0 +1,36 @@ ++#ifndef _ASM_X86_SMP_PROCESSOR_ID_H ++#define _ASM_X86_SMP_PROCESSOR_ID_H ++ ++#if defined(CONFIG_SMP) && !defined(__ASSEMBLY__) ++ ++#include ++ ++DECLARE_PER_CPU(int, cpu_number); ++ ++/* ++ * This function is needed by all SMP systems. It must _always_ be valid ++ * from the initial startup. We map APIC_BASE very early in page_setup(), ++ * so this is correct in the x86 case. ++ */ ++#define raw_smp_processor_id() percpu_read(cpu_number) ++#define safe_smp_processor_id() smp_processor_id() ++ ++#ifdef CONFIG_X86_64_SMP ++#define stack_smp_processor_id() \ ++({ \ ++ struct thread_info *ti; \ ++ __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ ++ ti->cpu; \ ++}) ++#endif ++ ++#ifdef CONFIG_DEBUG_PREEMPT ++extern unsigned int debug_smp_processor_id(void); ++# define smp_processor_id() debug_smp_processor_id() ++#else ++# define smp_processor_id() raw_smp_processor_id() ++#endif ++ ++#endif /* SMP && !__ASSEMBLY__ */ ++ ++#endif /* _ASM_X86_SMP_PROCESSOR_ID_H */ +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:17:59.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/smp.h 2009-11-20 11:18:10.000000000 +0100 +@@ -121,7 +121,6 @@ static inline void arch_send_call_functi + smp_ops.send_call_func_single_ipi(cpu); + } + +-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask + static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) + { + smp_ops.send_call_func_ipi(mask); +@@ -167,27 +166,7 @@ static inline int num_booting_cpus(void) + + extern unsigned disabled_cpus __cpuinitdata; + +-#ifdef CONFIG_X86_32_SMP +-/* +- * This function is needed by all SMP systems. It must _always_ be valid +- * from the initial startup. We map APIC_BASE very early in page_setup(), +- * so this is correct in the x86 case. +- */ +-#define raw_smp_processor_id() (percpu_read(cpu_number)) +-#define safe_smp_processor_id() smp_processor_id() +- +-#elif defined(CONFIG_X86_64_SMP) +-#define raw_smp_processor_id() (percpu_read(cpu_number)) +- +-#define stack_smp_processor_id() \ +-({ \ +- struct thread_info *ti; \ +- __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ +- ti->cpu; \ +-}) +-#define safe_smp_processor_id() smp_processor_id() +- +-#endif ++#include + + #ifdef CONFIG_X86_LOCAL_APIC + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/system.h 2009-11-06 10:52:22.000000000 +0100 +@@ -30,7 +30,7 @@ void __switch_to_xtra(struct task_struct + "movl %P[task_canary](%[next]), %%ebx\n\t" \ + "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" + #define __switch_canary_oparam \ +- , [stack_canary] "=m" (per_cpu_var(stack_canary)) ++ , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) + #define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) + #else /* CC_STACKPROTECTOR */ +@@ -149,33 +149,6 @@ do { \ + #endif + + #ifdef __KERNEL__ +-#define _set_base(addr, base) do { unsigned long __pr; \ +-__asm__ __volatile__ ("movw %%dx,%1\n\t" \ +- "rorl $16,%%edx\n\t" \ +- "movb %%dl,%2\n\t" \ +- "movb %%dh,%3" \ +- :"=&d" (__pr) \ +- :"m" (*((addr)+2)), \ +- "m" (*((addr)+4)), \ +- "m" (*((addr)+7)), \ +- "0" (base) \ +- ); } while (0) +- +-#define _set_limit(addr, limit) do { unsigned long __lr; \ +-__asm__ __volatile__ ("movw %%dx,%1\n\t" \ +- "rorl $16,%%edx\n\t" \ +- "movb %2,%%dh\n\t" \ +- "andb $0xf0,%%dh\n\t" \ +- "orb %%dh,%%dl\n\t" \ +- "movb %%dl,%2" \ +- :"=&d" (__lr) \ +- :"m" (*(addr)), \ +- "m" (*((addr)+6)), \ +- "0" (limit) \ +- ); } while (0) +- +-#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) +-#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) + + extern void xen_load_gs_index(unsigned); + +--- head-2010-01-18.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/include/mach-xen/asm/tlbflush.h 2009-11-06 10:52:22.000000000 +0100 +@@ -74,9 +74,9 @@ static inline void reset_lazy_tlbstate(v + #define local_flush_tlb() __flush_tlb() + + #define flush_tlb_all xen_tlb_flush_all +-#define flush_tlb_current_task() xen_tlb_flush_mask(¤t->mm->cpu_vm_mask) +-#define flush_tlb_mm(mm) xen_tlb_flush_mask(&(mm)->cpu_vm_mask) +-#define flush_tlb_page(vma, va) xen_invlpg_mask(&(vma)->vm_mm->cpu_vm_mask, va) ++#define flush_tlb_current_task() xen_tlb_flush_mask(mm_cpumask(current->mm)) ++#define flush_tlb_mm(mm) xen_tlb_flush_mask(mm_cpumask(mm)) ++#define flush_tlb_page(vma, va) xen_invlpg_mask(mm_cpumask((vma)->vm_mm), va) + + #define flush_tlb() flush_tlb_current_task() + +--- head-2010-01-18.orig/arch/x86/kernel/Makefile 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/Makefile 2009-11-06 10:52:22.000000000 +0100 +@@ -132,8 +132,6 @@ ifeq ($(CONFIG_X86_64),y) + + obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o + obj-y += vsmp_64.o +- +- time_64-$(CONFIG_XEN) += time_32.o + endif + + disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \ +--- head-2010-01-18.orig/arch/x86/kernel/apic/io_apic-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/apic/io_apic-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -79,6 +79,8 @@ unsigned long io_apic_irqs; + #endif /* CONFIG_XEN */ + + #define __apicdebuginit(type) static type __init ++#define for_each_irq_pin(entry, head) \ ++ for (entry = head; entry; entry = entry->next) + + /* + * Is the SiS APIC rmw bug present ? +@@ -100,12 +102,24 @@ int nr_ioapic_registers[MAX_IO_APICS]; + struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; + int nr_ioapics; + ++/* IO APIC gsi routing info */ ++struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; ++ + /* MP IRQ source entries */ + struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; + + /* # of MP IRQ source entries */ + int mp_irq_entries; + ++#ifndef CONFIG_XEN ++/* Number of legacy interrupts */ ++static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; ++/* GSI interrupts */ ++static int nr_irqs_gsi = NR_IRQS_LEGACY; ++#else ++#define nr_legacy_irqs NR_IRQS_LEGACY ++#endif ++ + #if defined (CONFIG_MCA) || defined (CONFIG_EISA) + int mp_bus_id_to_type[MAX_MP_BUSSES]; + #endif +@@ -132,15 +146,6 @@ static int __init parse_noapic(char *str + early_param("noapic", parse_noapic); + + #ifndef CONFIG_XEN +-struct irq_pin_list; +- +-/* +- * This is performance-critical, we want to do it O(1) +- * +- * the indexing order of this array favors 1:1 mappings +- * between pins and IRQs. +- */ +- + struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +@@ -155,6 +160,11 @@ static struct irq_pin_list *get_one_free + return pin; + } + ++/* ++ * This is performance-critical, we want to do it O(1) ++ * ++ * Most irqs are mapped 1:1 with pins. ++ */ + struct irq_cfg { + struct irq_pin_list *irq_2_pin; + cpumask_var_t domain; +@@ -188,6 +198,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] + [15] = { .vector = IRQ15_VECTOR, }, + }; + ++void __init io_apic_disable_legacy(void) ++{ ++ nr_legacy_irqs = 0; ++ nr_irqs_gsi = 0; ++} ++ + int __init arch_early_irq_init(void) + { + struct irq_cfg *cfg; +@@ -205,7 +221,7 @@ int __init arch_early_irq_init(void) + desc->chip_data = &cfg[i]; + zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); + zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); +- if (i < NR_IRQS_LEGACY) ++ if (i < nr_legacy_irqs) + cpumask_setall(cfg[i].domain); + } + +@@ -231,17 +247,14 @@ static struct irq_cfg *get_one_free_irq_ + + cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); + if (cfg) { +- if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { ++ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { + kfree(cfg); + cfg = NULL; +- } else if (!alloc_cpumask_var_node(&cfg->old_domain, ++ } else if (!zalloc_cpumask_var_node(&cfg->old_domain, + GFP_ATOMIC, node)) { + free_cpumask_var(cfg->domain); + kfree(cfg); + cfg = NULL; +- } else { +- cpumask_clear(cfg->domain); +- cpumask_clear(cfg->old_domain); + } + } + +@@ -455,13 +468,10 @@ static bool io_apic_level_ack_pending(st + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); +- entry = cfg->irq_2_pin; +- for (;;) { ++ for_each_irq_pin(entry, cfg->irq_2_pin) { + unsigned int reg; + int pin; + +- if (!entry) +- break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ +@@ -469,9 +479,6 @@ static bool io_apic_level_ack_pending(st + spin_unlock_irqrestore(&ioapic_lock, flags); + return true; + } +- if (!entry->next) +- break; +- entry = entry->next; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + +@@ -543,72 +550,68 @@ static void ioapic_mask_entry(int apic, + * shared ISA-space IRQs, so we have to support them. We are super + * fast in the common case, and fast for shared ISA-space IRQs. + */ +-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) ++static int ++add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) + { +- struct irq_pin_list *entry; ++ struct irq_pin_list **last, *entry; + +- entry = cfg->irq_2_pin; +- if (!entry) { +- entry = get_one_free_irq_2_pin(node); +- if (!entry) { +- printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", +- apic, pin); +- return; +- } +- cfg->irq_2_pin = entry; +- entry->apic = apic; +- entry->pin = pin; +- return; +- } +- +- while (entry->next) { +- /* not again, please */ ++ /* don't allow duplicates */ ++ last = &cfg->irq_2_pin; ++ for_each_irq_pin(entry, cfg->irq_2_pin) { + if (entry->apic == apic && entry->pin == pin) +- return; +- +- entry = entry->next; ++ return 0; ++ last = &entry->next; + } + +- entry->next = get_one_free_irq_2_pin(node); +- entry = entry->next; ++ entry = get_one_free_irq_2_pin(node); ++ if (!entry) { ++ printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", ++ node, apic, pin); ++ return -ENOMEM; ++ } + entry->apic = apic; + entry->pin = pin; ++ ++ *last = entry; ++ return 0; ++} ++ ++static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) ++{ ++ if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) ++ panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); + } + + /* + * Reroute an IRQ to a different pin. + */ + static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, +- int oldapic, int oldpin, +- int newapic, int newpin) ++ int oldapic, int oldpin, ++ int newapic, int newpin) + { +- struct irq_pin_list *entry = cfg->irq_2_pin; +- int replaced = 0; ++ struct irq_pin_list *entry; + +- while (entry) { ++ for_each_irq_pin(entry, cfg->irq_2_pin) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; +- replaced = 1; + /* every one is different, right? */ +- break; ++ return; + } +- entry = entry->next; + } + +- /* why? call replace before add? */ +- if (!replaced) +- add_pin_to_irq_node(cfg, node, newapic, newpin); ++ /* old apic/pin didn't exist, so just add new ones */ ++ add_pin_to_irq_node(cfg, node, newapic, newpin); + } + +-static inline void io_apic_modify_irq(struct irq_cfg *cfg, +- int mask_and, int mask_or, +- void (*final)(struct irq_pin_list *entry)) ++static void io_apic_modify_irq(struct irq_cfg *cfg, ++ int mask_and, int mask_or, ++ void (*final)(struct irq_pin_list *entry)) + { + int pin; + struct irq_pin_list *entry; + +- for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { ++ for_each_irq_pin(entry, cfg->irq_2_pin) { + unsigned int reg; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); +@@ -625,7 +628,6 @@ static void __unmask_IO_APIC_irq(struct + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); + } + +-#ifdef CONFIG_X86_64 + static void io_apic_sync(struct irq_pin_list *entry) + { + /* +@@ -641,11 +643,6 @@ static void __mask_IO_APIC_irq(struct ir + { + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + } +-#else /* CONFIG_X86_32 */ +-static void __mask_IO_APIC_irq(struct irq_cfg *cfg) +-{ +- io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); +-} + + static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) + { +@@ -658,7 +655,6 @@ static void __unmask_and_level_IO_APIC_i + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); + } +-#endif /* CONFIG_X86_32 */ + + static void mask_IO_APIC_irq_desc(struct irq_desc *desc) + { +@@ -719,6 +715,7 @@ static void clear_IO_APIC (void) + } + #else + #define add_pin_to_irq_node(cfg, node, apic, pin) ++#define add_pin_to_irq_node_nopanic(cfg, node, apic, pin) 0 + #endif /* CONFIG_XEN */ + + #ifdef CONFIG_X86_32 +@@ -935,7 +932,7 @@ static int __init find_isa_irq_apic(int + */ + static int EISA_ELCR(unsigned int irq) + { +- if (irq < NR_IRQS_LEGACY) { ++ if (irq < nr_legacy_irqs) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } +@@ -1547,7 +1544,7 @@ static void setup_IO_APIC_irq(int apic_i + } + + ioapic_register_intr(irq, desc, trigger); +- if (irq < NR_IRQS_LEGACY) ++ if (irq < nr_legacy_irqs) + disable_8259A_irq(irq); + + ioapic_write_entry(apic_id, pin, entry); +@@ -1775,12 +1772,8 @@ __apicdebuginit(void) print_IO_APIC(void + if (!entry) + continue; + printk(KERN_DEBUG "IRQ%d ", irq); +- for (;;) { ++ for_each_irq_pin(entry, cfg->irq_2_pin) + printk("-> %d:%d", entry->apic, entry->pin); +- if (!entry->next) +- break; +- entry = entry->next; +- } + printk("\n"); + } + +@@ -1924,7 +1917,7 @@ __apicdebuginit(void) print_PIC(void) + unsigned int v; + unsigned long flags; + +- if (apic_verbosity == APIC_QUIET) ++ if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) + return; + + printk(KERN_DEBUG "\nprinting PIC contents\n"); +@@ -1956,7 +1949,7 @@ __apicdebuginit(int) print_all_ICs(void) + print_PIC(); + + /* don't print out if apic is not there */ +- if (!cpu_has_apic || disable_apic) ++ if (!cpu_has_apic && !apic_from_smp_config()) + return 0; + + print_all_local_APICs(); +@@ -1990,6 +1983,10 @@ void __init enable_IO_APIC(void) + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } ++ ++ if (!nr_legacy_irqs) ++ return; ++ + #ifndef CONFIG_XEN + for(apic = 0; apic < nr_ioapics; apic++) { + int pin; +@@ -2049,6 +2046,9 @@ void disable_IO_APIC(void) + */ + clear_IO_APIC(); + ++ if (!nr_legacy_irqs) ++ return; ++ + /* + * If the i8259 is routed through an IOAPIC + * Put that IOAPIC in virtual wire mode +@@ -2082,7 +2082,7 @@ void disable_IO_APIC(void) + /* + * Use virtual wire A mode when interrupt remapping is enabled. + */ +- if (cpu_has_apic) ++ if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(!intr_remapping_enabled && + ioapic_i8259.pin != -1); + } +@@ -2095,7 +2095,7 @@ void disable_IO_APIC(void) + * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 + */ + +-static void __init setup_ioapic_ids_from_mpc(void) ++void __init setup_ioapic_ids_from_mpc(void) + { + union IO_APIC_reg_00 reg_00; + physid_mask_t phys_id_present_map; +@@ -2104,9 +2104,8 @@ static void __init setup_ioapic_ids_from + unsigned char old_id; + unsigned long flags; + +- if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) ++ if (acpi_ioapic) + return; +- + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. +@@ -2280,7 +2279,7 @@ static unsigned int startup_ioapic_irq(u + struct irq_cfg *cfg; + + spin_lock_irqsave(&ioapic_lock, flags); +- if (irq < NR_IRQS_LEGACY) { ++ if (irq < nr_legacy_irqs) { + disable_8259A_irq(irq); + if (i8259A_irq_pending(irq)) + was_pending = 1; +@@ -2292,7 +2291,6 @@ static unsigned int startup_ioapic_irq(u + return was_pending; + } + +-#ifdef CONFIG_X86_64 + static int ioapic_retrigger_irq(unsigned int irq) + { + +@@ -2305,14 +2303,6 @@ static int ioapic_retrigger_irq(unsigned + + return 1; + } +-#else +-static int ioapic_retrigger_irq(unsigned int irq) +-{ +- apic->send_IPI_self(irq_cfg(irq)->vector); +- +- return 1; +-} +-#endif + + /* + * Level and edge triggered IO-APIC interrupts need different handling, +@@ -2350,13 +2340,9 @@ static void __target_IO_APIC_irq(unsigne + struct irq_pin_list *entry; + u8 vector = cfg->vector; + +- entry = cfg->irq_2_pin; +- for (;;) { ++ for_each_irq_pin(entry, cfg->irq_2_pin) { + unsigned int reg; + +- if (!entry) +- break; +- + apic = entry->apic; + pin = entry->pin; + /* +@@ -2369,9 +2355,6 @@ static void __target_IO_APIC_irq(unsigne + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); +- if (!entry->next) +- break; +- entry = entry->next; + } + } + +@@ -2596,11 +2579,8 @@ atomic_t irq_mis_count; + static void ack_apic_level(unsigned int irq) + { + struct irq_desc *desc = irq_to_desc(irq); +- +-#ifdef CONFIG_X86_32 + unsigned long v; + int i; +-#endif + struct irq_cfg *cfg; + int do_unmask_irq = 0; + +@@ -2613,31 +2593,28 @@ static void ack_apic_level(unsigned int + } + #endif + +-#ifdef CONFIG_X86_32 + /* +- * It appears there is an erratum which affects at least version 0x11 +- * of I/O APIC (that's the 82093AA and cores integrated into various +- * chipsets). Under certain conditions a level-triggered interrupt is +- * erroneously delivered as edge-triggered one but the respective IRR +- * bit gets set nevertheless. As a result the I/O unit expects an EOI +- * message but it will never arrive and further interrupts are blocked +- * from the source. The exact reason is so far unknown, but the +- * phenomenon was observed when two consecutive interrupt requests +- * from a given source get delivered to the same CPU and the source is +- * temporarily disabled in between. +- * +- * A workaround is to simulate an EOI message manually. We achieve it +- * by setting the trigger mode to edge and then to level when the edge +- * trigger mode gets detected in the TMR of a local APIC for a +- * level-triggered interrupt. We mask the source for the time of the +- * operation to prevent an edge-triggered interrupt escaping meanwhile. +- * The idea is from Manfred Spraul. --macro +- */ ++ * It appears there is an erratum which affects at least version 0x11 ++ * of I/O APIC (that's the 82093AA and cores integrated into various ++ * chipsets). Under certain conditions a level-triggered interrupt is ++ * erroneously delivered as edge-triggered one but the respective IRR ++ * bit gets set nevertheless. As a result the I/O unit expects an EOI ++ * message but it will never arrive and further interrupts are blocked ++ * from the source. The exact reason is so far unknown, but the ++ * phenomenon was observed when two consecutive interrupt requests ++ * from a given source get delivered to the same CPU and the source is ++ * temporarily disabled in between. ++ * ++ * A workaround is to simulate an EOI message manually. We achieve it ++ * by setting the trigger mode to edge and then to level when the edge ++ * trigger mode gets detected in the TMR of a local APIC for a ++ * level-triggered interrupt. We mask the source for the time of the ++ * operation to prevent an edge-triggered interrupt escaping meanwhile. ++ * The idea is from Manfred Spraul. --macro ++ */ + cfg = desc->chip_data; + i = cfg->vector; +- + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); +-#endif + + /* + * We must acknowledge the irq before we move it or the acknowledge will +@@ -2679,7 +2656,7 @@ static void ack_apic_level(unsigned int + unmask_IO_APIC_irq_desc(desc); + } + +-#ifdef CONFIG_X86_32 ++ /* Tail end of version 0x11 I/O APIC bug workaround */ + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); +@@ -2687,26 +2664,15 @@ static void ack_apic_level(unsigned int + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } +-#endif + } + + #ifdef CONFIG_INTR_REMAP + static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) + { +- int apic, pin; + struct irq_pin_list *entry; + +- entry = cfg->irq_2_pin; +- for (;;) { +- +- if (!entry) +- break; +- +- apic = entry->apic; +- pin = entry->pin; +- io_apic_eoi(apic, pin); +- entry = entry->next; +- } ++ for_each_irq_pin(entry, cfg->irq_2_pin) ++ io_apic_eoi(entry->apic, entry->pin); + } + + static void +@@ -2796,7 +2762,7 @@ static inline void init_IO_APIC_traps(vo + * so default to an old-fashioned 8259 + * interrupt if we can.. + */ +- if (irq < NR_IRQS_LEGACY) ++ if (irq < nr_legacy_irqs) + make_8259A_irq(irq); + else + /* Strange. Oh, well.. */ +@@ -3136,7 +3102,7 @@ out: + * the I/O APIC in all cases now. No actual device should request + * it anyway. --macro + */ +-#define PIC_IRQS (1 << PIC_CASCADE_IR) ++#define PIC_IRQS (1UL << PIC_CASCADE_IR) + + void __init setup_IO_APIC(void) + { +@@ -3148,23 +3114,21 @@ void __init setup_IO_APIC(void) + * calling enable_IO_APIC() is moved to setup_local_APIC for BP + */ + #endif +- +- io_apic_irqs = ~PIC_IRQS; ++ io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; + + apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + /* + * Set up IO-APIC IRQ routing. + */ + #ifndef CONFIG_XEN +-#ifdef CONFIG_X86_32 +- if (!acpi_ioapic) +- setup_ioapic_ids_from_mpc(); +-#endif ++ x86_init.mpparse.setup_ioapic_ids(); ++ + sync_Arb_IDs(); + #endif + setup_IO_APIC_irqs(); + init_IO_APIC_traps(); +- check_timer(); ++ if (nr_legacy_irqs) ++ check_timer(); + } + + /* +@@ -3274,7 +3238,6 @@ static int __init ioapic_init_sysfs(void + + device_initcall(ioapic_init_sysfs); + +-static int nr_irqs_gsi = NR_IRQS_LEGACY; + /* + * Dynamic irq allocate and deallocation + */ +@@ -3346,8 +3309,7 @@ void destroy_irq(unsigned int irq) + cfg = desc->chip_data; + dynamic_irq_cleanup(irq); + /* connect back irq_cfg */ +- if (desc) +- desc->chip_data = cfg; ++ desc->chip_data = cfg; + + free_irte(irq); + spin_lock_irqsave(&vector_lock, flags); +@@ -4025,9 +3987,13 @@ static int __io_apic_set_pci_routing(str + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ +- if (irq >= NR_IRQS_LEGACY) { ++ if (irq >= nr_legacy_irqs) { + cfg = desc->chip_data; +- add_pin_to_irq_node(cfg, node, ioapic, pin); ++ if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { ++ printk(KERN_INFO "can not add pin %d for irq %d\n", ++ pin, irq); ++ return 0; ++ } + } + + setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); +@@ -4056,11 +4022,28 @@ int io_apic_set_pci_routing(struct devic + return __io_apic_set_pci_routing(dev, irq, irq_attr); + } + +-/* -------------------------------------------------------------------------- +- ACPI-based IOAPIC Configuration +- -------------------------------------------------------------------------- */ ++u8 __init io_apic_unique_id(u8 id) ++{ ++#ifdef CONFIG_X86_32 ++ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && ++ !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ return io_apic_get_unique_id(nr_ioapics, id); ++ else ++ return id; ++#else ++ int i; ++ DECLARE_BITMAP(used, 256); + +-#ifdef CONFIG_ACPI ++ bitmap_zero(used, 256); ++ for (i = 0; i < nr_ioapics; i++) { ++ struct mpc_ioapic *ia = &mp_ioapics[i]; ++ __set_bit(ia->apicid, used); ++ } ++ if (!test_bit(id, used)) ++ return id; ++ return find_first_zero_bit(used, 256); ++#endif ++} + + #ifdef CONFIG_X86_32 + int __init io_apic_get_unique_id(int ioapic, int apic_id) +@@ -4171,8 +4154,6 @@ int acpi_get_override_irq(int bus_irq, i + return 0; + } + +-#endif /* CONFIG_ACPI */ +- + #ifndef CONFIG_XEN + /* + * This function currently is only a helper for the i386 smp boot process where +@@ -4227,7 +4208,7 @@ void __init setup_ioapic_dest(void) + + static struct resource *ioapic_resources; + +-static struct resource * __init ioapic_setup_resources(void) ++static struct resource * __init ioapic_setup_resources(int nr_ioapics) + { + unsigned long n; + struct resource *res; +@@ -4243,15 +4224,13 @@ static struct resource * __init ioapic_s + mem = alloc_bootmem(n); + res = (void *)mem; + +- if (mem != NULL) { +- mem += sizeof(struct resource) * nr_ioapics; ++ mem += sizeof(struct resource) * nr_ioapics; + +- for (i = 0; i < nr_ioapics; i++) { +- res[i].name = mem; +- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; +- sprintf(mem, "IOAPIC %u", i); +- mem += IOAPIC_RESOURCE_NAME_SIZE; +- } ++ for (i = 0; i < nr_ioapics; i++) { ++ res[i].name = mem; ++ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; ++ sprintf(mem, "IOAPIC %u", i); ++ mem += IOAPIC_RESOURCE_NAME_SIZE; + } + + ioapic_resources = res; +@@ -4265,7 +4244,7 @@ void __init ioapic_init_mappings(void) + struct resource *ioapic_res; + int i; + +- ioapic_res = ioapic_setup_resources(); ++ ioapic_res = ioapic_setup_resources(nr_ioapics); + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_ioapics[i].apicaddr; +@@ -4294,11 +4273,9 @@ fake_ioapic_page: + __fix_to_virt(idx), ioapic_phys); + idx++; + +- if (ioapic_res != NULL) { +- ioapic_res->start = ioapic_phys; +- ioapic_res->end = ioapic_phys + (4 * 1024) - 1; +- ioapic_res++; +- } ++ ioapic_res->start = ioapic_phys; ++ ioapic_res->end = ioapic_phys + (4 * 1024) - 1; ++ ioapic_res++; + } + } + +@@ -4320,3 +4297,78 @@ void __init ioapic_insert_resources(void + } + } + #endif /* !CONFIG_XEN */ ++ ++int mp_find_ioapic(int gsi) ++{ ++ int i = 0; ++ ++ /* Find the IOAPIC that manages this GSI. */ ++ for (i = 0; i < nr_ioapics; i++) { ++ if ((gsi >= mp_gsi_routing[i].gsi_base) ++ && (gsi <= mp_gsi_routing[i].gsi_end)) ++ return i; ++ } ++ ++ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); ++ return -1; ++} ++ ++int mp_find_ioapic_pin(int ioapic, int gsi) ++{ ++ if (WARN_ON(ioapic == -1)) ++ return -1; ++ if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) ++ return -1; ++ ++ return gsi - mp_gsi_routing[ioapic].gsi_base; ++} ++ ++static int bad_ioapic(unsigned long address) ++{ ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " ++ "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); ++ return 1; ++ } ++ if (!address) { ++ printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" ++ " found in table, skipping!\n"); ++ return 1; ++ } ++ return 0; ++} ++ ++void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) ++{ ++ int idx = 0; ++ ++ if (bad_ioapic(address)) ++ return; ++ ++ idx = nr_ioapics; ++ ++ mp_ioapics[idx].type = MP_IOAPIC; ++ mp_ioapics[idx].flags = MPC_APIC_USABLE; ++ mp_ioapics[idx].apicaddr = address; ++ ++#ifndef CONFIG_XEN ++ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); ++#endif ++ mp_ioapics[idx].apicid = io_apic_unique_id(id); ++ mp_ioapics[idx].apicver = io_apic_get_version(idx); ++ ++ /* ++ * Build basic GSI lookup table to facilitate gsi->io_apic lookups ++ * and to prevent reprogramming of IOAPIC pins (PCI GSIs). ++ */ ++ mp_gsi_routing[idx].gsi_base = gsi_base; ++ mp_gsi_routing[idx].gsi_end = gsi_base + ++ io_apic_get_redir_entries(idx); ++ ++ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " ++ "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, ++ mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, ++ mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); ++ ++ nr_ioapics++; ++} +--- head-2010-01-18.orig/arch/x86/kernel/cpu/Makefile 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/Makefile 2009-11-06 10:52:22.000000000 +0100 +@@ -35,7 +35,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ + + obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o + +-disabled-obj-$(CONFIG_XEN) := hypervisor.o vmware.o ++disabled-obj-$(CONFIG_XEN) := hypervisor.o vmware.o sched.o + + quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ +--- head-2010-01-18.orig/arch/x86/kernel/cpu/amd.c 2010-01-18 16:53:52.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/amd.c 2010-01-18 16:55:14.000000000 +0100 +@@ -336,7 +336,7 @@ static void __cpuinit amd_detect_cmp(str + int amd_get_nb_id(int cpu) + { + int id = 0; +-#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) + id = per_cpu(cpu_llc_id, cpu); + #endif + return id; +@@ -492,8 +492,10 @@ static void __cpuinit init_amd(struct cp + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + ++#ifndef CONFIG_XEN + /* get apicid instead of initial apic id from cpuid */ + c->apicid = hard_smp_processor_id(); ++#endif + #else + + /* +--- head-2010-01-18.orig/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/common-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -13,13 +13,13 @@ + #include + + #include +-#include ++#include + #include + #include + #include + #include +-#include +-#include ++#include ++#include + #include + #include + #include +@@ -28,13 +28,12 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include + #include + #include +-#include + + #ifdef CONFIG_X86_LOCAL_APIC + #include +@@ -102,17 +101,17 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p + * TLS descriptors are currently at a different place compared to i386. + * Hopefully nobody expects them at a fixed place (Wine?) + */ +- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, +- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, ++ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), ++ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), ++ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), + #else +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, ++ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), ++ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), + #ifndef CONFIG_XEN + /* + * Segments used for calling PnP BIOS have byte granularity. +@@ -120,29 +119,29 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p + * the transfer segment sizes are set at run time. + */ + /* 32-bit code */ +- [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, ++ [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), + /* 16-bit code */ +- [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, ++ [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), + /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, ++ [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), + /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, ++ [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), + /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, ++ [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + /* 32-bit code */ +- [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, ++ [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), + /* 16-bit code */ +- [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, ++ [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), + /* data */ +- [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, ++ [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), + +- [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, ++ [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), + #endif +- [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, ++ [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), + GDT_STACK_CANARY_INIT + #endif + } }; +@@ -900,7 +899,7 @@ void __init identify_boot_cpu(void) + #else + vgetcpu_set_mode(); + #endif +- init_hw_perf_counters(); ++ init_hw_perf_events(); + } + + void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +@@ -1013,7 +1012,7 @@ __setup("clearcpuid=", setup_disablecpui + + #ifdef CONFIG_X86_64 + #ifndef CONFIG_X86_NO_IDT +-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; ++struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; + #endif + + DEFINE_PER_CPU_FIRST(union irq_stack_union, +@@ -1027,13 +1026,21 @@ void xen_switch_pt(void) + #endif + } + +-DEFINE_PER_CPU(char *, irq_stack_ptr) = +- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; ++/* ++ * The following four percpu variables are hot. Align current_task to ++ * cacheline size such that all four fall in the same cacheline. ++ */ ++DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = ++ &init_task; ++EXPORT_PER_CPU_SYMBOL(current_task); + + DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; + EXPORT_PER_CPU_SYMBOL(kernel_stack); + ++DEFINE_PER_CPU(char *, irq_stack_ptr) = ++ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; ++ + DEFINE_PER_CPU(unsigned int, irq_count) = -1; + + #ifndef CONFIG_X86_NO_TSS +@@ -1049,8 +1056,7 @@ static const unsigned int exception_stac + }; + + static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks +- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) +- __aligned(PAGE_SIZE); ++ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); + #endif + + void __cpuinit syscall_init(void) +@@ -1097,8 +1103,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist + + #else /* CONFIG_X86_64 */ + ++DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; ++EXPORT_PER_CPU_SYMBOL(current_task); ++ + #ifdef CONFIG_CC_STACKPROTECTOR +-DEFINE_PER_CPU(unsigned long, stack_canary); ++DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); + #endif + + /* Make sure %fs and %gs are initialized properly in idle threads */ +--- head-2010-01-18.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c 2010-01-18 15:20:20.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/mcheck/mce-inject.c 2009-11-06 10:52:22.000000000 +0100 +@@ -143,7 +143,7 @@ static void raise_mce(struct mce *m) + if (context == MCJ_CTX_RANDOM) + return; + +-#ifdef CONFIG_X86_LOCAL_APIC ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) + if (m->inject_flags & MCJ_NMI_BROADCAST) { + unsigned long start; + int cpu; +--- head-2010-01-18.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -1,10 +1,9 @@ +-#include +-#include +-#include ++#define DEBUG ++ ++#include + #include +-#include +-#include + #include ++#include + + #include + #include "mtrr.h" +@@ -58,7 +57,7 @@ static void __init init_table(void) + mtrr_usage_table[i] = 0; + } + +-int mtrr_add_page(unsigned long base, unsigned long size, ++int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, bool increment) + { + int error; +@@ -88,25 +87,23 @@ int mtrr_add_page(unsigned long base, un + static int mtrr_check(unsigned long base, unsigned long size) + { + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { +- printk(KERN_WARNING +- "mtrr: size and base must be multiples of 4 kiB\n"); +- printk(KERN_DEBUG +- "mtrr: size: 0x%lx base: 0x%lx\n", size, base); ++ pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); ++ pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; + } + +-int +-mtrr_add(unsigned long base, unsigned long size, unsigned int type, +- bool increment) ++int mtrr_add(unsigned long base, unsigned long size, unsigned int type, ++ bool increment) + { + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, + increment); + } ++EXPORT_SYMBOL(mtrr_add); + + int mtrr_del_page(int reg, unsigned long base, unsigned long size) + { +@@ -128,13 +125,13 @@ int mtrr_del_page(int reg, unsigned long + } + } + if (reg < 0) { +- printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, +- size); ++ pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n", ++ base, size); + goto out; + } + } + if (mtrr_usage_table[reg] < 1) { +- printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); ++ pr_warning("mtrr: reg: %d has count=0\n", reg); + goto out; + } + if (--mtrr_usage_table[reg] < 1) { +@@ -153,15 +150,12 @@ int mtrr_del_page(int reg, unsigned long + return error; + } + +-int +-mtrr_del(int reg, unsigned long base, unsigned long size) ++int mtrr_del(int reg, unsigned long base, unsigned long size) + { + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); + } +- +-EXPORT_SYMBOL(mtrr_add); + EXPORT_SYMBOL(mtrr_del); + + /* +--- head-2010-01-18.orig/arch/x86/kernel/e820-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/e820-xen.c 2009-12-04 11:31:40.000000000 +0100 +@@ -134,7 +134,7 @@ static void __init __e820_add_region(str + { + int x = e820x->nr_map; + +- if (x == ARRAY_SIZE(e820x->map)) { ++ if (x >= ARRAY_SIZE(e820x->map)) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } +@@ -1455,7 +1455,7 @@ void __init e820_reserve_resources(void) + struct resource *res; + u64 end; + +- res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); ++ res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); + e820_res = res; + for (i = 0; i < e820.nr_map; i++) { + end = e820.map[i].addr + e820.map[i].size - 1; +@@ -1502,8 +1502,8 @@ static unsigned long ram_alignment(resou + if (mb < 16) + return 1024*1024; + +- /* To 32MB for anything above that */ +- return 32*1024*1024; ++ /* To 64MB for anything above that */ ++ return 64*1024*1024; + } + + #define MAX_RESOURCE_SIZE ((resource_size_t)-1) +@@ -1543,59 +1543,8 @@ void __init e820_reserve_resources_late( + + #undef e820 + +-#ifndef CONFIG_XEN + char *__init default_machine_specific_memory_setup(void) + { +- char *who = "BIOS-e820"; +- u32 new_nr; +- /* +- * Try to copy the BIOS-supplied E820-map. +- * +- * Otherwise fake a memory map; one section from 0k->640k, +- * the next section from 1mb->appropriate_mem_k +- */ +- new_nr = boot_params.e820_entries; +- sanitize_e820_map(boot_params.e820_map, +- ARRAY_SIZE(boot_params.e820_map), +- &new_nr); +- boot_params.e820_entries = new_nr; +- if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) +- < 0) { +- u64 mem_size; +- +- /* compare results from other methods and take the greater */ +- if (boot_params.alt_mem_k +- < boot_params.screen_info.ext_mem_k) { +- mem_size = boot_params.screen_info.ext_mem_k; +- who = "BIOS-88"; +- } else { +- mem_size = boot_params.alt_mem_k; +- who = "BIOS-e801"; +- } +- +- e820.nr_map = 0; +- e820_add_region(0, LOWMEMSIZE(), E820_RAM); +- e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); +- } +- +- /* In case someone cares... */ +- return who; +-} +- +-char *__init __attribute__((weak)) machine_specific_memory_setup(void) +-{ +- if (x86_quirks->arch_memory_setup) { +- char *who = x86_quirks->arch_memory_setup(); +- +- if (who) +- return who; +- } +- return default_machine_specific_memory_setup(); +-} +-#endif +- +-static char * __init _memory_setup(void) +-{ + int rc, nr_map; + struct xen_memory_map memmap; + static struct e820entry __initdata map[E820MAX]; +@@ -1639,7 +1588,7 @@ void __init setup_memory_map(void) + { + char *who; + +- who = _memory_setup(); ++ who = x86_init.resources.memory_setup(); + #ifdef CONFIG_XEN + if (is_initial_xendomain()) { + printk(KERN_INFO "Xen-provided machine memory map:\n"); +--- head-2010-01-18.orig/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/early_printk-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -178,7 +178,6 @@ static __init void early_serial_init(cha + * mappings. Someone should fix this for domain 0. For now, use fake serial. + */ + #define early_vga_console early_serial_console +-#define xenboot_console early_serial_console + + #endif + +@@ -189,721 +188,6 @@ static struct console early_serial_conso + .index = -1, + }; + +-#ifdef CONFIG_EARLY_PRINTK_DBGP +- +-static struct ehci_caps __iomem *ehci_caps; +-static struct ehci_regs __iomem *ehci_regs; +-static struct ehci_dbg_port __iomem *ehci_debug; +-static unsigned int dbgp_endpoint_out; +- +-struct ehci_dev { +- u32 bus; +- u32 slot; +- u32 func; +-}; +- +-static struct ehci_dev ehci_dev; +- +-#define USB_DEBUG_DEVNUM 127 +- +-#define DBGP_DATA_TOGGLE 0x8800 +- +-static inline u32 dbgp_pid_update(u32 x, u32 tok) +-{ +- return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff); +-} +- +-static inline u32 dbgp_len_update(u32 x, u32 len) +-{ +- return (x & ~0x0f) | (len & 0x0f); +-} +- +-/* +- * USB Packet IDs (PIDs) +- */ +- +-/* token */ +-#define USB_PID_OUT 0xe1 +-#define USB_PID_IN 0x69 +-#define USB_PID_SOF 0xa5 +-#define USB_PID_SETUP 0x2d +-/* handshake */ +-#define USB_PID_ACK 0xd2 +-#define USB_PID_NAK 0x5a +-#define USB_PID_STALL 0x1e +-#define USB_PID_NYET 0x96 +-/* data */ +-#define USB_PID_DATA0 0xc3 +-#define USB_PID_DATA1 0x4b +-#define USB_PID_DATA2 0x87 +-#define USB_PID_MDATA 0x0f +-/* Special */ +-#define USB_PID_PREAMBLE 0x3c +-#define USB_PID_ERR 0x3c +-#define USB_PID_SPLIT 0x78 +-#define USB_PID_PING 0xb4 +-#define USB_PID_UNDEF_0 0xf0 +- +-#define USB_PID_DATA_TOGGLE 0x88 +-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE) +- +-#define PCI_CAP_ID_EHCI_DEBUG 0xa +- +-#define HUB_ROOT_RESET_TIME 50 /* times are in msec */ +-#define HUB_SHORT_RESET_TIME 10 +-#define HUB_LONG_RESET_TIME 200 +-#define HUB_RESET_TIMEOUT 500 +- +-#define DBGP_MAX_PACKET 8 +- +-static int dbgp_wait_until_complete(void) +-{ +- u32 ctrl; +- int loop = 0x100000; +- +- do { +- ctrl = readl(&ehci_debug->control); +- /* Stop when the transaction is finished */ +- if (ctrl & DBGP_DONE) +- break; +- } while (--loop > 0); +- +- if (!loop) +- return -1; +- +- /* +- * Now that we have observed the completed transaction, +- * clear the done bit. +- */ +- writel(ctrl | DBGP_DONE, &ehci_debug->control); +- return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); +-} +- +-static void __init dbgp_mdelay(int ms) +-{ +- int i; +- +- while (ms--) { +- for (i = 0; i < 1000; i++) +- outb(0x1, 0x80); +- } +-} +- +-static void dbgp_breath(void) +-{ +- /* Sleep to give the debug port a chance to breathe */ +-} +- +-static int dbgp_wait_until_done(unsigned ctrl) +-{ +- u32 pids, lpid; +- int ret; +- int loop = 3; +- +-retry: +- writel(ctrl | DBGP_GO, &ehci_debug->control); +- ret = dbgp_wait_until_complete(); +- pids = readl(&ehci_debug->pids); +- lpid = DBGP_PID_GET(pids); +- +- if (ret < 0) +- return ret; +- +- /* +- * If the port is getting full or it has dropped data +- * start pacing ourselves, not necessary but it's friendly. +- */ +- if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET)) +- dbgp_breath(); +- +- /* If I get a NACK reissue the transmission */ +- if (lpid == USB_PID_NAK) { +- if (--loop > 0) +- goto retry; +- } +- +- return ret; +-} +- +-static void dbgp_set_data(const void *buf, int size) +-{ +- const unsigned char *bytes = buf; +- u32 lo, hi; +- int i; +- +- lo = hi = 0; +- for (i = 0; i < 4 && i < size; i++) +- lo |= bytes[i] << (8*i); +- for (; i < 8 && i < size; i++) +- hi |= bytes[i] << (8*(i - 4)); +- writel(lo, &ehci_debug->data03); +- writel(hi, &ehci_debug->data47); +-} +- +-static void __init dbgp_get_data(void *buf, int size) +-{ +- unsigned char *bytes = buf; +- u32 lo, hi; +- int i; +- +- lo = readl(&ehci_debug->data03); +- hi = readl(&ehci_debug->data47); +- for (i = 0; i < 4 && i < size; i++) +- bytes[i] = (lo >> (8*i)) & 0xff; +- for (; i < 8 && i < size; i++) +- bytes[i] = (hi >> (8*(i - 4))) & 0xff; +-} +- +-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, +- const char *bytes, int size) +-{ +- u32 pids, addr, ctrl; +- int ret; +- +- if (size > DBGP_MAX_PACKET) +- return -1; +- +- addr = DBGP_EPADDR(devnum, endpoint); +- +- pids = readl(&ehci_debug->pids); +- pids = dbgp_pid_update(pids, USB_PID_OUT); +- +- ctrl = readl(&ehci_debug->control); +- ctrl = dbgp_len_update(ctrl, size); +- ctrl |= DBGP_OUT; +- ctrl |= DBGP_GO; +- +- dbgp_set_data(bytes, size); +- writel(addr, &ehci_debug->address); +- writel(pids, &ehci_debug->pids); +- +- ret = dbgp_wait_until_done(ctrl); +- if (ret < 0) +- return ret; +- +- return ret; +-} +- +-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, +- int size) +-{ +- u32 pids, addr, ctrl; +- int ret; +- +- if (size > DBGP_MAX_PACKET) +- return -1; +- +- addr = DBGP_EPADDR(devnum, endpoint); +- +- pids = readl(&ehci_debug->pids); +- pids = dbgp_pid_update(pids, USB_PID_IN); +- +- ctrl = readl(&ehci_debug->control); +- ctrl = dbgp_len_update(ctrl, size); +- ctrl &= ~DBGP_OUT; +- ctrl |= DBGP_GO; +- +- writel(addr, &ehci_debug->address); +- writel(pids, &ehci_debug->pids); +- ret = dbgp_wait_until_done(ctrl); +- if (ret < 0) +- return ret; +- +- if (size > ret) +- size = ret; +- dbgp_get_data(data, size); +- return ret; +-} +- +-static int __init dbgp_control_msg(unsigned devnum, int requesttype, +- int request, int value, int index, void *data, int size) +-{ +- u32 pids, addr, ctrl; +- struct usb_ctrlrequest req; +- int read; +- int ret; +- +- read = (requesttype & USB_DIR_IN) != 0; +- if (size > (read ? DBGP_MAX_PACKET:0)) +- return -1; +- +- /* Compute the control message */ +- req.bRequestType = requesttype; +- req.bRequest = request; +- req.wValue = cpu_to_le16(value); +- req.wIndex = cpu_to_le16(index); +- req.wLength = cpu_to_le16(size); +- +- pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); +- addr = DBGP_EPADDR(devnum, 0); +- +- ctrl = readl(&ehci_debug->control); +- ctrl = dbgp_len_update(ctrl, sizeof(req)); +- ctrl |= DBGP_OUT; +- ctrl |= DBGP_GO; +- +- /* Send the setup message */ +- dbgp_set_data(&req, sizeof(req)); +- writel(addr, &ehci_debug->address); +- writel(pids, &ehci_debug->pids); +- ret = dbgp_wait_until_done(ctrl); +- if (ret < 0) +- return ret; +- +- /* Read the result */ +- return dbgp_bulk_read(devnum, 0, data, size); +-} +- +- +-/* Find a PCI capability */ +-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap) +-{ +- u8 pos; +- int bytes; +- +- if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & +- PCI_STATUS_CAP_LIST)) +- return 0; +- +- pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); +- for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { +- u8 id; +- +- pos &= ~3; +- id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); +- if (id == 0xff) +- break; +- if (id == cap) +- return pos; +- +- pos = read_pci_config_byte(num, slot, func, +- pos+PCI_CAP_LIST_NEXT); +- } +- return 0; +-} +- +-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func) +-{ +- u32 class; +- +- class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); +- if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI) +- return 0; +- +- return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG); +-} +- +-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) +-{ +- u32 bus, slot, func; +- +- for (bus = 0; bus < 256; bus++) { +- for (slot = 0; slot < 32; slot++) { +- for (func = 0; func < 8; func++) { +- unsigned cap; +- +- cap = __find_dbgp(bus, slot, func); +- +- if (!cap) +- continue; +- if (ehci_num-- != 0) +- continue; +- *rbus = bus; +- *rslot = slot; +- *rfunc = func; +- return cap; +- } +- } +- } +- return 0; +-} +- +-static int __init ehci_reset_port(int port) +-{ +- u32 portsc; +- u32 delay_time, delay; +- int loop; +- +- /* Reset the usb debug port */ +- portsc = readl(&ehci_regs->port_status[port - 1]); +- portsc &= ~PORT_PE; +- portsc |= PORT_RESET; +- writel(portsc, &ehci_regs->port_status[port - 1]); +- +- delay = HUB_ROOT_RESET_TIME; +- for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; +- delay_time += delay) { +- dbgp_mdelay(delay); +- +- portsc = readl(&ehci_regs->port_status[port - 1]); +- if (portsc & PORT_RESET) { +- /* force reset to complete */ +- loop = 2; +- writel(portsc & ~(PORT_RWC_BITS | PORT_RESET), +- &ehci_regs->port_status[port - 1]); +- do { +- portsc = readl(&ehci_regs->port_status[port-1]); +- } while ((portsc & PORT_RESET) && (--loop > 0)); +- } +- +- /* Device went away? */ +- if (!(portsc & PORT_CONNECT)) +- return -ENOTCONN; +- +- /* bomb out completely if something weird happend */ +- if ((portsc & PORT_CSC)) +- return -EINVAL; +- +- /* If we've finished resetting, then break out of the loop */ +- if (!(portsc & PORT_RESET) && (portsc & PORT_PE)) +- return 0; +- } +- return -EBUSY; +-} +- +-static int __init ehci_wait_for_port(int port) +-{ +- u32 status; +- int ret, reps; +- +- for (reps = 0; reps < 3; reps++) { +- dbgp_mdelay(100); +- status = readl(&ehci_regs->status); +- if (status & STS_PCD) { +- ret = ehci_reset_port(port); +- if (ret == 0) +- return 0; +- } +- } +- return -ENOTCONN; +-} +- +-#ifdef DBGP_DEBUG +-# define dbgp_printk early_printk +-#else +-static inline void dbgp_printk(const char *fmt, ...) { } +-#endif +- +-typedef void (*set_debug_port_t)(int port); +- +-static void __init default_set_debug_port(int port) +-{ +-} +- +-static set_debug_port_t __initdata set_debug_port = default_set_debug_port; +- +-static void __init nvidia_set_debug_port(int port) +-{ +- u32 dword; +- dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, +- 0x74); +- dword &= ~(0x0f<<12); +- dword |= ((port & 0x0f)<<12); +- write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74, +- dword); +- dbgp_printk("set debug port to %d\n", port); +-} +- +-static void __init detect_set_debug_port(void) +-{ +- u32 vendorid; +- +- vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, +- 0x00); +- +- if ((vendorid & 0xffff) == 0x10de) { +- dbgp_printk("using nvidia set_debug_port\n"); +- set_debug_port = nvidia_set_debug_port; +- } +-} +- +-static int __init ehci_setup(void) +-{ +- struct usb_debug_descriptor dbgp_desc; +- u32 cmd, ctrl, status, portsc, hcs_params; +- u32 debug_port, new_debug_port = 0, n_ports; +- u32 devnum; +- int ret, i; +- int loop; +- int port_map_tried; +- int playtimes = 3; +- +-try_next_time: +- port_map_tried = 0; +- +-try_next_port: +- +- hcs_params = readl(&ehci_caps->hcs_params); +- debug_port = HCS_DEBUG_PORT(hcs_params); +- n_ports = HCS_N_PORTS(hcs_params); +- +- dbgp_printk("debug_port: %d\n", debug_port); +- dbgp_printk("n_ports: %d\n", n_ports); +- +- for (i = 1; i <= n_ports; i++) { +- portsc = readl(&ehci_regs->port_status[i-1]); +- dbgp_printk("portstatus%d: %08x\n", i, portsc); +- } +- +- if (port_map_tried && (new_debug_port != debug_port)) { +- if (--playtimes) { +- set_debug_port(new_debug_port); +- goto try_next_time; +- } +- return -1; +- } +- +- loop = 10; +- /* Reset the EHCI controller */ +- cmd = readl(&ehci_regs->command); +- cmd |= CMD_RESET; +- writel(cmd, &ehci_regs->command); +- do { +- cmd = readl(&ehci_regs->command); +- } while ((cmd & CMD_RESET) && (--loop > 0)); +- +- if (!loop) { +- dbgp_printk("can not reset ehci\n"); +- return -1; +- } +- dbgp_printk("ehci reset done\n"); +- +- /* Claim ownership, but do not enable yet */ +- ctrl = readl(&ehci_debug->control); +- ctrl |= DBGP_OWNER; +- ctrl &= ~(DBGP_ENABLED | DBGP_INUSE); +- writel(ctrl, &ehci_debug->control); +- +- /* Start the ehci running */ +- cmd = readl(&ehci_regs->command); +- cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET); +- cmd |= CMD_RUN; +- writel(cmd, &ehci_regs->command); +- +- /* Ensure everything is routed to the EHCI */ +- writel(FLAG_CF, &ehci_regs->configured_flag); +- +- /* Wait until the controller is no longer halted */ +- loop = 10; +- do { +- status = readl(&ehci_regs->status); +- } while ((status & STS_HALT) && (--loop > 0)); +- +- if (!loop) { +- dbgp_printk("ehci can be started\n"); +- return -1; +- } +- dbgp_printk("ehci started\n"); +- +- /* Wait for a device to show up in the debug port */ +- ret = ehci_wait_for_port(debug_port); +- if (ret < 0) { +- dbgp_printk("No device found in debug port\n"); +- goto next_debug_port; +- } +- dbgp_printk("ehci wait for port done\n"); +- +- /* Enable the debug port */ +- ctrl = readl(&ehci_debug->control); +- ctrl |= DBGP_CLAIM; +- writel(ctrl, &ehci_debug->control); +- ctrl = readl(&ehci_debug->control); +- if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) { +- dbgp_printk("No device in debug port\n"); +- writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control); +- goto err; +- } +- dbgp_printk("debug ported enabled\n"); +- +- /* Completely transfer the debug device to the debug controller */ +- portsc = readl(&ehci_regs->port_status[debug_port - 1]); +- portsc &= ~PORT_PE; +- writel(portsc, &ehci_regs->port_status[debug_port - 1]); +- +- dbgp_mdelay(100); +- +- /* Find the debug device and make it device number 127 */ +- for (devnum = 0; devnum <= 127; devnum++) { +- ret = dbgp_control_msg(devnum, +- USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE, +- USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0, +- &dbgp_desc, sizeof(dbgp_desc)); +- if (ret > 0) +- break; +- } +- if (devnum > 127) { +- dbgp_printk("Could not find attached debug device\n"); +- goto err; +- } +- if (ret < 0) { +- dbgp_printk("Attached device is not a debug device\n"); +- goto err; +- } +- dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint; +- +- /* Move the device to 127 if it isn't already there */ +- if (devnum != USB_DEBUG_DEVNUM) { +- ret = dbgp_control_msg(devnum, +- USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, +- USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0); +- if (ret < 0) { +- dbgp_printk("Could not move attached device to %d\n", +- USB_DEBUG_DEVNUM); +- goto err; +- } +- devnum = USB_DEBUG_DEVNUM; +- dbgp_printk("debug device renamed to 127\n"); +- } +- +- /* Enable the debug interface */ +- ret = dbgp_control_msg(USB_DEBUG_DEVNUM, +- USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, +- USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0); +- if (ret < 0) { +- dbgp_printk(" Could not enable the debug device\n"); +- goto err; +- } +- dbgp_printk("debug interface enabled\n"); +- +- /* Perform a small write to get the even/odd data state in sync +- */ +- ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1); +- if (ret < 0) { +- dbgp_printk("dbgp_bulk_write failed: %d\n", ret); +- goto err; +- } +- dbgp_printk("small write doned\n"); +- +- return 0; +-err: +- /* Things didn't work so remove my claim */ +- ctrl = readl(&ehci_debug->control); +- ctrl &= ~(DBGP_CLAIM | DBGP_OUT); +- writel(ctrl, &ehci_debug->control); +- return -1; +- +-next_debug_port: +- port_map_tried |= (1<<(debug_port - 1)); +- new_debug_port = ((debug_port-1+1)%n_ports) + 1; +- if (port_map_tried != ((1<> 29) & 0x7; +- bar = (bar * 4) + 0xc; +- offset = (debug_port >> 16) & 0xfff; +- dbgp_printk("bar: %02x offset: %03x\n", bar, offset); +- if (bar != PCI_BASE_ADDRESS_0) { +- dbgp_printk("only debug ports on bar 1 handled.\n"); +- +- return -1; +- } +- +- bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); +- dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset); +- if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) { +- dbgp_printk("only simple 32bit mmio bars supported\n"); +- +- return -1; +- } +- +- /* double check if the mem space is enabled */ +- byte = read_pci_config_byte(bus, slot, func, 0x04); +- if (!(byte & 0x2)) { +- byte |= 0x02; +- write_pci_config_byte(bus, slot, func, 0x04, byte); +- dbgp_printk("mmio for ehci enabled\n"); +- } +- +- /* +- * FIXME I don't have the bar size so just guess PAGE_SIZE is more +- * than enough. 1K is the biggest I have seen. +- */ +- set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); +- ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); +- ehci_bar += bar_val & ~PAGE_MASK; +- dbgp_printk("ehci_bar: %p\n", ehci_bar); +- +- ehci_caps = ehci_bar; +- ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase)); +- ehci_debug = ehci_bar + offset; +- ehci_dev.bus = bus; +- ehci_dev.slot = slot; +- ehci_dev.func = func; +- +- detect_set_debug_port(); +- +- ret = ehci_setup(); +- if (ret < 0) { +- dbgp_printk("ehci_setup failed\n"); +- ehci_debug = NULL; +- +- return -1; +- } +- +- return 0; +-} +- +-static void early_dbgp_write(struct console *con, const char *str, u32 n) +-{ +- int chunk, ret; +- +- if (!ehci_debug) +- return; +- while (n > 0) { +- chunk = n; +- if (chunk > DBGP_MAX_PACKET) +- chunk = DBGP_MAX_PACKET; +- ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, +- dbgp_endpoint_out, str, chunk); +- str += chunk; +- n -= chunk; +- } +-} +- +-static struct console early_dbgp_console = { +- .name = "earlydbg", +- .write = early_dbgp_write, +- .flags = CON_PRINTBUFFER, +- .index = -1, +-}; +-#endif +- + /* Direct interface for emergencies */ + static struct console *early_console = &early_vga_console; + static int __initdata early_console_initialized; +@@ -920,10 +204,24 @@ asmlinkage void early_printk(const char + va_end(ap); + } + ++static inline void early_console_register(struct console *con, int keep_early) ++{ ++ if (early_console->index != -1) { ++ printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", ++ con->name); ++ return; ++ } ++ early_console = con; ++ if (keep_early) ++ early_console->flags &= ~CON_BOOT; ++ else ++ early_console->flags |= CON_BOOT; ++ register_console(early_console); ++} + + static int __init setup_early_printk(char *buf) + { +- int keep_early; ++ int keep; + + if (!buf) + return 0; +@@ -932,44 +230,41 @@ static int __init setup_early_printk(cha + return 0; + early_console_initialized = 1; + +- keep_early = (strstr(buf, "keep") != NULL); ++ keep = (strstr(buf, "keep") != NULL); + +- if (!strncmp(buf, "serial", 6)) { +- early_serial_init(buf + 6); +- early_console = &early_serial_console; +- } else if (!strncmp(buf, "ttyS", 4)) { +- early_serial_init(buf); +- early_console = &early_serial_console; +- } else if (!strncmp(buf, "vga", 3)) { ++ while (*buf != '\0') { ++ if (!strncmp(buf, "serial", 6)) { ++ buf += 6; ++ early_serial_init(buf); ++ early_console_register(&early_serial_console, keep); ++ if (!strncmp(buf, ",ttyS", 5)) ++ buf += 5; ++ } ++ if (!strncmp(buf, "ttyS", 4)) { ++ early_serial_init(buf + 4); ++ early_console_register(&early_serial_console, keep); ++ } + #ifndef CONFIG_XEN +- && boot_params.screen_info.orig_video_isVGA == 1) { +- max_xpos = boot_params.screen_info.orig_video_cols; +- max_ypos = boot_params.screen_info.orig_video_lines; +- current_ypos = boot_params.screen_info.orig_y; ++ if (!strncmp(buf, "vga", 3) && ++ boot_params.screen_info.orig_video_isVGA == 1) { ++ max_xpos = boot_params.screen_info.orig_video_cols; ++ max_ypos = boot_params.screen_info.orig_video_lines; ++ current_ypos = boot_params.screen_info.orig_y; ++#else ++ if (!strncmp(buf, "vga", 3) || !strncmp(buf, "xen", 3)) { + #endif +- early_console = &early_vga_console; ++ early_console_register(&early_vga_console, keep); ++ } + #ifdef CONFIG_EARLY_PRINTK_DBGP +- } else if (!strncmp(buf, "dbgp", 4)) { +- if (early_dbgp_init(buf+4) < 0) +- return 0; +- early_console = &early_dbgp_console; +- /* +- * usb subsys will reset ehci controller, so don't keep +- * that early console +- */ +- keep_early = 0; ++ if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4)) ++ early_console_register(&early_dbgp_console, keep); + #endif +-#ifdef CONFIG_XEN +- } else if (!strncmp(buf, "xen", 3)) { +- early_console = &xenboot_console; ++#ifdef CONFIG_HVC_XEN ++ if (!strncmp(buf, "xen", 3)) ++ early_console_register(&xenboot_console, keep); + #endif ++ buf++; + } +- +- if (keep_early) +- early_console->flags &= ~CON_BOOT; +- else +- early_console->flags |= CON_BOOT; +- register_console(early_console); + return 0; + } + +--- head-2010-01-18.orig/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/entry_64-xen.S 2009-11-06 10:52:22.000000000 +0100 +@@ -53,6 +53,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -150,7 +151,7 @@ ENTRY(ftrace_graph_caller) + END(ftrace_graph_caller) + + GLOBAL(return_to_handler) +- subq $80, %rsp ++ subq $24, %rsp + + /* Save the return values */ + movq %rax, (%rsp) +@@ -159,10 +160,10 @@ GLOBAL(return_to_handler) + + call ftrace_return_to_handler + +- movq %rax, 72(%rsp) ++ movq %rax, 16(%rsp) + movq 8(%rsp), %rdx + movq (%rsp), %rax +- addq $72, %rsp ++ addq $16, %rsp + retq + #endif + +@@ -553,20 +554,13 @@ sysret_signal: + bt $TIF_SYSCALL_AUDIT,%edx + jc sysret_audit + #endif +- /* edx: work flags (arg3) */ +- leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 +- xorl %esi,%esi # oldset -> arg2 +- SAVE_REST +- FIXUP_TOP_OF_STACK %r11 +- call do_notify_resume +- RESTORE_TOP_OF_STACK %r11 +- RESTORE_REST +- movl $_TIF_WORK_MASK,%edi +- /* Use IRET because user could have changed frame. This +- works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ +- DISABLE_INTERRUPTS(CLBR_NONE) +- TRACE_IRQS_OFF +- jmp int_with_check ++ /* ++ * We have a signal, or exit tracing or single-step. ++ * These all wind up with the iret return path anyway, ++ * so just join that path right now. ++ */ ++ FIXUP_TOP_OF_STACK %r11, -ARGOFFSET ++ jmp int_check_syscall_exit_work + + badsys: + movq $-ENOSYS,RAX-ARGOFFSET(%rsp) +@@ -675,6 +669,7 @@ int_careful: + int_very_careful: + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) ++int_check_syscall_exit_work: + SAVE_REST + /* Check for syscall exit trace */ + testl $_TIF_WORK_SYSCALL_EXIT,%edx +@@ -921,7 +916,7 @@ apicinterrupt ERROR_APIC_VECTOR \ + apicinterrupt SPURIOUS_APIC_VECTOR \ + spurious_interrupt smp_spurious_interrupt + +-#ifdef CONFIG_PERF_COUNTERS ++#ifdef CONFIG_PERF_EVENTS + apicinterrupt LOCAL_PENDING_VECTOR \ + perf_pending_interrupt smp_perf_pending_interrupt + #endif +--- head-2010-01-18.orig/arch/x86/kernel/head-xen.c 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -59,7 +59,6 @@ void __init reserve_ebda_region(void) + #include + #include + #include +-#include + #include + #include + +@@ -163,7 +162,7 @@ void __init xen_start_kernel(void) + + } + +-void __init machine_specific_arch_setup(void) ++void __init xen_arch_setup(void) + { + int ret; + static const struct callback_register __initconst event = { +--- head-2010-01-18.orig/arch/x86/kernel/head32-xen.c 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head32-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -9,11 +9,26 @@ + #include + + #include +-#include + #include + #include +-#include ++#include + #include ++#include ++#include ++#include ++ ++static void __init i386_default_early_setup(void) ++{ ++ /* Initialize 32bit specific setup functions */ ++ if (is_initial_xendomain()) ++ x86_init.resources.probe_roms = probe_roms; ++ x86_init.resources.reserve_resources = i386_reserve_resources; ++#ifndef CONFIG_XEN ++ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; ++ ++ reserve_ebda_region(); ++#endif ++} + + void __init i386_start_kernel(void) + { +@@ -31,7 +46,16 @@ void __init i386_start_kernel(void) + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + } + #endif +- reserve_ebda_region(); ++ ++ /* Call the subarch specific early setup function */ ++ switch (boot_params.hdr.hardware_subarch) { ++ case X86_SUBARCH_MRST: ++ x86_mrst_early_setup(); ++ break; ++ default: ++ i386_default_early_setup(); ++ break; ++ } + #else + { + int max_cmdline; +@@ -42,6 +66,7 @@ void __init i386_start_kernel(void) + boot_command_line[max_cmdline-1] = '\0'; + } + ++ i386_default_early_setup(); + xen_start_kernel(); + #endif + +--- head-2010-01-18.orig/arch/x86/kernel/head64-xen.c 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head64-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -20,15 +20,14 @@ + #include + #include + #include +-#include + #include + #include + #include + #include + #include + #include +-#include + #include ++#include + + #ifndef CONFIG_XEN + static void __init zap_identity_mappings(void) +--- head-2010-01-18.orig/arch/x86/kernel/head_32-xen.S 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head_32-xen.S 2009-11-06 10:52:22.000000000 +0100 +@@ -30,7 +30,7 @@ + #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability + #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id + +-.section .text.head,"ax",@progbits ++__HEAD + #define VIRT_ENTRY_OFFSET 0x0 + .org VIRT_ENTRY_OFFSET + ENTRY(startup_32) +@@ -69,7 +69,6 @@ ENTRY(startup_32) + */ + movl $per_cpu__gdt_page,%eax + movl $per_cpu__stack_canary,%ecx +- subl $20, %ecx + movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) + shrl $16, %ecx + movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) +@@ -122,7 +121,7 @@ ENTRY(hypercall_page) + /* + * BSS section + */ +-.section ".bss.page_aligned","wa" ++__PAGE_ALIGNED_BSS + .align PAGE_SIZE_asm + ENTRY(swapper_pg_fixmap) + .fill 1024,4,0 +--- head-2010-01-18.orig/arch/x86/kernel/head_64-xen.S 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/head_64-xen.S 2009-11-06 10:52:22.000000000 +0100 +@@ -23,7 +23,7 @@ + #include + #include + +- .section .text.head, "ax", @progbits ++ __HEAD + .code64 + .globl startup_64 + startup_64: +@@ -51,7 +51,7 @@ startup_64: + + #define NEXT_PAGE(name) \ + .balign PAGE_SIZE; \ +- phys_##name = . - .text.head; \ ++ phys_##name = . - .head.text; \ + ENTRY(name) + + NEXT_PAGE(init_level4_pgt) +@@ -104,7 +104,7 @@ NEXT_PAGE(hypercall_page) + + #undef NEXT_PAGE + +- .section .bss.page_aligned, "aw", @nobits ++ __PAGE_ALIGNED_BSS + .align PAGE_SIZE + ENTRY(empty_zero_page) + .skip PAGE_SIZE +--- head-2010-01-18.orig/arch/x86/kernel/irq-xen.c 2009-12-18 09:58:56.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq-xen.c 2009-12-18 09:59:05.000000000 +0100 +@@ -67,10 +67,10 @@ static int show_other_interrupts(struct + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); +- seq_printf(p, "%*s: ", prec, "CNT"); ++ seq_printf(p, "%*s: ", prec, "PMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); +- seq_printf(p, " Performance counter interrupts\n"); ++ seq_printf(p, " Performance monitoring interrupts\n"); + seq_printf(p, "%*s: ", prec, "PND"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); +@@ -112,7 +112,7 @@ static int show_other_interrupts(struct + seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); + #endif +-#ifdef CONFIG_X86_NEW_MCE ++#ifdef CONFIG_X86_MCE + seq_printf(p, "%*s: ", prec, "MCE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); +@@ -212,7 +212,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) + #ifdef CONFIG_X86_MCE_THRESHOLD + sum += irq_stats(cpu)->irq_threshold_count; + #endif +-#ifdef CONFIG_X86_NEW_MCE ++#ifdef CONFIG_X86_MCE + sum += per_cpu(mce_exception_count, cpu); + sum += per_cpu(mce_poll_count, cpu); + #endif +--- head-2010-01-18.orig/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/irq_32-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_ + void fixup_irqs(void) + { + unsigned int irq; +- static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { +@@ -236,8 +235,8 @@ void fixup_irqs(void) + } + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, affinity); +- else if (desc->action && !(warned++)) +- printk("Cannot set affinity for irq %i\n", irq); ++ else if (desc->action) ++ printk_once("Cannot set affinity for irq %i\n", irq); + } + + #if 0 +--- head-2010-01-18.orig/arch/x86/kernel/ldt-xen.c 2009-11-06 10:51:55.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/ldt-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -70,8 +70,8 @@ static int alloc_ldt(mm_context_t *pc, i + XENFEAT_writable_descriptor_tables); + load_LDT(pc); + #ifdef CONFIG_SMP +- if (!cpus_equal(current->mm->cpu_vm_mask, +- cpumask_of_cpu(smp_processor_id()))) ++ if (!cpumask_equal(mm_cpumask(current->mm), ++ cpumask_of(smp_processor_id()))) + smp_call_function(flush_ldt, current->mm, 1); + preempt_enable(); + #endif +--- head-2010-01-18.orig/arch/x86/kernel/microcode_core-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/microcode_core-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -97,8 +97,8 @@ static ssize_t microcode_write(struct fi + { + ssize_t ret = -EINVAL; + +- if ((len >> PAGE_SHIFT) > num_physpages) { +- pr_err("microcode: too much data (max %ld pages)\n", num_physpages); ++ if ((len >> PAGE_SHIFT) > totalram_pages) { ++ pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); + return ret; + } + +@@ -121,7 +121,7 @@ static const struct file_operations micr + static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", +- .devnode = "cpu/microcode", ++ .nodename = "cpu/microcode", + .fops = µcode_fops, + }; + +--- head-2010-01-18.orig/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/mpparse-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -51,6 +51,13 @@ static int __init mpf_checksum(unsigned + return sum & 0xFF; + } + ++#ifndef CONFIG_XEN ++int __init default_mpc_apic_id(struct mpc_cpu *m) ++{ ++ return m->apicid; ++} ++#endif ++ + static void __init MP_processor_info(struct mpc_cpu *m) + { + #ifndef CONFIG_XEN +@@ -62,10 +69,7 @@ static void __init MP_processor_info(str + return; + } + +- if (x86_quirks->mpc_apic_id) +- apicid = x86_quirks->mpc_apic_id(m); +- else +- apicid = m->apicid; ++ apicid = x86_init.mpparse.mpc_apic_id(m); + + if (m->cpuflag & CPU_BOOTPROCESSOR) { + bootup_cpu = " (Bootup-CPU)"; +@@ -80,16 +84,18 @@ static void __init MP_processor_info(str + } + + #ifdef CONFIG_X86_IO_APIC +-static void __init MP_bus_info(struct mpc_bus *m) ++void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str) + { +- char str[7]; + memcpy(str, m->bustype, 6); + str[6] = 0; ++ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); ++} + +- if (x86_quirks->mpc_oem_bus_info) +- x86_quirks->mpc_oem_bus_info(m, str); +- else +- apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); ++static void __init MP_bus_info(struct mpc_bus *m) ++{ ++ char str[7]; ++ ++ x86_init.mpparse.mpc_oem_bus_info(m, str); + + #if MAX_MP_BUSSES < 256 + if (m->busid >= MAX_MP_BUSSES) { +@@ -106,8 +112,8 @@ static void __init MP_bus_info(struct mp + mp_bus_id_to_type[m->busid] = MP_BUS_ISA; + #endif + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { +- if (x86_quirks->mpc_oem_pci_bus) +- x86_quirks->mpc_oem_pci_bus(m); ++ if (x86_init.mpparse.mpc_oem_pci_bus) ++ x86_init.mpparse.mpc_oem_pci_bus(m); + + clear_bit(m->busid, mp_bus_not_pci); + #if defined(CONFIG_EISA) || defined(CONFIG_MCA) +@@ -301,6 +307,8 @@ static void __init smp_dump_mptable(stru + 1, mpc, mpc->length, 1); + } + ++void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } ++ + static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) + { + char str[16]; +@@ -322,16 +330,13 @@ static int __init smp_read_mpc(struct mp + if (early) + return 1; + +- if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) { +- struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr; +- x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize); +- } ++ if (mpc->oemptr) ++ x86_init.mpparse.smp_read_mpc_oem(mpc); + + /* + * Now process the configuration blocks. + */ +- if (x86_quirks->mpc_record) +- *x86_quirks->mpc_record = 0; ++ x86_init.mpparse.mpc_record(0); + + while (count < mpc->length) { + switch (*mpt) { +@@ -363,8 +368,7 @@ static int __init smp_read_mpc(struct mp + count = mpc->length; + break; + } +- if (x86_quirks->mpc_record) +- (*x86_quirks->mpc_record)++; ++ x86_init.mpparse.mpc_record(1); + } + + #ifdef CONFIG_X86_BIGSMP +@@ -492,11 +496,11 @@ static void __init construct_ioapic_tabl + MP_bus_info(&bus); + } + +- ioapic.type = MP_IOAPIC; +- ioapic.apicid = 2; +- ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; +- ioapic.flags = MPC_APIC_USABLE; +- ioapic.apicaddr = 0xFEC00000; ++ ioapic.type = MP_IOAPIC; ++ ioapic.apicid = 2; ++ ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ ioapic.flags = MPC_APIC_USABLE; ++ ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE; + MP_ioapic_info(&ioapic); + + /* +@@ -618,7 +622,7 @@ static int __init check_physptr(struct m + /* + * Scan the memory blocks for an SMP configuration block. + */ +-static void __init __get_smp_config(unsigned int early) ++void __init default_get_smp_config(unsigned int early) + { + struct mpf_intel *mpf = mpf_found; + +@@ -635,11 +639,6 @@ static void __init __get_smp_config(unsi + if (acpi_lapic && acpi_ioapic) + return; + +- if (x86_quirks->mach_get_smp_config) { +- if (x86_quirks->mach_get_smp_config(early)) +- return; +- } +- + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", + mpf->specification); + #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN) +@@ -680,16 +679,6 @@ static void __init __get_smp_config(unsi + */ + } + +-void __init early_get_smp_config(void) +-{ +- __get_smp_config(1); +-} +- +-void __init get_smp_config(void) +-{ +- __get_smp_config(0); +-} +- + #ifndef CONFIG_XEN + static void __init smp_reserve_bootmem(struct mpf_intel *mpf) + { +@@ -761,16 +750,12 @@ static int __init smp_scan_config(unsign + return 0; + } + +-static void __init __find_smp_config(unsigned int reserve) ++void __init default_find_smp_config(unsigned int reserve) + { + #ifndef CONFIG_XEN + unsigned int address; + #endif + +- if (x86_quirks->mach_find_smp_config) { +- if (x86_quirks->mach_find_smp_config(reserve)) +- return; +- } + /* + * FIXME: Linux assumes you have 640K of base ram.. + * this continues the error... +@@ -807,16 +792,6 @@ static void __init __find_smp_config(uns + #endif + } + +-void __init early_find_smp_config(void) +-{ +- __find_smp_config(0); +-} +- +-void __init find_smp_config(void) +-{ +- __find_smp_config(1); +-} +- + #ifdef CONFIG_X86_IO_APIC + static u8 __initdata irq_used[MAX_IRQ_SOURCES]; + +--- head-2010-01-18.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-dma-xen.c 2009-11-18 14:54:16.000000000 +0100 +@@ -3,6 +3,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -32,17 +33,22 @@ int no_iommu __read_mostly; + /* Set this to 1 if there is a HW IOMMU in the system */ + int iommu_detected __read_mostly = 0; + +-int iommu_pass_through; ++/* ++ * This variable becomes 1 if iommu=pt is passed on the kernel command line. ++ * If this variable is 1, IOMMU implementations do no DMA translation for ++ * devices and allow every device to access to whole physical memory. This is ++ * useful if a user want to use an IOMMU only for KVM device assignment to ++ * guests and not for driver dma translation. ++ */ ++int iommu_pass_through __read_mostly; + + dma_addr_t bad_dma_address __read_mostly = 0; + EXPORT_SYMBOL(bad_dma_address); + +-/* Dummy device used for NULL arguments (normally ISA). Better would +- be probably a smaller DMA mask, but this is bug-to-bug compatible +- to older i386. */ ++/* Dummy device used for NULL arguments (normally ISA). */ + struct device x86_dma_fallback_dev = { + .init_name = "fallback device", +- .coherent_dma_mask = DMA_BIT_MASK(32), ++ .coherent_dma_mask = ISA_DMA_BIT_MASK, + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, + }; + EXPORT_SYMBOL(x86_dma_fallback_dev); +@@ -88,6 +94,11 @@ void __init dma32_reserve_bootmem(void) + size = roundup(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + 512ULL<<20); ++ /* ++ * Kmemleak should not scan this block as it may not be mapped via the ++ * kernel direct mapping. ++ */ ++ kmemleak_ignore(dma32_bootmem_ptr); + if (dma32_bootmem_ptr) + dma32_bootmem_size = size; + else +@@ -178,7 +189,7 @@ again: + + #ifndef CONFIG_XEN + addr = page_to_phys(page); +- if (!is_buffer_dma_capable(dma_mask, addr, size)) { ++ if (addr + size > dma_mask) { + __free_pages(page, order); + + if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { +@@ -266,10 +277,8 @@ static __init int iommu_setup(char *p) + if (!strncmp(p, "soft", 4)) + swiotlb = 1; + #endif +- if (!strncmp(p, "pt", 2)) { ++ if (!strncmp(p, "pt", 2)) + iommu_pass_through = 1; +- return 1; +- } + + gart_parse_options(p); + +@@ -381,7 +390,7 @@ void pci_iommu_shutdown(void) + amd_iommu_shutdown(); + } + /* Must execute after PCI subsystem */ +-fs_initcall(pci_iommu_init); ++rootfs_initcall(pci_iommu_init); + + #ifdef CONFIG_PCI + /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ +--- head-2010-01-18.orig/arch/x86/kernel/pci-nommu-xen.c 2009-11-06 10:52:02.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/pci-nommu-xen.c 2009-11-06 10:52:22.000000000 +0100 +@@ -36,7 +36,7 @@ gnttab_map_sg(struct device *hwdev, stru + sg->dma_address = + gnttab_dma_map_page(sg_page(sg)) + sg->offset; + sg->dma_length = sg->length; +- IOMMU_BUG_ON(address_needs_mapping( ++ IOMMU_BUG_ON(!dma_capable( + hwdev, sg->dma_address, sg->length)); + IOMMU_BUG_ON(range_straddles_page_boundary( + page_to_pseudophys(sg_page(sg)) + sg->offset, +@@ -68,7 +68,7 @@ gnttab_map_page(struct device *dev, stru + + dma = gnttab_dma_map_page(page) + offset; + IOMMU_BUG_ON(range_straddles_page_boundary(offset, size)); +- IOMMU_BUG_ON(address_needs_mapping(dev, dma, size)); ++ IOMMU_BUG_ON(!dma_capable(dev, dma, size)); + + return dma; + } +@@ -80,14 +80,31 @@ gnttab_unmap_page(struct device *dev, dm + gnttab_dma_unmap_page(dma_addr); + } + ++static void nommu_sync_single_for_device(struct device *dev, ++ dma_addr_t addr, size_t size, ++ enum dma_data_direction dir) ++{ ++ flush_write_buffers(); ++} ++ ++ ++static void nommu_sync_sg_for_device(struct device *dev, ++ struct scatterlist *sg, int nelems, ++ enum dma_data_direction dir) ++{ ++ flush_write_buffers(); ++} ++ + struct dma_map_ops nommu_dma_ops = { +- .alloc_coherent = dma_generic_alloc_coherent, +- .free_coherent = dma_generic_free_coherent, +- .map_page = gnttab_map_page, +- .unmap_page = gnttab_unmap_page, +- .map_sg = gnttab_map_sg, +- .unmap_sg = gnttab_unmap_sg, +- .dma_supported = swiotlb_dma_supported, ++ .alloc_coherent = dma_generic_alloc_coherent, ++ .free_coherent = dma_generic_free_coherent, ++ .map_page = gnttab_map_page, ++ .unmap_page = gnttab_unmap_page, ++ .map_sg = gnttab_map_sg, ++ .unmap_sg = gnttab_unmap_sg, ++ .sync_single_for_device = nommu_sync_single_for_device, ++ .sync_sg_for_device = nommu_sync_sg_for_device, ++ .dma_supported = swiotlb_dma_supported, + }; + + void __init no_iommu_init(void) +--- head-2010-01-18.orig/arch/x86/kernel/process-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process-xen.c 2009-11-06 10:52:23.000000000 +0100 +@@ -9,7 +9,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -26,9 +26,6 @@ EXPORT_SYMBOL(idle_nomwait); + + struct kmem_cache *task_xstate_cachep; + +-DEFINE_TRACE(power_start); +-DEFINE_TRACE(power_end); +- + int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) + { + *dst = *src; +@@ -285,9 +282,7 @@ static inline int hlt_use_halt(void) + */ + void xen_idle(void) + { +- struct power_trace it; +- +- trace_power_start(&it, POWER_CSTATE, 1); ++ trace_power_start(POWER_CSTATE, 1); + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we +@@ -300,7 +295,6 @@ void xen_idle(void) + else + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; +- trace_power_end(&it); + } + #ifdef CONFIG_APM_MODULE + EXPORT_SYMBOL(default_idle); +@@ -354,9 +348,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); + */ + void mwait_idle_with_hints(unsigned long ax, unsigned long cx) + { +- struct power_trace it; +- +- trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); ++ trace_power_start(POWER_CSTATE, (ax>>4)+1); + if (!need_resched()) { + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); +@@ -366,15 +358,13 @@ void mwait_idle_with_hints(unsigned long + if (!need_resched()) + __mwait(ax, cx); + } +- trace_power_end(&it); + } + + /* Default MONITOR/MWAIT with no hints, used for default C1 state */ + static void mwait_idle(void) + { +- struct power_trace it; + if (!need_resched()) { +- trace_power_start(&it, POWER_CSTATE, 1); ++ trace_power_start(POWER_CSTATE, 1); + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + +@@ -384,7 +374,6 @@ static void mwait_idle(void) + __sti_mwait(0, 0); + else + local_irq_enable(); +- trace_power_end(&it); + } else + local_irq_enable(); + } +@@ -397,13 +386,11 @@ static void mwait_idle(void) + */ + static void poll_idle(void) + { +- struct power_trace it; +- +- trace_power_start(&it, POWER_CSTATE, 0); ++ trace_power_start(POWER_CSTATE, 0); + local_irq_enable(); + while (!need_resched()) + cpu_relax(); +- trace_power_end(&it); ++ trace_power_end(0); + } + + #ifndef CONFIG_XEN +@@ -556,10 +543,8 @@ void __init init_c1e_mask(void) + { + #ifndef CONFIG_XEN + /* If we're using c1e_idle, we need to allocate c1e_mask. */ +- if (pm_idle == c1e_idle) { +- alloc_cpumask_var(&c1e_mask, GFP_KERNEL); +- cpumask_clear(c1e_mask); +- } ++ if (pm_idle == c1e_idle) ++ zalloc_cpumask_var(&c1e_mask, GFP_KERNEL); + #endif + } + +--- head-2010-01-18.orig/arch/x86/kernel/process_32-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_32-xen.c 2009-11-06 10:52:23.000000000 +0100 +@@ -66,9 +66,6 @@ + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork"); + +-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +-EXPORT_PER_CPU_SYMBOL(current_task); +- + /* + * Return saved PC of a blocked thread. + */ +@@ -360,6 +357,7 @@ __switch_to(struct task_struct *prev_p, + #ifndef CONFIG_X86_NO_TSS + struct tss_struct *tss = &per_cpu(init_tss, cpu); + #endif ++ bool preload_fpu; + #if CONFIG_XEN_COMPAT > 0x030002 + struct physdev_set_iopl iopl_op; + struct physdev_set_iobitmap iobmp_op; +@@ -373,15 +371,24 @@ __switch_to(struct task_struct *prev_p, + /* XEN NOTE: FS/GS saved in switch_mm(), not here. */ + + /* ++ * If the task has used fpu the last 5 timeslices, just do a full ++ * restore of the math state immediately to avoid the trap; the ++ * chances of needing FPU soon are obviously high now ++ */ ++ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; ++ ++ /* + * This is basically '__unlazy_fpu', except that we queue a + * multicall to indicate FPU task switch, rather than + * synchronously trapping to Xen. + */ + if (task_thread_info(prev_p)->status & TS_USEDFPU) { + __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ +- mcl->op = __HYPERVISOR_fpu_taskswitch; +- mcl->args[0] = 1; +- mcl++; ++ if (!preload_fpu) { ++ mcl->op = __HYPERVISOR_fpu_taskswitch; ++ mcl->args[0] = 1; ++ mcl++; ++ } + } + #if 0 /* lazy fpu sanity check */ + else BUG_ON(!(read_cr0() & 8)); +@@ -427,6 +434,14 @@ __switch_to(struct task_struct *prev_p, + mcl++; + } + ++ /* If we're going to preload the fpu context, make sure clts ++ is run while we're batching the cpu state updates. */ ++ if (preload_fpu) { ++ mcl->op = __HYPERVISOR_fpu_taskswitch; ++ mcl->args[0] = 0; ++ mcl++; ++ } ++ + if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { + set_xen_guest_handle(iobmp_op.bitmap, + (char *)next->io_bitmap_ptr); +@@ -451,7 +466,7 @@ __switch_to(struct task_struct *prev_p, + BUG(); + + /* we're going to use this soon, after a few expensive things */ +- if (next_p->fpu_counter > 5) ++ if (preload_fpu) + prefetch(next->xstate); + + /* +@@ -470,15 +485,8 @@ __switch_to(struct task_struct *prev_p, + */ + arch_end_context_switch(next_p); + +- /* If the task has used fpu the last 5 timeslices, just do a full +- * restore of the math state immediately to avoid the trap; the +- * chances of needing FPU soon are obviously high now +- * +- * tsk_used_math() checks prevent calling math_state_restore(), +- * which can sleep in the case of !tsk_used_math() +- */ +- if (tsk_used_math(next_p) && next_p->fpu_counter > 5) +- math_state_restore(); ++ if (preload_fpu) ++ __math_state_restore(); + + /* + * Restore %gs if needed (which is common) +--- head-2010-01-18.orig/arch/x86/kernel/process_64-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/process_64-xen.c 2009-11-18 14:54:16.000000000 +0100 +@@ -64,9 +64,6 @@ + + asmlinkage extern void ret_from_fork(void); + +-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +-EXPORT_PER_CPU_SYMBOL(current_task); +- + DEFINE_PER_CPU(unsigned long, old_rsp); + static DEFINE_PER_CPU(unsigned char, is_idle); + +@@ -402,6 +399,7 @@ __switch_to(struct task_struct *prev_p, + #ifndef CONFIG_X86_NO_TSS + struct tss_struct *tss = &per_cpu(init_tss, cpu); + #endif ++ bool preload_fpu; + #if CONFIG_XEN_COMPAT > 0x030002 + struct physdev_set_iopl iopl_op; + struct physdev_set_iobitmap iobmp_op; +@@ -412,8 +410,15 @@ __switch_to(struct task_struct *prev_p, + #endif + multicall_entry_t _mcl[8], *mcl = _mcl; + ++ /* ++ * If the task has used fpu the last 5 timeslices, just do a full ++ * restore of the math state immediately to avoid the trap; the ++ * chances of needing FPU soon are obviously high now ++ */ ++ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; ++ + /* we're going to use this soon, after a few expensive things */ +- if (next_p->fpu_counter > 5) ++ if (preload_fpu) + prefetch(next->xstate); + + /* +@@ -425,12 +430,21 @@ __switch_to(struct task_struct *prev_p, + */ + if (task_thread_info(prev_p)->status & TS_USEDFPU) { + __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ +- mcl->op = __HYPERVISOR_fpu_taskswitch; +- mcl->args[0] = 1; +- mcl++; ++ if (!preload_fpu) { ++ mcl->op = __HYPERVISOR_fpu_taskswitch; ++ mcl->args[0] = 1; ++ mcl++; ++ } + } else + prev_p->fpu_counter = 0; + ++ /* Make sure cpu is ready for new context */ ++ if (preload_fpu) { ++ mcl->op = __HYPERVISOR_fpu_taskswitch; ++ mcl->args[0] = 0; ++ mcl++; ++ } ++ + /* + * Reload sp0. + * This is load_sp0(tss, next) with a multicall. +@@ -550,15 +564,12 @@ __switch_to(struct task_struct *prev_p, + task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev_p, next_p); + +- /* If the task has used fpu the last 5 timeslices, just do a full +- * restore of the math state immediately to avoid the trap; the +- * chances of needing FPU soon are obviously high now +- * +- * tsk_used_math() checks prevent calling math_state_restore(), +- * which can sleep in the case of !tsk_used_math() ++ /* ++ * Preload the FPU context, now that we've determined that the ++ * task is likely to be using it. + */ +- if (tsk_used_math(next_p) && next_p->fpu_counter > 5) +- math_state_restore(); ++ if (preload_fpu) ++ __math_state_restore(); + return prev_p; + } + +@@ -730,3 +741,8 @@ long sys_arch_prctl(int code, unsigned l + return do_arch_prctl(current, code, addr); + } + ++unsigned long KSTK_ESP(struct task_struct *task) ++{ ++ return (test_tsk_thread_flag(task, TIF_IA32)) ? ++ (task_pt_regs(task)->sp) : ((task)->thread.usersp); ++} +--- head-2010-01-18.orig/arch/x86/kernel/quirks-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/quirks-xen.c 2009-11-06 10:52:23.000000000 +0100 +@@ -509,7 +509,7 @@ static void __init quirk_amd_nb_node(str + + pci_read_config_dword(nb_ht, 0x60, &val); + set_dev_node(&dev->dev, val & 7); +- pci_dev_put(dev); ++ pci_dev_put(nb_ht); + } + + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, +--- head-2010-01-18.orig/arch/x86/kernel/rtc.c 2009-11-06 10:51:25.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/rtc.c 2009-11-06 10:52:23.000000000 +0100 +@@ -189,8 +189,10 @@ void read_persistent_clock(struct timesp + unsigned long retval, flags; + + #ifdef CONFIG_XEN +- if (!is_initial_xendomain()) +- return xen_read_persistent_clock(); ++ if (!is_initial_xendomain()) { ++ xen_read_persistent_clock(ts); ++ return; ++ } + #endif + spin_lock_irqsave(&rtc_lock, flags); + retval = x86_platform.get_wallclock(); +--- head-2010-01-18.orig/arch/x86/kernel/setup-xen.c 2009-11-06 10:52:09.000000000 +0100 ++++ head-2010-01-18/arch/x86/kernel/setup-xen.c 2009-11-18 14:54:16.000000000 +0100 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -66,6 +67,7 @@ + + #include + #include ++#include + + #include