]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - arch/x86/kernel/io_apic_64.c
Merge branch 'linus' into x86/x2apic
[mirror_ubuntu-zesty-kernel.git] / arch / x86 / kernel / io_apic_64.c
index f1e1ae3e5c7de1fd81a05c0d2c28e37bb12f44bf..39f0be37e9a16b7f0b10df3af743b6338f0532fa 100644 (file)
@@ -37,6 +37,7 @@
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
+#include <linux/dmar.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -48,6 +49,7 @@
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -61,7 +63,7 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
        [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
        [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
        [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
@@ -94,7 +96,7 @@ static int no_timer_check;
 
 static int disable_timer_pin_1 __initdata;
 
-int timer_over_8254 __initdata = 1;
+int timer_through_8259 __initdata;
 
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -107,16 +109,21 @@ DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -144,7 +151,7 @@ struct io_apic {
 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 {
        return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-               + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+               + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
 }
 
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -187,7 +194,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
                        break;
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
                /* Is the remote IRR bit set? */
-               if ((reg >> 14) & 1) {
+               if (reg & IO_APIC_REDIR_REMOTE_IRR) {
                        spin_unlock_irqrestore(&ioapic_lock, flags);
                        return true;
                }
@@ -300,9 +307,14 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
                pin = entry->pin;
                if (pin == -1)
                        break;
-               io_apic_write(apic, 0x11 + pin*2, dest);
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
                reg = io_apic_read(apic, 0x10 + pin*2);
-               reg &= ~0x000000ff;
+               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
                io_apic_modify(apic, reg);
                if (!entry->next)
@@ -364,16 +376,37 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
        entry->pin = pin;
 }
 
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+                                     int oldapic, int oldpin,
+                                     int newapic, int newpin)
+{
+       struct irq_pin_list *entry = irq_2_pin + irq;
+
+       while (1) {
+               if (entry->apic == oldapic && entry->pin == oldpin) {
+                       entry->apic = newapic;
+                       entry->pin = newpin;
+               }
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+}
+
 
 #define DO_ACTION(name,R,ACTION, FINAL)                                        \
                                                                        \
        static void name##_IO_APIC_irq (unsigned int irq)               \
        __DO_ACTION(R, ACTION, FINAL)
 
-DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
-                                               /* mask = 1 */
-DO_ACTION( __unmask,           0, &= 0xfffeffff, )
-                                               /* mask = 0 */
+/* mask = 1 */
+DO_ACTION(__mask,      0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+
+/* mask = 0 */
+DO_ACTION(__unmask,    0, &= ~IO_APIC_REDIR_MASKED, )
 
 static void mask_IO_APIC_irq (unsigned int irq)
 {
@@ -416,6 +449,69 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+       union IO_APIC_reg_01 reg_01;
+       unsigned long flags;
+       int apic, pin;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               early_ioapic_entries[apic] =
+                       kzalloc(sizeof(struct IO_APIC_route_entry) *
+                               nr_ioapic_registers[apic], GFP_KERNEL);
+               if (!early_ioapic_entries[apic])
+                       return -ENOMEM;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+
+                       entry = early_ioapic_entries[apic][pin] =
+                               ioapic_read_entry(apic, pin);
+                       if (!entry.mask) {
+                               entry.mask = 1;
+                               ioapic_write_entry(apic, pin, entry);
+                       }
+               }
+       return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       ioapic_write_entry(apic, pin,
+                                          early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+       /*
+        * for now plain restore of previous settings.
+        * TBD: In the case of OS enabling interrupt-remapping,
+        * IO-APIC RTE's need to be setup to point to interrupt-remapping
+        * table entries. for now, do a plain restore, and wait for
+        * the setup_IO_APIC_irqs() to do proper initialization.
+        */
+       restore_IO_APIC_setup();
+}
+
 int skip_ioapic_setup;
 int ioapic_force;
 
@@ -434,20 +530,6 @@ static int __init disable_timer_pin_setup(char *arg)
 }
 __setup("disable_timer_pin_1", disable_timer_pin_setup);
 
-static int __init setup_disable_8254_timer(char *s)
-{
-       timer_over_8254 = -1;
-       return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
-       timer_over_8254 = 2;
-       return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -457,10 +539,10 @@ static int find_irq_entry(int apic, int pin, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mpc_irqtype == type &&
-                   (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
-                    mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
-                   mp_irqs[i].mpc_dstirq == pin)
+               if (mp_irqs[i].mp_irqtype == type &&
+                   (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+                    mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mp_dstirq == pin)
                        return i;
 
        return -1;
@@ -474,13 +556,13 @@ static int __init find_isa_irq_pin(int irq, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mpc_irqtype == type) &&
-                   (mp_irqs[i].mpc_srcbusirq == irq))
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
 
-                       return mp_irqs[i].mpc_dstirq;
+                       return mp_irqs[i].mp_dstirq;
        }
        return -1;
 }
@@ -490,17 +572,17 @@ static int __init find_isa_irq_apic(int irq, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mpc_irqtype == type) &&
-                   (mp_irqs[i].mpc_srcbusirq == irq))
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
                        break;
        }
        if (i < mp_irq_entries) {
                int apic;
                for(apic = 0; apic < nr_ioapics; apic++) {
-                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
                                return apic;
                }
        }
@@ -520,28 +602,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
        apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
                bus, slot, pin);
-       if (mp_bus_id_to_pci_bus[bus] == -1) {
+       if (test_bit(bus, mp_bus_not_pci)) {
                apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
                return -1;
        }
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
-                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+                           mp_irqs[i].mp_dstapic == MP_APIC_ALL)
                                break;
 
                if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].mpc_irqtype &&
+                   !mp_irqs[i].mp_irqtype &&
                    (bus == lbus) &&
-                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+                   (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 
                        if (!(apic || IO_APIC_IRQ(irq)))
                                continue;
 
-                       if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+                       if (pin == (mp_irqs[i].mp_srcbusirq & 3))
                                return irq;
                        /*
                         * Use the first all-but-pin matching entry as a
@@ -569,13 +651,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
 static int MPBIOS_polarity(int idx)
 {
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
        int polarity;
 
        /*
         * Determine IRQ line polarity (high active or low active):
         */
-       switch (mp_irqs[idx].mpc_irqflag & 3)
+       switch (mp_irqs[idx].mp_irqflag & 3)
        {
                case 0: /* conforms, ie. bus-type dependent polarity */
                        if (test_bit(bus, mp_bus_not_pci))
@@ -611,13 +693,13 @@ static int MPBIOS_polarity(int idx)
 
 static int MPBIOS_trigger(int idx)
 {
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
        int trigger;
 
        /*
         * Determine IRQ trigger mode (edge or level sensitive):
         */
-       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
        {
                case 0: /* conforms, ie. bus-type dependent */
                        if (test_bit(bus, mp_bus_not_pci))
@@ -664,16 +746,16 @@ static inline int irq_trigger(int idx)
 static int pin_2_irq(int idx, int apic, int pin)
 {
        int irq, i;
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
 
        /*
         * Debugging check, we are in big trouble if this message pops up!
         */
-       if (mp_irqs[idx].mpc_dstirq != pin)
+       if (mp_irqs[idx].mp_dstirq != pin)
                printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
        if (test_bit(bus, mp_bus_not_pci)) {
-               irq = mp_irqs[idx].mpc_srcbusirq;
+               irq = mp_irqs[idx].mp_srcbusirq;
        } else {
                /*
                 * PCI IRQs are mapped in order
@@ -792,7 +874,7 @@ static void __clear_irq_vector(int irq)
        cpus_clear(cfg->domain);
 }
 
-void __setup_vector_irq(int cpu)
+static void __setup_vector_irq(int cpu)
 {
        /* Initialize vector_irq on a new cpu */
        /* This function must be called with vector_lock held */
@@ -815,20 +897,107 @@ void __setup_vector_irq(int cpu)
        }
 }
 
+void setup_vector_irq(int cpu)
+{
+       spin_lock(&vector_lock);
+       __setup_vector_irq(smp_processor_id());
+       spin_unlock(&vector_lock);
+}
+
 
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-       if (trigger) {
+       if (trigger)
                irq_desc[irq].status |= IRQ_LEVEL;
-               set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                             handle_fasteoi_irq, "fasteoi");
-       } else {
+       else
                irq_desc[irq].status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+               if (trigger)
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_fasteoi_irq,
+                                                    "fasteoi");
+               else
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_edge_irq, "edge");
+               return;
+       }
+#endif
+       if (trigger)
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                             handle_fasteoi_irq,
+                                             "fasteoi");
+       else
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+                             struct IO_APIC_route_entry *entry,
+                             unsigned int destination, int trigger,
+                             int polarity, int vector)
+{
+       /*
+        * add it to the IO-APIC irq-routing table:
+        */
+       memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+               struct irte irte;
+               struct IR_IO_APIC_route_entry *ir_entry =
+                       (struct IR_IO_APIC_route_entry *) entry;
+               int index;
+
+               if (!iommu)
+                       panic("No mapping iommu for ioapic %d\n", apic);
+
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+               memset(&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = trigger;
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = vector;
+               irte.dest_id = IRTE_DEST(destination);
+
+               modify_irte(irq, &irte);
+
+               ir_entry->index2 = (index >> 15) & 0x1;
+               ir_entry->zero = 0;
+               ir_entry->format = 1;
+               ir_entry->index = (index & 0x7fff);
+       } else
+#endif
+       {
+               entry->delivery_mode = INT_DELIVERY_MODE;
+               entry->dest_mode = INT_DEST_MODE;
+               entry->dest = destination;
        }
+
+       entry->mask = 0;                                /* enable IRQ */
+       entry->trigger = trigger;
+       entry->polarity = polarity;
+       entry->vector = vector;
+
+       /* Mask level triggered irqs.
+        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+        */
+       if (trigger)
+               entry->mask = 1;
+       return 0;
 }
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -850,27 +1019,18 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
        apic_printk(APIC_VERBOSE,KERN_DEBUG
                    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
                    "IRQ %d Mode:%i Active:%i)\n",
-                   apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
+                   apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                    irq, trigger, polarity);
 
-       /*
-        * add it to the IO-APIC irq-routing table:
-        */
-       memset(&entry,0,sizeof(entry));
 
-       entry.delivery_mode = INT_DELIVERY_MODE;
-       entry.dest_mode = INT_DEST_MODE;
-       entry.dest = cpu_mask_to_apicid(mask);
-       entry.mask = 0;                         /* enable IRQ */
-       entry.trigger = trigger;
-       entry.polarity = polarity;
-       entry.vector = cfg->vector;
-
-       /* Mask level triggered irqs.
-        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-        */
-       if (trigger)
-               entry.mask = 1;
+       if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+                              cpu_mask_to_apicid(mask), trigger, polarity,
+                              cfg->vector)) {
+               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+                      mp_ioapics[apic].mp_apicid, pin);
+               __clear_irq_vector(irq);
+               return;
+       }
 
        ioapic_register_intr(irq, trigger);
        if (irq < 16)
@@ -891,10 +1051,10 @@ static void __init setup_IO_APIC_irqs(void)
                idx = find_irq_entry(apic,pin,mp_INT);
                if (idx == -1) {
                        if (first_notcon) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
                                first_notcon = 0;
                        } else
-                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
                        continue;
                }
                if (!first_notcon) {
@@ -915,26 +1075,24 @@ static void __init setup_IO_APIC_irqs(void)
 }
 
 /*
- * Set up the 8259A-master output pin as broadcast to all
- * CPUs.
+ * Set up the timer pin, possibly with the 8259A-master behind.
  */
-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
+static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
+                                       int vector)
 {
        struct IO_APIC_route_entry entry;
 
-       memset(&entry, 0, sizeof(entry));
-
-       disable_8259A_irq(0);
+       if (intr_remapping_enabled)
+               return;
 
-       /* mask LVT0 */
-       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       memset(&entry, 0, sizeof(entry));
 
        /*
         * We use logical delivery to get the timer IRQ
         * to the first CPU.
         */
        entry.dest_mode = INT_DEST_MODE;
-       entry.mask = 0;                                 /* unmask IRQ now */
+       entry.mask = 1;                                 /* mask IRQ now */
        entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
        entry.delivery_mode = INT_DELIVERY_MODE;
        entry.polarity = 0;
@@ -943,7 +1101,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
 
        /*
         * The timer IRQ doesn't have to know that behind the
-        * scene we have a 8259A-master in AEOI mode ...
+        * scene we may have a 8259A-master in AEOI mode ...
         */
        set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
 
@@ -951,8 +1109,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
         * Add it to the IO-APIC irq-routing table:
         */
        ioapic_write_entry(apic, pin, entry);
-
-       enable_8259A_irq(0);
 }
 
 void __apicdebuginit print_IO_APIC(void)
@@ -969,7 +1125,7 @@ void __apicdebuginit print_IO_APIC(void)
        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
        for (i = 0; i < nr_ioapics; i++)
                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-                      mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+                      mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
 
        /*
         * We are a bit conservative about what we expect.  We have to
@@ -987,7 +1143,7 @@ void __apicdebuginit print_IO_APIC(void)
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        printk("\n");
-       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 
@@ -1075,13 +1231,15 @@ static __apicdebuginit void print_APIC_bitfield (int base)
 void __apicdebuginit print_local_APIC(void * dummy)
 {
        unsigned int v, ver, maxlvt;
+       unsigned long icr;
 
        if (apic_verbosity == APIC_QUIET)
                return;
 
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
-       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+       v = apic_read(APIC_ID);
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
@@ -1117,10 +1275,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
        v = apic_read(APIC_ESR);
        printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
-       v = apic_read(APIC_ICR);
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
-       v = apic_read(APIC_ICR2);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+       icr = apic_icr_read();
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
 
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1150,7 +1307,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
 
 void print_all_local_APICs (void)
 {
-       on_each_cpu(print_local_APIC, NULL, 1, 1);
+       on_each_cpu(print_local_APIC, NULL, 1);
 }
 
 void __apicdebuginit print_PIC(void)
@@ -1275,7 +1432,7 @@ void disable_IO_APIC(void)
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
-               entry.dest          = GET_APIC_ID(read_apic_id());
+               entry.dest            = read_apic_id();
 
                /*
                 * Add it to the IO-APIC irq-routing table:
@@ -1383,6 +1540,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_desc *desc = irq_desc + irq;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+       int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       unsigned int dest;
+       unsigned long flags;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       if (modify_ioapic_rte) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * Modified the IRTE and flushes the Interrupt entry cache.
+        */
+       modify_irte(irq, &irte);
+
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+
+       irq_desc[irq].affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+       int ret = -1;
+
+       mask_IO_APIC_irq(irq);
+
+       if (io_apic_level_ack_pending(irq)) {
+               /*
+                * Interrupt in progress. Migrating irq now will change the
+                * vector information in the IO-APIC RTE and that will confuse
+                * the EOI broadcast performed by cpu.
+                * So, delay the irq migration to the next instance.
+                */
+               schedule_delayed_work(&ir_migration_work, 1);
+               goto unmask;
+       }
+
+       /* everthing is clear. we have right of way */
+       migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+
+       ret = 0;
+       irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
+       cpus_clear(irq_desc[irq].pending_mask);
+
+unmask:
+       unmask_IO_APIC_irq(irq);
+       return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+       int irq;
+
+       for (irq = 0; irq < NR_IRQS; irq++) {
+               struct irq_desc *desc = irq_desc + irq;
+               if (desc->status & IRQ_MOVE_PENDING) {
+                       unsigned long flags;
+
+                       spin_lock_irqsave(&desc->lock, flags);
+                       if (!desc->chip->set_affinity ||
+                           !(desc->status & IRQ_MOVE_PENDING)) {
+                               desc->status &= ~IRQ_MOVE_PENDING;
+                               spin_unlock_irqrestore(&desc->lock, flags);
+                               continue;
+                       }
+
+                       desc->chip->set_affinity(irq,
+                                                irq_desc[irq].pending_mask);
+                       spin_unlock_irqrestore(&desc->lock, flags);
+               }
+       }
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+       if (irq_desc[irq].status & IRQ_LEVEL) {
+               irq_desc[irq].status |= IRQ_MOVE_PENDING;
+               irq_desc[irq].pending_mask = mask;
+               migrate_irq_remapped_level(irq);
+               return;
+       }
+
+       migrate_ioapic_irq(irq, mask);
+}
+#endif
+
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
        unsigned vector, me;
@@ -1439,6 +1737,17 @@ static void irq_complete_move(unsigned int irq)
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+#endif
 
 static void ack_apic_edge(unsigned int irq)
 {
@@ -1513,6 +1822,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ir_ioapic_affinity_irq,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+#endif
+
 static inline void init_IO_APIC_traps(void)
 {
        int irq;
@@ -1544,7 +1868,7 @@ static inline void init_IO_APIC_traps(void)
        }
 }
 
-static void enable_lapic_irq (unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
 {
        unsigned long v;
 
@@ -1552,7 +1876,7 @@ static void enable_lapic_irq (unsigned int irq)
        apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void disable_lapic_irq (unsigned int irq)
+static void mask_lapic_irq(unsigned int irq)
 {
        unsigned long v;
 
@@ -1565,19 +1889,20 @@ static void ack_lapic_irq (unsigned int irq)
        ack_APIC_irq();
 }
 
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
-
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
-       .name = "local-APIC",
-       .typename = "local-APIC-edge",
-       .startup = NULL, /* startup_irq() not used for IRQ0 */
-       .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
-       .enable = enable_lapic_irq,
-       .disable = disable_lapic_irq,
-       .ack = ack_lapic_irq,
-       .end = end_lapic_irq,
+static struct irq_chip lapic_chip __read_mostly = {
+       .name           = "local-APIC",
+       .mask           = mask_lapic_irq,
+       .unmask         = unmask_lapic_irq,
+       .ack            = ack_lapic_irq,
 };
 
+static void lapic_register_intr(int irq)
+{
+       irq_desc[irq].status &= ~IRQ_LEVEL;
+       set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+                                     "edge");
+}
+
 static void __init setup_nmi(void)
 {
        /*
@@ -1663,6 +1988,7 @@ static inline void __init check_timer(void)
        struct irq_cfg *cfg = irq_cfg + 0;
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
+       int no_pin1 = 0;
 
        local_irq_save(flags);
 
@@ -1673,16 +1999,11 @@ static inline void __init check_timer(void)
        assign_irq_vector(0, TARGET_CPUS);
 
        /*
-        * Subtle, code in do_timer_interrupt() expects an AEOI
-        * mode for the 8259A whenever interrupts are routed
-        * through I/O APICs.  Also IRQ0 has to be enabled in
-        * the 8259A which implies the virtual wire has to be
-        * disabled in the local APIC.
+        * As IRQ0 is to be enabled in the 8259A, the virtual
+        * wire has to be disabled in the local APIC.
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        init_8259A(1);
-       if (timer_over_8254 > 0)
-               enable_8259A_irq(0);
 
        pin1  = find_isa_irq_pin(0, mp_INT);
        apic1 = find_isa_irq_apic(0, mp_INT);
@@ -1692,15 +2013,35 @@ static inline void __init check_timer(void)
        apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
                cfg->vector, apic1, pin1, apic2, pin2);
 
+       /*
+        * Some BIOS writers are clueless and report the ExtINTA
+        * I/O APIC input from the cascaded 8259A as the timer
+        * interrupt input.  So just in case, if only one pin
+        * was found above, try it both directly and through the
+        * 8259A.
+        */
+       if (pin1 == -1) {
+               if (intr_remapping_enabled)
+                       panic("BIOS bug: timer not connected to IO-APIC");
+               pin1 = pin2;
+               apic1 = apic2;
+               no_pin1 = 1;
+       } else if (pin2 == -1) {
+               pin2 = pin1;
+               apic2 = apic1;
+       }
+
        if (pin1 != -1) {
                /*
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
+               if (no_pin1) {
+                       add_pin_to_irq(0, apic1, pin1);
+                       setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+               }
                unmask_IO_APIC_irq(0);
                if (!no_timer_check && timer_irq_works()) {
-                       nmi_watchdog_default();
                        if (nmi_watchdog == NMI_IO_APIC) {
-                               disable_8259A_irq(0);
                                setup_nmi();
                                enable_8259A_irq(0);
                        }
@@ -1708,44 +2049,51 @@ static inline void __init check_timer(void)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
+               if (intr_remapping_enabled)
+                       panic("timer doesn't work through Interrupt-remapped IO-APIC");
                clear_IO_APIC_pin(apic1, pin1);
-               apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
-                               "connected to IO-APIC\n");
-       }
+               if (!no_pin1)
+                       apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
+                                   "8254 timer not connected to IO-APIC\n");
 
-       apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
-                               "through the 8259A ... ");
-       if (pin2 != -1) {
+               apic_printk(APIC_VERBOSE,KERN_INFO
+                       "...trying to set up timer (IRQ0) "
+                       "through the 8259A ... ");
                apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
                        apic2, pin2);
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
+               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+               setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+               unmask_IO_APIC_irq(0);
+               enable_8259A_irq(0);
                if (timer_irq_works()) {
                        apic_printk(APIC_VERBOSE," works.\n");
-                       nmi_watchdog_default();
+                       timer_through_8259 = 1;
                        if (nmi_watchdog == NMI_IO_APIC) {
+                               disable_8259A_irq(0);
                                setup_nmi();
+                               enable_8259A_irq(0);
                        }
                        goto out;
                }
                /*
                 * Cleanup, just in case ...
                 */
+               disable_8259A_irq(0);
                clear_IO_APIC_pin(apic2, pin2);
+               apic_printk(APIC_VERBOSE," failed.\n");
        }
-       apic_printk(APIC_VERBOSE," failed.\n");
 
        if (nmi_watchdog == NMI_IO_APIC) {
                printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
-               nmi_watchdog = 0;
+               nmi_watchdog = NMI_NONE;
        }
 
        apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
-       disable_8259A_irq(0);
-       irq_desc[0].chip = &lapic_irq_type;
+       lapic_register_intr(0);
        apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
        enable_8259A_irq(0);
 
@@ -1753,6 +2101,7 @@ static inline void __init check_timer(void)
                apic_printk(APIC_VERBOSE," works.\n");
                goto out;
        }
+       disable_8259A_irq(0);
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
        apic_printk(APIC_VERBOSE," failed.\n");
 
@@ -1782,11 +2131,21 @@ static int __init notimercheck(char *s)
 __setup("no_timer_check", notimercheck);
 
 /*
- *
- * IRQs that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- *   Linux doesn't really care, as it's not actually used
- *   for any interrupt handling anyway.
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices.  However there may be an I/O APIC pin available for
+ * this interrupt regardless.  The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A.  In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table.  With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default.  We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now.  No actual device should request
+ * it anyway.  --macro
  */
 #define PIC_IRQS       (1<<2)
 
@@ -1797,10 +2156,7 @@ void __init setup_IO_APIC(void)
         * calling enable_IO_APIC() is moved to setup_local_APIC for BP
         */
 
-       if (acpi_ioapic)
-               io_apic_irqs = ~0;      /* all IRQs go through IOAPIC */
-       else
-               io_apic_irqs = ~PIC_IRQS;
+       io_apic_irqs = ~PIC_IRQS;
 
        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
 
@@ -1845,8 +2201,8 @@ static int ioapic_resume(struct sys_device *dev)
 
        spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(dev->id, 0);
-       if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
-               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
                io_apic_write(dev->id, 0, reg_00.raw);
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -1931,6 +2287,9 @@ void destroy_irq(unsigned int irq)
 
        dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+       free_irte(irq);
+#endif
        spin_lock_irqsave(&vector_lock, flags);
        __clear_irq_vector(irq);
        spin_unlock_irqrestore(&vector_lock, flags);
@@ -1949,10 +2308,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 
        tmp = TARGET_CPUS;
        err = assign_irq_vector(irq, tmp);
-       if (!err) {
-               cpus_and(tmp, cfg->domain, tmp);
-               dest = cpu_mask_to_apicid(tmp);
+       if (err)
+               return err;
+
+       cpus_and(tmp, cfg->domain, tmp);
+       dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irte irte;
+               int ir_index;
+               u16 sub_handle;
+
+               ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+               BUG_ON(ir_index == -1);
 
+               memset (&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = 0; /* edge */
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = cfg->vector;
+               irte.dest_id = IRTE_DEST(dest);
+
+               modify_irte(irq, &irte);
+
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->data = sub_handle;
+               msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+                                 MSI_ADDR_IR_SHV |
+                                 MSI_ADDR_IR_INDEX1(ir_index) |
+                                 MSI_ADDR_IR_INDEX2(ir_index);
+       } else
+#endif
+       {
                msg->address_hi = MSI_ADDR_BASE_HI;
                msg->address_lo =
                        MSI_ADDR_BASE_LO |
@@ -2003,6 +2393,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        write_msi_msg(irq, &msg);
        irq_desc[irq].affinity = mask;
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg = irq_cfg + irq;
+       unsigned int dest;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * atomically update the IRTE with the new destination and vector.
+        */
+       modify_irte(irq, &irte);
+
+       /*
+        * After this point, all the interrupts will start arriving
+        * at the new destination. So, time to cleanup the previous
+        * vector allocation.
+        */
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+
+       irq_desc[irq].affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2020,26 +2459,157 @@ static struct irq_chip msi_chip = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+       .name           = "IR-PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity   = ir_set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 {
+       struct intel_iommu *iommu;
+       int index;
+
+       iommu = map_dev_to_ir(dev);
+       if (!iommu) {
+               printk(KERN_ERR
+                      "Unable to map PCI %s to iommu\n", pci_name(dev));
+               return -ENOENT;
+       }
+
+       index = alloc_irte(iommu, irq, nvec);
+       if (index < 0) {
+               printk(KERN_ERR
+                      "Unable to allocate %d IRTE for PCI %s\n", nvec,
+                       pci_name(dev));
+               return -ENOSPC;
+       }
+       return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+{
+       int ret;
        struct msi_msg msg;
+
+       ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       set_irq_msi(irq, desc);
+       write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irq_desc *desc = irq_desc + irq;
+               /*
+                * irq migration in process context
+                */
+               desc->status |= IRQ_MOVE_PCNTXT;
+               set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+       } else
+#endif
+               set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+       return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
        int irq, ret;
+
        irq = create_irq();
        if (irq < 0)
                return irq;
 
-       ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+       if (!intr_remapping_enabled)
+               goto no_ir;
+
+       ret = msi_alloc_irte(dev, irq, 1);
+       if (ret < 0)
+               goto error;
+no_ir:
+#endif
+       ret = setup_msi_irq(dev, desc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
        }
+       return 0;
 
-       set_irq_msi(irq, desc);
-       write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+       destroy_irq(irq);
+       return ret;
+#endif
+}
 
-       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+       int irq, ret, sub_handle;
+       struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+       struct intel_iommu *iommu = 0;
+       int index = 0;
+#endif
 
+       sub_handle = 0;
+       list_for_each_entry(desc, &dev->msi_list, list) {
+               irq = create_irq();
+               if (irq < 0)
+                       return irq;
+#ifdef CONFIG_INTR_REMAP
+               if (!intr_remapping_enabled)
+                       goto no_ir;
+
+               if (!sub_handle) {
+                       /*
+                        * allocate the consecutive block of IRTE's
+                        * for 'nvec'
+                        */
+                       index = msi_alloc_irte(dev, irq, nvec);
+                       if (index < 0) {
+                               ret = index;
+                               goto error;
+                       }
+               } else {
+                       iommu = map_dev_to_ir(dev);
+                       if (!iommu) {
+                               ret = -ENOENT;
+                               goto error;
+                       }
+                       /*
+                        * setup the mapping between the irq and the IRTE
+                        * base index, the sub_handle pointing to the
+                        * appropriate interrupt remap table entry.
+                        */
+                       set_irte_irq(irq, iommu, index, sub_handle);
+               }
+no_ir:
+#endif
+               ret = setup_msi_irq(dev, desc, irq);
+               if (ret < 0)
+                       goto error;
+               sub_handle++;
+       }
        return 0;
+
+error:
+       destroy_irq(irq);
+       return ret;
 }
 
 void arch_teardown_msi_irq(unsigned int irq)
@@ -2246,8 +2816,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
                return -1;
 
        for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mpc_irqtype == mp_INT &&
-                   mp_irqs[i].mpc_srcbusirq == bus_irq)
+               if (mp_irqs[i].mp_irqtype == mp_INT &&
+                   mp_irqs[i].mp_srcbusirq == bus_irq)
                        break;
        if (i >= mp_irq_entries)
                return -1;
@@ -2287,6 +2857,10 @@ void __init setup_ioapic_dest(void)
                                setup_IO_APIC_irq(ioapic, pin, irq,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+                       else if (intr_remapping_enabled)
+                               set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
                        else
                                set_ioapic_affinity_irq(irq, TARGET_CPUS);
                }
@@ -2340,7 +2914,7 @@ void __init ioapic_init_mappings(void)
        ioapic_res = ioapic_setup_resources();
        for (i = 0; i < nr_ioapics; i++) {
                if (smp_found_config) {
-                       ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+                       ioapic_phys = mp_ioapics[i].mp_apicaddr;
                } else {
                        ioapic_phys = (unsigned long)
                                alloc_bootmem_pages(PAGE_SIZE);