option saves about 4k and might cause you much additional grey
           hair.
 
+config DEBUG_PARAVIRT
+       bool "Enable some paravirtualization debugging"
+       default y
+       depends on PARAVIRT && DEBUG_KERNEL
+       help
+         Currently deliberately clobbers regs which are allowed to be
+         clobbered in inlined paravirt hooks, even in native mode.
+         If turning this off solves a problem, then DISABLE_INTERRUPTS() or
+         ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
+
 endmenu
 
 
 #endif /* CONFIG_X86_64 */
 
+static void nop_out(void *insns, unsigned int len)
+{
+       unsigned char **noptable = find_nop_table();
+
+       while (len > 0) {
+               unsigned int noplen = len;
+               if (noplen > ASM_NOP_MAX)
+                       noplen = ASM_NOP_MAX;
+               memcpy(insns, noptable[noplen], noplen);
+               insns += noplen;
+               len -= noplen;
+       }
+}
+
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
 
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
-       unsigned char **noptable = find_nop_table();
        struct alt_instr *a;
        u8 *instr;
-       int diff, i, k;
+       int diff;
 
        DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
        for (a = start; a < end; a++) {
 #endif
                memcpy(instr, a->replacement, a->replacementlen);
                diff = a->instrlen - a->replacementlen;
-               /* Pad the rest with nops */
-               for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-                       k = diff;
-                       if (k > ASM_NOP_MAX)
-                               k = ASM_NOP_MAX;
-                       memcpy(a->instr + i, noptable[k], k);
-               }
+               nop_out(instr + a->replacementlen, diff);
        }
 }
 
 
 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 {
-       unsigned char **noptable = find_nop_table();
        u8 **ptr;
 
        for (ptr = start; ptr < end; ptr++) {
                        continue;
                if (*ptr > text_end)
                        continue;
-               **ptr = noptable[1][0];
+               nop_out(*ptr, 1);
        };
 }
 
 
 #endif
 
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{
+       struct paravirt_patch *p;
+
+       for (p = start; p < end; p++) {
+               unsigned int used;
+
+               used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
+                                         p->len);
+#ifdef CONFIG_DEBUG_PARAVIRT
+               {
+               int i;
+               /* Deliberately clobber regs using "not %reg" to find bugs. */
+               for (i = 0; i < 3; i++) {
+                       if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
+                               memcpy(p->instr + used, "\xf7\xd0", 2);
+                               p->instr[used+1] |= i;
+                               used += 2;
+                       }
+               }
+               }
+#endif
+               /* Pad the rest with nops */
+               nop_out(p->instr + used, p->len - used);
+       }
+
+       /* Sync to be conservative, in case we patched following instructions */
+       sync_core();
+}
+extern struct paravirt_patch __start_parainstructions[],
+       __stop_parainstructions[];
+#endif /* CONFIG_PARAVIRT */
+
 void __init alternative_instructions(void)
 {
        unsigned long flags;
                alternatives_smp_switch(0);
        }
 #endif
+       apply_paravirt(__start_parainstructions, __stop_parainstructions);
        local_irq_restore(flags);
 }
 
 #include <asm/dwarf2.h>
 #include "irq_vectors.h"
 
+/*
+ * We use macros for low-level operations which need to be overridden
+ * for paravirtualization.  The following will never clobber any registers:
+ *   INTERRUPT_RETURN (aka. "iret")
+ *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
+ *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ *
+ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
+ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
+ * Allowing a register to be clobbered can shrink the paravirt replacement
+ * enough to patch inline, increasing performance.
+ */
+
 #define nr_syscalls ((syscall_table_size)/4)
 
 CF_MASK                = 0x00000001
 VM_MASK                = 0x00020000
 
 #ifdef CONFIG_PREEMPT
-#define preempt_stop           DISABLE_INTERRUPTS; TRACE_IRQS_OFF
+#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
-#define preempt_stop
+#define preempt_stop(clobbers)
 #define resume_kernel          restore_nocheck
 #endif
 
        ALIGN
        RING0_PTREGS_FRAME
 ret_from_exception:
-       preempt_stop
+       preempt_stop(CLBR_ANY)
 ret_from_intr:
        GET_THREAD_INFO(%ebp)
 check_userspace:
        jb resume_kernel                # not returning to v8086 or userspace
 
 ENTRY(resume_userspace)
-       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+       DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
                                        # setting need_resched or sigpending
                                        # between sampling and the iret
        movl TI_flags(%ebp), %ecx
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
-       DISABLE_INTERRUPTS
+       DISABLE_INTERRUPTS(CLBR_ANY)
        cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
        jnz restore_nocheck
 need_resched:
         * No need to follow this irqs on/off section: the syscall
         * disabled irqs and here we enable it straight after entry:
         */
-       ENABLE_INTERRUPTS
+       ENABLE_INTERRUPTS(CLBR_NONE)
        pushl $(__USER_DS)
        CFI_ADJUST_CFA_OFFSET 4
        /*CFI_REL_OFFSET ss, 0*/
        jae syscall_badsys
        call *sys_call_table(,%eax,4)
        movl %eax,PT_EAX(%esp)
-       DISABLE_INTERRUPTS
+       DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
        TRACE_IRQS_OFF
        movl TI_flags(%ebp), %ecx
        testw $_TIF_ALLWORK_MASK, %cx
        call *sys_call_table(,%eax,4)
        movl %eax,PT_EAX(%esp)          # store the return value
 syscall_exit:
-       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+       DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
                                        # setting need_resched or sigpending
                                        # between sampling and the iret
        TRACE_IRQS_OFF
 .section .fixup,"ax"
 iret_exc:
        TRACE_IRQS_ON
-       ENABLE_INTERRUPTS
+       ENABLE_INTERRUPTS(CLBR_NONE)
        pushl $0                        # no error code
        pushl $do_iret_error
        jmp error_code
        CFI_ADJUST_CFA_OFFSET 4
        pushl %eax
        CFI_ADJUST_CFA_OFFSET 4
-       DISABLE_INTERRUPTS
+       DISABLE_INTERRUPTS(CLBR_EAX)
        TRACE_IRQS_OFF
        lss (%esp), %esp
        CFI_ADJUST_CFA_OFFSET -8
        jz work_notifysig
 work_resched:
        call schedule
-       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+       DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
                                        # setting need_resched or sigpending
                                        # between sampling and the iret
        TRACE_IRQS_OFF
        testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
        jz work_pending
        TRACE_IRQS_ON
-       ENABLE_INTERRUPTS               # could let do_syscall_trace() call
+       ENABLE_INTERRUPTS(CLBR_ANY)     # could let do_syscall_trace() call
                                        # schedule() instead
        movl %esp, %eax
        movl $1, %edx
        GET_CR0_INTO_EAX
        testl $0x4, %eax                # EM (math emulation bit)
        jne device_not_available_emulate
-       preempt_stop
+       preempt_stop(CLBR_ANY)
        call math_state_restore
        jmp ret_from_exception
 device_not_available_emulate:
 
                    const Elf_Shdr *sechdrs,
                    struct module *me)
 {
-       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
+       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+               *para = NULL;
        char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
        for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
                        alt = s;
                if (!strcmp(".smp_locks", secstrings + s->sh_name))
                        locks= s;
+               if (!strcmp(".parainstructions", secstrings + s->sh_name))
+                       para = s;
        }
 
        if (alt) {
                                            lseg, lseg + locks->sh_size,
                                            tseg, tseg + text->sh_size);
        }
+
+       if (para) {
+               void *pseg = (void *)para->sh_addr;
+               apply_paravirt(pseg, pseg + para->sh_size);
+       }
+
        return 0;
 }
 
 
        return paravirt_ops.memory_setup();
 }
 
+/* Simple instruction patching code. */
+#define DEF_NATIVE(name, code)                                 \
+       extern const char start_##name[], end_##name[];         \
+       asm("start_" #name ": " code "; end_" #name ":")
+DEF_NATIVE(cli, "cli");
+DEF_NATIVE(sti, "sti");
+DEF_NATIVE(popf, "push %eax; popf");
+DEF_NATIVE(pushf, "pushf; pop %eax");
+DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
+DEF_NATIVE(iret, "iret");
+DEF_NATIVE(sti_sysexit, "sti; sysexit");
+
+static const struct native_insns
+{
+       const char *start, *end;
+} native_insns[] = {
+       [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
+       [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
+       [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
+       [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
+       [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
+       [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
+       [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
+};
+
+static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
+{
+       unsigned int insn_len;
+
+       /* Don't touch it if we don't have a replacement */
+       if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
+               return len;
+
+       insn_len = native_insns[type].end - native_insns[type].start;
+
+       /* Similarly if we can't fit replacement. */
+       if (len < insn_len)
+               return len;
+
+       memcpy(insns, native_insns[type].start, insn_len);
+       return insn_len;
+}
+
 static fastcall unsigned long native_get_debugreg(int regno)
 {
        unsigned long val = 0;  /* Damn you, gcc! */
        .paravirt_enabled = 0,
        .kernel_rpl = 0,
 
+       .patch = native_patch,
        .banner = default_banner,
        .arch_setup = native_nop,
        .memory_setup = machine_specific_memory_setup,
 
   .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
        *(.altinstr_replacement)
   }
+  . = ALIGN(4);
+  __start_parainstructions = .;
+  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+       *(.parainstructions)
+  }
+  __stop_parainstructions = .;
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
   .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
 
 #ifdef __KERNEL__
 
 #include <asm/types.h>
-
+#include <linux/stddef.h>
 #include <linux/types.h>
 
 struct alt_instr {
 #define LOCK_PREFIX ""
 #endif
 
+struct paravirt_patch;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
+#else
+static inline void
+apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{}
+#define __start_parainstructions NULL
+#define __stop_parainstructions NULL
+#endif
+
 #endif /* _I386_ALTERNATIVE_H */
 
 #undef C
 }
 
-static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
-{
-       __u32 *lp = (__u32 *)((char *)dt + entry*8);
-       *lp = entry_a;
-       *(lp+1) = entry_b;
-}
-
 #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
 #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
 #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
 
-static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
-{
-       __u32 a, b;
-       pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
-       write_idt_entry(idt_table, gate, a, b);
-}
-
-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
+static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
 {
-       __u32 a, b;
-       pack_descriptor(&a, &b, (unsigned long)addr,
-                       offsetof(struct tss_struct, __cacheline_filler) - 1,
-                       DESCTYPE_TSS, 0);
-       write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
+       __u32 *lp = (__u32 *)((char *)dt + entry*8);
+       *lp = entry_a;
+       *(lp+1) = entry_b;
 }
 
 #define set_ldt native_set_ldt
        }
 }
 
+static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
+{
+       __u32 a, b;
+       pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
+       write_idt_entry(idt_table, gate, a, b);
+}
+
+static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
+{
+       __u32 a, b;
+       pack_descriptor(&a, &b, (unsigned long)addr,
+                       offsetof(struct tss_struct, __cacheline_filler) - 1,
+                       DESCTYPE_TSS, 0);
+       write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
+}
+
+
 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
 
 #define LDT_entry_a(info) \
 
 }
 
 #else
-#define DISABLE_INTERRUPTS             cli
-#define ENABLE_INTERRUPTS              sti
+#define DISABLE_INTERRUPTS(clobbers)   cli
+#define ENABLE_INTERRUPTS(clobbers)    sti
 #define ENABLE_INTERRUPTS_SYSEXIT      sti; sysexit
 #define INTERRUPT_RETURN               iret
 #define GET_CR0_INTO_EAX               movl %cr0, %eax
 
 /* Various instructions on x86 need to be replaced for
  * para-virtualization: those hooks are defined here. */
 #include <linux/linkage.h>
+#include <linux/stringify.h>
 
 #ifdef CONFIG_PARAVIRT
+/* These are the most performance critical ops, so we want to be able to patch
+ * callers */
+#define PARAVIRT_IRQ_DISABLE 0
+#define PARAVIRT_IRQ_ENABLE 1
+#define PARAVIRT_RESTORE_FLAGS 2
+#define PARAVIRT_SAVE_FLAGS 3
+#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4
+#define PARAVIRT_INTERRUPT_RETURN 5
+#define PARAVIRT_STI_SYSEXIT 6
+
+/* Bitmask of what can be clobbered: usually at least eax. */
+#define CLBR_NONE 0x0
+#define CLBR_EAX 0x1
+#define CLBR_ECX 0x2
+#define CLBR_EDX 0x4
+#define CLBR_ANY 0x7
+
 #ifndef __ASSEMBLY__
 struct thread_struct;
 struct Xgt_desc_struct;
        int paravirt_enabled;
        const char *name;
 
+       /*
+        * Patch may replace one of the defined code sequences with arbitrary
+        * code, subject to the same register constraints.  This generally
+        * means the code is not free to clobber any registers other than EAX.
+        * The patch function should return the number of bytes of code
+        * generated, as we nop pad the rest in generic code.
+        */
+       unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len);
+
        void (*arch_setup)(void);
        char *(*memory_setup)(void);
        void (*init_IRQ)(void);
 #define read_cr4_safe(x) paravirt_ops.read_cr4_safe()
 #define write_cr4(x) paravirt_ops.write_cr4(x)
 
-static inline unsigned long __raw_local_save_flags(void)
-{
-       return paravirt_ops.save_fl();
-}
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
-       return paravirt_ops.restore_fl(flags);
-}
-
-static inline void raw_local_irq_disable(void)
-{
-       paravirt_ops.irq_disable();
-}
-
-static inline void raw_local_irq_enable(void)
-{
-       paravirt_ops.irq_enable();
-}
-
-static inline unsigned long __raw_local_irq_save(void)
-{
-       unsigned long flags = paravirt_ops.save_fl();
-
-       paravirt_ops.irq_disable();
-
-       return flags;
-}
-
 static inline void raw_safe_halt(void)
 {
        paravirt_ops.safe_halt();
 #endif
 }
 
-#define CLI_STRING     "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax"
-#define STI_STRING     "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax"
+/* These all sit in the .parainstructions section to tell us what to patch. */
+struct paravirt_patch {
+       u8 *instr;              /* original instructions */
+       u8 instrtype;           /* type of this instruction */
+       u8 len;                 /* length of original instruction */
+       u16 clobbers;           /* what registers you may clobber */
+};
+
+#define paravirt_alt(insn_string, typenum, clobber)    \
+       "771:\n\t" insn_string "\n" "772:\n"            \
+       ".pushsection .parainstructions,\"a\"\n"        \
+       "  .long 771b\n"                                \
+       "  .byte " __stringify(typenum) "\n"            \
+       "  .byte 772b-771b\n"                           \
+       "  .short " __stringify(clobber) "\n"           \
+       ".popsection"
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+       unsigned long f;
+
+       __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+                                          "call *%1;"
+                                          "popl %%edx; popl %%ecx",
+                                         PARAVIRT_SAVE_FLAGS, CLBR_NONE)
+                            : "=a"(f): "m"(paravirt_ops.save_fl)
+                            : "memory", "cc");
+       return f;
+}
+
+static inline void raw_local_irq_restore(unsigned long f)
+{
+       __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+                                          "call *%1;"
+                                          "popl %%edx; popl %%ecx",
+                                         PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
+                            : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
+                            : "memory", "cc");
+}
+
+static inline void raw_local_irq_disable(void)
+{
+       __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+                                          "call *%0;"
+                                          "popl %%edx; popl %%ecx",
+                                         PARAVIRT_IRQ_DISABLE, CLBR_EAX)
+                            : : "m" (paravirt_ops.irq_disable)
+                            : "memory", "eax", "cc");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+       __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+                                          "call *%0;"
+                                          "popl %%edx; popl %%ecx",
+                                         PARAVIRT_IRQ_ENABLE, CLBR_EAX)
+                            : : "m" (paravirt_ops.irq_enable)
+                            : "memory", "eax", "cc");
+}
+
+static inline unsigned long __raw_local_irq_save(void)
+{
+       unsigned long f;
+
+       __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+                                          "call *%1; pushl %%eax;"
+                                          "call *%2; popl %%eax;"
+                                          "popl %%edx; popl %%ecx",
+                                         PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
+                                         CLBR_NONE)
+                            : "=a"(f)
+                            : "m" (paravirt_ops.save_fl),
+                              "m" (paravirt_ops.irq_disable)
+                            : "memory", "cc");
+       return f;
+}
+
+#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"            \
+                    "call *paravirt_ops+%c[irq_disable];"              \
+                    "popl %%edx; popl %%ecx",                          \
+                    PARAVIRT_IRQ_DISABLE, CLBR_EAX)
+
+#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"            \
+                    "call *paravirt_ops+%c[irq_enable];"               \
+                    "popl %%edx; popl %%ecx",                          \
+                    PARAVIRT_IRQ_ENABLE, CLBR_EAX)
+#define CLI_STI_CLOBBERS , "%eax"
+#define CLI_STI_INPUT_ARGS \
+       ,                                                               \
+       [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \
+       [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable))
+
 #else  /* __ASSEMBLY__ */
 
-#define INTERRUPT_RETURN       jmp *%cs:paravirt_ops+PARAVIRT_iret
-#define DISABLE_INTERRUPTS     pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax
-#define ENABLE_INTERRUPTS      pushl %eax; pushl %ecx; pushl %edx; call *%cs:paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax
-#define ENABLE_INTERRUPTS_SYSEXIT      jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit
-#define GET_CR0_INTO_EAX       call *paravirt_ops+PARAVIRT_read_cr0
+#define PARA_PATCH(ptype, clobbers, ops)       \
+771:;                                          \
+       ops;                                    \
+772:;                                          \
+       .pushsection .parainstructions,"a";     \
+        .long 771b;                            \
+        .byte ptype;                           \
+        .byte 772b-771b;                       \
+        .short clobbers;                       \
+       .popsection
+
+#define INTERRUPT_RETURN                               \
+       PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \
+       jmp *%cs:paravirt_ops+PARAVIRT_iret)
+
+#define DISABLE_INTERRUPTS(clobbers)                   \
+       PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers,      \
+       pushl %ecx; pushl %edx;                         \
+       call *paravirt_ops+PARAVIRT_irq_disable;        \
+       popl %edx; popl %ecx)                           \
+
+#define ENABLE_INTERRUPTS(clobbers)                    \
+       PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers,       \
+       pushl %ecx; pushl %edx;                         \
+       call *%cs:paravirt_ops+PARAVIRT_irq_enable;     \
+       popl %edx; popl %ecx)
+
+#define ENABLE_INTERRUPTS_SYSEXIT                      \
+       PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY,      \
+       jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
+
+#define GET_CR0_INTO_EAX                       \
+       call *paravirt_ops+PARAVIRT_read_cr0
+
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PARAVIRT */
 #endif /* __ASM_PARAVIRT_H */
 
                : "0" (*eax), "2" (*ecx));
 }
 
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
-{
-       *eax = op;
-       *ecx = 0;
-       __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
-                              int *edx)
-{
-       *eax = op;
-       *ecx = count;
-       __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
-       unsigned int eax, ebx, ecx, edx;
-
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-       return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
-       unsigned int eax, ebx, ecx, edx;
-
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-       return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
-       unsigned int eax, ebx, ecx, edx;
-
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-       return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
-       unsigned int eax, ebx, ecx, edx;
-
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-       return edx;
-}
-
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
        .io_bitmap      = { [ 0 ... IO_BITMAP_LONGS] = ~0 },            \
 }
 
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define paravirt_enabled() 0
-#define __cpuid native_cpuid
-
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
-{
-       tss->esp0 = thread->esp0;
-       /* This can only happen when SEP is enabled, no need to test "SEP"arately */
-       if (unlikely(tss->ss1 != thread->sysenter_cs)) {
-               tss->ss1 = thread->sysenter_cs;
-               wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
-       }
-}
-
 #define start_thread(regs, new_eip, new_esp) do {              \
        __asm__("movl %0,%%fs": :"r" (0));                      \
        regs->xgs = 0;                                          \
        regs->esp = new_esp;                                    \
 } while (0)
 
-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register)                            \
-               __asm__("movl %%db" #register ", %0"            \
-                       :"=r" (var))
-#define set_debugreg(value, register)                  \
-               __asm__("movl %0,%%db" #register                \
-                       : /* no output */                       \
-                       :"r" (value))
-
-#define set_iopl_mask native_set_iopl_mask
-#endif /* CONFIG_PARAVIRT */
-
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static fastcall inline void native_set_iopl_mask(unsigned mask)
-{
-       unsigned int reg;
-       __asm__ __volatile__ ("pushfl;"
-                             "popl %0;"
-                             "andl %1, %0;"
-                             "orl %2, %0;"
-                             "pushl %0;"
-                             "popfl"
-                               : "=&r" (reg)
-                               : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-}
-
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
 
 #define cpu_relax()    rep_nop()
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define paravirt_enabled() 0
+#define __cpuid native_cpuid
+
+static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+{
+       tss->esp0 = thread->esp0;
+       /* This can only happen when SEP is enabled, no need to test "SEP"arately */
+       if (unlikely(tss->ss1 != thread->sysenter_cs)) {
+               tss->ss1 = thread->sysenter_cs;
+               wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+       }
+}
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register)                            \
+               __asm__("movl %%db" #register ", %0"            \
+                       :"=r" (var))
+#define set_debugreg(value, register)                  \
+               __asm__("movl %0,%%db" #register                \
+                       : /* no output */                       \
+                       :"r" (value))
+
+#define set_iopl_mask native_set_iopl_mask
+#endif /* CONFIG_PARAVIRT */
+
+/*
+ * Set IOPL bits in EFLAGS from given mask
+ */
+static fastcall inline void native_set_iopl_mask(unsigned mask)
+{
+       unsigned int reg;
+       __asm__ __volatile__ ("pushfl;"
+                             "popl %0;"
+                             "andl %1, %0;"
+                             "orl %2, %0;"
+                             "pushl %0;"
+                             "popfl"
+                               : "=&r" (reg)
+                               : "i" (~X86_EFLAGS_IOPL), "r" (mask));
+}
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
+{
+       *eax = op;
+       *ecx = 0;
+       __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+                              int *edx)
+{
+       *eax = op;
+       *ecx = count;
+       __cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       cpuid(op, &eax, &ebx, &ecx, &edx);
+       return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       cpuid(op, &eax, &ebx, &ecx, &edx);
+       return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       cpuid(op, &eax, &ebx, &ecx, &edx);
+       return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       cpuid(op, &eax, &ebx, &ecx, &edx);
+       return edx;
+}
+
 /* generic versions from gas */
 #define GENERIC_NOP1   ".byte 0x90\n"
 #define GENERIC_NOP2           ".byte 0x89,0xf6\n"
 
 #else
 #define CLI_STRING     "cli"
 #define STI_STRING     "sti"
+#define CLI_STI_CLOBBERS
+#define CLI_STI_INPUT_ARGS
 #endif /* CONFIG_PARAVIRT */
 
 /*
 {
        asm volatile(
                "\n1:\t"
-               LOCK_PREFIX " ; decb %0\n\t"
+               LOCK_PREFIX " ; decb %[slock]\n\t"
                "jns 5f\n"
                "2:\t"
-               "testl $0x200, %1\n\t"
+               "testl $0x200, %[flags]\n\t"
                "jz 4f\n\t"
                STI_STRING "\n"
                "3:\t"
                "rep;nop\n\t"
-               "cmpb $0, %0\n\t"
+               "cmpb $0, %[slock]\n\t"
                "jle 3b\n\t"
                CLI_STRING "\n\t"
                "jmp 1b\n"
                "4:\t"
                "rep;nop\n\t"
-               "cmpb $0, %0\n\t"
+               "cmpb $0, %[slock]\n\t"
                "jg 1b\n\t"
                "jmp 4b\n"
                "5:\n\t"
-               : "+m" (lock->slock) : "r" (flags) : "memory");
+               : [slock] "+m" (lock->slock)
+               : [flags] "r" (flags)
+                 CLI_STI_INPUT_ARGS
+               : "memory" CLI_STI_CLOBBERS);
 }
 #endif
 
 
 #ifdef __KERNEL__
 
 #include <linux/types.h>
+#include <linux/stddef.h>
 #include <asm/cpufeature.h>
 
 struct alt_instr {
 #define LOCK_PREFIX ""
 #endif
 
+struct paravirt_patch;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
+#else
+static inline void
+apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{}
+#define __start_parainstructions NULL
+#define __stop_parainstructions NULL
+#endif
+
 #endif /* _X86_64_ALTERNATIVE_H */
 
                ".toc1",  /* used by ppc64 */
                ".stab",
                ".rodata",
+               ".parainstructions",
                ".text.lock",
                "__bug_table", /* used by powerpc for BUG() */
                ".pci_fixup_header",
                ".altinstructions",
                ".eh_frame",
                ".debug",
+               ".parainstructions",
                NULL
        };
        /* part of section name */