[3.13.y.z extended stable] Patch "x86_64/entry/xen: Do not invoke espfix64 on Xen" has been added to staging queue

Kamal Mostafa kamal at canonical.com
Tue Sep 2 21:37:13 UTC 2014

This is a note to let you know that I have just added a patch titled

    x86_64/entry/xen: Do not invoke espfix64 on Xen

to the linux-3.13.y-queue branch of the 3.13.y.z extended stable tree 
which can be found at:


This patch is scheduled to be released in version

If you, or anyone else, feels it should not be added to this tree, please 
reply to this email.

For more information about the 3.13.y.z tree, see



>From 010b8551c65fb49fcc4af5830638ceb105ca0b08 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto at amacapital.net>
Date: Wed, 23 Jul 2014 08:34:11 -0700
Subject: x86_64/entry/xen: Do not invoke espfix64 on Xen

commit 7209a75d2009dbf7745e2fd354abf25c3deb3ca3 upstream.

This moves the espfix64 logic into native_iret.  To make this work,
it gets rid of the native patch for INTERRUPT_RETURN:
INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.

This changes the 16-bit SS behavior on Xen from OOPSing to leaking
some bits of the Xen hypervisor's RSP (I think).

[ hpa: this is a nonzero cost on native, but probably not enough to
  measure. Xen needs to fix this in their own code, probably doing
  something equivalent to espfix64. ]

Signed-off-by: Andy Lutomirski <luto at amacapital.net>
Link: http://lkml.kernel.org/r/7b8f1d8ef6597cb16ae004a43c56980a7de3cf94.1406129132.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
Signed-off-by: Kamal Mostafa <kamal at canonical.com>
 arch/x86/include/asm/irqflags.h     |  2 +-
 arch/x86/kernel/entry_64.S          | 28 ++++++++++------------------
 arch/x86/kernel/paravirt_patch_64.c |  2 --
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index bba3cf8..0a8b519 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)


-#define INTERRUPT_RETURN	iretq
+#define INTERRUPT_RETURN	jmp native_iret
 #define USERGS_SYSRET64				\
 	swapgs;					\
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index da0b9bd..03cd2a8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1041,27 +1041,24 @@ restore_args:

 	 * Are we returning to a stack segment from the LDT?  Note: in
 	 * 64-bit mode SS:RSP on the exception stack is always valid.
 #ifdef CONFIG_X86_ESPFIX64
 	testb $4,(SS-RIP)(%rsp)
-	jnz irq_return_ldt
+	jnz native_irq_return_ldt

-	_ASM_EXTABLE(irq_return_iret, bad_iret)
-	_ASM_EXTABLE(native_iret, bad_iret)
+	_ASM_EXTABLE(native_irq_return_iret, bad_iret)

 #ifdef CONFIG_X86_ESPFIX64
 	pushq_cfi %rax
 	pushq_cfi %rdi
@@ -1083,7 +1080,7 @@ irq_return_ldt:
 	movq %rax,%rsp
 	popq_cfi %rax
-	jmp irq_return_iret
+	jmp native_irq_return_iret

 	.section .fixup,"ax"
@@ -1167,13 +1164,8 @@ __do_double_fault:
 	cmpl $__KERNEL_CS,CS(%rdi)
 	jne do_double_fault
 	movq RIP(%rdi),%rax
-	cmpq $irq_return_iret,%rax
-	je 1f
-	cmpq $native_iret,%rax
+	cmpq $native_irq_return_iret,%rax
 	jne do_double_fault		/* This shouldn't happen... */
 	movq PER_CPU_VAR(kernel_stack),%rax
 	subq $(6*8-KERNEL_STACK_OFFSET),%rax	/* Reset to original stack */
 	movq %rax,RSP(%rdi)
@@ -1674,7 +1666,7 @@ error_sti:
 	incl %ebx
-	leaq irq_return_iret(%rip),%rcx
+	leaq native_irq_return_iret(%rip),%rcx
 	cmpq %rcx,RIP+8(%rsp)
 	je error_swapgs
 	movl %ecx,%eax	/* zero extend */
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34..a1da673 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_cpu_ops, iret, "iretq");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, save_fl);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
-		PATCH_SITE(pv_cpu_ops, iret);
 		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret32);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret64);

More information about the kernel-team mailing list