kaiser: merged update
Merged fixes and cleanups, rebased to 4.9.51 tree (no 5-level paging). Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
13be4483bb
commit
8f0baadf2b
15 changed files with 551 additions and 190 deletions
|
@ -230,6 +230,13 @@ entry_SYSCALL_64_fastpath:
|
||||||
movq RIP(%rsp), %rcx
|
movq RIP(%rsp), %rcx
|
||||||
movq EFLAGS(%rsp), %r11
|
movq EFLAGS(%rsp), %r11
|
||||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||||
|
/*
|
||||||
|
* This opens a window where we have a user CR3, but are
|
||||||
|
* running in the kernel. This makes using the CS
|
||||||
|
* register useless for telling whether or not we need to
|
||||||
|
* switch CR3 in NMIs. Normal interrupts are OK because
|
||||||
|
* they are off here.
|
||||||
|
*/
|
||||||
SWITCH_USER_CR3
|
SWITCH_USER_CR3
|
||||||
movq RSP(%rsp), %rsp
|
movq RSP(%rsp), %rsp
|
||||||
USERGS_SYSRET64
|
USERGS_SYSRET64
|
||||||
|
@ -326,11 +333,25 @@ return_from_SYSCALL_64:
|
||||||
syscall_return_via_sysret:
|
syscall_return_via_sysret:
|
||||||
/* rcx and r11 are already restored (see code above) */
|
/* rcx and r11 are already restored (see code above) */
|
||||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||||
|
/*
|
||||||
|
* This opens a window where we have a user CR3, but are
|
||||||
|
* running in the kernel. This makes using the CS
|
||||||
|
* register useless for telling whether or not we need to
|
||||||
|
* switch CR3 in NMIs. Normal interrupts are OK because
|
||||||
|
* they are off here.
|
||||||
|
*/
|
||||||
SWITCH_USER_CR3
|
SWITCH_USER_CR3
|
||||||
movq RSP(%rsp), %rsp
|
movq RSP(%rsp), %rsp
|
||||||
USERGS_SYSRET64
|
USERGS_SYSRET64
|
||||||
|
|
||||||
opportunistic_sysret_failed:
|
opportunistic_sysret_failed:
|
||||||
|
/*
|
||||||
|
* This opens a window where we have a user CR3, but are
|
||||||
|
* running in the kernel. This makes using the CS
|
||||||
|
* register useless for telling whether or not we need to
|
||||||
|
* switch CR3 in NMIs. Normal interrupts are OK because
|
||||||
|
* they are off here.
|
||||||
|
*/
|
||||||
SWITCH_USER_CR3
|
SWITCH_USER_CR3
|
||||||
SWAPGS
|
SWAPGS
|
||||||
jmp restore_c_regs_and_iret
|
jmp restore_c_regs_and_iret
|
||||||
|
@ -1087,6 +1108,13 @@ ENTRY(error_entry)
|
||||||
cld
|
cld
|
||||||
SAVE_C_REGS 8
|
SAVE_C_REGS 8
|
||||||
SAVE_EXTRA_REGS 8
|
SAVE_EXTRA_REGS 8
|
||||||
|
/*
|
||||||
|
* error_entry() always returns with a kernel gsbase and
|
||||||
|
* CR3. We must also have a kernel CR3/gsbase before
|
||||||
|
* calling TRACE_IRQS_*. Just unconditionally switch to
|
||||||
|
* the kernel CR3 here.
|
||||||
|
*/
|
||||||
|
SWITCH_KERNEL_CR3
|
||||||
xorl %ebx, %ebx
|
xorl %ebx, %ebx
|
||||||
testb $3, CS+8(%rsp)
|
testb $3, CS+8(%rsp)
|
||||||
jz .Lerror_kernelspace
|
jz .Lerror_kernelspace
|
||||||
|
@ -1096,7 +1124,6 @@ ENTRY(error_entry)
|
||||||
* from user mode due to an IRET fault.
|
* from user mode due to an IRET fault.
|
||||||
*/
|
*/
|
||||||
SWAPGS
|
SWAPGS
|
||||||
SWITCH_KERNEL_CR3
|
|
||||||
|
|
||||||
.Lerror_entry_from_usermode_after_swapgs:
|
.Lerror_entry_from_usermode_after_swapgs:
|
||||||
/*
|
/*
|
||||||
|
@ -1148,7 +1175,6 @@ ENTRY(error_entry)
|
||||||
* Switch to kernel gsbase:
|
* Switch to kernel gsbase:
|
||||||
*/
|
*/
|
||||||
SWAPGS
|
SWAPGS
|
||||||
SWITCH_KERNEL_CR3
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pretend that the exception came from user mode: set up pt_regs
|
* Pretend that the exception came from user mode: set up pt_regs
|
||||||
|
@ -1249,7 +1275,10 @@ ENTRY(nmi)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
SWAPGS_UNSAFE_STACK
|
SWAPGS_UNSAFE_STACK
|
||||||
SWITCH_KERNEL_CR3_NO_STACK
|
/*
|
||||||
|
* percpu variables are mapped with user CR3, so no need
|
||||||
|
* to switch CR3 here.
|
||||||
|
*/
|
||||||
cld
|
cld
|
||||||
movq %rsp, %rdx
|
movq %rsp, %rdx
|
||||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||||
|
@ -1283,14 +1312,33 @@ ENTRY(nmi)
|
||||||
|
|
||||||
movq %rsp, %rdi
|
movq %rsp, %rdi
|
||||||
movq $-1, %rsi
|
movq $-1, %rsi
|
||||||
|
#ifdef CONFIG_KAISER
|
||||||
|
/* Unconditionally use kernel CR3 for do_nmi() */
|
||||||
|
/* %rax is saved above, so OK to clobber here */
|
||||||
|
movq %cr3, %rax
|
||||||
|
pushq %rax
|
||||||
|
#ifdef CONFIG_KAISER_REAL_SWITCH
|
||||||
|
andq $(~0x1000), %rax
|
||||||
|
#endif
|
||||||
|
movq %rax, %cr3
|
||||||
|
#endif
|
||||||
call do_nmi
|
call do_nmi
|
||||||
|
/*
|
||||||
|
* Unconditionally restore CR3. I know we return to
|
||||||
|
* kernel code that needs user CR3, but do we ever return
|
||||||
|
* to "user mode" where we need the kernel CR3?
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_KAISER
|
||||||
|
popq %rax
|
||||||
|
mov %rax, %cr3
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return back to user mode. We must *not* do the normal exit
|
* Return back to user mode. We must *not* do the normal exit
|
||||||
* work, because we don't want to enable interrupts. Fortunately,
|
* work, because we don't want to enable interrupts. Do not
|
||||||
* do_nmi doesn't modify pt_regs.
|
* switch to user CR3: we might be going back to kernel code
|
||||||
|
* that had a user CR3 set.
|
||||||
*/
|
*/
|
||||||
SWITCH_USER_CR3
|
|
||||||
SWAPGS
|
SWAPGS
|
||||||
jmp restore_c_regs_and_iret
|
jmp restore_c_regs_and_iret
|
||||||
|
|
||||||
|
@ -1486,23 +1534,54 @@ end_repeat_nmi:
|
||||||
ALLOC_PT_GPREGS_ON_STACK
|
ALLOC_PT_GPREGS_ON_STACK
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
|
* Use the same approach as paranoid_entry to handle SWAPGS, but
|
||||||
* as we should not be calling schedule in NMI context.
|
* without CR3 handling since we do that differently in NMIs. No
|
||||||
* Even with normal interrupts enabled. An NMI should not be
|
* need to use paranoid_exit as we should not be calling schedule
|
||||||
* setting NEED_RESCHED or anything that normal interrupts and
|
* in NMI context. Even with normal interrupts enabled. An NMI
|
||||||
* exceptions might do.
|
* should not be setting NEED_RESCHED or anything that normal
|
||||||
|
* interrupts and exceptions might do.
|
||||||
*/
|
*/
|
||||||
call paranoid_entry
|
cld
|
||||||
|
SAVE_C_REGS
|
||||||
|
SAVE_EXTRA_REGS
|
||||||
|
movl $1, %ebx
|
||||||
|
movl $MSR_GS_BASE, %ecx
|
||||||
|
rdmsr
|
||||||
|
testl %edx, %edx
|
||||||
|
js 1f /* negative -> in kernel */
|
||||||
|
SWAPGS
|
||||||
|
xorl %ebx, %ebx
|
||||||
|
1:
|
||||||
|
#ifdef CONFIG_KAISER
|
||||||
|
/* Unconditionally use kernel CR3 for do_nmi() */
|
||||||
|
/* %rax is saved above, so OK to clobber here */
|
||||||
|
movq %cr3, %rax
|
||||||
|
pushq %rax
|
||||||
|
#ifdef CONFIG_KAISER_REAL_SWITCH
|
||||||
|
andq $(~0x1000), %rax
|
||||||
|
#endif
|
||||||
|
movq %rax, %cr3
|
||||||
|
#endif
|
||||||
|
|
||||||
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
||||||
movq %rsp, %rdi
|
movq %rsp, %rdi
|
||||||
|
addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
|
||||||
movq $-1, %rsi
|
movq $-1, %rsi
|
||||||
call do_nmi
|
call do_nmi
|
||||||
|
/*
|
||||||
|
* Unconditionally restore CR3. We might be returning to
|
||||||
|
* kernel code that needs user CR3, like just just before
|
||||||
|
* a sysret.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_KAISER
|
||||||
|
popq %rax
|
||||||
|
mov %rax, %cr3
|
||||||
|
#endif
|
||||||
|
|
||||||
testl %ebx, %ebx /* swapgs needed? */
|
testl %ebx, %ebx /* swapgs needed? */
|
||||||
jnz nmi_restore
|
jnz nmi_restore
|
||||||
nmi_swapgs:
|
nmi_swapgs:
|
||||||
SWITCH_USER_CR3_NO_STACK
|
/* We fixed up CR3 above, so no need to switch it here */
|
||||||
SWAPGS_UNSAFE_STACK
|
SWAPGS_UNSAFE_STACK
|
||||||
nmi_restore:
|
nmi_restore:
|
||||||
RESTORE_EXTRA_REGS
|
RESTORE_EXTRA_REGS
|
||||||
|
|
|
@ -16,13 +16,17 @@
|
||||||
|
|
||||||
.macro _SWITCH_TO_KERNEL_CR3 reg
|
.macro _SWITCH_TO_KERNEL_CR3 reg
|
||||||
movq %cr3, \reg
|
movq %cr3, \reg
|
||||||
|
#ifdef CONFIG_KAISER_REAL_SWITCH
|
||||||
andq $(~0x1000), \reg
|
andq $(~0x1000), \reg
|
||||||
|
#endif
|
||||||
movq \reg, %cr3
|
movq \reg, %cr3
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro _SWITCH_TO_USER_CR3 reg
|
.macro _SWITCH_TO_USER_CR3 reg
|
||||||
movq %cr3, \reg
|
movq %cr3, \reg
|
||||||
|
#ifdef CONFIG_KAISER_REAL_SWITCH
|
||||||
orq $(0x1000), \reg
|
orq $(0x1000), \reg
|
||||||
|
#endif
|
||||||
movq \reg, %cr3
|
movq \reg, %cr3
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -65,48 +69,53 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
#endif /* CONFIG_KAISER */
|
#endif /* CONFIG_KAISER */
|
||||||
|
|
||||||
#else /* __ASSEMBLY__ */
|
#else /* __ASSEMBLY__ */
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_KAISER
|
#ifdef CONFIG_KAISER
|
||||||
// Upon kernel/user mode switch, it may happen that
|
/*
|
||||||
// the address space has to be switched before the registers have been stored.
|
* Upon kernel/user mode switch, it may happen that the address
|
||||||
// To change the address space, another register is needed.
|
* space has to be switched before the registers have been
|
||||||
// A register therefore has to be stored/restored.
|
* stored. To change the address space, another register is
|
||||||
//
|
* needed. A register therefore has to be stored/restored.
|
||||||
|
*/
|
||||||
|
|
||||||
DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
|
DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
|
||||||
|
|
||||||
#endif /* CONFIG_KAISER */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* shadowmem_add_mapping - map a virtual memory part to the shadow mapping
|
* kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
|
||||||
* @addr: the start address of the range
|
* @addr: the start address of the range
|
||||||
* @size: the size of the range
|
* @size: the size of the range
|
||||||
* @flags: The mapping flags of the pages
|
* @flags: The mapping flags of the pages
|
||||||
*
|
*
|
||||||
* the mapping is done on a global scope, so no bigger synchronization has to be done.
|
* The mapping is done on a global scope, so no bigger
|
||||||
* the pages have to be manually unmapped again when they are not needed any longer.
|
* synchronization has to be done. the pages have to be
|
||||||
|
* manually unmapped again when they are not needed any longer.
|
||||||
*/
|
*/
|
||||||
extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
|
extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
|
* kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
|
||||||
* @addr: the start address of the range
|
* @addr: the start address of the range
|
||||||
* @size: the size of the range
|
* @size: the size of the range
|
||||||
*/
|
*/
|
||||||
extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
|
extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* shadowmem_initialize_mapping - Initalize the shadow mapping
|
* kaiser_initialize_mapping - Initalize the shadow mapping
|
||||||
*
|
*
|
||||||
* most parts of the shadow mapping can be mapped upon boot time.
|
* Most parts of the shadow mapping can be mapped upon boot
|
||||||
* only the thread stacks have to be mapped on runtime.
|
* time. Only per-process things like the thread stacks
|
||||||
* the mapped regions are not unmapped at all.
|
* or a new LDT have to be mapped at runtime. These boot-
|
||||||
|
* time mappings are permanent and nevertunmapped.
|
||||||
*/
|
*/
|
||||||
extern void kaiser_init(void);
|
extern void kaiser_init(void);
|
||||||
|
|
||||||
#endif
|
#endif /* CONFIG_KAISER */
|
||||||
|
|
||||||
|
#endif /* __ASSEMBLY */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -690,7 +690,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
|
||||||
|
|
||||||
static inline int pgd_bad(pgd_t pgd)
|
static inline int pgd_bad(pgd_t pgd)
|
||||||
{
|
{
|
||||||
return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
pgdval_t ignore_flags = _PAGE_USER;
|
||||||
|
/*
|
||||||
|
* We set NX on KAISER pgds that map userspace memory so
|
||||||
|
* that userspace can not meaningfully use the kernel
|
||||||
|
* page table by accident; it will fault on the first
|
||||||
|
* instruction it tries to run. See native_set_pgd().
|
||||||
|
*/
|
||||||
|
if (IS_ENABLED(CONFIG_KAISER))
|
||||||
|
ignore_flags |= _PAGE_NX;
|
||||||
|
|
||||||
|
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int pgd_none(pgd_t pgd)
|
static inline int pgd_none(pgd_t pgd)
|
||||||
|
@ -905,8 +915,10 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
|
||||||
{
|
{
|
||||||
memcpy(dst, src, count * sizeof(pgd_t));
|
memcpy(dst, src, count * sizeof(pgd_t));
|
||||||
#ifdef CONFIG_KAISER
|
#ifdef CONFIG_KAISER
|
||||||
// clone the shadow pgd part as well
|
/* Clone the shadow pgd part as well */
|
||||||
memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
|
memcpy(native_get_shadow_pgd(dst),
|
||||||
|
native_get_shadow_pgd(src),
|
||||||
|
count * sizeof(pgd_t));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,26 +107,58 @@ static inline void native_pud_clear(pud_t *pud)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_KAISER
|
#ifdef CONFIG_KAISER
|
||||||
static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
|
static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
|
||||||
|
{
|
||||||
return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
|
return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
|
static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
|
||||||
|
{
|
||||||
return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
|
return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(1);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
|
||||||
|
{
|
||||||
|
return pgdp;
|
||||||
|
}
|
||||||
#endif /* CONFIG_KAISER */
|
#endif /* CONFIG_KAISER */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Page table pages are page-aligned. The lower half of the top
|
||||||
|
* level is used for userspace and the top half for the kernel.
|
||||||
|
* This returns true for user pages that need to get copied into
|
||||||
|
* both the user and kernel copies of the page tables, and false
|
||||||
|
* for kernel pages that should only be in the kernel copy.
|
||||||
|
*/
|
||||||
|
static inline bool is_userspace_pgd(void *__ptr)
|
||||||
|
{
|
||||||
|
unsigned long ptr = (unsigned long)__ptr;
|
||||||
|
|
||||||
|
return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
|
||||||
|
}
|
||||||
|
|
||||||
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_KAISER
|
#ifdef CONFIG_KAISER
|
||||||
// We know that a pgd is page aligned.
|
pteval_t extra_kern_pgd_flags = 0;
|
||||||
// Therefore the lower indices have to be mapped to user space.
|
/* Do we need to also populate the shadow pgd? */
|
||||||
// These pages are mapped to the shadow mapping.
|
if (is_userspace_pgd(pgdp)) {
|
||||||
if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
|
|
||||||
native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
|
native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
|
||||||
|
/*
|
||||||
|
* Even if the entry is *mapping* userspace, ensure
|
||||||
|
* that userspace can not use it. This way, if we
|
||||||
|
* get out to userspace running on the kernel CR3,
|
||||||
|
* userspace will crash instead of running.
|
||||||
|
*/
|
||||||
|
extra_kern_pgd_flags = _PAGE_NX;
|
||||||
}
|
}
|
||||||
|
pgdp->pgd = pgd.pgd;
|
||||||
pgdp->pgd = pgd.pgd & ~_PAGE_USER;
|
pgdp->pgd |= extra_kern_pgd_flags;
|
||||||
#else /* CONFIG_KAISER */
|
#else /* CONFIG_KAISER */
|
||||||
*pgdp = pgd;
|
*pgdp = pgd;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -48,7 +48,7 @@
|
||||||
#ifdef CONFIG_KAISER
|
#ifdef CONFIG_KAISER
|
||||||
#define _PAGE_GLOBAL (_AT(pteval_t, 0))
|
#define _PAGE_GLOBAL (_AT(pteval_t, 0))
|
||||||
#else
|
#else
|
||||||
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
|
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
|
||||||
#endif
|
#endif
|
||||||
#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
|
#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
|
||||||
#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
|
#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
|
||||||
|
@ -123,11 +123,7 @@
|
||||||
#define _PAGE_DEVMAP (_AT(pteval_t, 0))
|
#define _PAGE_DEVMAP (_AT(pteval_t, 0))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_KAISER
|
|
||||||
#define _PAGE_PROTNONE (_AT(pteval_t, 0))
|
|
||||||
#else
|
|
||||||
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
||||||
#endif
|
|
||||||
|
|
||||||
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
||||||
_PAGE_ACCESSED | _PAGE_DIRTY)
|
_PAGE_ACCESSED | _PAGE_DIRTY)
|
||||||
|
|
|
@ -127,11 +127,14 @@ void __init init_espfix_bsp(void)
|
||||||
/* Install the espfix pud into the kernel page directory */
|
/* Install the espfix pud into the kernel page directory */
|
||||||
pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
|
pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
|
||||||
pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
|
pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
|
||||||
#ifdef CONFIG_KAISER
|
/*
|
||||||
// add the esp stack pud to the shadow mapping here.
|
* Just copy the top-level PGD that is mapping the espfix
|
||||||
// This can be done directly, because the fixup stack has its own pud
|
* area to ensure it is mapped into the shadow user page
|
||||||
set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
|
* tables.
|
||||||
#endif
|
*/
|
||||||
|
if (IS_ENABLED(CONFIG_KAISER))
|
||||||
|
set_pgd(native_get_shadow_pgd(pgd_p),
|
||||||
|
__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
|
||||||
|
|
||||||
/* Randomize the locations */
|
/* Randomize the locations */
|
||||||
init_espfix_random();
|
init_espfix_random();
|
||||||
|
|
|
@ -406,11 +406,24 @@ GLOBAL(early_recursion_flag)
|
||||||
GLOBAL(name)
|
GLOBAL(name)
|
||||||
|
|
||||||
#ifdef CONFIG_KAISER
|
#ifdef CONFIG_KAISER
|
||||||
|
/*
|
||||||
|
* Each PGD needs to be 8k long and 8k aligned. We do not
|
||||||
|
* ever go out to userspace with these, so we do not
|
||||||
|
* strictly *need* the second page, but this allows us to
|
||||||
|
* have a single set_pgd() implementation that does not
|
||||||
|
* need to worry about whether it has 4k or 8k to work
|
||||||
|
* with.
|
||||||
|
*
|
||||||
|
* This ensures PGDs are 8k long:
|
||||||
|
*/
|
||||||
|
#define KAISER_USER_PGD_FILL 512
|
||||||
|
/* This ensures they are 8k-aligned: */
|
||||||
#define NEXT_PGD_PAGE(name) \
|
#define NEXT_PGD_PAGE(name) \
|
||||||
.balign 2 * PAGE_SIZE; \
|
.balign 2 * PAGE_SIZE; \
|
||||||
GLOBAL(name)
|
GLOBAL(name)
|
||||||
#else
|
#else
|
||||||
#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
|
#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
|
||||||
|
#define KAISER_USER_PGD_FILL 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Automate the creation of 1 to 1 mapping pmd entries */
|
/* Automate the creation of 1 to 1 mapping pmd entries */
|
||||||
|
@ -425,6 +438,7 @@ GLOBAL(name)
|
||||||
NEXT_PGD_PAGE(early_level4_pgt)
|
NEXT_PGD_PAGE(early_level4_pgt)
|
||||||
.fill 511,8,0
|
.fill 511,8,0
|
||||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||||
|
.fill KAISER_USER_PGD_FILL,8,0
|
||||||
|
|
||||||
NEXT_PAGE(early_dynamic_pgts)
|
NEXT_PAGE(early_dynamic_pgts)
|
||||||
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
|
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
|
||||||
|
@ -433,7 +447,8 @@ NEXT_PAGE(early_dynamic_pgts)
|
||||||
|
|
||||||
#ifndef CONFIG_XEN
|
#ifndef CONFIG_XEN
|
||||||
NEXT_PGD_PAGE(init_level4_pgt)
|
NEXT_PGD_PAGE(init_level4_pgt)
|
||||||
.fill 2*512,8,0
|
.fill 512,8,0
|
||||||
|
.fill KAISER_USER_PGD_FILL,8,0
|
||||||
#else
|
#else
|
||||||
NEXT_PGD_PAGE(init_level4_pgt)
|
NEXT_PGD_PAGE(init_level4_pgt)
|
||||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||||
|
@ -442,6 +457,7 @@ NEXT_PGD_PAGE(init_level4_pgt)
|
||||||
.org init_level4_pgt + L4_START_KERNEL*8, 0
|
.org init_level4_pgt + L4_START_KERNEL*8, 0
|
||||||
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
||||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||||
|
.fill KAISER_USER_PGD_FILL,8,0
|
||||||
|
|
||||||
NEXT_PAGE(level3_ident_pgt)
|
NEXT_PAGE(level3_ident_pgt)
|
||||||
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||||
|
@ -452,6 +468,7 @@ NEXT_PAGE(level2_ident_pgt)
|
||||||
*/
|
*/
|
||||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||||
#endif
|
#endif
|
||||||
|
.fill KAISER_USER_PGD_FILL,8,0
|
||||||
|
|
||||||
NEXT_PAGE(level3_kernel_pgt)
|
NEXT_PAGE(level3_kernel_pgt)
|
||||||
.fill L3_START_KERNEL,8,0
|
.fill L3_START_KERNEL,8,0
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
#include <asm/ldt.h>
|
#include <asm/ldt.h>
|
||||||
|
#include <asm/kaiser.h>
|
||||||
#include <asm/desc.h>
|
#include <asm/desc.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
#include <asm/syscalls.h>
|
#include <asm/syscalls.h>
|
||||||
|
@ -34,11 +35,21 @@ static void flush_ldt(void *current_mm)
|
||||||
set_ldt(pc->ldt->entries, pc->ldt->size);
|
set_ldt(pc->ldt->entries, pc->ldt->size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __free_ldt_struct(struct ldt_struct *ldt)
|
||||||
|
{
|
||||||
|
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
|
||||||
|
vfree(ldt->entries);
|
||||||
|
else
|
||||||
|
free_page((unsigned long)ldt->entries);
|
||||||
|
kfree(ldt);
|
||||||
|
}
|
||||||
|
|
||||||
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
|
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
|
||||||
static struct ldt_struct *alloc_ldt_struct(int size)
|
static struct ldt_struct *alloc_ldt_struct(int size)
|
||||||
{
|
{
|
||||||
struct ldt_struct *new_ldt;
|
struct ldt_struct *new_ldt;
|
||||||
int alloc_size;
|
int alloc_size;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (size > LDT_ENTRIES)
|
if (size > LDT_ENTRIES)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -66,6 +77,14 @@ static struct ldt_struct *alloc_ldt_struct(int size)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: make kaiser_add_mapping() return an error code
|
||||||
|
// when it fails
|
||||||
|
kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
|
||||||
|
__PAGE_KERNEL);
|
||||||
|
if (ret) {
|
||||||
|
__free_ldt_struct(new_ldt);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
new_ldt->size = size;
|
new_ldt->size = size;
|
||||||
return new_ldt;
|
return new_ldt;
|
||||||
}
|
}
|
||||||
|
@ -92,12 +111,10 @@ static void free_ldt_struct(struct ldt_struct *ldt)
|
||||||
if (likely(!ldt))
|
if (likely(!ldt))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
kaiser_remove_mapping((unsigned long)ldt->entries,
|
||||||
|
ldt->size * LDT_ENTRY_SIZE);
|
||||||
paravirt_free_ldt(ldt->entries, ldt->size);
|
paravirt_free_ldt(ldt->entries, ldt->size);
|
||||||
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
|
__free_ldt_struct(ldt);
|
||||||
vfree(ldt->entries);
|
|
||||||
else
|
|
||||||
free_page((unsigned long)ldt->entries);
|
|
||||||
kfree(ldt);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -9,10 +9,12 @@
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
|
||||||
atomic_t trace_idt_ctr = ATOMIC_INIT(0);
|
atomic_t trace_idt_ctr = ATOMIC_INIT(0);
|
||||||
|
__aligned(PAGE_SIZE)
|
||||||
struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
|
struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
|
||||||
(unsigned long) trace_idt_table };
|
(unsigned long) trace_idt_table };
|
||||||
|
|
||||||
/* No need to be aligned, but done to keep all IDTs defined the same way. */
|
/* No need to be aligned, but done to keep all IDTs defined the same way. */
|
||||||
|
__aligned(PAGE_SIZE)
|
||||||
gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
|
gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
|
||||||
|
|
||||||
static int trace_irq_vector_refcount;
|
static int trace_irq_vector_refcount;
|
||||||
|
|
|
@ -1,160 +1,305 @@
|
||||||
|
#include <linux/bug.h>
|
||||||
|
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
|
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
|
#include <asm/kaiser.h>
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/desc.h>
|
#include <asm/desc.h>
|
||||||
#ifdef CONFIG_KAISER
|
#ifdef CONFIG_KAISER
|
||||||
|
|
||||||
__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
|
__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
|
||||||
|
/*
|
||||||
/**
|
* At runtime, the only things we map are some things for CPU
|
||||||
* Get the real ppn from a address in kernel mapping.
|
* hotplug, and stacks for new processes. No two CPUs will ever
|
||||||
* @param address The virtual adrress
|
* be populating the same addresses, so we only need to ensure
|
||||||
* @return the physical address
|
* that we protect between two CPUs trying to allocate and
|
||||||
|
* populate the same page table page.
|
||||||
|
*
|
||||||
|
* Only take this lock when doing a set_p[4um]d(), but it is not
|
||||||
|
* needed for doing a set_pte(). We assume that only the *owner*
|
||||||
|
* of a given allocation will be doing this for _their_
|
||||||
|
* allocation.
|
||||||
|
*
|
||||||
|
* This ensures that once a system has been running for a while
|
||||||
|
* and there have been stacks all over and these page tables
|
||||||
|
* are fully populated, there will be no further acquisitions of
|
||||||
|
* this lock.
|
||||||
*/
|
*/
|
||||||
static inline unsigned long get_pa_from_mapping (unsigned long address)
|
static DEFINE_SPINLOCK(shadow_table_allocation_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns -1 on error.
|
||||||
|
*/
|
||||||
|
static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
|
||||||
{
|
{
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
pmd_t *pmd;
|
pmd_t *pmd;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
|
||||||
pgd = pgd_offset_k(address);
|
pgd = pgd_offset_k(vaddr);
|
||||||
BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
|
/*
|
||||||
|
* We made all the kernel PGDs present in kaiser_init().
|
||||||
|
* We expect them to stay that way.
|
||||||
|
*/
|
||||||
|
BUG_ON(pgd_none(*pgd));
|
||||||
|
/*
|
||||||
|
* PGDs are either 512GB or 128TB on all x86_64
|
||||||
|
* configurations. We don't handle these.
|
||||||
|
*/
|
||||||
|
BUG_ON(pgd_large(*pgd));
|
||||||
|
|
||||||
|
pud = pud_offset(pgd, vaddr);
|
||||||
|
if (pud_none(*pud)) {
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pud_large(*pud))
|
||||||
|
return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
|
||||||
|
|
||||||
|
pmd = pmd_offset(pud, vaddr);
|
||||||
|
if (pmd_none(*pmd)) {
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pmd_large(*pmd))
|
||||||
|
return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
|
||||||
|
|
||||||
|
pte = pte_offset_kernel(pmd, vaddr);
|
||||||
|
if (pte_none(*pte)) {
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a relatively normal page table walk, except that it
|
||||||
|
* also tries to allocate page tables pages along the way.
|
||||||
|
*
|
||||||
|
* Returns a pointer to a PTE on success, or NULL on failure.
|
||||||
|
*/
|
||||||
|
static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
|
||||||
|
{
|
||||||
|
pmd_t *pmd;
|
||||||
|
pud_t *pud;
|
||||||
|
pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
|
||||||
|
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
||||||
|
|
||||||
|
might_sleep();
|
||||||
|
if (is_atomic) {
|
||||||
|
gfp &= ~GFP_KERNEL;
|
||||||
|
gfp |= __GFP_HIGH | __GFP_ATOMIC;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pgd_none(*pgd)) {
|
||||||
|
WARN_ONCE(1, "All shadow pgds should have been populated");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
BUILD_BUG_ON(pgd_large(*pgd) != 0);
|
||||||
|
|
||||||
pud = pud_offset(pgd, address);
|
pud = pud_offset(pgd, address);
|
||||||
BUG_ON(pud_none(*pud));
|
/* The shadow page tables do not use large mappings: */
|
||||||
|
|
||||||
if (pud_large(*pud)) {
|
if (pud_large(*pud)) {
|
||||||
return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
|
WARN_ON(1);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (pud_none(*pud)) {
|
||||||
|
unsigned long new_pmd_page = __get_free_page(gfp);
|
||||||
|
if (!new_pmd_page)
|
||||||
|
return NULL;
|
||||||
|
spin_lock(&shadow_table_allocation_lock);
|
||||||
|
if (pud_none(*pud))
|
||||||
|
set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
|
||||||
|
else
|
||||||
|
free_page(new_pmd_page);
|
||||||
|
spin_unlock(&shadow_table_allocation_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
pmd = pmd_offset(pud, address);
|
pmd = pmd_offset(pud, address);
|
||||||
BUG_ON(pmd_none(*pmd));
|
/* The shadow page tables do not use large mappings: */
|
||||||
|
|
||||||
if (pmd_large(*pmd)) {
|
if (pmd_large(*pmd)) {
|
||||||
return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
|
WARN_ON(1);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (pmd_none(*pmd)) {
|
||||||
|
unsigned long new_pte_page = __get_free_page(gfp);
|
||||||
|
if (!new_pte_page)
|
||||||
|
return NULL;
|
||||||
|
spin_lock(&shadow_table_allocation_lock);
|
||||||
|
if (pmd_none(*pmd))
|
||||||
|
set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
|
||||||
|
else
|
||||||
|
free_page(new_pte_page);
|
||||||
|
spin_unlock(&shadow_table_allocation_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
pte = pte_offset_kernel(pmd, address);
|
return pte_offset_kernel(pmd, address);
|
||||||
BUG_ON(pte_none(*pte));
|
|
||||||
|
|
||||||
return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void _kaiser_copy (unsigned long start_addr, unsigned long size,
|
int kaiser_add_user_map(const void *__start_addr, unsigned long size,
|
||||||
unsigned long flags)
|
unsigned long flags)
|
||||||
{
|
{
|
||||||
pgd_t *pgd;
|
int ret = 0;
|
||||||
pud_t *pud;
|
|
||||||
pmd_t *pmd;
|
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
unsigned long address;
|
unsigned long start_addr = (unsigned long )__start_addr;
|
||||||
unsigned long end_addr = start_addr + size;
|
unsigned long address = start_addr & PAGE_MASK;
|
||||||
|
unsigned long end_addr = PAGE_ALIGN(start_addr + size);
|
||||||
unsigned long target_address;
|
unsigned long target_address;
|
||||||
|
|
||||||
for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
|
for (;address < end_addr; address += PAGE_SIZE) {
|
||||||
address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
|
|
||||||
target_address = get_pa_from_mapping(address);
|
target_address = get_pa_from_mapping(address);
|
||||||
|
if (target_address == -1) {
|
||||||
pgd = native_get_shadow_pgd(pgd_offset_k(address));
|
ret = -EIO;
|
||||||
|
break;
|
||||||
BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
|
|
||||||
BUG_ON(pgd_large(*pgd));
|
|
||||||
|
|
||||||
pud = pud_offset(pgd, address);
|
|
||||||
if (pud_none(*pud)) {
|
|
||||||
set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
|
|
||||||
}
|
}
|
||||||
BUG_ON(pud_large(*pud));
|
pte = kaiser_pagetable_walk(address, false);
|
||||||
|
|
||||||
pmd = pmd_offset(pud, address);
|
|
||||||
if (pmd_none(*pmd)) {
|
|
||||||
set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
|
|
||||||
}
|
|
||||||
BUG_ON(pmd_large(*pmd));
|
|
||||||
|
|
||||||
pte = pte_offset_kernel(pmd, address);
|
|
||||||
if (pte_none(*pte)) {
|
if (pte_none(*pte)) {
|
||||||
set_pte(pte, __pte(flags | target_address));
|
set_pte(pte, __pte(flags | target_address));
|
||||||
} else {
|
} else {
|
||||||
BUG_ON(__pa(pte_page(*pte)) != target_address);
|
pte_t tmp;
|
||||||
|
set_pte(&tmp, __pte(flags | target_address));
|
||||||
|
WARN_ON_ONCE(!pte_same(*pte, tmp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
|
static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
|
||||||
static inline void __init _kaiser_init(void)
|
{
|
||||||
|
unsigned long size = end - start;
|
||||||
|
|
||||||
|
return kaiser_add_user_map(start, size, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure that the top level of the (shadow) page tables are
|
||||||
|
* entirely populated. This ensures that all processes that get
|
||||||
|
* forked have the same entries. This way, we do not have to
|
||||||
|
* ever go set up new entries in older processes.
|
||||||
|
*
|
||||||
|
* Note: we never free these, so there are no updates to them
|
||||||
|
* after this.
|
||||||
|
*/
|
||||||
|
static void __init kaiser_init_all_pgds(void)
|
||||||
{
|
{
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
|
pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
|
||||||
for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
|
for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
|
||||||
set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
|
pgd_t new_pgd;
|
||||||
|
pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
|
||||||
|
if (!pud) {
|
||||||
|
WARN_ON(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
|
||||||
|
/*
|
||||||
|
* Make sure not to stomp on some other pgd entry.
|
||||||
|
*/
|
||||||
|
if (!pgd_none(pgd[i])) {
|
||||||
|
WARN_ON(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
set_pgd(pgd + i, new_pgd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define kaiser_add_user_map_early(start, size, flags) do { \
|
||||||
|
int __ret = kaiser_add_user_map(start, size, flags); \
|
||||||
|
WARN_ON(__ret); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \
|
||||||
|
int __ret = kaiser_add_user_map_ptrs(start, end, flags); \
|
||||||
|
WARN_ON(__ret); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
|
extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
|
||||||
spinlock_t shadow_table_lock;
|
/*
|
||||||
|
* If anything in here fails, we will likely die on one of the
|
||||||
|
* first kernel->user transitions and init will die. But, we
|
||||||
|
* will have most of the kernel up by then and should be able to
|
||||||
|
* get a clean warning out of it. If we BUG_ON() here, we run
|
||||||
|
* the risk of being before we have good console output.
|
||||||
|
*/
|
||||||
void __init kaiser_init(void)
|
void __init kaiser_init(void)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
spin_lock_init(&shadow_table_lock);
|
|
||||||
|
|
||||||
spin_lock(&shadow_table_lock);
|
kaiser_init_all_pgds();
|
||||||
|
|
||||||
_kaiser_init();
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
// map the per cpu user variables
|
void *percpu_vaddr = __per_cpu_user_mapped_start +
|
||||||
_kaiser_copy(
|
per_cpu_offset(cpu);
|
||||||
(unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
|
unsigned long percpu_sz = __per_cpu_user_mapped_end -
|
||||||
(unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
|
__per_cpu_user_mapped_start;
|
||||||
__PAGE_KERNEL);
|
kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
|
||||||
|
__PAGE_KERNEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// map the entry/exit text section, which is responsible to switch between user- and kernel mode
|
/*
|
||||||
_kaiser_copy(
|
* Map the entry/exit text section, which is needed at
|
||||||
(unsigned long) __entry_text_start,
|
* switches from user to and from kernel.
|
||||||
(unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
|
*/
|
||||||
__PAGE_KERNEL_RX);
|
kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
|
||||||
|
__PAGE_KERNEL_RX);
|
||||||
|
|
||||||
// the fixed map address of the idt_table
|
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||||
_kaiser_copy(
|
kaiser_add_user_map_ptrs_early(__irqentry_text_start,
|
||||||
(unsigned long) idt_descr.address,
|
__irqentry_text_end,
|
||||||
sizeof(gate_desc) * NR_VECTORS,
|
__PAGE_KERNEL_RX);
|
||||||
__PAGE_KERNEL_RO);
|
#endif
|
||||||
|
kaiser_add_user_map_early((void *)idt_descr.address,
|
||||||
spin_unlock(&shadow_table_lock);
|
sizeof(gate_desc) * NR_VECTORS,
|
||||||
|
__PAGE_KERNEL_RO);
|
||||||
|
#ifdef CONFIG_TRACING
|
||||||
|
kaiser_add_user_map_early(&trace_idt_descr,
|
||||||
|
sizeof(trace_idt_descr),
|
||||||
|
__PAGE_KERNEL);
|
||||||
|
kaiser_add_user_map_early(&trace_idt_table,
|
||||||
|
sizeof(gate_desc) * NR_VECTORS,
|
||||||
|
__PAGE_KERNEL);
|
||||||
|
#endif
|
||||||
|
kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
|
||||||
|
__PAGE_KERNEL);
|
||||||
|
kaiser_add_user_map_early(&debug_idt_table,
|
||||||
|
sizeof(gate_desc) * NR_VECTORS,
|
||||||
|
__PAGE_KERNEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end);
|
||||||
// add a mapping to the shadow-mapping, and synchronize the mappings
|
// add a mapping to the shadow-mapping, and synchronize the mappings
|
||||||
void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
|
int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
|
||||||
{
|
{
|
||||||
spin_lock(&shadow_table_lock);
|
return kaiser_add_user_map((const void *)addr, size, flags);
|
||||||
_kaiser_copy(addr, size, flags);
|
|
||||||
spin_unlock(&shadow_table_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
|
|
||||||
void kaiser_remove_mapping(unsigned long start, unsigned long size)
|
void kaiser_remove_mapping(unsigned long start, unsigned long size)
|
||||||
{
|
{
|
||||||
pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
|
unsigned long end = start + size;
|
||||||
spin_lock(&shadow_table_lock);
|
unsigned long addr;
|
||||||
do {
|
|
||||||
unmap_pud_range(pgd, start, start + size);
|
for (addr = start; addr < end; addr += PGDIR_SIZE) {
|
||||||
} while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
|
pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
|
||||||
spin_unlock(&shadow_table_lock);
|
/*
|
||||||
|
* unmap_p4d_range() handles > P4D_SIZE unmaps,
|
||||||
|
* so no need to trim 'end'.
|
||||||
|
*/
|
||||||
|
unmap_pud_range_nofree(pgd, addr, end);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_KAISER */
|
#endif /* CONFIG_KAISER */
|
||||||
|
|
|
@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock);
|
||||||
#define CPA_FLUSHTLB 1
|
#define CPA_FLUSHTLB 1
|
||||||
#define CPA_ARRAY 2
|
#define CPA_ARRAY 2
|
||||||
#define CPA_PAGES_ARRAY 4
|
#define CPA_PAGES_ARRAY 4
|
||||||
|
#define CPA_FREE_PAGETABLES 8
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
static unsigned long direct_pages_count[PG_LEVEL_NUM];
|
static unsigned long direct_pages_count[PG_LEVEL_NUM];
|
||||||
|
@ -729,10 +730,13 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool try_to_free_pte_page(pte_t *pte)
|
static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
if (!(cpa->flags & CPA_FREE_PAGETABLES))
|
||||||
|
return false;
|
||||||
|
|
||||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||||
if (!pte_none(pte[i]))
|
if (!pte_none(pte[i]))
|
||||||
return false;
|
return false;
|
||||||
|
@ -741,10 +745,13 @@ static bool try_to_free_pte_page(pte_t *pte)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool try_to_free_pmd_page(pmd_t *pmd)
|
static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
if (!(cpa->flags & CPA_FREE_PAGETABLES))
|
||||||
|
return false;
|
||||||
|
|
||||||
for (i = 0; i < PTRS_PER_PMD; i++)
|
for (i = 0; i < PTRS_PER_PMD; i++)
|
||||||
if (!pmd_none(pmd[i]))
|
if (!pmd_none(pmd[i]))
|
||||||
return false;
|
return false;
|
||||||
|
@ -753,7 +760,9 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
|
static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd,
|
||||||
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
pte_t *pte = pte_offset_kernel(pmd, start);
|
pte_t *pte = pte_offset_kernel(pmd, start);
|
||||||
|
|
||||||
|
@ -764,22 +773,23 @@ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
|
||||||
pte++;
|
pte++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
|
if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) {
|
||||||
pmd_clear(pmd);
|
pmd_clear(pmd);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
|
static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd,
|
||||||
unsigned long start, unsigned long end)
|
unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
if (unmap_pte_range(pmd, start, end))
|
if (unmap_pte_range(cpa, pmd, start, end))
|
||||||
if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
|
if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
|
||||||
pud_clear(pud);
|
pud_clear(pud);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
|
static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud,
|
||||||
|
unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
pmd_t *pmd = pmd_offset(pud, start);
|
pmd_t *pmd = pmd_offset(pud, start);
|
||||||
|
|
||||||
|
@ -790,7 +800,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
|
||||||
unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
|
unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
|
||||||
unsigned long pre_end = min_t(unsigned long, end, next_page);
|
unsigned long pre_end = min_t(unsigned long, end, next_page);
|
||||||
|
|
||||||
__unmap_pmd_range(pud, pmd, start, pre_end);
|
__unmap_pmd_range(cpa, pud, pmd, start, pre_end);
|
||||||
|
|
||||||
start = pre_end;
|
start = pre_end;
|
||||||
pmd++;
|
pmd++;
|
||||||
|
@ -803,7 +813,8 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
|
||||||
if (pmd_large(*pmd))
|
if (pmd_large(*pmd))
|
||||||
pmd_clear(pmd);
|
pmd_clear(pmd);
|
||||||
else
|
else
|
||||||
__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
|
__unmap_pmd_range(cpa, pud, pmd,
|
||||||
|
start, start + PMD_SIZE);
|
||||||
|
|
||||||
start += PMD_SIZE;
|
start += PMD_SIZE;
|
||||||
pmd++;
|
pmd++;
|
||||||
|
@ -813,17 +824,19 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
|
||||||
* 4K leftovers?
|
* 4K leftovers?
|
||||||
*/
|
*/
|
||||||
if (start < end)
|
if (start < end)
|
||||||
return __unmap_pmd_range(pud, pmd, start, end);
|
return __unmap_pmd_range(cpa, pud, pmd, start, end);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try again to free the PMD page if haven't succeeded above.
|
* Try again to free the PMD page if haven't succeeded above.
|
||||||
*/
|
*/
|
||||||
if (!pud_none(*pud))
|
if (!pud_none(*pud))
|
||||||
if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
|
if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
|
||||||
pud_clear(pud);
|
pud_clear(pud);
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
|
static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd,
|
||||||
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
pud_t *pud = pud_offset(pgd, start);
|
pud_t *pud = pud_offset(pgd, start);
|
||||||
|
|
||||||
|
@ -834,7 +847,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
|
||||||
unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
|
unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
|
||||||
unsigned long pre_end = min_t(unsigned long, end, next_page);
|
unsigned long pre_end = min_t(unsigned long, end, next_page);
|
||||||
|
|
||||||
unmap_pmd_range(pud, start, pre_end);
|
unmap_pmd_range(cpa, pud, start, pre_end);
|
||||||
|
|
||||||
start = pre_end;
|
start = pre_end;
|
||||||
pud++;
|
pud++;
|
||||||
|
@ -848,7 +861,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
|
||||||
if (pud_large(*pud))
|
if (pud_large(*pud))
|
||||||
pud_clear(pud);
|
pud_clear(pud);
|
||||||
else
|
else
|
||||||
unmap_pmd_range(pud, start, start + PUD_SIZE);
|
unmap_pmd_range(cpa, pud, start, start + PUD_SIZE);
|
||||||
|
|
||||||
start += PUD_SIZE;
|
start += PUD_SIZE;
|
||||||
pud++;
|
pud++;
|
||||||
|
@ -858,7 +871,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
|
||||||
* 2M leftovers?
|
* 2M leftovers?
|
||||||
*/
|
*/
|
||||||
if (start < end)
|
if (start < end)
|
||||||
unmap_pmd_range(pud, start, end);
|
unmap_pmd_range(cpa, pud, start, end);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to try to free the PUD page because we'll free it in
|
* No need to try to free the PUD page because we'll free it in
|
||||||
|
@ -866,6 +879,24 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
struct cpa_data cpa = {
|
||||||
|
.flags = CPA_FREE_PAGETABLES,
|
||||||
|
};
|
||||||
|
|
||||||
|
__unmap_pud_range(&cpa, pgd, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
struct cpa_data cpa = {
|
||||||
|
.flags = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
__unmap_pud_range(&cpa, pgd, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
static int alloc_pte_page(pmd_t *pmd)
|
static int alloc_pte_page(pmd_t *pmd)
|
||||||
{
|
{
|
||||||
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
||||||
|
|
|
@ -344,40 +344,26 @@ static inline void _pgd_free(pgd_t *pgd)
|
||||||
kmem_cache_free(pgd_cache, pgd);
|
kmem_cache_free(pgd_cache, pgd);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
#ifdef CONFIG_KAISER
|
||||||
|
/*
|
||||||
|
* Instead of one pmd, we aquire two pmds. Being order-1, it is
|
||||||
|
* both 8k in size and 8k-aligned. That lets us just flip bit 12
|
||||||
|
* in a pointer to swap between the two 4k halves.
|
||||||
|
*/
|
||||||
|
#define PGD_ALLOCATION_ORDER 1
|
||||||
|
#else
|
||||||
|
#define PGD_ALLOCATION_ORDER 0
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline pgd_t *_pgd_alloc(void)
|
static inline pgd_t *_pgd_alloc(void)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_KAISER
|
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
|
||||||
// Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
|
|
||||||
// block. Therefore, we have to allocate at least 3 pages. However, the
|
|
||||||
// __get_free_pages returns us 4 pages. Hence, we store the base pointer at
|
|
||||||
// the beginning of the page of our 8kb-aligned memory block in order to
|
|
||||||
// correctly free it afterwars.
|
|
||||||
|
|
||||||
unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
|
|
||||||
|
|
||||||
if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
|
|
||||||
{
|
|
||||||
*((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
|
|
||||||
return (pgd_t *) pages;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
|
|
||||||
return (pgd_t *) (pages + PAGE_SIZE);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
return (pgd_t *)__get_free_page(PGALLOC_GFP);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _pgd_free(pgd_t *pgd)
|
static inline void _pgd_free(pgd_t *pgd)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_KAISER
|
free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
|
||||||
unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
|
|
||||||
free_pages(pages, get_order(4*PAGE_SIZE));
|
|
||||||
#else
|
|
||||||
free_page((unsigned long)pgd);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_X86_PAE */
|
#endif /* CONFIG_X86_PAE */
|
||||||
|
|
||||||
|
|
26
include/linux/kaiser.h
Normal file
26
include/linux/kaiser.h
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
#ifndef _INCLUDE_KAISER_H
|
||||||
|
#define _INCLUDE_KAISER_H
|
||||||
|
|
||||||
|
#ifdef CONFIG_KAISER
|
||||||
|
#include <asm/kaiser.h>
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These stubs are used whenever CONFIG_KAISER is off, which
|
||||||
|
* includes architectures that support KAISER, but have it
|
||||||
|
* disabled.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline void kaiser_init(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static inline void kaiser_remove_mapping(unsigned long start, unsigned long size)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !CONFIG_KAISER */
|
||||||
|
#endif /* _INCLUDE_KAISER_H */
|
|
@ -58,6 +58,7 @@
|
||||||
#include <linux/tsacct_kern.h>
|
#include <linux/tsacct_kern.h>
|
||||||
#include <linux/cn_proc.h>
|
#include <linux/cn_proc.h>
|
||||||
#include <linux/freezer.h>
|
#include <linux/freezer.h>
|
||||||
|
#include <linux/kaiser.h>
|
||||||
#include <linux/delayacct.h>
|
#include <linux/delayacct.h>
|
||||||
#include <linux/taskstats_kern.h>
|
#include <linux/taskstats_kern.h>
|
||||||
#include <linux/random.h>
|
#include <linux/random.h>
|
||||||
|
@ -472,7 +473,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
|
||||||
*stackend = STACK_END_MAGIC; /* for overflow detection */
|
*stackend = STACK_END_MAGIC; /* for overflow detection */
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
|
|
||||||
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||||
{
|
{
|
||||||
struct task_struct *tsk;
|
struct task_struct *tsk;
|
||||||
|
@ -500,9 +500,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||||
* functions again.
|
* functions again.
|
||||||
*/
|
*/
|
||||||
tsk->stack = stack;
|
tsk->stack = stack;
|
||||||
#ifdef CONFIG_KAISER
|
|
||||||
kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
|
err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
|
||||||
#endif
|
if (err)
|
||||||
|
goto free_stack;
|
||||||
#ifdef CONFIG_VMAP_STACK
|
#ifdef CONFIG_VMAP_STACK
|
||||||
tsk->stack_vm_area = stack_vm_area;
|
tsk->stack_vm_area = stack_vm_area;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -32,12 +32,17 @@ config SECURITY
|
||||||
If you are unsure how to answer this question, answer N.
|
If you are unsure how to answer this question, answer N.
|
||||||
config KAISER
|
config KAISER
|
||||||
bool "Remove the kernel mapping in user mode"
|
bool "Remove the kernel mapping in user mode"
|
||||||
|
default y
|
||||||
depends on X86_64
|
depends on X86_64
|
||||||
depends on !PARAVIRT
|
depends on !PARAVIRT
|
||||||
help
|
help
|
||||||
This enforces a strict kernel and user space isolation in order to close
|
This enforces a strict kernel and user space isolation in order to close
|
||||||
hardware side channels on kernel address information.
|
hardware side channels on kernel address information.
|
||||||
|
|
||||||
|
config KAISER_REAL_SWITCH
|
||||||
|
bool "KAISER: actually switch page tables"
|
||||||
|
default y
|
||||||
|
|
||||||
config SECURITYFS
|
config SECURITYFS
|
||||||
bool "Enable the securityfs filesystem"
|
bool "Enable the securityfs filesystem"
|
||||||
help
|
help
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue