294 lines
9.5 KiB
C
294 lines
9.5 KiB
C
/*********************************************************
|
|
* Copyright (c) 1998-2014,2016,2018-2020,2022 VMware, Inc. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the
|
|
* Free Software Foundation version 2 and no later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
*********************************************************/
|
|
|
|
/*
|
|
* x86paging_64.h --
|
|
*
|
|
* Contains definitions for the x86 page table layout specific to
|
|
* long mode.
|
|
*/
|
|
|
|
#ifndef _X86PAGING_64_H_
|
|
#define _X86PAGING_64_H_
|
|
|
|
#define INCLUDE_ALLOW_USERLEVEL
|
|
|
|
#define INCLUDE_ALLOW_MODULE
|
|
#define INCLUDE_ALLOW_VMMON
|
|
#define INCLUDE_ALLOW_VMK_MODULE
|
|
#define INCLUDE_ALLOW_VMKERNEL
|
|
#define INCLUDE_ALLOW_DISTRIBUTE
|
|
#define INCLUDE_ALLOW_VMCORE
|
|
#include "includeCheck.h"
|
|
|
|
#include "vm_basic_types.h"
|
|
#include "x86/cpu_types_arch.h"
|
|
#include "vm_pagetable.h"
|
|
#include "x86paging_common.h"
|
|
#include "vm_assert.h"
|
|
|
|
#define LM_PTE_PFN_MASK CONST64U(0xffffffffff000)
|
|
#define LM_PTE_2_PFN(_pte) (((_pte) & LM_PTE_PFN_MASK) >> PT_PTE_PFN_SHIFT)
|
|
|
|
#define LM_PDE_PFN_MASK 0xfffffffe00000LL
|
|
#define LM_PDPTE_PFN_MASK 0xfffffc0000000LL
|
|
|
|
#define LM_AVAIL_SHIFT 9
|
|
|
|
#define LM_AVAIL_MASK (CONST64(0x7) << LM_AVAIL_SHIFT)
|
|
#define LM_FLAGS_MASK CONST64(0x80000000000001ff)
|
|
#define LM_CR3_FLAGS_MASK CONST64(0x18)
|
|
#define LM_L3_1G_RSVD_MASK CONST64(0x3fffe000)
|
|
|
|
#define LM_MAKE_CR3(_mpfn, _flags) \
|
|
(((uint64)(_mpfn) << PT_PTE_PFN_SHIFT) | \
|
|
((_flags) & LM_CR3_FLAGS_MASK))
|
|
|
|
#define LM_MAKE_PTE(_mpfn, _avail, _flags) \
|
|
(((uint64)(_mpfn) << PT_PTE_PFN_SHIFT) | \
|
|
(((_avail) << LM_AVAIL_SHIFT) & LM_AVAIL_MASK) | \
|
|
((uint64)(_flags) & LM_FLAGS_MASK))
|
|
|
|
#define LM_MAKE_PDE(_pfn, _avail, _flags) LM_MAKE_PTE(_pfn, _avail, _flags)
|
|
#define LM_MAKE_L5E(_pfn, _avail, _flags) LM_MAKE_PTE(_pfn, _avail, _flags)
|
|
#define LM_MAKE_L4E(_pfn, _avail, _flags) LM_MAKE_PTE(_pfn, _avail, _flags)
|
|
#define LM_MAKE_L3E(_pfn, _avail, _flags) LM_MAKE_PTE(_pfn, _avail, _flags)
|
|
#define LM_MAKE_L2E(_pfn, _avail, _flags) LM_MAKE_PTE(_pfn, _avail, _flags)
|
|
#define LM_MAKE_L1E(_pfn, _avail, _flags) LM_MAKE_PTE(_pfn, _avail, _flags)
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------
|
|
*
|
|
* LMPTEIsSafe --
|
|
*
|
|
* A shadow PTE is considered "safe" if any of the following conditions are
|
|
* met:
|
|
*
|
|
* a) It is not terminal (i.e., present with no reserved bits set)
|
|
* b) Terminal, but with MPN and PS fields set to zero
|
|
* c) Terminal, but with MPN field specifying an uncachable page
|
|
*
|
|
* In practice, for condition c), we require that bits 45:43 of the EPTE are
|
|
* set to b'111. The position of these bits is undocumented and not
|
|
* architectural; they are truly magic.
|
|
*
|
|
*----------------------------------------------------------------------
|
|
*/
|
|
#ifdef VMX86_DEBUG
|
|
|
|
#define LM_SAFE_BITS_MASK (MASK64(3) << 43)
|
|
#define LM_SAFE_BITS(_v) ((_v) & LM_SAFE_BITS_MASK)
|
|
|
|
static INLINE Bool
|
|
LMPTEIsTerminal(VM_PAE_PTE pte, PT_Level level, uint64 physMask)
|
|
{
|
|
uint64 rsvd2m = (MASK(PT_LEVEL_SHIFT) << PAGE_SHIFT) & ~PTE_LARGE_PAT;
|
|
uint64 rsvd1g = (MASK(2 * PT_LEVEL_SHIFT) << PAGE_SHIFT) & ~PTE_LARGE_PAT;
|
|
uint64 rsvdl4 = PTE_PS;
|
|
uint64 rsvd = physMask & MASK64(52);
|
|
|
|
return (!PTE_PRESENT(pte) ||
|
|
(pte & LM_PTE_PFN_MASK) & rsvd) != 0 ||
|
|
((pte & PTE_PS) != 0 &&
|
|
((level == PT_LEVEL_2 && (pte & rsvd2m) != 0) ||
|
|
(level == PT_LEVEL_3 && (pte & rsvd1g) != 0))) ||
|
|
(level == PT_LEVEL_4 && (pte & rsvdl4) != 0);
|
|
}
|
|
|
|
static INLINE Bool
|
|
LMPTEIsSafe(VM_PAE_PTE pte, PT_Level level, uint64 physMask)
|
|
{
|
|
MPN mpn = (pte & LM_PTE_PFN_MASK) >> PT_PTE_PFN_SHIFT;
|
|
Bool safe = !LMPTEIsTerminal(pte, level, physMask) ||
|
|
(mpn == 0 && !PTE_LARGEPAGE(pte)) ||
|
|
LM_SAFE_BITS(pte) == LM_SAFE_BITS_MASK;
|
|
return safe;
|
|
}
|
|
#endif /* VMX86_DEBUG */
|
|
|
|
/*
|
|
*----------------------------------------------------------------------
|
|
*
|
|
* NPTValidLargePage --
|
|
*
|
|
* Returns TRUE iff the provided large page NPT entry is valid
|
|
* (i.e. no reserved bits set).
|
|
*
|
|
*----------------------------------------------------------------------
|
|
*/
|
|
static INLINE Bool
|
|
NPTValidLargePage(VM_PAE_PTE npte, PT_Level level, unsigned depth)
|
|
{
|
|
const PPN lpRsvd = MASK((level - 1) * PT_LEVEL_SHIFT) &
|
|
~(PTE_LARGE_PAT >> PAGE_SHIFT);
|
|
return (level == PT_LEVEL_2 ||
|
|
(level == PT_LEVEL_3 && depth == PT_LEVEL_4)) &&
|
|
(LM_PTE_2_PFN(npte) & lpRsvd) == 0;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------
|
|
*
|
|
* NPTEIsValid --
|
|
*
|
|
* Check an NPT entry for validity at the indicated page table level
|
|
* and depth. Use the provided physMask as the mask of reserved PA bits.
|
|
*
|
|
*----------------------------------------------------------------------
|
|
*/
|
|
static INLINE Bool
|
|
NPTEIsValid(VM_PAE_PTE npte, PT_Level level, Bool nxOn, unsigned depth,
|
|
uint64 physMask)
|
|
{
|
|
VM_PAE_PTE rsvd;
|
|
if (depth == PT_LEVEL_3) {
|
|
rsvd = physMask & ~PTE_NX;
|
|
if (level == PT_LEVEL_3) {
|
|
rsvd |= PTE_NX | PDPTR_MBZ_MASK;
|
|
}
|
|
} else {
|
|
rsvd = physMask & MASK64(52);
|
|
if (level == PT_LEVEL_4) {
|
|
rsvd |= PTE_PS | PTE_G;
|
|
}
|
|
}
|
|
if (UNLIKELY(!nxOn)) {
|
|
/* When NX is disabled, PTE_NX is treated as reserved. */
|
|
rsvd |= PTE_NX;
|
|
}
|
|
return !PTE_PRESENT(npte) ||
|
|
((npte & rsvd) == 0 &&
|
|
(level == PT_LEVEL_1 ||
|
|
(!PTE_LARGEPAGE(npte) || NPTValidLargePage(npte, level, depth))));
|
|
}
|
|
|
|
/*
|
|
* x86-64 architecture requires implementations supporting less than
|
|
* full 64-bit VAs to ensure that all virtual addresses are in canonical
|
|
* form. An address is in canonical form if the address bits from the
|
|
* most significant implemented bit up to bit 63 are all ones or all
|
|
* zeros. If this is not the case, the processor generates #GP/#SS. Our
|
|
* VCPU implements 48 bits of virtual address space.
|
|
*/
|
|
|
|
#define VA64_IMPL_BITS 48
|
|
#define VA64_CANONICAL_MASK ~((CONST64U(1) << (VA64_IMPL_BITS - 1)) - 1)
|
|
#define VA64_CANONICAL_HOLE_START (CONST64U(1) << (VA64_IMPL_BITS - 1))
|
|
#define VA64_CANONICAL_HOLE_LEN VA64_CANONICAL_MASK - VA64_CANONICAL_HOLE_START
|
|
|
|
/*
|
|
* x86-64 architecture allows 57 bits of virtual address space if 5-level
|
|
* paging is enabled.
|
|
*/
|
|
#define VA64_L5_IMPL_BITS 57
|
|
#define VA64_L5_IMPL_MASK ((CONST64U(1) << VA64_L5_IMPL_BITS) - 1)
|
|
|
|
static INLINE Bool
|
|
x86IsCanonicalC(VA64 va)
|
|
{
|
|
return (va & VA64_CANONICAL_MASK) == 0 ||
|
|
(va & VA64_CANONICAL_MASK) == VA64_CANONICAL_MASK;
|
|
}
|
|
|
|
#if defined(VM_X86_64) && defined(__GNUC__)
|
|
static INLINE Bool
|
|
x86IsCanonicalAsm(VA64 va)
|
|
{
|
|
Bool out;
|
|
/*
|
|
* sarq $48, %0: Move 17 bits from position 63:47 into 15:0 and CF,
|
|
* sign-extending as we do so.
|
|
* adcl $0, %0: Then add (0 + CF) to the shifted value. The result is zero
|
|
* iff (CF == 1 && bits 31:0 == 0xffffffff) or (CF == 0 && bits 31:0 == 0).
|
|
* That is, if original bits 47 and higher were all 1s or all 0s.
|
|
* setz %1: Create the boolean.
|
|
*
|
|
* The chain of AdcLong macros will emit one adc instruction; gcc
|
|
* has no obvious way to force the size of a quad %0 to a long.
|
|
*/
|
|
#define AdcLong(reg64, reg32) \
|
|
".ifc %0, %%" #reg64 "\n" \
|
|
" adcl $0, %%" #reg32 "\n" \
|
|
".endif \n"
|
|
|
|
asm ("sarq $48, %0 \n"
|
|
AdcLong(rax, eax)
|
|
AdcLong(rcx, ecx)
|
|
AdcLong(rdx, edx)
|
|
AdcLong(rbx, ebx)
|
|
AdcLong(rsp, esp)
|
|
AdcLong(rbp, ebp)
|
|
AdcLong(rsi, esi)
|
|
AdcLong(rdi, edi)
|
|
AdcLong(r8, r8d)
|
|
AdcLong(r9, r9d)
|
|
AdcLong(r10, r10d)
|
|
AdcLong(r11, r11d)
|
|
AdcLong(r12, r12d)
|
|
AdcLong(r13, r13d)
|
|
AdcLong(r14, r14d)
|
|
AdcLong(r15, r15d)
|
|
"setz %1" : "+r"(va), "=r"(out));
|
|
return out;
|
|
#undef AdcLong
|
|
}
|
|
#endif
|
|
|
|
static INLINE Bool
|
|
x86_IsCanonical(VA64 va)
|
|
{
|
|
#if defined(VM_X86_64) && defined(__GNUC__)
|
|
if (__builtin_constant_p(va)) {
|
|
return x86IsCanonicalC(va);
|
|
} else {
|
|
return x86IsCanonicalAsm(va);
|
|
}
|
|
#else
|
|
return x86IsCanonicalC(va);
|
|
#endif
|
|
}
|
|
|
|
static INLINE Bool
|
|
x86_IsCanonicalRange(VA64 va, unsigned size)
|
|
{
|
|
/*
|
|
* The check is simple as long as the size is less
|
|
* than the number of implemented bits.
|
|
*
|
|
* The only case we don't handle is one where the VA goes from a
|
|
* high canonical address and wraps to a non-canonical address
|
|
* (e.g. 0x00008000_00000000) or higher. Our test would falsely
|
|
* consider this canonical.
|
|
*/
|
|
ASSERT_ON_COMPILE(sizeof(size) * 8 < VA64_IMPL_BITS);
|
|
|
|
/*
|
|
* VA64_CANONICAL_MASK is the lowest canonical address with the
|
|
* upper bits all set.
|
|
*
|
|
* VA64_CANONICAL_HOLE_START is one higher than the highest valid
|
|
* canonical address with the upper bits all cleared. Note that we
|
|
* access up to (va + size - 1), not (va + size), so <= is correct.
|
|
*/
|
|
return va >= VA64_CANONICAL_MASK ||
|
|
va + size <= VA64_CANONICAL_HOLE_START;
|
|
}
|
|
|
|
#endif /* _X86PAGING_64_H_ */
|