add XSETBV, CLAC, STAC, initrd
All checks were successful
Check / Build ISO (nightly-2025-04-27) (push) Successful in 43s

This commit is contained in:
Masato Imai
2025-08-22 14:32:21 +00:00
parent df56e251e4
commit 2d0db85574
8 changed files with 315 additions and 26 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ nel_os_bootloader/fat.img
nel_os_bootloader/myOSimage.img
nel_os_bootloader/iso/
nel_os_bootloader/nel_os.iso
nel_os_bootloader/vmlinux

View File

@ -6,7 +6,7 @@ EFI_BINARY="$1"
./create-iso.sh "$EFI_BINARY"
qemu-system-x86_64 -enable-kvm \
-m 512M \
-m 2G \
-serial mon:stdio \
-nographic \
-no-reboot \

View File

@ -10,6 +10,12 @@ pub fn load_kernel(vcpu: &mut dyn VCpu) -> Result<(), &'static str> {
let kernel =
unsafe { core::slice::from_raw_parts(*kernel_addr as *const u8, *kernel_size as usize) };
let initrd_addr = crate::ROOTFS_ADDR.get().unwrap();
let initrd_size = crate::ROOTFS_SIZE.get().unwrap();
let initrd =
unsafe { core::slice::from_raw_parts(*initrd_addr as *const u8, *initrd_size as usize) };
info!("Creating boot parameters");
let guest_mem_size = vcpu.get_guest_memory_size();
let mut bp = BootParams::from_bytes(kernel)?;
@ -23,6 +29,8 @@ pub fn load_kernel(vcpu: &mut dyn VCpu) -> Result<(), &'static str> {
bp.hdr.loadflags.set_keep_segments(true);
bp.hdr.cmd_line_ptr = LAYOUT_CMDLINE as u32;
bp.hdr.vid_mode = 0xFFFF;
bp.hdr.ramdisk_image = LAYOUT_INITRD as u32;
bp.hdr.ramdisk_size = initrd.len() as u32;
bp.add_e820_entry(0, LAYOUT_KERNEL_BASE, E820Type::Ram);
bp.add_e820_entry(
@ -65,6 +73,9 @@ pub fn load_kernel(vcpu: &mut dyn VCpu) -> Result<(), &'static str> {
LAYOUT_KERNEL_BASE as usize,
)?;
info!("Loading initrd image into guest memory");
load_image(vcpu, initrd, LAYOUT_INITRD as usize)?;
Ok(())
}

View File

@ -109,13 +109,7 @@ pub fn setup_exit_controls() -> Result<(), &'static str> {
exit_ctrl.write()?;
/*vmwrite(
0x4004,
1u64 << x86::irq::DOUBLE_FAULT_VECTOR
| 1u64 << x86::irq::GENERAL_PROTECTION_FAULT_VECTOR
| 1u64 << x86::irq::PAGE_FAULT_VECTOR
| 1u64 << x86::irq::X87_FPU_VECTOR,
)?;*/
vmwrite(0x4004, 1u64 << x86::irq::INVALID_OPCODE_VECTOR)?;
Ok(())
}

View File

@ -0,0 +1,67 @@
use modular_bitfield::{bitfield, prelude::B44};
use crate::vmm::x86_64::intel::vcpu::IntelVCpu;
#[bitfield]
#[repr(u64)]
#[derive(Debug, Clone, Copy)]
pub struct XCR0 {
pub x87: bool,
pub sse: bool,
pub avx: bool,
pub bndreg: bool,
pub bndcsr: bool,
pub opmask: bool,
pub zmm_hi256: bool,
pub hi16_zmm: bool,
pub pt: bool,
pub pkru: bool,
pub pasid: bool,
pub cet_u: bool,
pub cet_s: bool,
pub hdc: bool,
pub intr: bool,
pub lbr: bool,
pub hwp: bool,
pub xtilecfg: bool,
pub xtiledata: bool,
pub apx: bool,
#[skip]
_reserved: B44,
}
pub fn set_xcr(vcpu: &mut IntelVCpu, index: u32, xcr: u64) -> Result<(), &'static str> {
if index != 0 {
return Err("Invalid XCR index");
}
if !(xcr & 0b1 != 0) {
return Err("X87 is not enabled");
}
if (xcr & 0b100 != 0) && !(xcr & 0b10 != 0) {
return Err("SSE is not enabled");
}
if !(xcr & 0b1000) != (!(xcr & 0b10000)) {
return Err("BNDREGS and BNDCSR are not both enabled");
}
if xcr & 0b11100000 != 0 {
if !(xcr & 0b100 != 0) {
return Err("YMM bits are not enabled");
}
if (xcr & 0b11100000) != 0b11100000 {
return Err("Invalid bits set in XCR0");
}
}
if (xcr & 0b1000000000000 != 0) && (xcr & 0b1000000000000 != 0b1000000000000) {
return Err("xtile bits are not both enabled");
}
vcpu.guest_xcr0 = XCR0::from(xcr);
Ok(())
}

View File

@ -155,6 +155,35 @@ impl PIC {
Ok(false)
}
pub fn inject_exception(
&mut self,
vector: u32,
error_code: Option<u32>,
) -> Result<(), &'static str> {
let has_error_code = match vector {
8 | 10..=14 | 17 | 21 => true,
_ => false,
};
let interrupt_info = EntryIntrInfo::new()
.with_vector(vector as u8)
.with_typ(3)
.with_ec_available(has_error_code)
.with_valid(true);
vmwrite(
vmx::vmcs::control::VMENTRY_INTERRUPTION_INFO_FIELD,
u32::from(interrupt_info) as u64,
)?;
if has_error_code {
let ec = error_code.unwrap_or(0);
vmwrite(vmx::vmcs::control::VMENTRY_EXCEPTION_ERR_CODE, ec as u64)?;
}
Ok(())
}
fn handle_io_in(&self, regs: &mut GuestRegisters, qual: QualIo) {
match qual.port() {
0x0CF8..=0x0CFF => regs.rax = 0,

View File

@ -4,6 +4,7 @@ mod controls;
mod cpuid;
mod cr;
mod ept;
mod fpu;
mod io;
mod msr;
mod qual;

View File

@ -1,6 +1,10 @@
use core::arch::asm;
use core::arch::{
asm,
x86_64::{_xgetbv, _xsetbv},
};
use raw_cpuid::cpuid;
use x86::controlregs::cr4;
use x86_64::{
registers::control::Cr4Flags,
structures::paging::{FrameAllocator, Size4KiB},
@ -14,6 +18,7 @@ use crate::{
common::{self, read_msr},
intel::{
auditor, controls, cpuid, ept,
fpu::{self, XCR0},
io::{vmm_interrupt_subscriber, IOBitmap},
msr::{self, ShadowMsr},
qual::{QualCr, QualIo},
@ -50,6 +55,8 @@ pub struct IntelVCpu {
pic: super::io::PIC,
io_bitmap: IOBitmap,
pub pending_irq: u16,
pub host_xcr0: u64,
pub guest_xcr0: XCR0,
}
impl IntelVCpu {
@ -118,6 +125,15 @@ impl IntelVCpu {
self.step_next_inst()?;
}
VmxExitReason::XSETBV => {
fpu::set_xcr(
self,
self.guest_registers.rcx as u32,
self.guest_registers.rax,
)?;
self.step_next_inst()?;
}
VmxExitReason::IO_INSTRUCTION => {
let qual = vmread(vmcs::ro::EXIT_QUALIFICATION)?;
let qual_io = QualIo::from(qual);
@ -147,26 +163,77 @@ impl IntelVCpu {
return Err("Triple fault");
}
VmxExitReason::EXCEPTION => {
let vmexit_intr_info = vmread(vmcs::ro::VMEXIT_INTERRUPTION_INFO)?;
let vector = (vmexit_intr_info & 0xFF) as u8;
let error_code = (vmexit_intr_info >> 8) & 0b111;
let error_code_valid = (vmexit_intr_info >> 11) & 0b1 != 0;
let vmexit_intr_info = vmread(vmcs::ro::VMEXIT_INTERRUPTION_INFO).unwrap();
let vector = (vmexit_intr_info & 0xFF) as u32;
let has_error_code = (vmexit_intr_info & (1 << 11)) != 0;
let idt_vectoring_info = vmread(vmcs::ro::IDT_VECTORING_INFO)?;
info!("idt valid: {}", idt_vectoring_info >> 31 & 0b1 != 0);
let rip = vmread(vmcs::guest::RIP)?;
let hpa = self.ept.get_phys_addr(rip).unwrap();
if error_code_valid {
info!(
"VM exit due to exception: vector {}, error code {}, at RIP {:#x} (hpa: {:#x})",
vector, error_code, rip, hpa
);
let error_code = if has_error_code {
Some(vmread(vmcs::ro::VMEXIT_INTERRUPTION_ERR_CODE).unwrap() as u32)
} else {
info!("VM exit due to exception: vector {}", vector);
None
};
let rip = vmread(vmcs::guest::RIP).unwrap();
let mut instruction_bytes = [0u8; 16];
let mut valid_bytes = 0;
match self.translate_guest_address(rip) {
Ok(guest_phys_addr) => {
for i in 0..16 {
match self.ept.get(guest_phys_addr + i) {
Ok(byte) => {
instruction_bytes[i as usize] = byte;
valid_bytes = i + 1;
}
Err(_) => break,
}
}
}
Err(e) => {
info!(
"Failed to get physical address for RIP: {:#x}, {:?}",
rip, e
);
return Err("Failed to get physical address for RIP");
}
}
if valid_bytes > 0 {
match instruction_bytes[0] {
0x0F => {
if valid_bytes > 1 {
match instruction_bytes[1] {
0x01 => match instruction_bytes[2] {
0xCA => {
let rflags = vmread(vmcs::guest::RFLAGS).unwrap();
vmwrite(vmcs::guest::RFLAGS, rflags & !(1 << 18))
.unwrap();
self.step_next_inst().unwrap();
}
0xCB => {
let rflags = vmread(vmcs::guest::RFLAGS).unwrap();
vmwrite(vmcs::guest::RFLAGS, rflags | (1 << 18))
.unwrap();
self.step_next_inst().unwrap();
}
_ => {
self.pic
.inject_exception(vector, error_code)
.unwrap();
}
},
_ => {
self.pic.inject_exception(vector, error_code).unwrap();
}
}
}
}
_ => {
self.pic.inject_exception(vector, error_code).unwrap();
}
}
}
return Err("VM exit due to exception");
}
_ => {
info!("VM exit reason: {:?}", exit_reason);
@ -178,6 +245,48 @@ impl IntelVCpu {
Ok(())
}
fn load_guest_xcr0(&mut self) -> Result<(), &'static str> {
let host_cr4 = unsafe { cr4() };
if (host_cr4.bits() & Cr4Flags::OSXSAVE.bits() as usize) == 0 {
return Ok(());
}
if self.host_xcr0 == 0 {
self.host_xcr0 = unsafe { _xgetbv(0) };
}
let guest_cr4 = vmread(x86::vmx::vmcs::guest::CR4)?;
if guest_cr4 & Cr4Flags::OSXSAVE.bits() != 0 && u64::from(self.guest_xcr0) != self.host_xcr0
{
unsafe {
_xsetbv(0, u64::from(self.guest_xcr0));
}
}
Ok(())
}
fn load_host_xcr0(&mut self) -> Result<(), &'static str> {
let host_cr4 = unsafe { cr4() };
if (host_cr4.bits() & Cr4Flags::OSXSAVE.bits() as usize) == 0 {
return Ok(());
}
let guest_cr4 = vmread(x86::vmx::vmcs::guest::CR4)?;
if guest_cr4 & Cr4Flags::OSXSAVE.bits() != 0 {
let current_xcr0 = unsafe { _xgetbv(0) };
if current_xcr0 != self.host_xcr0 {
unsafe {
_xsetbv(0, self.host_xcr0);
}
}
}
Ok(())
}
fn step_next_inst(&mut self) -> Result<(), &'static str> {
use x86::vmx::vmcs;
let rip = vmread(vmcs::guest::RIP)?;
@ -193,9 +302,13 @@ impl IntelVCpu {
let success = {
let result: u16;
self.load_guest_xcr0().unwrap();
unsafe {
result = crate::vmm::x86_64::intel::asm::asm_vm_entry(self as *mut _);
};
self.load_host_xcr0().unwrap();
result == 0
};
@ -424,6 +537,77 @@ impl IntelVCpu {
Ok(())
}
fn translate_guest_address(&mut self, vaddr: u64) -> Result<u64, &'static str> {
let cr3 = vmread(x86::vmx::vmcs::guest::CR3).map_err(|_| "Failed to read guest CR3")?;
let pml4_base = cr3 & !0xFFF; // Clear lower 12 bits to get page table base
let efer = vmread(x86::vmx::vmcs::guest::IA32_EFER_FULL).unwrap_or(0);
let is_long_mode = (efer & (1 << 8)) != 0; // LME bit
if !is_long_mode {
return Ok(vaddr & 0xFFFFFFFF);
}
let pml4_idx = ((vaddr >> 39) & 0x1FF) as u64;
let pdpt_idx = ((vaddr >> 30) & 0x1FF) as u64;
let pd_idx = ((vaddr >> 21) & 0x1FF) as u64;
let pt_idx = ((vaddr >> 12) & 0x1FF) as u64;
let page_offset = (vaddr & 0xFFF) as u64;
let pml4_entry_addr = pml4_base + (pml4_idx * 8);
let pml4_entry = self.read_guest_phys_u64(pml4_entry_addr)?;
if (pml4_entry & 1) == 0 {
return Err("PML4 entry not present");
}
let pdpt_base = pml4_entry & 0x000FFFFFFFFFF000;
let pdpt_entry_addr = pdpt_base + (pdpt_idx * 8);
let pdpt_entry = self.read_guest_phys_u64(pdpt_entry_addr)?;
if (pdpt_entry & 1) == 0 {
return Err("PDPT entry not present");
}
if (pdpt_entry & (1 << 7)) != 0 {
let page_base = pdpt_entry & 0x000FFFFFC0000000;
return Ok(page_base | (vaddr & 0x3FFFFFFF));
}
let pd_base = pdpt_entry & 0x000FFFFFFFFFF000;
let pd_entry_addr = pd_base + (pd_idx * 8);
let pd_entry = self.read_guest_phys_u64(pd_entry_addr)?;
if (pd_entry & 1) == 0 {
return Err("PD entry not present");
}
if (pd_entry & (1 << 7)) != 0 {
let page_base = pd_entry & 0x000FFFFFFFE00000;
return Ok(page_base | (vaddr & 0x1FFFFF));
}
let pt_base = pd_entry & 0x000FFFFFFFFFF000;
let pt_entry_addr = pt_base + (pt_idx * 8);
let pt_entry = self.read_guest_phys_u64(pt_entry_addr)?;
if (pt_entry & 1) == 0 {
return Err("PT entry not present");
}
let page_base = pt_entry & 0x000FFFFFFFFFF000;
Ok(page_base | page_offset)
}
fn read_guest_phys_u64(&mut self, gpa: u64) -> Result<u64, &'static str> {
let mut result_bytes = [0u8; 8];
for i in 0..8 {
match self.ept.get(gpa + i) {
Ok(byte) => result_bytes[i as usize] = byte,
Err(_) => return Err("Failed to read from EPT"),
}
}
Ok(u64::from_le_bytes(result_bytes))
}
fn dump_vmcs_settings(&self) -> Result<(), &'static str> {
info!("=== VMCS Control Fields ===");
@ -725,6 +909,8 @@ impl VCpu for IntelVCpu {
pic: super::io::PIC::new(),
io_bitmap: IOBitmap::new(frame_allocator),
pending_irq: 0,
host_xcr0: 0,
guest_xcr0: XCR0::new(),
})
}