diff --git a/Cargo.lock b/Cargo.lock index c066dd4eb..68ddc6c92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -846,6 +846,14 @@ dependencies = [ "zstd", ] +[[package]] +name = "krun-whp" +version = "0.1.0-1.18.0" +dependencies = [ + "log", + "windows-sys", +] + [[package]] name = "kvm-bindings" version = "0.12.1" diff --git a/Cargo.toml b/Cargo.toml index 83db53c57..00b06aa00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "src/vmm", "src/aws_nitro", "src/rutabaga_gfx", + "src/whp", ] exclude = ["examples/gtk_display"] resolver = "2" diff --git a/src/whp/Cargo.toml b/src/whp/Cargo.toml new file mode 100644 index 000000000..d2e731486 --- /dev/null +++ b/src/whp/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "krun-whp" +version = "0.1.0-1.18.0" +authors = ["The libkrun Authors"] +edition = "2021" +description = "Windows Hypervisor Platform backend for libkrun" +license = "Apache-2.0" +repository = "https://github.com/containers/libkrun" + +[dependencies] +log = "0.4.0" + +[target.'cfg(target_os = "windows")'.dependencies] +windows-sys = { version = "0.61.2", features = [ + "Win32_Foundation", + "Win32_System_Hypervisor", + "Win32_System_Performance", +] } diff --git a/src/whp/src/lib.rs b/src/whp/src/lib.rs new file mode 100644 index 000000000..532fac467 --- /dev/null +++ b/src/whp/src/lib.rs @@ -0,0 +1,927 @@ +// Copyright 2026 Red Hat, Inc. +// SPDX-License-Identifier: Apache-2.0 + +#![cfg(target_os = "windows")] + +use std::ffi::c_void; +use std::fmt::{Display, Formatter}; +use std::mem::{self, MaybeUninit}; +use std::sync::Arc; + +use log::{debug, error}; +use windows_sys::Win32::Foundation::S_OK; +use windows_sys::Win32::System::Hypervisor::{ + WHvCancelRunVirtualProcessor, WHvCapabilityCodeHypervisorPresent, WHvCreatePartition, + WHvCreateVirtualProcessor, WHvDeletePartition, WHvDeleteVirtualProcessor, + WHvEmulatorCreateEmulator, WHvEmulatorDestroyEmulator, WHvEmulatorTryIoEmulation, + WHvEmulatorTryMmioEmulation, WHvGetCapability, WHvGetVirtualProcessorRegisters, WHvMapGpaRange, + WHvMapGpaRangeFlagExecute, WHvMapGpaRangeFlagRead, WHvMapGpaRangeFlagWrite, + WHvPartitionPropertyCodeCpuidResultList, WHvPartitionPropertyCodeExtendedVmExits, + WHvPartitionPropertyCodeLocalApicEmulationMode, WHvPartitionPropertyCodeProcessorCount, + WHvRegisterInternalActivityState, WHvRequestInterrupt, WHvRunVirtualProcessor, + WHvRunVpExitReasonCanceled, WHvRunVpExitReasonInvalidVpRegisterValue, + WHvRunVpExitReasonMemoryAccess, WHvRunVpExitReasonUnrecoverableException, + WHvRunVpExitReasonUnsupportedFeature, WHvRunVpExitReasonX64Cpuid, WHvRunVpExitReasonX64Halt, + WHvRunVpExitReasonX64InterruptWindow, WHvRunVpExitReasonX64IoPortAccess, + WHvRunVpExitReasonX64MsrAccess, WHvSetPartitionProperty, WHvSetVirtualProcessorRegisters, + WHvSetupPartition, WHvX64LocalApicEmulationModeXApic, + WHvX64RegisterDeliverabilityNotifications, WHvX64RegisterRax, WHvX64RegisterRbx, + WHvX64RegisterRcx, WHvX64RegisterRdx, WHvX64RegisterRip, WHV_CAPABILITY, + WHV_EMULATOR_CALLBACKS, WHV_EMULATOR_STATUS, WHV_MEMORY_ACCESS_CONTEXT, WHV_PARTITION_HANDLE, + WHV_PARTITION_PROPERTY, WHV_PARTITION_PROPERTY_CODE, WHV_REGISTER_NAME, WHV_REGISTER_VALUE, + WHV_RUN_VP_EXIT_CONTEXT, WHV_VP_EXIT_CONTEXT, WHV_X64_CPUID_RESULT, + WHV_X64_IO_PORT_ACCESS_CONTEXT, +}; +use windows_sys::Win32::System::Performance::{QueryPerformanceCounter, QueryPerformanceFrequency}; + +#[derive(Debug)] +pub enum Error { + CheckCapability(i32), + HypervisorNotPresent, + CreatePartition(i32), + SetPartitionProperty(i32), + SetupPartition(i32), + DeletePartition(i32), + MapGpaRange(i32), + RequestInterrupt(i32), + CreateVirtualProcessor(i32), + DeleteVirtualProcessor(i32), + RunVirtualProcessor(i32), + GetRegisters(i32), + SetRegisters(i32), + MemoryAlignment, + CreateEmulator(i32), + DestroyEmulator(i32), + IoEmulation(i32), + MmioEmulation(i32), + EmulationFailed(u32), +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + use Error::*; + match self { + CheckCapability(hr) => write!(f, "WHvGetCapability failed: HRESULT 0x{hr:08x}"), + HypervisorNotPresent => write!(f, "WHP hypervisor is not present on this system"), + CreatePartition(hr) => write!(f, "WHvCreatePartition failed: HRESULT 0x{hr:08x}"), + SetPartitionProperty(hr) => { + write!(f, "WHvSetPartitionProperty failed: HRESULT 0x{hr:08x}") + } + SetupPartition(hr) => write!(f, "WHvSetupPartition failed: HRESULT 0x{hr:08x}"), + DeletePartition(hr) => write!(f, "WHvDeletePartition failed: HRESULT 0x{hr:08x}"), + MapGpaRange(hr) => write!(f, "WHvMapGpaRange failed: HRESULT 0x{hr:08x}"), + RequestInterrupt(hr) => write!(f, "WHvRequestInterrupt failed: HRESULT 0x{hr:08x}"), + CreateVirtualProcessor(hr) => { + write!(f, "WHvCreateVirtualProcessor failed: HRESULT 0x{hr:08x}") + } + DeleteVirtualProcessor(hr) => { + write!(f, "WHvDeleteVirtualProcessor failed: HRESULT 0x{hr:08x}") + } + RunVirtualProcessor(hr) => { + write!(f, "WHvRunVirtualProcessor failed: HRESULT 0x{hr:08x}") + } + GetRegisters(hr) => { + write!( + f, + "WHvGetVirtualProcessorRegisters failed: HRESULT 0x{hr:08x}" + ) + } + SetRegisters(hr) => { + write!( + f, + "WHvSetVirtualProcessorRegisters failed: HRESULT 0x{hr:08x}" + ) + } + MemoryAlignment => write!(f, "WHP memory mapping must be 4KB aligned"), + CreateEmulator(hr) => { + write!(f, "WHvEmulatorCreateEmulator failed: HRESULT 0x{hr:08x}") + } + DestroyEmulator(hr) => { + write!(f, "WHvEmulatorDestroyEmulator failed: HRESULT 0x{hr:08x}") + } + IoEmulation(hr) => { + write!(f, "WHvEmulatorTryIoEmulation failed: HRESULT 0x{hr:08x}") + } + MmioEmulation(hr) => { + write!(f, "WHvEmulatorTryMmioEmulation failed: HRESULT 0x{hr:08x}") + } + EmulationFailed(status) => { + let reason = match *status { + s if s & (1 << 1) != 0 => "internal emulation failure", + s if s & (1 << 2) != 0 => "I/O port callback failed", + s if s & (1 << 3) != 0 => "memory callback failed", + s if s & (1 << 4) != 0 => "translate GVA page callback failed", + s if s & (1 << 5) != 0 => "translated GPA page is not aligned", + s if s & (1 << 6) != 0 => "get VP registers callback failed", + s if s & (1 << 7) != 0 => "set VP registers callback failed", + s if s & (1 << 8) != 0 => "interrupt caused intercept", + s if s & (1 << 9) != 0 => "guest cannot be faulted", + _ => "unknown", + }; + write!( + f, + "Instruction emulation failed: {reason} (status 0x{status:08x})" + ) + } + } + } +} + +/// Verifies that the Windows Hypervisor Platform is available. +pub fn check_hypervisor() -> Result<(), Error> { + let mut capability = MaybeUninit::::uninit(); + let mut written_size: u32 = 0; + + unsafe { + let hr = WHvGetCapability( + WHvCapabilityCodeHypervisorPresent, + capability.as_mut_ptr().cast(), + mem::size_of::() as u32, + &mut written_size, + ); + + if hr != S_OK { + return Err(Error::CheckCapability(hr)); + } + + if capability.assume_init().HypervisorPresent == 0 { + return Err(Error::HypervisorNotPresent); + } + } + + debug!("WHP hypervisor is present"); + Ok(()) +} + +/// Parsed CPUID exit context returned by [`WhpVcpu::cpuid_exit_info`]. +#[derive(Debug, Clone)] +pub struct CpuidExitInfo { + pub leaf: u64, + pub subleaf: u64, + pub default_eax: u64, + pub default_ebx: u64, + pub default_ecx: u64, + pub default_edx: u64, +} + +/// Parsed MSR exit context returned by [`WhpVcpu::msr_exit_info`]. +#[derive(Debug, Clone)] +pub struct MsrExitInfo { + pub msr_number: u32, + pub is_write: bool, + pub rax: u64, + pub rdx: u64, +} + +pub struct WhpVm { + handle: WHV_PARTITION_HANDLE, +} + +#[repr(C)] +struct WhvInterruptControl { + type_and_flags: u64, + destination: u32, + vector: u32, +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy)] +pub enum InterruptType { + Fixed = 0, + LowestPriority = 1, +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy)] +pub enum InterruptDestinationMode { + Physical = 0, + Logical = 1, +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy)] +pub enum InterruptTriggerMode { + Edge = 0, + Level = 1, +} + +#[derive(Debug, Clone)] +pub struct InterruptRequest { + pub interrupt_type: InterruptType, + pub destination_mode: InterruptDestinationMode, + pub trigger_mode: InterruptTriggerMode, + pub destination: u32, + pub vector: u32, +} + +impl WhpVm { + /// Creates a new WHP partition. + /// WHP has a create → configure → finalize model + /// WHvCreatePartition — allocates the partition object but it's not yet usable. + /// WHvSetPartitionProperty — sets properties like processor count, APIC emulation mode, etc. + /// These properties can only be set before finalization. + /// WHvSetupPartition — finalizes the partition. After this call, configuration is locked and you can start creating vCPUs. + /// You cannot change the config (like processor count) after this call. + pub fn new(vcpu_count: u32) -> Result { + let handle = unsafe { + let mut h: WHV_PARTITION_HANDLE = 0; + let hr = WHvCreatePartition(&mut h); + if hr != S_OK { + return Err(Error::CreatePartition(hr)); + } + h + }; + + let tsc_freq_hz = Self::detect_tsc_frequency(); + + if let Err(e) = Self::configure_partition(handle, vcpu_count, tsc_freq_hz) { + let _ = unsafe { WHvDeletePartition(handle) }; + return Err(e); + } + + debug!("WHP partition created with {vcpu_count} vCPU(s)"); + Ok(WhpVm { handle }) + } + + fn configure_partition( + handle: WHV_PARTITION_HANDLE, + vcpu_count: u32, + tsc_freq_hz: u64, + ) -> Result<(), Error> { + Self::set_property(handle, WHvPartitionPropertyCodeProcessorCount, |p| { + p.ProcessorCount = vcpu_count; + })?; + + Self::set_property( + handle, + WHvPartitionPropertyCodeLocalApicEmulationMode, + |p| { + p.LocalApicEmulationMode = WHvX64LocalApicEmulationModeXApic; + }, + )?; + + // Enable CPUID exits (bit 0) and MSR exits (bit 1) + // https://github.com/google/crosvm/blob/main/hypervisor/src/whpx/whpx_sys/WinHvPlatformDefs.h#L74 + Self::set_property(handle, WHvPartitionPropertyCodeExtendedVmExits, |p| { + p.ExtendedVmExits.AsUINT64 = 0b11; // bit 0 = X64CpuidExit, bit 1 = X64MsrExit + })?; + + let mut cpuid_results: Vec = Vec::new(); + + // Standard Intel CPUID leaves (Intel's SDM Vol. 2A) + if tsc_freq_hz > 0 { + debug!("Providing TSC frequency to guest: {} Hz", tsc_freq_hz); + + // CPUID 0x15 — TSC / Core Crystal Clock (Intel SDM) + // TSC frequency in Hz = ECX * (EBX / EAX). + // We use a 1 Hz crystal with EBX = tsc_freq_hz to avoid rounding. + cpuid_results.push(WHV_X64_CPUID_RESULT { + Function: 0x15, + Reserved: [0; 3], + Eax: 1, + Ebx: tsc_freq_hz as u32, + Ecx: 1, + Edx: 0, + }); + } + + if !cpuid_results.is_empty() { + let hr = unsafe { + WHvSetPartitionProperty( + handle, + WHvPartitionPropertyCodeCpuidResultList, + cpuid_results.as_ptr() as *const _, + (cpuid_results.len() * mem::size_of::()) as u32, + ) + }; + if hr != S_OK { + return Err(Error::SetPartitionProperty(hr)); + } + debug!("CpuidResultList set ({} entries)", cpuid_results.len()); + } + + let hr = unsafe { WHvSetupPartition(handle) }; + if hr != S_OK { + Err(Error::SetupPartition(hr)) + } else { + Ok(()) + } + } + + fn set_property( + handle: WHV_PARTITION_HANDLE, + code: WHV_PARTITION_PROPERTY_CODE, + configure: impl FnOnce(&mut WHV_PARTITION_PROPERTY), + ) -> Result<(), Error> { + let mut prop = unsafe { MaybeUninit::::zeroed().assume_init() }; + configure(&mut prop); + let hr = unsafe { + WHvSetPartitionProperty( + handle, + code, + &prop as *const _ as *const _, + mem::size_of::() as u32, + ) + }; + if hr != S_OK { + Err(Error::SetPartitionProperty(hr)) + } else { + Ok(()) + } + } + + /// Detect the host TSC frequency in Hz. + /// Tries CPUID 0x15, then 0x16 (Intel), then falls back to measuring + /// via RDTSC over a short sleep (works on AMD and all other x86_64). + fn detect_tsc_frequency() -> u64 { + unsafe { + // Leaf 0 returns the maximum supported standard leaf in EAX. + // leaves 0x15 and 0x16 may not be supported (like in non-Intel hardware (like AMD) + // or when using nested virtualization), so it's better to check first. + let max_leaf = core::arch::x86_64::__cpuid(0x0).eax; + + if max_leaf >= 0x15 { + let cpuid15 = core::arch::x86_64::__cpuid(0x15); + if cpuid15.eax != 0 && cpuid15.ebx != 0 && cpuid15.ecx != 0 { + let freq = (cpuid15.ecx as u64 * cpuid15.ebx as u64) / cpuid15.eax as u64; + debug!("TSC frequency from CPUID 0x15: {} Hz", freq); + return freq; + } + } + + if max_leaf >= 0x16 { + let cpuid16 = core::arch::x86_64::__cpuid(0x16); + if cpuid16.eax != 0 { + let freq = cpuid16.eax as u64 * 1_000_000; + debug!("TSC frequency from CPUID 0x16: {} Hz", freq); + return freq; + } + } + } + + debug!("CPUID 0x15/0x16 unavailable, measuring TSC frequency via QPC"); + + // If CPUID 0x15/0x16 is unavailable, we try to measure the CPU's TSC frequency by using the + // Windows High-Resolution Performance Counter (QPC) + // We takes a snapshot of both the QPC and the TSC, waits for exactly 10 milliseconds, + // and then takes another snapshot of both. By comparing how many TSC ticks occurred during those 10 ms + // of QPC ticks, we can accurately calculate the TSC ticks per second (Hz). + + let mut qpc_freq = 0; + let mut start_qpc = 0; + let mut end_qpc = 0; + + unsafe { + // No need to check the result, it will always succeed + // https://learn.microsoft.com/en-us/windows/win32/api/profileapi/nf-profileapi-queryperformancefrequency#return-value + let _ = QueryPerformanceFrequency(&mut qpc_freq); + // Take a snapshot of the current QPC tick count + // No need to check the result, it will always succeed. + // https://learn.microsoft.com/en-us/windows/win32/api/profileapi/nf-profileapi-queryperformancecounter#return-value + let _ = QueryPerformanceCounter(&mut start_qpc); + } + + // Take a snapshot of the CPU's TSC + let start_tsc = unsafe { core::arch::x86_64::_rdtsc() }; + + // Calculate how many QPC ticks equal 10 milliseconds and loop over it. + // If we used `std::thread::sleep(10ms)` instead, the OS might put this thread + // to sleep for 15ms or 20ms, ruining our highly sensitive calibration interval. + let target_qpc = start_qpc + (qpc_freq / 100); + loop { + unsafe { + // No need to check the result, it will always succeed. + let _ = QueryPerformanceCounter(&mut end_qpc); + } + if end_qpc >= target_qpc { + break; + } + } + + // Take a snapshot of the CPU's TSC after 10ms + let end_tsc = unsafe { core::arch::x86_64::_rdtsc() }; + // Calculate the actual elapsed QPC ticks + let qpc_elapsed = end_qpc - start_qpc; + + if qpc_elapsed > 0 { + let tsc_elapsed = end_tsc.wrapping_sub(start_tsc); + // Calculate utilizing u128 to prevent overflow before dividing + let freq = (tsc_elapsed as u128 * qpc_freq as u128 / qpc_elapsed as u128) as u64; + debug!( + "TSC frequency measured: {} Hz ({} MHz)", + freq, + freq / 1_000_000 + ); + return freq; + } + + error!("Could not determine TSC frequency"); + 0 + } + + /// Maps a host memory region into the guest physical address space. + /// + /// # Safety + /// + /// `host_start_addr` must point to a valid, writable memory region of at least `size` bytes + /// that remains live for the lifetime of this partition. + pub unsafe fn map_memory( + &self, + host_start_addr: *mut c_void, + guest_start_addr: u64, + size: u64, + ) -> Result<(), Error> { + // WHP requires 4KB alignment + if (host_start_addr as usize | guest_start_addr as usize | size as usize) & 0xFFF != 0 { + return Err(Error::MemoryAlignment); + } + + let hr = unsafe { + WHvMapGpaRange( + self.handle, + host_start_addr as *const _, + guest_start_addr, + size, + WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute, + ) + }; + if hr != S_OK { + Err(Error::MapGpaRange(hr)) + } else { + Ok(()) + } + } + + /// Injects an interrupt into a virtual processor's local APIC. + /// http://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvrequestinterrupt + pub fn request_interrupt(&self, req: &InterruptRequest) -> Result<(), Error> { + let ctrl = WhvInterruptControl { + type_and_flags: (req.interrupt_type as u64) + | ((req.destination_mode as u64) << 8) + | ((req.trigger_mode as u64) << 12), + destination: req.destination, + vector: req.vector, + }; + + let hr = unsafe { + WHvRequestInterrupt( + self.handle, + &ctrl as *const _ as *const _, + mem::size_of::() as u32, + ) + }; + if hr != S_OK { + Err(Error::RequestInterrupt(hr)) + } else { + Ok(()) + } + } + + /// Fire a fixed, edge-triggered interrupt to APIC ID with the given vector. + pub fn request_fixed_interrupt(&self, vector: u32, apic_id: u32) { + let result = self.request_interrupt(&InterruptRequest { + interrupt_type: InterruptType::Fixed, + destination_mode: InterruptDestinationMode::Physical, + trigger_mode: InterruptTriggerMode::Edge, + destination: apic_id, + vector, + }); + if result.is_err() { + error!("inject_vector(0x{vector:02x}) failed: {result:?}"); + } + } + + /// Cancel a running `WHvRunVirtualProcessor` call so the vCPU thread + /// exits with `Canceled`. Required after `request_interrupt`/`request_fixed_interrupt` to wake a + /// vCPU that is blocked in HLT. + pub fn cancel_vcpu(&self, vp_index: u32) { + let hr = unsafe { WHvCancelRunVirtualProcessor(self.handle, vp_index, 0) }; + if hr != S_OK { + error!("WHvCancelRunVirtualProcessor({vp_index}) failed: HRESULT 0x{hr:08x}"); + } + } + + pub fn partition_handle(&self) -> WHV_PARTITION_HANDLE { + self.handle + } +} + +impl Drop for WhpVm { + fn drop(&mut self) { + let hr = unsafe { WHvDeletePartition(self.handle) }; + if hr != S_OK { + error!("WHvDeletePartition failed: HRESULT 0x{hr:08x}"); + } + } +} + +unsafe impl Send for WhpVm {} +unsafe impl Sync for WhpVm {} + +/// Wraps a `WHV_EMULATOR_HANDLE` (one per vCPU). +/// +/// The Windows Hypervisor Platform provides a built-in x86 instruction emulator. +/// When the guest OS tries to read or write to a virtual device (via MMIO or Port I/O), +/// the physical CPU exits to the hypervisor. However, the hypervisor only knows *where* +/// the guest tried to access, not *how* (e.g., was it a 1-byte read or a 4-byte write?). +/// +/// We pass the raw VM exit context into this emulator, and it decodes the x86 +/// instruction, calls our provided callback functions to perform the actual I/O, +/// updates the guest registers with the result, and advances the instruction pointer. +pub struct WhpEmulator { + handle: *mut c_void, +} + +impl WhpEmulator { + /// The `callbacks` struct contains function pointers to VMM's I/O + /// and memory handling routines. When the emulator decodes an instruction + /// that reads/writes memory, it will call these pointers to get or set the data. + pub fn new(callbacks: WHV_EMULATOR_CALLBACKS) -> Result { + let mut handle: *mut c_void = std::ptr::null_mut(); + let hr = unsafe { WHvEmulatorCreateEmulator(&callbacks, &mut handle) }; + if hr != S_OK { + Err(Error::CreateEmulator(hr)) + } else { + Ok(WhpEmulator { handle }) + } + } + + fn check_emulation_result( + hr: i32, + status: WHV_EMULATOR_STATUS, + hresult_err: fn(i32) -> Error, + ) -> Result<(), Error> { + if hr != S_OK { + return Err(hresult_err(hr)); + } + // Check the emulator status bitfield. + // According to Microsoft docs, Bit 0 (EmulationSuccessful) must be 1. + let bits = unsafe { status.AsUINT32 }; + if bits & 1 == 0 { + return Err(Error::EmulationFailed(bits)); + } + Ok(()) + } + + /// Attempts to emulate an x86 Port I/O (PIO) instruction (e.g., `IN`, `OUT`). + /// + /// It is called when `WHvRunVirtualProcessor` returns `WHvRunVpExitReasonX64IoPortAccess`. + /// + /// * `context`: An opaque pointer that WHP will pass directly into your callbacks. + /// This points to the Bus so the callback can route the I/O to the correct device. + /// * `vp_context`: The state of the vCPU's registers at the time of the exit. + /// * `io_context`: The details of the I/O port exit (port number, access size, etc.). + /// + /// # Safety + /// + /// All three pointers must be valid and non-null for the duration of the call. + // https://learn.microsoft.com/en-us/virtualization/api/hypervisor-instruction-emulator/funcs/whvemulatortryemulation + pub unsafe fn try_io_emulation( + &self, + context: *const c_void, + vp_context: *const WHV_VP_EXIT_CONTEXT, + io_context: *const WHV_X64_IO_PORT_ACCESS_CONTEXT, + ) -> Result<(), Error> { + let mut status: WHV_EMULATOR_STATUS = mem::zeroed(); + let hr = + WHvEmulatorTryIoEmulation(self.handle, context, vp_context, io_context, &mut status); + Self::check_emulation_result(hr, status, Error::IoEmulation) + } + + /// Attempts to emulate an x86 Memory-Mapped I/O (MMIO) instruction (e.g., `MOV eax, [mem]`). + /// + /// It is called when `WHvRunVirtualProcessor` returns `WHvRunVpExitReasonMemoryAccess` + /// *and* the memory address belongs to a virtual device. + /// + /// # Safety + /// + /// All three pointers must be valid and non-null for the duration of the call. + pub unsafe fn try_mmio_emulation( + &self, + context: *const c_void, + vp_context: *const WHV_VP_EXIT_CONTEXT, + mmio_context: *const WHV_MEMORY_ACCESS_CONTEXT, + ) -> Result<(), Error> { + let mut status: WHV_EMULATOR_STATUS = mem::zeroed(); + let hr = WHvEmulatorTryMmioEmulation( + self.handle, + context, + vp_context, + mmio_context, + &mut status, + ); + Self::check_emulation_result(hr, status, Error::MmioEmulation) + } +} + +impl Drop for WhpEmulator { + fn drop(&mut self) { + let hr = unsafe { WHvEmulatorDestroyEmulator(self.handle) }; + if hr != S_OK { + error!("WHvEmulatorDestroyEmulator failed: HRESULT 0x{hr:08x}"); + } + } +} + +unsafe impl Send for WhpEmulator {} + +#[derive(Debug)] +pub enum VcpuExitReason { + IoPortAccess, + MemoryAccess, + Halt, + Canceled, + CpuidAccess, + MsrAccess, + InterruptWindow, + UnrecoverableException, + InvalidVpRegisterValue, + UnsupportedFeature, + Unknown(u32), +} + +pub struct WhpVcpu { + vm: Arc, + index: u32, + exit_context: WHV_RUN_VP_EXIT_CONTEXT, +} + +impl WhpVcpu { + /// Creates a new virtual processor within the given partition. + pub fn new(vm: Arc, index: u32) -> Result { + let hr = unsafe { WHvCreateVirtualProcessor(vm.partition_handle(), index, 0) }; + if hr != S_OK { + return Err(Error::CreateVirtualProcessor(hr)); + } + + debug!("Created WHP vCPU {index}"); + Ok(WhpVcpu { + vm, + index, + exit_context: unsafe { mem::zeroed() }, + }) + } + + /// Runs the virtual processor until a VM exit occurs. + /// + /// The raw exit context is stored internally and can be accessed via + /// [`vp_exit_context`], [`io_port_access_context`], and + /// [`memory_access_context`] for passing to the instruction emulator. + pub fn run(&mut self) -> Result { + let hr = unsafe { + WHvRunVirtualProcessor( + self.vm.partition_handle(), + self.index, + &mut self.exit_context as *mut _ as *mut _, + mem::size_of::() as u32, + ) + }; + if hr != S_OK { + return Err(Error::RunVirtualProcessor(hr)); + } + + Ok(Self::decode_reason(&self.exit_context)) + } + + pub fn vp_exit_context(&self) -> *const WHV_VP_EXIT_CONTEXT { + &self.exit_context.VpContext + } + + pub fn io_port_access_context(&self) -> *const WHV_X64_IO_PORT_ACCESS_CONTEXT { + unsafe { &self.exit_context.Anonymous.IoPortAccess } + } + + pub fn memory_access_context(&self) -> *const WHV_MEMORY_ACCESS_CONTEXT { + unsafe { &self.exit_context.Anonymous.MemoryAccess } + } + + /// Returns parsed CPUID exit info. Only valid after a `CpuidAccess` exit. + pub fn cpuid_exit_info(&self) -> CpuidExitInfo { + let ctx = unsafe { &self.exit_context.Anonymous.CpuidAccess }; + CpuidExitInfo { + leaf: ctx.Rax, + subleaf: ctx.Rcx, + default_eax: ctx.DefaultResultRax, + default_ebx: ctx.DefaultResultRbx, + default_ecx: ctx.DefaultResultRcx, + default_edx: ctx.DefaultResultRdx, + } + } + + /// Returns parsed MSR exit info. Only valid after an `MsrAccess` exit. + pub fn msr_exit_info(&self) -> MsrExitInfo { + let ctx = unsafe { &self.exit_context.Anonymous.MsrAccess }; + MsrExitInfo { + msr_number: ctx.MsrNumber, + is_write: unsafe { ctx.AccessInfo.Anonymous._bitfield } & 1 != 0, + rax: ctx.Rax, + rdx: ctx.Rdx, + } + } + + /// RIP at the time of the VM exit. + pub fn exit_rip(&self) -> u64 { + self.exit_context.VpContext.Rip + } + + /// Instruction length from the exit context (lower 4 bits of the packed byte). + pub fn instruction_length(&self) -> u8 { + self.exit_context.VpContext._bitfield & 0x0F + } + + /// Advances RIP past the faulting instruction using the instruction length + /// from the exit context. + pub fn advance_rip(&self) -> Result<(), Error> { + let new_rip = self.exit_context.VpContext.Rip + self.instruction_length() as u64; + self.set_registers64([(WHvX64RegisterRip, new_rip)]) + } + + /// Sets RAX, RBX, RCX, RDX and advances RIP in a single register write. + /// Used by CPUID exit handling. + pub fn complete_cpuid(&self, eax: u64, ebx: u64, ecx: u64, edx: u64) -> Result<(), Error> { + let new_rip = self.exit_context.VpContext.Rip + self.instruction_length() as u64; + + self.set_registers64([ + (WHvX64RegisterRax, eax), + (WHvX64RegisterRbx, ebx), + (WHvX64RegisterRcx, ecx), + (WHvX64RegisterRdx, edx), + (WHvX64RegisterRip, new_rip), + ]) + } + + /// Sets RAX and RDX (MSR read result) then advances RIP. + pub fn complete_msr_read(&self, rax: u64, rdx: u64) -> Result<(), Error> { + let new_rip = self.exit_context.VpContext.Rip + self.instruction_length() as u64; + + self.set_registers64([ + (WHvX64RegisterRax, rax), + (WHvX64RegisterRdx, rdx), + (WHvX64RegisterRip, new_rip), + ]) + } + + /// Helper to get multiple registers at once. + /// Returns a stack-allocated array of results. + pub fn get_registers( + &self, + names: [WHV_REGISTER_NAME; N], + ) -> Result<[WHV_REGISTER_VALUE; N], Error> { + // Create a zeroed array on the stack to hold the results + let mut values: [WHV_REGISTER_VALUE; N] = unsafe { mem::zeroed() }; + + let hr = unsafe { + WHvGetVirtualProcessorRegisters( + self.vm.partition_handle(), + self.index, + names.as_ptr(), + N as u32, + values.as_mut_ptr(), + ) + }; + + if hr != S_OK { + Err(Error::GetRegisters(hr)) + } else { + // Return the array directly! + Ok(values) + } + } + + pub fn get_registers64( + &self, + names: [WHV_REGISTER_NAME; N], + ) -> Result<[u64; N], Error> { + let values = self.get_registers(names)?; + Ok(values.map(|v| unsafe { v.Reg64 })) + } + + fn set_whp_registers( + &self, + names: &[WHV_REGISTER_NAME], + values: &[WHV_REGISTER_VALUE], + ) -> Result<(), Error> { + assert_eq!(names.len(), values.len()); + let count = names.len() as u32; + + let hr = unsafe { + WHvSetVirtualProcessorRegisters( + self.vm.partition_handle(), + self.index, + names.as_ptr(), + count, + values.as_ptr(), + ) + }; + if hr != S_OK { + Err(Error::SetRegisters(hr)) + } else { + Ok(()) + } + } + + pub fn set_registers( + &self, + pairs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); N], + ) -> Result<(), Error> { + let mut names: [WHV_REGISTER_NAME; N] = unsafe { mem::zeroed() }; + let mut values: [WHV_REGISTER_VALUE; N] = unsafe { mem::zeroed() }; + + for i in 0..N { + names[i] = pairs[i].0; + values[i] = pairs[i].1; + } + + self.set_whp_registers(&names, &values) + } + + pub fn set_registers64( + &self, + pairs: [(WHV_REGISTER_NAME, u64); N], + ) -> Result<(), Error> { + let mut names: [WHV_REGISTER_NAME; N] = unsafe { mem::zeroed() }; + let mut values: [WHV_REGISTER_VALUE; N] = unsafe { mem::zeroed() }; + + for i in 0..N { + names[i] = pairs[i].0; + values[i] = WHV_REGISTER_VALUE { Reg64: pairs[i].1 }; + } + + self.set_whp_registers(&names, &values) + } + + pub fn vm(&self) -> &Arc { + &self.vm + } + + pub fn partition_handle(&self) -> WHV_PARTITION_HANDLE { + self.vm.partition_handle() + } + + pub fn index(&self) -> u32 { + self.index + } + + /// Clear the `HaltSuspend` flag so a halted vCPU can process a pending + /// interrupt. + /// + /// The Windows Hypervisor Platform (WHP) fails to automatically wake a vCPU from a HLT (Halt) state + /// when a virtual interrupt is injected via WHvRequestInterrupt. + /// While the interrupt is queued in the virtual Local APIC, the hypervisor's internal "HaltSuspend" flag remains set, + /// causing WHvRunVirtualProcessor to return immediately without executing the guest. + /// This function introduces a workaround by manually clear bit 1 of internal registry + /// `WHvRegisterInternalActivityState`, which represents the HaltSuspend state. + /// https://github.com/qemu/qemu/blob/master/target/i386/whpx/whpx-all.c#L2045 + /// https://github.com/google/crosvm/blob/main/hypervisor/src/whpx/whpx_sys/WinHvPlatformDefs.h#L833 + pub fn clear_halt_suspend(&self) -> Result<(), Error> { + let [activity] = self.get_registers64([WHvRegisterInternalActivityState])?; + if activity & 0x2 != 0 { + self.set_registers64([(WHvRegisterInternalActivityState, activity & !0x2)])?; + } + Ok(()) + } + + /// Clear the deliverability-notification request so WHP stops exiting + /// on every STI instruction. Called after an InterruptWindow exit. + /// https://github.com/google/crosvm/blob/main/hypervisor/src/whpx/whpx_sys/WinHvPlatformDefs.h#L773 + pub fn clear_interrupt_window(&self) -> Result<(), Error> { + let [notifications] = self.get_registers64([WHvX64RegisterDeliverabilityNotifications])?; + if notifications & 0x2 != 0 { + // Clear Bit 1 (InterruptNotification) while preserving NMI and Priority bits + self.set_registers64([( + WHvX64RegisterDeliverabilityNotifications, + notifications & !0x2, + )])?; + } + Ok(()) + } + + #[allow(non_upper_case_globals)] + fn decode_reason(ctx: &WHV_RUN_VP_EXIT_CONTEXT) -> VcpuExitReason { + match ctx.ExitReason { + WHvRunVpExitReasonX64IoPortAccess => VcpuExitReason::IoPortAccess, + WHvRunVpExitReasonMemoryAccess => VcpuExitReason::MemoryAccess, + WHvRunVpExitReasonX64Halt => VcpuExitReason::Halt, + WHvRunVpExitReasonCanceled => VcpuExitReason::Canceled, + WHvRunVpExitReasonX64Cpuid => VcpuExitReason::CpuidAccess, + WHvRunVpExitReasonX64MsrAccess => VcpuExitReason::MsrAccess, + WHvRunVpExitReasonX64InterruptWindow => VcpuExitReason::InterruptWindow, + WHvRunVpExitReasonUnrecoverableException => VcpuExitReason::UnrecoverableException, + WHvRunVpExitReasonInvalidVpRegisterValue => VcpuExitReason::InvalidVpRegisterValue, + WHvRunVpExitReasonUnsupportedFeature => VcpuExitReason::UnsupportedFeature, + _ => VcpuExitReason::Unknown(ctx.ExitReason as u32), + } + } +} + +impl Drop for WhpVcpu { + fn drop(&mut self) { + let hr = unsafe { WHvDeleteVirtualProcessor(self.vm.partition_handle(), self.index) }; + if hr != S_OK { + error!( + "WHvDeleteVirtualProcessor({}) failed: HRESULT 0x{hr:08x}", + self.index + ); + } + } +}