1 use crate::{ 2 arch::kvm::vmx::ept::EptMapper, 3 kdebug, 4 libs::mutex::Mutex, 5 mm::{page::PageFlags, syscall::ProtFlags}, 6 virt::kvm::host_mem::{__gfn_to_pfn, kvm_vcpu_gfn_to_memslot, PAGE_MASK, PAGE_SHIFT}, 7 }; 8 use bitfield_struct::bitfield; 9 use system_error::SystemError; 10 11 use super::{ 12 ept::check_ept_features, 13 vcpu::VmxVcpu, 14 vmcs::VmcsFields, 15 vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite}, 16 }; 17 use crate::arch::kvm::vmx::mmu::VmcsFields::CTRL_EPTP_PTR; 18 19 // pub const PT64_ROOT_LEVEL: u32 = 4; 20 // pub const PT32_ROOT_LEVEL: u32 = 2; 21 // pub const PT32E_ROOT_LEVEL: u32 = 3; 22 23 // pub struct KvmMmuPage{ 24 // gfn: u64, // 管理地址范围的起始地址对应的 gfn 25 // role: KvmMmuPageRole, // 基本信息,包括硬件特性和所属层级等 26 // // spt: *mut u64, // spt: shadow page table,指向 struct page 的地址,其包含了所有页表项 (pte)。同时 page->private 会指向该 kvm_mmu_page 27 // } 28 29 #[bitfield(u32)] 30 pub struct KvmMmuPageRole { 31 #[bits(4)] 32 level: usize, // 页所处的层级 33 cr4_pae: bool, // cr4.pae,1 表示使用 64bit gpte 34 #[bits(2)] 35 quadrant: usize, // 如果 cr4.pae=0,则 gpte 为 32bit,但 spte 为 64bit,因此需要用多个 spte 来表示一个 gpte,该字段指示是 gpte 的第几块 36 direct: bool, 37 #[bits(3)] 38 access: usize, // 访问权限 39 invalid: bool, // 失效,一旦 unpin 就会被销毁 40 nxe: bool, // efer.nxe,不可执行 41 cr0_wp: bool, // cr0.wp, 写保护 42 smep_andnot_wp: bool, // smep && !cr0.wp,SMEP启用,用户模式代码将无法执行位于内核地址空间中的指令。 43 smap_andnot_wp: bool, // smap && !cr0.wp 44 #[bits(8)] 45 unused: usize, 46 #[bits(8)] 47 smm: usize, // 1 表示处于 system management mode, 0 表示非 SMM 48 } 49 50 // We don't want allocation failures within the mmu code, so we preallocate 51 // enough memory for a single page fault in a cache. 52 // pub struct KvmMmuMemoryCache { 53 // num_objs: u32, 54 // objs: [*mut u8; KVM_NR_MEM_OBJS as usize], 55 // } 56 pub type KvmMmuPageFaultHandler = 57 fn(vcpu: &mut VmxVcpu, gpa: u64, error_code: u32, prefault: bool) -> Result<(), SystemError>; 58 #[derive(Default)] 59 pub struct KvmMmu { 60 pub root_hpa: u64, 61 pub root_level: u32, 62 pub base_role: KvmMmuPageRole, 63 // ...还有一些变量不知道用来做什么 64 pub get_cr3: Option<fn(&VmxVcpu) -> u64>, 65 pub set_eptp: Option<fn(u64) -> Result<(), SystemError>>, 66 pub page_fault: Option<KvmMmuPageFaultHandler>, 67 // get_pdptr: Option<fn(& VmxVcpu, index:u32) -> u64>, // Page Directory Pointer Table Register?暂时不知道和CR3的区别是什么 68 // inject_page_fault: Option<fn(&mut VmxVcpu, fault: &X86Exception)>, 69 // gva_to_gpa: Option<fn(&mut VmxVcpu, gva: u64, access: u32, exception: &X86Exception) -> u64>, 70 // translate_gpa: Option<fn(&mut VmxVcpu, gpa: u64, access: u32, exception: &X86Exception) -> u64>, 71 // sync_page: Option<fn(&mut VmxVcpu, &mut KvmMmuPage)>, 72 // invlpg: Option<fn(&mut VmxVcpu, gva: u64)>, // invalid entry 73 // update_pte: Option<fn(&mut VmxVcpu, sp: &KvmMmuPage, spte: u64, pte: u64)>, 74 } 75 76 impl core::fmt::Debug for KvmMmu { 77 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 78 f.debug_struct("KvmMmu") 79 .field("root_hpa", &self.root_hpa) 80 .field("root_level", &self.root_level) 81 .field("base_role", &self.base_role) 82 .finish() 83 } 84 } 85 86 fn tdp_get_cr3(_vcpu: &VmxVcpu) -> u64 { 87 let guest_cr3 = vmx_vmread(VmcsFields::GUEST_CR3 as u32).expect("Failed to read eptp"); 88 return guest_cr3; 89 } 90 91 fn tdp_set_eptp(root_hpa: u64) -> Result<(), SystemError> { 92 // 设置权限位,目前是写死的,可读可写可执行 93 // EPT paging-structure memory type: Uncacheable 94 let mut eptp = 0x0_u64; 95 // This value is 1 less than the EPT page-walk length. 3 means 4-level paging. 96 eptp |= 0x3 << 3; 97 eptp |= root_hpa & (PAGE_MASK as u64); 98 vmx_vmwrite(CTRL_EPTP_PTR as u32, eptp)?; 99 Ok(()) 100 } 101 102 fn tdp_page_fault( 103 vcpu: &mut VmxVcpu, 104 gpa: u64, 105 error_code: u32, 106 prefault: bool, 107 ) -> Result<(), SystemError> { 108 kdebug!("tdp_page_fault"); 109 let gfn = gpa >> PAGE_SHIFT; // 物理地址右移12位得到物理页框号(相对于虚拟机而言) 110 // 分配缓存池,为了避免在运行时分配空间失败,这里提前分配/填充足额的空间 111 mmu_topup_memory_caches(vcpu)?; 112 // TODO:获取gfn使用的level,处理hugepage的问题 113 let level = 1; // 4KB page 114 // TODO: 快速处理由读写操作引起violation,即present同时有写权限的非mmio page fault 115 // fast_page_fault(vcpu, gpa, level, error_code) 116 // gfn->pfn 117 let mut map_writable = false; 118 let write = error_code & ((1_u32) << 1); 119 let pfn = mmu_gfn_to_pfn_fast(vcpu, gpa, prefault, gfn, write == 0, &mut map_writable)?; 120 // direct map就是映射ept页表的过程 121 __direct_map(vcpu, gpa, write, map_writable, level, gfn, pfn, prefault)?; 122 Ok(()) 123 } 124 125 /* 126 * Caculate mmu pages needed for kvm. 127 */ 128 // pub fn kvm_mmu_calculate_mmu_pages() -> u32 { 129 // let mut nr_mmu_pages:u32; 130 // let mut nr_pages = 0; 131 132 // let kvm = vm(0).unwrap(); 133 // for as_id in 0..KVM_ADDRESS_SPACE_NUM { 134 // let slots = kvm.memslots[as_id]; 135 // for i in 0..KVM_MEM_SLOTS_NUM { 136 // let memslot = slots.memslots[i as usize]; 137 // nr_pages += memslot.npages; 138 // } 139 // } 140 141 // nr_mmu_pages = (nr_pages as u32)* KVM_PERMILLE_MMU_PAGES / 1000; 142 // nr_mmu_pages = nr_mmu_pages.max(KVM_MIN_ALLOC_MMU_PAGES); 143 // return nr_mmu_pages; 144 // } 145 146 // pub fn kvm_mmu_change_mmu_pages(mut goal_nr_mmu_pages: u32){ 147 // let kvm = KVM(); 148 // // 释放多余的mmu page 149 // if kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages { 150 // while kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages { 151 // if !prepare_zap_oldest_mmu_page() { 152 // break; 153 // } 154 // } 155 // // kvm_mmu_commit_zap_page(); 156 // goal_nr_mmu_pages = kvm.lock().arch.n_used_mmu_pages; 157 158 // } 159 // kvm.lock().arch.n_max_mmu_pages = goal_nr_mmu_pages; 160 // } 161 162 // pub fn prepare_zap_oldest_mmu_page() -> bool { 163 // return false; 164 // } 165 166 pub fn kvm_mmu_setup(vcpu: &Mutex<VmxVcpu>) { 167 // TODO: init_kvm_softmmu(vcpu), init_kvm_nested_mmu(vcpu) 168 init_kvm_tdp_mmu(vcpu); 169 } 170 171 pub fn kvm_vcpu_mtrr_init(_vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError> { 172 check_ept_features()?; 173 Ok(()) 174 } 175 176 pub fn init_kvm_tdp_mmu(vcpu: &Mutex<VmxVcpu>) { 177 let context = &mut vcpu.lock().mmu; 178 context.page_fault = Some(tdp_page_fault); 179 context.get_cr3 = Some(tdp_get_cr3); 180 context.set_eptp = Some(tdp_set_eptp); 181 // context.inject_page_fault = kvm_inject_page_fault; TODO: inject_page_fault 182 // context.invlpg = nonpaging_invlpg; 183 // context.sync_page = nonpaging_sync_page; 184 // context.update_pte = nonpaging_update_pte; 185 186 // TODO: gva to gpa in kvm 187 // if !is_paging(vcpu) { // vcpu不分页 188 // context.gva_to_gpa = nonpaging_gva_to_gpa; 189 // context.root_level = 0; 190 // } else if (is_long_mode(vcpu)) { 191 // context.gva_to_gpa = paging64_gva_to_gpa; 192 // context.root_level = PT64_ROOT_LEVEL; 193 // TODO:: different paging strategy 194 // } else if (is_pae(vcpu)) { 195 // context.gva_to_gpa = paging64_gva_to_gpa; 196 // context.root_level = PT32E_ROOT_LEVEL; 197 // } else { 198 // context.gva_to_gpa = paging32_gva_to_gpa; 199 // context.root_level = PT32_ROOT_LEVEL; 200 // } 201 } 202 203 #[allow(clippy::too_many_arguments)] 204 pub fn __direct_map( 205 vcpu: &mut VmxVcpu, 206 gpa: u64, 207 _write: u32, 208 _map_writable: bool, 209 _level: i32, 210 _gfn: u64, 211 pfn: u64, 212 _prefault: bool, 213 ) -> Result<u32, SystemError> { 214 kdebug!("gpa={}, pfn={}, root_hpa={:x}", gpa, pfn, vcpu.mmu.root_hpa); 215 // 判断vcpu.mmu.root_hpa是否有效 216 if vcpu.mmu.root_hpa == 0 { 217 return Err(SystemError::KVM_HVA_ERR_BAD); 218 } 219 // 把gpa映射到hpa 220 let mut ept_mapper = EptMapper::lock(); 221 let page_flags = PageFlags::from_prot_flags(ProtFlags::from_bits_truncate(0x7_u64), false); 222 unsafe { 223 assert!(ept_mapper.walk(gpa, pfn << PAGE_SHIFT, page_flags).is_ok()); 224 } 225 return Ok(0); 226 } 227 228 pub fn mmu_gfn_to_pfn_fast( 229 vcpu: &mut VmxVcpu, 230 _gpa: u64, 231 _prefault: bool, 232 gfn: u64, 233 write: bool, 234 writable: &mut bool, 235 ) -> Result<u64, SystemError> { 236 let slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 237 let pfn = __gfn_to_pfn(slot, gfn, false, write, writable)?; 238 Ok(pfn) 239 } 240 241 // TODO: 添加cache 242 pub fn mmu_topup_memory_caches(_vcpu: &mut VmxVcpu) -> Result<(), SystemError> { 243 // 如果 vcpu->arch.mmu_page_header_cache 不足,从 mmu_page_header_cache 中分配 244 // pte_list_desc_cache 和 mmu_page_header_cache 两块全局 slab cache 在 kvm_mmu_module_init 中被创建 245 // mmu_topup_memory_cache(vcpu.mmu_page_header_cache, 246 // mmu_page_header_cache, 4); 247 Ok(()) 248 } 249