xref: /DragonOS/kernel/src/arch/x86_64/kvm/vmx/mmu.rs (revision cf7f801e1d50ee5b04cb728e4251a57f4183bfbc)
1 use crate::{
2     arch::kvm::vmx::ept::EptMapper,
3     libs::mutex::Mutex,
4     mm::{page::EntryFlags, syscall::ProtFlags},
5     virt::kvm::host_mem::{__gfn_to_pfn, kvm_vcpu_gfn_to_memslot, PAGE_MASK, PAGE_SHIFT},
6 };
7 use bitfield_struct::bitfield;
8 use log::debug;
9 use system_error::SystemError;
10 
11 use super::{
12     ept::check_ept_features,
13     vcpu::VmxVcpu,
14     vmcs::VmcsFields,
15     vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite},
16 };
17 use crate::arch::kvm::vmx::mmu::VmcsFields::CTRL_EPTP_PTR;
18 
19 // pub const PT64_ROOT_LEVEL: u32 = 4;
20 // pub const PT32_ROOT_LEVEL: u32 = 2;
21 // pub const PT32E_ROOT_LEVEL: u32 = 3;
22 
23 // pub struct KvmMmuPage{
24 //     gfn: u64, // 管理地址范围的起始地址对应的 gfn
25 //     role: KvmMmuPageRole, // 基本信息,包括硬件特性和所属层级等
26 //     // spt: *mut u64, // spt: shadow page table,指向 struct page 的地址,其包含了所有页表项 (pte)。同时 page->private 会指向该 kvm_mmu_page
27 // }
28 
29 #[bitfield(u32)]
30 pub struct KvmMmuPageRole {
31     #[bits(4)]
32     level: usize, // 页所处的层级
33     cr4_pae: bool, // cr4.pae,1 表示使用 64bit gpte
34     #[bits(2)]
35     quadrant: usize, // 如果 cr4.pae=0,则 gpte 为 32bit,但 spte 为 64bit,因此需要用多个 spte 来表示一个 gpte,该字段指示是 gpte 的第几块
36     direct: bool,
37     #[bits(3)]
38     access: usize, // 访问权限
39     invalid: bool,        // 失效,一旦 unpin 就会被销毁
40     nxe: bool,            // efer.nxe,不可执行
41     cr0_wp: bool,         // cr0.wp, 写保护
42     smep_andnot_wp: bool, // smep && !cr0.wp,SMEP启用,用户模式代码将无法执行位于内核地址空间中的指令。
43     smap_andnot_wp: bool, // smap && !cr0.wp
44     #[bits(8)]
45     unused: usize,
46     #[bits(8)]
47     smm: usize, // 1 表示处于 system management mode, 0 表示非 SMM
48 }
49 
50 //  We don't want allocation failures within the mmu code, so we preallocate
51 // enough memory for a single page fault in a cache.
52 // pub struct KvmMmuMemoryCache {
53 //     num_objs: u32,
54 //     objs: [*mut u8; KVM_NR_MEM_OBJS as usize],
55 // }
56 pub type KvmMmuPageFaultHandler =
57     fn(vcpu: &mut VmxVcpu, gpa: u64, error_code: u32, prefault: bool) -> Result<(), SystemError>;
58 #[derive(Default)]
59 pub struct KvmMmu {
60     pub root_hpa: u64,
61     pub root_level: u32,
62     pub base_role: KvmMmuPageRole,
63     // ...还有一些变量不知道用来做什么
64     pub get_cr3: Option<fn(&VmxVcpu) -> u64>,
65     pub set_eptp: Option<fn(u64) -> Result<(), SystemError>>,
66     pub page_fault: Option<KvmMmuPageFaultHandler>,
67     // get_pdptr: Option<fn(& VmxVcpu, index:u32) -> u64>, // Page Directory Pointer Table Register?暂时不知道和CR3的区别是什么
68     // inject_page_fault: Option<fn(&mut VmxVcpu, fault: &X86Exception)>,
69     // gva_to_gpa: Option<fn(&mut VmxVcpu, gva: u64, access: u32, exception: &X86Exception) -> u64>,
70     // translate_gpa: Option<fn(&mut VmxVcpu, gpa: u64, access: u32, exception: &X86Exception) -> u64>,
71     // sync_page: Option<fn(&mut VmxVcpu, &mut KvmMmuPage)>,
72     // invlpg: Option<fn(&mut VmxVcpu, gva: u64)>, // invalid entry
73     // update_pte: Option<fn(&mut VmxVcpu, sp: &KvmMmuPage, spte: u64, pte: u64)>,
74 }
75 
76 impl core::fmt::Debug for KvmMmu {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result77     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
78         f.debug_struct("KvmMmu")
79             .field("root_hpa", &self.root_hpa)
80             .field("root_level", &self.root_level)
81             .field("base_role", &self.base_role)
82             .finish()
83     }
84 }
85 
tdp_get_cr3(_vcpu: &VmxVcpu) -> u6486 fn tdp_get_cr3(_vcpu: &VmxVcpu) -> u64 {
87     let guest_cr3 = vmx_vmread(VmcsFields::GUEST_CR3 as u32).expect("Failed to read eptp");
88     return guest_cr3;
89 }
90 
tdp_set_eptp(root_hpa: u64) -> Result<(), SystemError>91 fn tdp_set_eptp(root_hpa: u64) -> Result<(), SystemError> {
92     // 设置权限位,目前是写死的,可读可写可执行
93     //  EPT paging-structure memory type: Uncacheable
94     let mut eptp = 0x0_u64;
95     // This value is 1 less than the EPT page-walk length.  3 means 4-level paging.
96     eptp |= 0x3 << 3;
97     eptp |= root_hpa & (PAGE_MASK as u64);
98     vmx_vmwrite(CTRL_EPTP_PTR as u32, eptp)?;
99     Ok(())
100 }
101 
tdp_page_fault( vcpu: &mut VmxVcpu, gpa: u64, error_code: u32, prefault: bool, ) -> Result<(), SystemError>102 fn tdp_page_fault(
103     vcpu: &mut VmxVcpu,
104     gpa: u64,
105     error_code: u32,
106     prefault: bool,
107 ) -> Result<(), SystemError> {
108     debug!("tdp_page_fault");
109     let gfn = gpa >> PAGE_SHIFT; // 物理地址右移12位得到物理页框号(相对于虚拟机而言)
110                                  // 分配缓存池,为了避免在运行时分配空间失败,这里提前分配/填充足额的空间
111     mmu_topup_memory_caches(vcpu)?;
112     // TODO:获取gfn使用的level,处理hugepage的问题
113     let level = 1; // 4KB page
114                    // TODO: 快速处理由读写操作引起violation,即present同时有写权限的非mmio page fault
115                    // fast_page_fault(vcpu, gpa, level, error_code)
116                    // gfn->pfn
117     let mut map_writable = false;
118     let write = error_code & ((1_u32) << 1);
119     let pfn = mmu_gfn_to_pfn_fast(vcpu, gpa, prefault, gfn, write == 0, &mut map_writable)?;
120     // direct map就是映射ept页表的过程
121     __direct_map(vcpu, gpa, write, map_writable, level, gfn, pfn, prefault)?;
122     Ok(())
123 }
124 
125 /*
126  * Caculate mmu pages needed for kvm.
127  */
128 // pub fn kvm_mmu_calculate_mmu_pages() -> u32 {
129 // 	let mut nr_mmu_pages:u32;
130 //     let mut nr_pages = 0;
131 
132 //     let kvm = vm(0).unwrap();
133 //     for as_id in 0..KVM_ADDRESS_SPACE_NUM {
134 //         let slots = kvm.memslots[as_id];
135 //         for i in 0..KVM_MEM_SLOTS_NUM {
136 //             let memslot = slots.memslots[i as usize];
137 //             nr_pages += memslot.npages;
138 //         }
139 //     }
140 
141 // 	nr_mmu_pages = (nr_pages as u32)* KVM_PERMILLE_MMU_PAGES / 1000;
142 // 	nr_mmu_pages = nr_mmu_pages.max(KVM_MIN_ALLOC_MMU_PAGES);
143 // 	return nr_mmu_pages;
144 // }
145 
146 // pub fn kvm_mmu_change_mmu_pages(mut goal_nr_mmu_pages: u32){
147 //     let kvm = KVM();
148 //     // 释放多余的mmu page
149 //     if kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
150 //         while kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
151 //             if !prepare_zap_oldest_mmu_page() {
152 //                 break;
153 //             }
154 //         }
155 //         // kvm_mmu_commit_zap_page();
156 //         goal_nr_mmu_pages = kvm.lock().arch.n_used_mmu_pages;
157 
158 //     }
159 //     kvm.lock().arch.n_max_mmu_pages = goal_nr_mmu_pages;
160 // }
161 
162 // pub fn prepare_zap_oldest_mmu_page() -> bool {
163 //     return false;
164 // }
165 
kvm_mmu_setup(vcpu: &Mutex<VmxVcpu>)166 pub fn kvm_mmu_setup(vcpu: &Mutex<VmxVcpu>) {
167     // TODO: init_kvm_softmmu(vcpu), init_kvm_nested_mmu(vcpu)
168     init_kvm_tdp_mmu(vcpu);
169 }
170 
kvm_vcpu_mtrr_init(_vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError>171 pub fn kvm_vcpu_mtrr_init(_vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError> {
172     check_ept_features()?;
173     Ok(())
174 }
175 
init_kvm_tdp_mmu(vcpu: &Mutex<VmxVcpu>)176 pub fn init_kvm_tdp_mmu(vcpu: &Mutex<VmxVcpu>) {
177     let context = &mut vcpu.lock().mmu;
178     context.page_fault = Some(tdp_page_fault);
179     context.get_cr3 = Some(tdp_get_cr3);
180     context.set_eptp = Some(tdp_set_eptp);
181     // context.inject_page_fault = kvm_inject_page_fault; TODO: inject_page_fault
182     // context.invlpg = nonpaging_invlpg;
183     // context.sync_page = nonpaging_sync_page;
184     // context.update_pte = nonpaging_update_pte;
185 
186     // TODO: gva to gpa in kvm
187     // if !is_paging(vcpu) { // vcpu不分页
188     //     context.gva_to_gpa = nonpaging_gva_to_gpa;
189     // 	context.root_level = 0;
190     // } else if (is_long_mode(vcpu)) {
191     // 	context.gva_to_gpa = paging64_gva_to_gpa;
192     // 	context.root_level = PT64_ROOT_LEVEL;
193     // TODO:: different paging strategy
194     // } else if (is_pae(vcpu)) {
195     //     context.gva_to_gpa = paging64_gva_to_gpa;
196     //     context.root_level = PT32E_ROOT_LEVEL;
197     // } else {
198     //     context.gva_to_gpa = paging32_gva_to_gpa;
199     //     context.root_level = PT32_ROOT_LEVEL;
200     // }
201 }
202 
203 #[allow(clippy::too_many_arguments)]
__direct_map( vcpu: &mut VmxVcpu, gpa: u64, _write: u32, _map_writable: bool, _level: i32, _gfn: u64, pfn: u64, _prefault: bool, ) -> Result<u32, SystemError>204 pub fn __direct_map(
205     vcpu: &mut VmxVcpu,
206     gpa: u64,
207     _write: u32,
208     _map_writable: bool,
209     _level: i32,
210     _gfn: u64,
211     pfn: u64,
212     _prefault: bool,
213 ) -> Result<u32, SystemError> {
214     debug!("gpa={}, pfn={}, root_hpa={:x}", gpa, pfn, vcpu.mmu.root_hpa);
215     // 判断vcpu.mmu.root_hpa是否有效
216     if vcpu.mmu.root_hpa == 0 {
217         return Err(SystemError::KVM_HVA_ERR_BAD);
218     }
219     // 把gpa映射到hpa
220     let mut ept_mapper = EptMapper::lock();
221     let page_flags = EntryFlags::from_prot_flags(ProtFlags::from_bits_truncate(0x7_u64), false);
222     unsafe {
223         assert!(ept_mapper.walk(gpa, pfn << PAGE_SHIFT, page_flags).is_ok());
224     }
225     return Ok(0);
226 }
227 
mmu_gfn_to_pfn_fast( vcpu: &mut VmxVcpu, _gpa: u64, _prefault: bool, gfn: u64, write: bool, writable: &mut bool, ) -> Result<u64, SystemError>228 pub fn mmu_gfn_to_pfn_fast(
229     vcpu: &mut VmxVcpu,
230     _gpa: u64,
231     _prefault: bool,
232     gfn: u64,
233     write: bool,
234     writable: &mut bool,
235 ) -> Result<u64, SystemError> {
236     let slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
237     let pfn = __gfn_to_pfn(slot, gfn, false, write, writable)?;
238     Ok(pfn)
239 }
240 
241 // TODO: 添加cache
mmu_topup_memory_caches(_vcpu: &mut VmxVcpu) -> Result<(), SystemError>242 pub fn mmu_topup_memory_caches(_vcpu: &mut VmxVcpu) -> Result<(), SystemError> {
243     // 如果 vcpu->arch.mmu_page_header_cache 不足,从 mmu_page_header_cache 中分配
244     // pte_list_desc_cache 和 mmu_page_header_cache 两块全局 slab cache 在 kvm_mmu_module_init 中被创建
245     // mmu_topup_memory_cache(vcpu.mmu_page_header_cache,
246     //     mmu_page_header_cache, 4);
247     Ok(())
248 }
249