1 use core::{ 2 arch::asm, 3 intrinsics::unlikely, 4 mem::ManuallyDrop, 5 sync::atomic::{compiler_fence, Ordering}, 6 }; 7 8 use alloc::{string::String, sync::Arc, vec::Vec}; 9 10 use memoffset::offset_of; 11 use x86::{controlregs::Cr4, segmentation::SegmentSelector}; 12 13 use crate::{ 14 arch::process::table::TSSManager, 15 exception::InterruptArch, 16 libs::spinlock::SpinLockGuard, 17 mm::{ 18 percpu::{PerCpu, PerCpuVar}, 19 VirtAddr, 20 }, 21 process::{ 22 fork::CloneFlags, KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, 23 SwitchResult, SWITCH_RESULT, 24 }, 25 syscall::{Syscall, SystemError}, 26 }; 27 28 use self::{ 29 kthread::kernel_thread_bootstrap_stage1, 30 table::{switch_fs_and_gs, KERNEL_DS, USER_DS}, 31 }; 32 33 use super::{fpu::FpState, interrupt::TrapFrame, CurrentIrqArch}; 34 35 mod c_adapter; 36 pub mod kthread; 37 pub mod syscall; 38 pub mod table; 39 40 extern "C" { 41 /// 从中断返回 42 fn ret_from_intr(); 43 } 44 45 #[allow(dead_code)] 46 #[repr(align(32768))] 47 union InitProcUnion { 48 /// 用于存放idle进程的内核栈 49 idle_stack: [u8; 32768], 50 } 51 52 #[link_section = ".data.init_proc_union"] 53 #[no_mangle] 54 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion { 55 idle_stack: [0; 32768], 56 }; 57 58 /// PCB中与架构相关的信息 59 #[derive(Debug, Clone)] 60 #[allow(dead_code)] 61 pub struct ArchPCBInfo { 62 rflags: usize, 63 rbx: usize, 64 r12: usize, 65 r13: usize, 66 r14: usize, 67 r15: usize, 68 rbp: usize, 69 rsp: usize, 70 rip: usize, 71 cr2: usize, 72 fsbase: usize, 73 gsbase: usize, 74 fs: u16, 75 gs: u16, 76 77 /// 浮点寄存器的状态 78 fp_state: Option<FpState>, 79 } 80 81 #[allow(dead_code)] 82 impl ArchPCBInfo { 83 /// 创建一个新的ArchPCBInfo 84 /// 85 /// ## 参数 86 /// 87 /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。 88 /// 89 /// ## 返回值 90 /// 91 /// 返回一个新的ArchPCBInfo 92 pub fn new(kstack: Option<&KernelStack>) -> Self { 93 let mut r = Self { 94 rflags: 0, 95 rbx: 0, 96 r12: 0, 97 r13: 0, 98 r14: 0, 99 r15: 0, 100 rbp: 0, 101 rsp: 0, 102 rip: 0, 103 cr2: 0, 104 fsbase: 0, 105 gsbase: 0, 106 fs: KERNEL_DS.bits(), 107 gs: KERNEL_DS.bits(), 108 fp_state: None, 109 }; 110 111 if kstack.is_some() { 112 let kstack = kstack.unwrap(); 113 r.rsp = kstack.stack_max_address().data(); 114 r.rbp = kstack.stack_max_address().data(); 115 } 116 117 return r; 118 } 119 120 pub fn set_stack(&mut self, stack: VirtAddr) { 121 self.rsp = stack.data(); 122 } 123 124 pub fn set_stack_base(&mut self, stack_base: VirtAddr) { 125 self.rbp = stack_base.data(); 126 } 127 128 pub fn rbp(&self) -> usize { 129 self.rbp 130 } 131 132 pub unsafe fn push_to_stack(&mut self, value: usize) { 133 self.rsp -= core::mem::size_of::<usize>(); 134 *(self.rsp as *mut usize) = value; 135 } 136 137 pub unsafe fn pop_from_stack(&mut self) -> usize { 138 let value = *(self.rsp as *const usize); 139 self.rsp += core::mem::size_of::<usize>(); 140 value 141 } 142 143 pub fn save_fp_state(&mut self) { 144 if self.fp_state.is_none() { 145 self.fp_state = Some(FpState::new()); 146 } 147 148 self.fp_state.as_mut().unwrap().save(); 149 } 150 151 pub fn restore_fp_state(&mut self) { 152 if unlikely(self.fp_state.is_none()) { 153 return; 154 } 155 156 self.fp_state.as_mut().unwrap().restore(); 157 } 158 159 pub unsafe fn save_fsbase(&mut self) { 160 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 161 self.fsbase = x86::current::segmentation::rdfsbase() as usize; 162 } else { 163 self.fsbase = 0; 164 } 165 } 166 167 pub unsafe fn save_gsbase(&mut self) { 168 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 169 self.gsbase = x86::current::segmentation::rdgsbase() as usize; 170 } else { 171 self.gsbase = 0; 172 } 173 } 174 175 pub unsafe fn restore_fsbase(&mut self) { 176 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 177 x86::current::segmentation::wrfsbase(self.fsbase as u64); 178 } 179 } 180 181 pub unsafe fn restore_gsbase(&mut self) { 182 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 183 x86::current::segmentation::wrgsbase(self.gsbase as u64); 184 } 185 } 186 187 pub fn fsbase(&self) -> usize { 188 self.fsbase 189 } 190 191 pub fn gsbase(&self) -> usize { 192 self.gsbase 193 } 194 } 195 196 impl ProcessControlBlock { 197 /// 获取当前进程的pcb 198 pub fn arch_current_pcb() -> Arc<Self> { 199 // 获取栈指针 200 let ptr = VirtAddr::new(x86::current::registers::rsp() as usize); 201 let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1))); 202 // 从内核栈的最低地址处取出pcb的地址 203 let p = stack_base.data() as *const *const ProcessControlBlock; 204 if unlikely((unsafe { *p }).is_null()) { 205 panic!("current_pcb is null"); 206 } 207 unsafe { 208 // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用 209 let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> = 210 ManuallyDrop::new(Arc::from_raw(*p)); 211 212 let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper); 213 return new_arc; 214 } 215 } 216 } 217 218 impl ProcessManager { 219 pub fn arch_init() { 220 { 221 // 初始化进程切换结果 per cpu变量 222 let mut switch_res_vec: Vec<SwitchResult> = Vec::new(); 223 for _ in 0..PerCpu::MAX_CPU_NUM { 224 switch_res_vec.push(SwitchResult::new()); 225 } 226 unsafe { 227 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap()); 228 } 229 } 230 } 231 /// fork的过程中复制线程 232 /// 233 /// 由于这个过程与具体的架构相关,所以放在这里 234 pub fn copy_thread( 235 _clone_flags: &CloneFlags, 236 current_pcb: &Arc<ProcessControlBlock>, 237 new_pcb: &Arc<ProcessControlBlock>, 238 current_trapframe: &TrapFrame, 239 ) -> Result<(), SystemError> { 240 let mut child_trapframe = current_trapframe.clone(); 241 242 // 子进程的返回值为0 243 child_trapframe.set_return_value(0); 244 245 // 设置子进程的栈基址(开始执行中断返回流程时的栈基址) 246 let mut new_arch_guard = new_pcb.arch_info(); 247 let kernel_stack_guard = new_pcb.kernel_stack(); 248 249 // 设置子进程在内核态开始执行时的rsp、rbp 250 new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address()); 251 252 let trap_frame_vaddr: VirtAddr = 253 kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>(); 254 new_arch_guard.set_stack(trap_frame_vaddr); 255 256 // 拷贝栈帧 257 unsafe { 258 let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame; 259 *trap_frame_ptr = child_trapframe; 260 } 261 262 let current_arch_guard = current_pcb.arch_info_irqsave(); 263 new_arch_guard.fsbase = current_arch_guard.fsbase; 264 new_arch_guard.gsbase = current_arch_guard.gsbase; 265 new_arch_guard.fs = current_arch_guard.fs; 266 new_arch_guard.gs = current_arch_guard.gs; 267 new_arch_guard.fp_state = current_arch_guard.fp_state.clone(); 268 269 // 拷贝浮点寄存器的状态 270 if let Some(fp_state) = current_arch_guard.fp_state.as_ref() { 271 new_arch_guard.fp_state = Some(*fp_state); 272 } 273 drop(current_arch_guard); 274 275 // 设置返回地址(子进程开始执行的指令地址) 276 277 if new_pcb.flags().contains(ProcessFlags::KTHREAD) { 278 let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize; 279 280 new_arch_guard.rip = kthread_bootstrap_stage1_func_addr; 281 } else { 282 new_arch_guard.rip = ret_from_intr as usize; 283 } 284 285 return Ok(()); 286 } 287 288 /// 切换进程 289 /// 290 /// ## 参数 291 /// 292 /// - `prev`:上一个进程的pcb 293 /// - `next`:下一个进程的pcb 294 pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) { 295 assert!(CurrentIrqArch::is_irq_enabled() == false); 296 297 // 保存浮点寄存器 298 prev.arch_info().save_fp_state(); 299 // 切换浮点寄存器 300 next.arch_info().restore_fp_state(); 301 302 // 切换fsbase 303 prev.arch_info().save_fsbase(); 304 next.arch_info().restore_fsbase(); 305 306 // 切换gsbase 307 prev.arch_info().save_gsbase(); 308 next.arch_info().restore_gsbase(); 309 310 // 切换地址空间 311 let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone(); 312 compiler_fence(Ordering::SeqCst); 313 314 next_addr_space.read().user_mapper.utable.make_current(); 315 compiler_fence(Ordering::SeqCst); 316 // 切换内核栈 317 318 // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁) 319 let next_arch = SpinLockGuard::leak(next.arch_info()); 320 let prev_arch = SpinLockGuard::leak(prev.arch_info()); 321 322 prev_arch.rip = switch_back as usize; 323 324 // 恢复当前的 preempt count*2 325 ProcessManager::current_pcb().preempt_enable(); 326 ProcessManager::current_pcb().preempt_enable(); 327 SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev.clone()); 328 SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next.clone()); 329 330 // 切换tss 331 TSSManager::current_tss().set_rsp( 332 x86::Ring::Ring0, 333 next.kernel_stack().stack_max_address().data() as u64, 334 ); 335 // kdebug!("switch tss ok"); 336 337 compiler_fence(Ordering::SeqCst); 338 // 正式切换上下文 339 switch_to_inner(prev_arch, next_arch); 340 } 341 } 342 343 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数 344 #[naked] 345 unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut ArchPCBInfo) { 346 asm!( 347 // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): 348 // 349 // - the current parameters are passed in the registers `rdi`, `rsi`, 350 // - we can modify scratch registers, e.g. rax 351 // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we 352 // store them here in the first place. 353 concat!(" 354 // Save old registers, and load new ones 355 mov [rdi + {off_rbx}], rbx 356 mov rbx, [rsi + {off_rbx}] 357 358 mov [rdi + {off_r12}], r12 359 mov r12, [rsi + {off_r12}] 360 361 mov [rdi + {off_r13}], r13 362 mov r13, [rsi + {off_r13}] 363 364 mov [rdi + {off_r14}], r14 365 mov r14, [rsi + {off_r14}] 366 367 mov [rdi + {off_r15}], r15 368 mov r15, [rsi + {off_r15}] 369 370 // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换) 371 mov [rdi + {off_fs}], fs 372 mov [rdi + {off_gs}], gs 373 374 push rbp 375 push rax 376 377 mov [rdi + {off_rbp}], rbp 378 mov rbp, [rsi + {off_rbp}] 379 380 mov [rdi + {off_rsp}], rsp 381 mov rsp, [rsi + {off_rsp}] 382 383 // // push RFLAGS (can only be modified via stack) 384 pushfq 385 // // pop RFLAGS into `self.rflags` 386 pop QWORD PTR [rdi + {off_rflags}] 387 388 // // push `next.rflags` 389 push QWORD PTR [rsi + {off_rflags}] 390 // // pop into RFLAGS 391 popfq 392 393 // push next rip to stack 394 push QWORD PTR [rsi + {off_rip}] 395 396 397 // When we return, we cannot even guarantee that the return address on the stack, points to 398 // the calling function. Thus, we have to execute this Rust hook by 399 // ourselves, which will unlock the contexts before the later switch. 400 401 // Note that switch_finish_hook will be responsible for executing `ret`. 402 jmp {switch_hook} 403 "), 404 405 off_rflags = const(offset_of!(ArchPCBInfo, rflags)), 406 407 off_rbx = const(offset_of!(ArchPCBInfo, rbx)), 408 off_r12 = const(offset_of!(ArchPCBInfo, r12)), 409 off_r13 = const(offset_of!(ArchPCBInfo, r13)), 410 off_r14 = const(offset_of!(ArchPCBInfo, r14)), 411 off_rbp = const(offset_of!(ArchPCBInfo, rbp)), 412 off_rsp = const(offset_of!(ArchPCBInfo, rsp)), 413 off_r15 = const(offset_of!(ArchPCBInfo, r15)), 414 off_rip = const(offset_of!(ArchPCBInfo, rip)), 415 off_fs = const(offset_of!(ArchPCBInfo, fs)), 416 off_gs = const(offset_of!(ArchPCBInfo, gs)), 417 418 switch_hook = sym crate::process::switch_finish_hook, 419 options(noreturn), 420 ); 421 } 422 423 /// 从`switch_to_inner`返回后,执行这个函数 424 /// 425 /// 也就是说,当进程再次被调度时,会从这里开始执行 426 #[inline(never)] 427 unsafe extern "sysv64" fn switch_back() { 428 asm!(concat!( 429 " 430 pop rax 431 pop rbp 432 " 433 )) 434 } 435 436 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! { 437 // 以下代码不能发生中断 438 CurrentIrqArch::interrupt_disable(); 439 440 let current_pcb = ProcessManager::current_pcb(); 441 let trap_frame_vaddr = VirtAddr::new( 442 current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(), 443 ); 444 // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr); 445 let new_rip = VirtAddr::new(ret_from_intr as usize); 446 447 assert!( 448 (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(), 449 "arch_switch_to_user(): current_rsp >= fake trap 450 frame vaddr, this may cause some illegal access to memory! 451 rsp: {:#x}, trap_frame_vaddr: {:#x}", 452 x86::current::registers::rsp() as usize, 453 trap_frame_vaddr.data() 454 ); 455 456 let mut arch_guard = current_pcb.arch_info_irqsave(); 457 arch_guard.rsp = trap_frame_vaddr.data(); 458 459 arch_guard.fs = USER_DS.bits(); 460 arch_guard.gs = USER_DS.bits(); 461 462 switch_fs_and_gs( 463 SegmentSelector::from_bits_truncate(arch_guard.fs), 464 SegmentSelector::from_bits_truncate(arch_guard.gs), 465 ); 466 arch_guard.rip = new_rip.data(); 467 468 drop(arch_guard); 469 470 // 删除kthread的标志 471 current_pcb.flags().remove(ProcessFlags::KTHREAD); 472 current_pcb.worker_private().take(); 473 474 let mut trap_frame = TrapFrame::new(); 475 476 compiler_fence(Ordering::SeqCst); 477 Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| { 478 panic!( 479 "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}", 480 pid = current_pcb.pid(), 481 e = e 482 ); 483 }); 484 compiler_fence(Ordering::SeqCst); 485 486 // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题! 487 488 drop(current_pcb); 489 490 compiler_fence(Ordering::SeqCst); 491 ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data()); 492 } 493 494 /// 由于需要依赖ret来切换到用户态,所以不能inline 495 #[inline(never)] 496 unsafe extern "sysv64" fn ready_to_switch_to_user( 497 trap_frame: TrapFrame, 498 trapframe_vaddr: usize, 499 new_rip: usize, 500 ) -> ! { 501 *(trapframe_vaddr as *mut TrapFrame) = trap_frame; 502 asm!( 503 "mov rsp, {trapframe_vaddr}", 504 "push {new_rip}", 505 "ret", 506 trapframe_vaddr = in(reg) trapframe_vaddr, 507 new_rip = in(reg) new_rip 508 ); 509 unreachable!() 510 } 511