1 use core::{ 2 arch::asm, 3 intrinsics::unlikely, 4 mem::ManuallyDrop, 5 sync::atomic::{compiler_fence, Ordering}, 6 }; 7 8 use alloc::{ 9 string::String, 10 sync::{Arc, Weak}, 11 vec::Vec, 12 }; 13 14 use kdepends::memoffset::offset_of; 15 use system_error::SystemError; 16 use x86::{controlregs::Cr4, segmentation::SegmentSelector}; 17 18 use crate::{ 19 arch::process::table::TSSManager, 20 exception::InterruptArch, 21 kerror, kwarn, 22 libs::spinlock::SpinLockGuard, 23 mm::{ 24 percpu::{PerCpu, PerCpuVar}, 25 VirtAddr, 26 }, 27 process::{ 28 fork::{CloneFlags, KernelCloneArgs}, 29 KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult, 30 SWITCH_RESULT, 31 }, 32 syscall::Syscall, 33 }; 34 35 use self::{ 36 kthread::kernel_thread_bootstrap_stage1, 37 syscall::ARCH_SET_FS, 38 table::{switch_fs_and_gs, KERNEL_DS, USER_DS}, 39 }; 40 41 use super::{fpu::FpState, interrupt::TrapFrame, syscall::X86_64GSData, CurrentIrqArch}; 42 43 pub mod idle; 44 pub mod kthread; 45 pub mod syscall; 46 pub mod table; 47 48 extern "C" { 49 /// 从中断返回 50 fn ret_from_intr(); 51 } 52 53 #[allow(dead_code)] 54 #[repr(align(32768))] 55 union InitProcUnion { 56 /// 用于存放idle进程的内核栈 57 idle_stack: [u8; 32768], 58 } 59 60 #[link_section = ".data.init_proc_union"] 61 #[no_mangle] 62 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion { 63 idle_stack: [0; 32768], 64 }; 65 66 /// PCB中与架构相关的信息 67 #[derive(Debug)] 68 #[allow(dead_code)] 69 pub struct ArchPCBInfo { 70 rflags: usize, 71 rbx: usize, 72 r12: usize, 73 r13: usize, 74 r14: usize, 75 r15: usize, 76 rbp: usize, 77 rsp: usize, 78 rip: usize, 79 cr2: usize, 80 fsbase: usize, 81 gsbase: usize, 82 fs: SegmentSelector, 83 gs: SegmentSelector, 84 /// 存储PCB系统调用栈以及在syscall过程中暂存用户态rsp的结构体 85 gsdata: X86_64GSData, 86 /// 浮点寄存器的状态 87 fp_state: Option<FpState>, 88 } 89 90 #[allow(dead_code)] 91 impl ArchPCBInfo { 92 /// 创建一个新的ArchPCBInfo 93 /// 94 /// ## 参数 95 /// 96 /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。 97 /// 98 /// ## 返回值 99 /// 100 /// 返回一个新的ArchPCBInfo 101 #[inline(never)] 102 pub fn new(kstack: &KernelStack) -> Self { 103 let mut r = Self { 104 rflags: 0, 105 rbx: 0, 106 r12: 0, 107 r13: 0, 108 r14: 0, 109 r15: 0, 110 rbp: 0, 111 rsp: 0, 112 rip: 0, 113 cr2: 0, 114 fsbase: 0, 115 gsbase: 0, 116 gsdata: X86_64GSData { 117 kaddr: VirtAddr::new(0), 118 uaddr: VirtAddr::new(0), 119 }, 120 fs: KERNEL_DS, 121 gs: KERNEL_DS, 122 fp_state: None, 123 }; 124 125 r.rsp = kstack.stack_max_address().data() - 8; 126 r.rbp = kstack.stack_max_address().data(); 127 128 return r; 129 } 130 131 pub fn set_stack(&mut self, stack: VirtAddr) { 132 self.rsp = stack.data(); 133 } 134 135 pub fn set_stack_base(&mut self, stack_base: VirtAddr) { 136 self.rbp = stack_base.data(); 137 } 138 139 pub fn rbp(&self) -> usize { 140 self.rbp 141 } 142 143 pub unsafe fn push_to_stack(&mut self, value: usize) { 144 self.rsp -= core::mem::size_of::<usize>(); 145 *(self.rsp as *mut usize) = value; 146 } 147 148 pub unsafe fn pop_from_stack(&mut self) -> usize { 149 let value = *(self.rsp as *const usize); 150 self.rsp += core::mem::size_of::<usize>(); 151 value 152 } 153 154 pub fn save_fp_state(&mut self) { 155 if self.fp_state.is_none() { 156 self.fp_state = Some(FpState::new()); 157 } 158 159 self.fp_state.as_mut().unwrap().save(); 160 } 161 162 pub fn restore_fp_state(&mut self) { 163 if unlikely(self.fp_state.is_none()) { 164 return; 165 } 166 167 self.fp_state.as_mut().unwrap().restore(); 168 } 169 170 /// 返回浮点寄存器结构体的副本 171 pub fn fp_state(&self) -> &Option<FpState> { 172 &self.fp_state 173 } 174 175 // 清空浮点寄存器 176 pub fn clear_fp_state(&mut self) { 177 if unlikely(self.fp_state.is_none()) { 178 kwarn!("fp_state is none"); 179 return; 180 } 181 182 self.fp_state.as_mut().unwrap().clear(); 183 } 184 pub unsafe fn save_fsbase(&mut self) { 185 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 186 self.fsbase = x86::current::segmentation::rdfsbase() as usize; 187 } else { 188 self.fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE) as usize; 189 } 190 } 191 192 pub unsafe fn save_gsbase(&mut self) { 193 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 194 self.gsbase = x86::current::segmentation::rdgsbase() as usize; 195 } else { 196 self.gsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE) as usize; 197 } 198 } 199 200 pub unsafe fn restore_fsbase(&mut self) { 201 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 202 x86::current::segmentation::wrfsbase(self.fsbase as u64); 203 } else { 204 x86::msr::wrmsr(x86::msr::IA32_FS_BASE, self.fsbase as u64); 205 } 206 } 207 208 pub unsafe fn restore_gsbase(&mut self) { 209 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 210 x86::current::segmentation::wrgsbase(self.gsbase as u64); 211 } else { 212 x86::msr::wrmsr(x86::msr::IA32_GS_BASE, self.gsbase as u64); 213 } 214 } 215 216 /// 将gsdata写入KernelGsbase寄存器 217 pub unsafe fn store_kernel_gsbase(&self) { 218 x86::msr::wrmsr( 219 x86::msr::IA32_KERNEL_GSBASE, 220 &self.gsdata as *const X86_64GSData as u64, 221 ); 222 } 223 224 /// ### 初始化系统调用栈,不得与PCB内核栈冲突(即传入的应该是一个新的栈,避免栈损坏) 225 pub fn init_syscall_stack(&mut self, stack: &KernelStack) { 226 self.gsdata.set_kstack(stack.stack_max_address() - 8); 227 } 228 229 pub fn fsbase(&self) -> usize { 230 self.fsbase 231 } 232 233 pub fn gsbase(&self) -> usize { 234 self.gsbase 235 } 236 237 pub fn cr2_mut(&mut self) -> &mut usize { 238 &mut self.cr2 239 } 240 241 pub fn fp_state_mut(&mut self) -> &mut Option<FpState> { 242 &mut self.fp_state 243 } 244 245 /// ### 克隆ArchPCBInfo,需要注意gsdata也是对应clone的 246 pub fn clone_all(&self) -> Self { 247 Self { 248 rflags: self.rflags, 249 rbx: self.rbx, 250 r12: self.r12, 251 r13: self.r13, 252 r14: self.r14, 253 r15: self.r15, 254 rbp: self.rbp, 255 rsp: self.rsp, 256 rip: self.rip, 257 cr2: self.cr2, 258 fsbase: self.fsbase, 259 gsbase: self.gsbase, 260 fs: self.fs.clone(), 261 gs: self.gs.clone(), 262 gsdata: self.gsdata.clone(), 263 fp_state: self.fp_state, 264 } 265 } 266 267 // ### 从另一个ArchPCBInfo处clone,gsdata会被保留 268 pub fn clone_from(&mut self, from: &Self) { 269 let gsdata = self.gsdata.clone(); 270 *self = from.clone_all(); 271 self.gsdata = gsdata; 272 } 273 } 274 275 impl ProcessControlBlock { 276 /// 获取当前进程的pcb 277 pub fn arch_current_pcb() -> Arc<Self> { 278 // 获取栈指针 279 let ptr = VirtAddr::new(x86::current::registers::rsp() as usize); 280 281 let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1))); 282 283 // 从内核栈的最低地址处取出pcb的地址 284 let p = stack_base.data() as *const *const ProcessControlBlock; 285 if unlikely((unsafe { *p }).is_null()) { 286 kerror!("p={:p}", p); 287 panic!("current_pcb is null"); 288 } 289 unsafe { 290 // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下 291 let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> = 292 ManuallyDrop::new(Weak::from_raw(*p)); 293 294 let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap(); 295 return new_arc; 296 } 297 } 298 } 299 300 impl ProcessManager { 301 pub fn arch_init() { 302 { 303 // 初始化进程切换结果 per cpu变量 304 let mut switch_res_vec: Vec<SwitchResult> = Vec::new(); 305 for _ in 0..PerCpu::MAX_CPU_NUM { 306 switch_res_vec.push(SwitchResult::new()); 307 } 308 unsafe { 309 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap()); 310 } 311 } 312 } 313 /// fork的过程中复制线程 314 /// 315 /// 由于这个过程与具体的架构相关,所以放在这里 316 pub fn copy_thread( 317 current_pcb: &Arc<ProcessControlBlock>, 318 new_pcb: &Arc<ProcessControlBlock>, 319 clone_args: KernelCloneArgs, 320 current_trapframe: &TrapFrame, 321 ) -> Result<(), SystemError> { 322 let clone_flags = clone_args.flags; 323 let mut child_trapframe = current_trapframe.clone(); 324 325 // 子进程的返回值为0 326 child_trapframe.set_return_value(0); 327 328 // 设置子进程的栈基址(开始执行中断返回流程时的栈基址) 329 let mut new_arch_guard = unsafe { new_pcb.arch_info() }; 330 let kernel_stack_guard = new_pcb.kernel_stack(); 331 332 // 设置子进程在内核态开始执行时的rsp、rbp 333 new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address()); 334 335 let trap_frame_vaddr: VirtAddr = 336 kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>(); 337 new_arch_guard.set_stack(trap_frame_vaddr); 338 339 // 拷贝栈帧 340 unsafe { 341 let usp = clone_args.stack; 342 if usp != 0 { 343 child_trapframe.rsp = usp as u64; 344 } 345 let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame; 346 *trap_frame_ptr = child_trapframe; 347 } 348 349 let current_arch_guard = current_pcb.arch_info_irqsave(); 350 new_arch_guard.fsbase = current_arch_guard.fsbase; 351 new_arch_guard.gsbase = current_arch_guard.gsbase; 352 new_arch_guard.fs = current_arch_guard.fs; 353 new_arch_guard.gs = current_arch_guard.gs; 354 new_arch_guard.fp_state = current_arch_guard.fp_state.clone(); 355 356 // 拷贝浮点寄存器的状态 357 if let Some(fp_state) = current_arch_guard.fp_state.as_ref() { 358 new_arch_guard.fp_state = Some(*fp_state); 359 } 360 drop(current_arch_guard); 361 362 // 设置返回地址(子进程开始执行的指令地址) 363 if new_pcb.flags().contains(ProcessFlags::KTHREAD) { 364 let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize; 365 new_arch_guard.rip = kthread_bootstrap_stage1_func_addr; 366 } else { 367 new_arch_guard.rip = ret_from_intr as usize; 368 } 369 370 // 设置tls 371 if clone_flags.contains(CloneFlags::CLONE_SETTLS) { 372 drop(new_arch_guard); 373 Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?; 374 } 375 376 return Ok(()); 377 } 378 379 /// 切换进程 380 /// 381 /// ## 参数 382 /// 383 /// - `prev`:上一个进程的pcb 384 /// - `next`:下一个进程的pcb 385 pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) { 386 assert!(CurrentIrqArch::is_irq_enabled() == false); 387 388 // 保存浮点寄存器 389 prev.arch_info_irqsave().save_fp_state(); 390 // 切换浮点寄存器 391 next.arch_info_irqsave().restore_fp_state(); 392 393 // 切换fsbase 394 prev.arch_info_irqsave().save_fsbase(); 395 next.arch_info_irqsave().restore_fsbase(); 396 397 // 切换gsbase 398 Self::switch_gsbase(&prev, &next); 399 400 // 切换地址空间 401 let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone(); 402 compiler_fence(Ordering::SeqCst); 403 404 next_addr_space.read().user_mapper.utable.make_current(); 405 drop(next_addr_space); 406 compiler_fence(Ordering::SeqCst); 407 // 切换内核栈 408 409 // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁) 410 let next_arch = SpinLockGuard::leak(next.arch_info_irqsave()) as *mut ArchPCBInfo; 411 let prev_arch = SpinLockGuard::leak(prev.arch_info_irqsave()) as *mut ArchPCBInfo; 412 413 (*prev_arch).rip = switch_back as usize; 414 415 // 恢复当前的 preempt count*2 416 ProcessManager::current_pcb().preempt_enable(); 417 ProcessManager::current_pcb().preempt_enable(); 418 419 // 切换tss 420 TSSManager::current_tss().set_rsp( 421 x86::Ring::Ring0, 422 next.kernel_stack().stack_max_address().data() as u64, 423 ); 424 SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev); 425 SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next); 426 // kdebug!("switch tss ok"); 427 compiler_fence(Ordering::SeqCst); 428 // 正式切换上下文 429 switch_to_inner(prev_arch, next_arch); 430 } 431 432 unsafe fn switch_gsbase(prev: &Arc<ProcessControlBlock>, next: &Arc<ProcessControlBlock>) { 433 asm!("swapgs", options(nostack, preserves_flags)); 434 prev.arch_info_irqsave().save_gsbase(); 435 next.arch_info_irqsave().restore_gsbase(); 436 // 将下一个进程的kstack写入kernel_gsbase 437 next.arch_info_irqsave().store_kernel_gsbase(); 438 asm!("swapgs", options(nostack, preserves_flags)); 439 } 440 } 441 442 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数 443 #[naked] 444 unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) { 445 asm!( 446 // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): 447 // 448 // - the current parameters are passed in the registers `rdi`, `rsi`, 449 // - we can modify scratch registers, e.g. rax 450 // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we 451 // store them here in the first place. 452 concat!(" 453 // Save old registers, and load new ones 454 mov [rdi + {off_rbx}], rbx 455 mov rbx, [rsi + {off_rbx}] 456 457 mov [rdi + {off_r12}], r12 458 mov r12, [rsi + {off_r12}] 459 460 mov [rdi + {off_r13}], r13 461 mov r13, [rsi + {off_r13}] 462 463 mov [rdi + {off_r14}], r14 464 mov r14, [rsi + {off_r14}] 465 466 mov [rdi + {off_r15}], r15 467 mov r15, [rsi + {off_r15}] 468 469 // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换) 470 mov [rdi + {off_fs}], fs 471 mov [rdi + {off_gs}], gs 472 473 // mov fs, [rsi + {off_fs}] 474 // mov gs, [rsi + {off_gs}] 475 476 push rbp 477 push rax 478 479 mov [rdi + {off_rbp}], rbp 480 mov rbp, [rsi + {off_rbp}] 481 482 mov [rdi + {off_rsp}], rsp 483 mov rsp, [rsi + {off_rsp}] 484 485 // // push RFLAGS (can only be modified via stack) 486 pushfq 487 // // pop RFLAGS into `self.rflags` 488 pop QWORD PTR [rdi + {off_rflags}] 489 490 // // push `next.rflags` 491 push QWORD PTR [rsi + {off_rflags}] 492 // // pop into RFLAGS 493 popfq 494 495 // push next rip to stack 496 push QWORD PTR [rsi + {off_rip}] 497 498 499 // When we return, we cannot even guarantee that the return address on the stack, points to 500 // the calling function. Thus, we have to execute this Rust hook by 501 // ourselves, which will unlock the contexts before the later switch. 502 503 // Note that switch_finish_hook will be responsible for executing `ret`. 504 jmp {switch_hook} 505 "), 506 507 off_rflags = const(offset_of!(ArchPCBInfo, rflags)), 508 509 off_rbx = const(offset_of!(ArchPCBInfo, rbx)), 510 off_r12 = const(offset_of!(ArchPCBInfo, r12)), 511 off_r13 = const(offset_of!(ArchPCBInfo, r13)), 512 off_r14 = const(offset_of!(ArchPCBInfo, r14)), 513 off_rbp = const(offset_of!(ArchPCBInfo, rbp)), 514 off_rsp = const(offset_of!(ArchPCBInfo, rsp)), 515 off_r15 = const(offset_of!(ArchPCBInfo, r15)), 516 off_rip = const(offset_of!(ArchPCBInfo, rip)), 517 off_fs = const(offset_of!(ArchPCBInfo, fs)), 518 off_gs = const(offset_of!(ArchPCBInfo, gs)), 519 520 switch_hook = sym crate::process::switch_finish_hook, 521 options(noreturn), 522 ); 523 } 524 525 /// 从`switch_to_inner`返回后,执行这个函数 526 /// 527 /// 也就是说,当进程再次被调度时,会从这里开始执行 528 #[inline(never)] 529 unsafe extern "sysv64" fn switch_back() { 530 asm!(concat!( 531 " 532 pop rax 533 pop rbp 534 " 535 )) 536 } 537 538 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! { 539 // 以下代码不能发生中断 540 CurrentIrqArch::interrupt_disable(); 541 542 let current_pcb = ProcessManager::current_pcb(); 543 let trap_frame_vaddr = VirtAddr::new( 544 current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(), 545 ); 546 // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr); 547 let new_rip = VirtAddr::new(ret_from_intr as usize); 548 549 assert!( 550 (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(), 551 "arch_switch_to_user(): current_rsp >= fake trap 552 frame vaddr, this may cause some illegal access to memory! 553 rsp: {:#x}, trap_frame_vaddr: {:#x}", 554 x86::current::registers::rsp() as usize, 555 trap_frame_vaddr.data() 556 ); 557 558 let mut arch_guard = current_pcb.arch_info_irqsave(); 559 arch_guard.rsp = trap_frame_vaddr.data(); 560 561 arch_guard.fs = USER_DS; 562 arch_guard.gs = USER_DS; 563 564 // 将内核gs数据压进cpu 565 arch_guard.store_kernel_gsbase(); 566 567 switch_fs_and_gs( 568 SegmentSelector::from_bits_truncate(arch_guard.fs.bits()), 569 SegmentSelector::from_bits_truncate(arch_guard.gs.bits()), 570 ); 571 arch_guard.rip = new_rip.data(); 572 573 drop(arch_guard); 574 575 // 删除kthread的标志 576 current_pcb.flags().remove(ProcessFlags::KTHREAD); 577 current_pcb.worker_private().take(); 578 579 let mut trap_frame = TrapFrame::new(); 580 581 compiler_fence(Ordering::SeqCst); 582 Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| { 583 panic!( 584 "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}", 585 pid = current_pcb.pid(), 586 e = e 587 ); 588 }); 589 compiler_fence(Ordering::SeqCst); 590 591 // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题! 592 593 drop(current_pcb); 594 595 compiler_fence(Ordering::SeqCst); 596 ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data()); 597 } 598 599 /// 由于需要依赖ret来切换到用户态,所以不能inline 600 #[inline(never)] 601 unsafe extern "sysv64" fn ready_to_switch_to_user( 602 trap_frame: TrapFrame, 603 trapframe_vaddr: usize, 604 new_rip: usize, 605 ) -> ! { 606 *(trapframe_vaddr as *mut TrapFrame) = trap_frame; 607 asm!( 608 "swapgs", 609 "mov rsp, {trapframe_vaddr}", 610 "push {new_rip}", 611 "ret", 612 trapframe_vaddr = in(reg) trapframe_vaddr, 613 new_rip = in(reg) new_rip 614 ); 615 unreachable!() 616 } 617