1 use core::{ 2 arch::asm, 3 intrinsics::unlikely, 4 mem::ManuallyDrop, 5 sync::atomic::{compiler_fence, Ordering}, 6 }; 7 8 use alloc::{ 9 string::String, 10 sync::{Arc, Weak}, 11 vec::Vec, 12 }; 13 14 use kdepends::memoffset::offset_of; 15 use x86::{controlregs::Cr4, segmentation::SegmentSelector}; 16 17 use crate::{ 18 arch::process::table::TSSManager, 19 exception::InterruptArch, 20 kerror, kwarn, 21 libs::spinlock::SpinLockGuard, 22 mm::{ 23 percpu::{PerCpu, PerCpuVar}, 24 VirtAddr, 25 }, 26 process::{ 27 fork::{CloneFlags, KernelCloneArgs}, 28 KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult, 29 SWITCH_RESULT, 30 }, 31 syscall::{Syscall, SystemError}, 32 }; 33 34 use self::{ 35 kthread::kernel_thread_bootstrap_stage1, 36 syscall::ARCH_SET_FS, 37 table::{switch_fs_and_gs, KERNEL_DS, USER_DS}, 38 }; 39 40 use super::{fpu::FpState, interrupt::TrapFrame, syscall::X86_64GSData, CurrentIrqArch}; 41 42 mod c_adapter; 43 pub mod kthread; 44 pub mod syscall; 45 pub mod table; 46 47 extern "C" { 48 /// 从中断返回 49 fn ret_from_intr(); 50 } 51 52 #[allow(dead_code)] 53 #[repr(align(32768))] 54 union InitProcUnion { 55 /// 用于存放idle进程的内核栈 56 idle_stack: [u8; 32768], 57 } 58 59 #[link_section = ".data.init_proc_union"] 60 #[no_mangle] 61 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion { 62 idle_stack: [0; 32768], 63 }; 64 65 /// PCB中与架构相关的信息 66 #[derive(Debug)] 67 #[allow(dead_code)] 68 pub struct ArchPCBInfo { 69 rflags: usize, 70 rbx: usize, 71 r12: usize, 72 r13: usize, 73 r14: usize, 74 r15: usize, 75 rbp: usize, 76 rsp: usize, 77 rip: usize, 78 cr2: usize, 79 fsbase: usize, 80 gsbase: usize, 81 fs: SegmentSelector, 82 gs: SegmentSelector, 83 /// 存储PCB系统调用栈以及在syscall过程中暂存用户态rsp的结构体 84 gsdata: X86_64GSData, 85 /// 浮点寄存器的状态 86 fp_state: Option<FpState>, 87 } 88 89 #[allow(dead_code)] 90 impl ArchPCBInfo { 91 /// 创建一个新的ArchPCBInfo 92 /// 93 /// ## 参数 94 /// 95 /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。 96 /// 97 /// ## 返回值 98 /// 99 /// 返回一个新的ArchPCBInfo 100 pub fn new(kstack: &KernelStack) -> Self { 101 let mut r = Self { 102 rflags: 0, 103 rbx: 0, 104 r12: 0, 105 r13: 0, 106 r14: 0, 107 r15: 0, 108 rbp: 0, 109 rsp: 0, 110 rip: 0, 111 cr2: 0, 112 fsbase: 0, 113 gsbase: 0, 114 gsdata: X86_64GSData { 115 kaddr: VirtAddr::new(0), 116 uaddr: VirtAddr::new(0), 117 }, 118 fs: KERNEL_DS, 119 gs: KERNEL_DS, 120 fp_state: None, 121 }; 122 123 r.rsp = kstack.stack_max_address().data() - 8; 124 r.rbp = kstack.stack_max_address().data(); 125 126 return r; 127 } 128 129 pub fn set_stack(&mut self, stack: VirtAddr) { 130 self.rsp = stack.data(); 131 } 132 133 pub fn set_stack_base(&mut self, stack_base: VirtAddr) { 134 self.rbp = stack_base.data(); 135 } 136 137 pub fn rbp(&self) -> usize { 138 self.rbp 139 } 140 141 pub unsafe fn push_to_stack(&mut self, value: usize) { 142 self.rsp -= core::mem::size_of::<usize>(); 143 *(self.rsp as *mut usize) = value; 144 } 145 146 pub unsafe fn pop_from_stack(&mut self) -> usize { 147 let value = *(self.rsp as *const usize); 148 self.rsp += core::mem::size_of::<usize>(); 149 value 150 } 151 152 pub fn save_fp_state(&mut self) { 153 if self.fp_state.is_none() { 154 self.fp_state = Some(FpState::new()); 155 } 156 157 self.fp_state.as_mut().unwrap().save(); 158 } 159 160 pub fn restore_fp_state(&mut self) { 161 if unlikely(self.fp_state.is_none()) { 162 return; 163 } 164 165 self.fp_state.as_mut().unwrap().restore(); 166 } 167 168 /// 返回浮点寄存器结构体的副本 169 pub fn fp_state(&self) -> &Option<FpState> { 170 &self.fp_state 171 } 172 173 // 清空浮点寄存器 174 pub fn clear_fp_state(&mut self) { 175 if unlikely(self.fp_state.is_none()) { 176 kwarn!("fp_state is none"); 177 return; 178 } 179 180 self.fp_state.as_mut().unwrap().clear(); 181 } 182 pub unsafe fn save_fsbase(&mut self) { 183 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 184 self.fsbase = x86::current::segmentation::rdfsbase() as usize; 185 } else { 186 self.fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE) as usize; 187 } 188 } 189 190 pub unsafe fn save_gsbase(&mut self) { 191 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 192 self.gsbase = x86::current::segmentation::rdgsbase() as usize; 193 } else { 194 self.gsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE) as usize; 195 } 196 } 197 198 pub unsafe fn restore_fsbase(&mut self) { 199 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 200 x86::current::segmentation::wrfsbase(self.fsbase as u64); 201 } else { 202 x86::msr::wrmsr(x86::msr::IA32_FS_BASE, self.fsbase as u64); 203 } 204 } 205 206 pub unsafe fn restore_gsbase(&mut self) { 207 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 208 x86::current::segmentation::wrgsbase(self.gsbase as u64); 209 } else { 210 x86::msr::wrmsr(x86::msr::IA32_GS_BASE, self.gsbase as u64); 211 } 212 } 213 214 /// 将gsdata写入KernelGsbase寄存器 215 pub unsafe fn store_kernel_gsbase(&self) { 216 x86::msr::wrmsr( 217 x86::msr::IA32_KERNEL_GSBASE, 218 &self.gsdata as *const X86_64GSData as u64, 219 ); 220 } 221 222 /// ### 初始化系统调用栈,不得与PCB内核栈冲突(即传入的应该是一个新的栈,避免栈损坏) 223 pub fn init_syscall_stack(&mut self, stack: &KernelStack) { 224 self.gsdata.set_kstack(stack.stack_max_address() - 8); 225 } 226 227 pub fn fsbase(&self) -> usize { 228 self.fsbase 229 } 230 231 pub fn gsbase(&self) -> usize { 232 self.gsbase 233 } 234 235 pub fn cr2_mut(&mut self) -> &mut usize { 236 &mut self.cr2 237 } 238 239 pub fn fp_state_mut(&mut self) -> &mut Option<FpState> { 240 &mut self.fp_state 241 } 242 243 /// ### 克隆ArchPCBInfo,需要注意gsdata也是对应clone的 244 pub fn clone_all(&self) -> Self { 245 Self { 246 rflags: self.rflags, 247 rbx: self.rbx, 248 r12: self.r12, 249 r13: self.r13, 250 r14: self.r14, 251 r15: self.r15, 252 rbp: self.rbp, 253 rsp: self.rsp, 254 rip: self.rip, 255 cr2: self.cr2, 256 fsbase: self.fsbase, 257 gsbase: self.gsbase, 258 fs: self.fs.clone(), 259 gs: self.gs.clone(), 260 gsdata: self.gsdata.clone(), 261 fp_state: self.fp_state, 262 } 263 } 264 265 // ### 从另一个ArchPCBInfo处clone,gsdata会被保留 266 pub fn clone_from(&mut self, from: &Self) { 267 let gsdata = self.gsdata.clone(); 268 *self = from.clone_all(); 269 self.gsdata = gsdata; 270 } 271 } 272 273 impl ProcessControlBlock { 274 /// 获取当前进程的pcb 275 pub fn arch_current_pcb() -> Arc<Self> { 276 // 获取栈指针 277 let ptr = VirtAddr::new(x86::current::registers::rsp() as usize); 278 279 let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1))); 280 281 // 从内核栈的最低地址处取出pcb的地址 282 let p = stack_base.data() as *const *const ProcessControlBlock; 283 if unlikely((unsafe { *p }).is_null()) { 284 kerror!("p={:p}", p); 285 panic!("current_pcb is null"); 286 } 287 unsafe { 288 // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下 289 let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> = 290 ManuallyDrop::new(Weak::from_raw(*p)); 291 292 let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap(); 293 return new_arc; 294 } 295 } 296 } 297 298 impl ProcessManager { 299 pub fn arch_init() { 300 { 301 // 初始化进程切换结果 per cpu变量 302 let mut switch_res_vec: Vec<SwitchResult> = Vec::new(); 303 for _ in 0..PerCpu::MAX_CPU_NUM { 304 switch_res_vec.push(SwitchResult::new()); 305 } 306 unsafe { 307 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap()); 308 } 309 } 310 } 311 /// fork的过程中复制线程 312 /// 313 /// 由于这个过程与具体的架构相关,所以放在这里 314 pub fn copy_thread( 315 current_pcb: &Arc<ProcessControlBlock>, 316 new_pcb: &Arc<ProcessControlBlock>, 317 clone_args: KernelCloneArgs, 318 current_trapframe: &TrapFrame, 319 ) -> Result<(), SystemError> { 320 let clone_flags = clone_args.flags; 321 let mut child_trapframe = current_trapframe.clone(); 322 323 // 子进程的返回值为0 324 child_trapframe.set_return_value(0); 325 326 // 设置子进程的栈基址(开始执行中断返回流程时的栈基址) 327 let mut new_arch_guard = new_pcb.arch_info(); 328 let kernel_stack_guard = new_pcb.kernel_stack(); 329 330 // 设置子进程在内核态开始执行时的rsp、rbp 331 new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address()); 332 333 let trap_frame_vaddr: VirtAddr = 334 kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>(); 335 new_arch_guard.set_stack(trap_frame_vaddr); 336 337 // 拷贝栈帧 338 unsafe { 339 let usp = clone_args.stack; 340 if usp != 0 { 341 child_trapframe.rsp = usp as u64; 342 } 343 let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame; 344 *trap_frame_ptr = child_trapframe; 345 } 346 347 let current_arch_guard = current_pcb.arch_info_irqsave(); 348 new_arch_guard.fsbase = current_arch_guard.fsbase; 349 new_arch_guard.gsbase = current_arch_guard.gsbase; 350 new_arch_guard.fs = current_arch_guard.fs; 351 new_arch_guard.gs = current_arch_guard.gs; 352 new_arch_guard.fp_state = current_arch_guard.fp_state.clone(); 353 354 // 拷贝浮点寄存器的状态 355 if let Some(fp_state) = current_arch_guard.fp_state.as_ref() { 356 new_arch_guard.fp_state = Some(*fp_state); 357 } 358 drop(current_arch_guard); 359 360 // 设置返回地址(子进程开始执行的指令地址) 361 if new_pcb.flags().contains(ProcessFlags::KTHREAD) { 362 let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize; 363 new_arch_guard.rip = kthread_bootstrap_stage1_func_addr; 364 } else { 365 new_arch_guard.rip = ret_from_intr as usize; 366 } 367 368 // 设置tls 369 if clone_flags.contains(CloneFlags::CLONE_SETTLS) { 370 drop(new_arch_guard); 371 Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?; 372 } 373 374 return Ok(()); 375 } 376 377 /// 切换进程 378 /// 379 /// ## 参数 380 /// 381 /// - `prev`:上一个进程的pcb 382 /// - `next`:下一个进程的pcb 383 pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) { 384 assert!(CurrentIrqArch::is_irq_enabled() == false); 385 386 // 保存浮点寄存器 387 prev.arch_info().save_fp_state(); 388 // 切换浮点寄存器 389 next.arch_info().restore_fp_state(); 390 391 // 切换fsbase 392 prev.arch_info().save_fsbase(); 393 next.arch_info().restore_fsbase(); 394 395 // 切换gsbase 396 Self::switch_gsbase(&prev, &next); 397 398 // 切换地址空间 399 let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone(); 400 compiler_fence(Ordering::SeqCst); 401 402 next_addr_space.read().user_mapper.utable.make_current(); 403 drop(next_addr_space); 404 compiler_fence(Ordering::SeqCst); 405 // 切换内核栈 406 407 // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁) 408 let next_arch = SpinLockGuard::leak(next.arch_info()) as *mut ArchPCBInfo; 409 let prev_arch = SpinLockGuard::leak(prev.arch_info()) as *mut ArchPCBInfo; 410 411 (*prev_arch).rip = switch_back as usize; 412 413 // 恢复当前的 preempt count*2 414 ProcessManager::current_pcb().preempt_enable(); 415 ProcessManager::current_pcb().preempt_enable(); 416 417 // 切换tss 418 TSSManager::current_tss().set_rsp( 419 x86::Ring::Ring0, 420 next.kernel_stack().stack_max_address().data() as u64, 421 ); 422 SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev); 423 SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next); 424 // kdebug!("switch tss ok"); 425 compiler_fence(Ordering::SeqCst); 426 // 正式切换上下文 427 switch_to_inner(prev_arch, next_arch); 428 } 429 430 unsafe fn switch_gsbase(prev: &Arc<ProcessControlBlock>, next: &Arc<ProcessControlBlock>) { 431 asm!("swapgs", options(nostack, preserves_flags)); 432 prev.arch_info().save_gsbase(); 433 next.arch_info().restore_gsbase(); 434 // 将下一个进程的kstack写入kernel_gsbase 435 next.arch_info().store_kernel_gsbase(); 436 asm!("swapgs", options(nostack, preserves_flags)); 437 } 438 } 439 440 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数 441 #[naked] 442 unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) { 443 asm!( 444 // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): 445 // 446 // - the current parameters are passed in the registers `rdi`, `rsi`, 447 // - we can modify scratch registers, e.g. rax 448 // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we 449 // store them here in the first place. 450 concat!(" 451 // Save old registers, and load new ones 452 mov [rdi + {off_rbx}], rbx 453 mov rbx, [rsi + {off_rbx}] 454 455 mov [rdi + {off_r12}], r12 456 mov r12, [rsi + {off_r12}] 457 458 mov [rdi + {off_r13}], r13 459 mov r13, [rsi + {off_r13}] 460 461 mov [rdi + {off_r14}], r14 462 mov r14, [rsi + {off_r14}] 463 464 mov [rdi + {off_r15}], r15 465 mov r15, [rsi + {off_r15}] 466 467 // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换) 468 mov [rdi + {off_fs}], fs 469 mov [rdi + {off_gs}], gs 470 471 // mov fs, [rsi + {off_fs}] 472 // mov gs, [rsi + {off_gs}] 473 474 push rbp 475 push rax 476 477 mov [rdi + {off_rbp}], rbp 478 mov rbp, [rsi + {off_rbp}] 479 480 mov [rdi + {off_rsp}], rsp 481 mov rsp, [rsi + {off_rsp}] 482 483 // // push RFLAGS (can only be modified via stack) 484 pushfq 485 // // pop RFLAGS into `self.rflags` 486 pop QWORD PTR [rdi + {off_rflags}] 487 488 // // push `next.rflags` 489 push QWORD PTR [rsi + {off_rflags}] 490 // // pop into RFLAGS 491 popfq 492 493 // push next rip to stack 494 push QWORD PTR [rsi + {off_rip}] 495 496 497 // When we return, we cannot even guarantee that the return address on the stack, points to 498 // the calling function. Thus, we have to execute this Rust hook by 499 // ourselves, which will unlock the contexts before the later switch. 500 501 // Note that switch_finish_hook will be responsible for executing `ret`. 502 jmp {switch_hook} 503 "), 504 505 off_rflags = const(offset_of!(ArchPCBInfo, rflags)), 506 507 off_rbx = const(offset_of!(ArchPCBInfo, rbx)), 508 off_r12 = const(offset_of!(ArchPCBInfo, r12)), 509 off_r13 = const(offset_of!(ArchPCBInfo, r13)), 510 off_r14 = const(offset_of!(ArchPCBInfo, r14)), 511 off_rbp = const(offset_of!(ArchPCBInfo, rbp)), 512 off_rsp = const(offset_of!(ArchPCBInfo, rsp)), 513 off_r15 = const(offset_of!(ArchPCBInfo, r15)), 514 off_rip = const(offset_of!(ArchPCBInfo, rip)), 515 off_fs = const(offset_of!(ArchPCBInfo, fs)), 516 off_gs = const(offset_of!(ArchPCBInfo, gs)), 517 518 switch_hook = sym crate::process::switch_finish_hook, 519 options(noreturn), 520 ); 521 } 522 523 /// 从`switch_to_inner`返回后,执行这个函数 524 /// 525 /// 也就是说,当进程再次被调度时,会从这里开始执行 526 #[inline(never)] 527 unsafe extern "sysv64" fn switch_back() { 528 asm!(concat!( 529 " 530 pop rax 531 pop rbp 532 " 533 )) 534 } 535 536 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! { 537 // 以下代码不能发生中断 538 CurrentIrqArch::interrupt_disable(); 539 540 let current_pcb = ProcessManager::current_pcb(); 541 let trap_frame_vaddr = VirtAddr::new( 542 current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(), 543 ); 544 // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr); 545 let new_rip = VirtAddr::new(ret_from_intr as usize); 546 547 assert!( 548 (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(), 549 "arch_switch_to_user(): current_rsp >= fake trap 550 frame vaddr, this may cause some illegal access to memory! 551 rsp: {:#x}, trap_frame_vaddr: {:#x}", 552 x86::current::registers::rsp() as usize, 553 trap_frame_vaddr.data() 554 ); 555 556 let mut arch_guard = current_pcb.arch_info_irqsave(); 557 arch_guard.rsp = trap_frame_vaddr.data(); 558 559 arch_guard.fs = USER_DS; 560 arch_guard.gs = USER_DS; 561 562 // 将内核gs数据压进cpu 563 arch_guard.store_kernel_gsbase(); 564 565 switch_fs_and_gs( 566 SegmentSelector::from_bits_truncate(arch_guard.fs.bits()), 567 SegmentSelector::from_bits_truncate(arch_guard.gs.bits()), 568 ); 569 arch_guard.rip = new_rip.data(); 570 571 drop(arch_guard); 572 573 // 删除kthread的标志 574 current_pcb.flags().remove(ProcessFlags::KTHREAD); 575 current_pcb.worker_private().take(); 576 577 let mut trap_frame = TrapFrame::new(); 578 579 compiler_fence(Ordering::SeqCst); 580 Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| { 581 panic!( 582 "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}", 583 pid = current_pcb.pid(), 584 e = e 585 ); 586 }); 587 compiler_fence(Ordering::SeqCst); 588 589 // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题! 590 591 drop(current_pcb); 592 593 compiler_fence(Ordering::SeqCst); 594 ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data()); 595 } 596 597 /// 由于需要依赖ret来切换到用户态,所以不能inline 598 #[inline(never)] 599 unsafe extern "sysv64" fn ready_to_switch_to_user( 600 trap_frame: TrapFrame, 601 trapframe_vaddr: usize, 602 new_rip: usize, 603 ) -> ! { 604 *(trapframe_vaddr as *mut TrapFrame) = trap_frame; 605 asm!( 606 "swapgs", 607 "mov rsp, {trapframe_vaddr}", 608 "push {new_rip}", 609 "ret", 610 trapframe_vaddr = in(reg) trapframe_vaddr, 611 new_rip = in(reg) new_rip 612 ); 613 unreachable!() 614 } 615