1 use core::{ 2 arch::asm, 3 intrinsics::unlikely, 4 mem::ManuallyDrop, 5 sync::atomic::{compiler_fence, Ordering}, 6 }; 7 8 use alloc::{ 9 string::String, 10 sync::{Arc, Weak}, 11 vec::Vec, 12 }; 13 14 use memoffset::offset_of; 15 use x86::{controlregs::Cr4, segmentation::SegmentSelector}; 16 17 use crate::{ 18 arch::process::table::TSSManager, 19 exception::InterruptArch, 20 kwarn, 21 libs::spinlock::SpinLockGuard, 22 mm::{ 23 percpu::{PerCpu, PerCpuVar}, 24 VirtAddr, 25 }, 26 process::{ 27 fork::{CloneFlags, KernelCloneArgs}, 28 KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult, 29 SWITCH_RESULT, 30 }, 31 syscall::{Syscall, SystemError}, 32 }; 33 34 use self::{ 35 kthread::kernel_thread_bootstrap_stage1, 36 syscall::ARCH_SET_FS, 37 table::{switch_fs_and_gs, KERNEL_DS, USER_DS}, 38 }; 39 40 use super::{fpu::FpState, interrupt::TrapFrame, CurrentIrqArch}; 41 42 mod c_adapter; 43 pub mod kthread; 44 pub mod syscall; 45 pub mod table; 46 47 pub const IA32_FS_BASE: u32 = 0xC000_0100; 48 pub const IA32_GS_BASE: u32 = 0xC000_0101; 49 50 extern "C" { 51 /// 从中断返回 52 fn ret_from_intr(); 53 } 54 55 #[allow(dead_code)] 56 #[repr(align(32768))] 57 union InitProcUnion { 58 /// 用于存放idle进程的内核栈 59 idle_stack: [u8; 32768], 60 } 61 62 #[link_section = ".data.init_proc_union"] 63 #[no_mangle] 64 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion { 65 idle_stack: [0; 32768], 66 }; 67 68 /// PCB中与架构相关的信息 69 #[derive(Debug, Clone)] 70 #[allow(dead_code)] 71 pub struct ArchPCBInfo { 72 rflags: usize, 73 rbx: usize, 74 r12: usize, 75 r13: usize, 76 r14: usize, 77 r15: usize, 78 rbp: usize, 79 rsp: usize, 80 rip: usize, 81 cr2: usize, 82 fsbase: usize, 83 gsbase: usize, 84 fs: u16, 85 gs: u16, 86 87 /// 浮点寄存器的状态 88 fp_state: Option<FpState>, 89 } 90 91 #[allow(dead_code)] 92 impl ArchPCBInfo { 93 /// 创建一个新的ArchPCBInfo 94 /// 95 /// ## 参数 96 /// 97 /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。 98 /// 99 /// ## 返回值 100 /// 101 /// 返回一个新的ArchPCBInfo 102 pub fn new(kstack: Option<&KernelStack>) -> Self { 103 let mut r = Self { 104 rflags: 0, 105 rbx: 0, 106 r12: 0, 107 r13: 0, 108 r14: 0, 109 r15: 0, 110 rbp: 0, 111 rsp: 0, 112 rip: 0, 113 cr2: 0, 114 fsbase: 0, 115 gsbase: 0, 116 fs: KERNEL_DS.bits(), 117 gs: KERNEL_DS.bits(), 118 fp_state: None, 119 }; 120 121 if kstack.is_some() { 122 let kstack = kstack.unwrap(); 123 r.rsp = kstack.stack_max_address().data(); 124 r.rbp = kstack.stack_max_address().data(); 125 } 126 127 return r; 128 } 129 130 pub fn set_stack(&mut self, stack: VirtAddr) { 131 self.rsp = stack.data(); 132 } 133 134 pub fn set_stack_base(&mut self, stack_base: VirtAddr) { 135 self.rbp = stack_base.data(); 136 } 137 138 pub fn rbp(&self) -> usize { 139 self.rbp 140 } 141 142 pub unsafe fn push_to_stack(&mut self, value: usize) { 143 self.rsp -= core::mem::size_of::<usize>(); 144 *(self.rsp as *mut usize) = value; 145 } 146 147 pub unsafe fn pop_from_stack(&mut self) -> usize { 148 let value = *(self.rsp as *const usize); 149 self.rsp += core::mem::size_of::<usize>(); 150 value 151 } 152 153 pub fn save_fp_state(&mut self) { 154 if self.fp_state.is_none() { 155 self.fp_state = Some(FpState::new()); 156 } 157 158 self.fp_state.as_mut().unwrap().save(); 159 } 160 161 pub fn restore_fp_state(&mut self) { 162 if unlikely(self.fp_state.is_none()) { 163 return; 164 } 165 166 self.fp_state.as_mut().unwrap().restore(); 167 } 168 169 /// 返回浮点寄存器结构体的副本 170 pub fn fp_state(&self) -> &Option<FpState> { 171 &self.fp_state 172 } 173 174 // 清空浮点寄存器 175 pub fn clear_fp_state(&mut self) { 176 if unlikely(self.fp_state.is_none()) { 177 kwarn!("fp_state is none"); 178 return; 179 } 180 181 self.fp_state.as_mut().unwrap().clear(); 182 } 183 pub unsafe fn save_fsbase(&mut self) { 184 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 185 self.fsbase = x86::current::segmentation::rdfsbase() as usize; 186 } else { 187 self.fsbase = x86::msr::rdmsr(IA32_FS_BASE) as usize; 188 } 189 } 190 191 pub unsafe fn save_gsbase(&mut self) { 192 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 193 self.gsbase = x86::current::segmentation::rdgsbase() as usize; 194 } else { 195 self.gsbase = x86::msr::rdmsr(IA32_GS_BASE) as usize; 196 } 197 } 198 199 pub unsafe fn restore_fsbase(&mut self) { 200 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 201 x86::current::segmentation::wrfsbase(self.fsbase as u64); 202 } else { 203 x86::msr::wrmsr(IA32_FS_BASE, self.fsbase as u64); 204 } 205 } 206 207 pub unsafe fn restore_gsbase(&mut self) { 208 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 209 x86::current::segmentation::wrgsbase(self.gsbase as u64); 210 } else { 211 x86::msr::wrmsr(IA32_GS_BASE, self.gsbase as u64); 212 } 213 } 214 215 pub fn fsbase(&self) -> usize { 216 self.fsbase 217 } 218 219 pub fn gsbase(&self) -> usize { 220 self.gsbase 221 } 222 223 pub fn cr2_mut(&mut self) -> &mut usize { 224 &mut self.cr2 225 } 226 227 pub fn fp_state_mut(&mut self) -> &mut Option<FpState> { 228 &mut self.fp_state 229 } 230 } 231 232 impl ProcessControlBlock { 233 /// 获取当前进程的pcb 234 pub fn arch_current_pcb() -> Arc<Self> { 235 // 获取栈指针 236 let ptr = VirtAddr::new(x86::current::registers::rsp() as usize); 237 let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1))); 238 // 从内核栈的最低地址处取出pcb的地址 239 let p = stack_base.data() as *const *const ProcessControlBlock; 240 if unlikely((unsafe { *p }).is_null()) { 241 panic!("current_pcb is null"); 242 } 243 unsafe { 244 // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下 245 let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> = 246 ManuallyDrop::new(Weak::from_raw(*p)); 247 248 let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap(); 249 return new_arc; 250 } 251 } 252 } 253 254 impl ProcessManager { 255 pub fn arch_init() { 256 { 257 // 初始化进程切换结果 per cpu变量 258 let mut switch_res_vec: Vec<SwitchResult> = Vec::new(); 259 for _ in 0..PerCpu::MAX_CPU_NUM { 260 switch_res_vec.push(SwitchResult::new()); 261 } 262 unsafe { 263 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap()); 264 } 265 } 266 } 267 /// fork的过程中复制线程 268 /// 269 /// 由于这个过程与具体的架构相关,所以放在这里 270 pub fn copy_thread( 271 current_pcb: &Arc<ProcessControlBlock>, 272 new_pcb: &Arc<ProcessControlBlock>, 273 clone_args: KernelCloneArgs, 274 current_trapframe: &TrapFrame, 275 ) -> Result<(), SystemError> { 276 let clone_flags = clone_args.flags; 277 let mut child_trapframe = current_trapframe.clone(); 278 279 // 子进程的返回值为0 280 child_trapframe.set_return_value(0); 281 282 // 设置子进程的栈基址(开始执行中断返回流程时的栈基址) 283 let mut new_arch_guard = new_pcb.arch_info(); 284 let kernel_stack_guard = new_pcb.kernel_stack(); 285 286 // 设置子进程在内核态开始执行时的rsp、rbp 287 new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address()); 288 289 let trap_frame_vaddr: VirtAddr = 290 kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>(); 291 new_arch_guard.set_stack(trap_frame_vaddr); 292 293 // 拷贝栈帧 294 unsafe { 295 let usp = clone_args.stack; 296 if usp != 0 { 297 child_trapframe.rsp = usp as u64; 298 } 299 let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame; 300 *trap_frame_ptr = child_trapframe; 301 } 302 303 let current_arch_guard = current_pcb.arch_info_irqsave(); 304 new_arch_guard.fsbase = current_arch_guard.fsbase; 305 new_arch_guard.gsbase = current_arch_guard.gsbase; 306 new_arch_guard.fs = current_arch_guard.fs; 307 new_arch_guard.gs = current_arch_guard.gs; 308 new_arch_guard.fp_state = current_arch_guard.fp_state.clone(); 309 310 // 拷贝浮点寄存器的状态 311 if let Some(fp_state) = current_arch_guard.fp_state.as_ref() { 312 new_arch_guard.fp_state = Some(*fp_state); 313 } 314 drop(current_arch_guard); 315 316 // 设置返回地址(子进程开始执行的指令地址) 317 if new_pcb.flags().contains(ProcessFlags::KTHREAD) { 318 let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize; 319 new_arch_guard.rip = kthread_bootstrap_stage1_func_addr; 320 } else { 321 new_arch_guard.rip = ret_from_intr as usize; 322 } 323 324 // 设置tls 325 if clone_flags.contains(CloneFlags::CLONE_SETTLS) { 326 drop(new_arch_guard); 327 Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?; 328 } 329 330 return Ok(()); 331 } 332 333 /// 切换进程 334 /// 335 /// ## 参数 336 /// 337 /// - `prev`:上一个进程的pcb 338 /// - `next`:下一个进程的pcb 339 pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) { 340 assert!(CurrentIrqArch::is_irq_enabled() == false); 341 342 // 保存浮点寄存器 343 prev.arch_info().save_fp_state(); 344 // 切换浮点寄存器 345 next.arch_info().restore_fp_state(); 346 347 // 切换fsbase 348 prev.arch_info().save_fsbase(); 349 next.arch_info().restore_fsbase(); 350 351 // 切换gsbase 352 prev.arch_info().save_gsbase(); 353 next.arch_info().restore_gsbase(); 354 355 // 切换地址空间 356 let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone(); 357 compiler_fence(Ordering::SeqCst); 358 359 next_addr_space.read().user_mapper.utable.make_current(); 360 drop(next_addr_space); 361 compiler_fence(Ordering::SeqCst); 362 // 切换内核栈 363 364 // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁) 365 let next_arch = SpinLockGuard::leak(next.arch_info()) as *mut ArchPCBInfo; 366 let prev_arch = SpinLockGuard::leak(prev.arch_info()) as *mut ArchPCBInfo; 367 368 (*prev_arch).rip = switch_back as usize; 369 370 // 恢复当前的 preempt count*2 371 ProcessManager::current_pcb().preempt_enable(); 372 ProcessManager::current_pcb().preempt_enable(); 373 374 // 切换tss 375 TSSManager::current_tss().set_rsp( 376 x86::Ring::Ring0, 377 next.kernel_stack().stack_max_address().data() as u64, 378 ); 379 SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev); 380 SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next); 381 // kdebug!("switch tss ok"); 382 compiler_fence(Ordering::SeqCst); 383 // 正式切换上下文 384 switch_to_inner(prev_arch, next_arch); 385 } 386 } 387 388 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数 389 #[naked] 390 unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) { 391 asm!( 392 // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): 393 // 394 // - the current parameters are passed in the registers `rdi`, `rsi`, 395 // - we can modify scratch registers, e.g. rax 396 // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we 397 // store them here in the first place. 398 concat!(" 399 // Save old registers, and load new ones 400 mov [rdi + {off_rbx}], rbx 401 mov rbx, [rsi + {off_rbx}] 402 403 mov [rdi + {off_r12}], r12 404 mov r12, [rsi + {off_r12}] 405 406 mov [rdi + {off_r13}], r13 407 mov r13, [rsi + {off_r13}] 408 409 mov [rdi + {off_r14}], r14 410 mov r14, [rsi + {off_r14}] 411 412 mov [rdi + {off_r15}], r15 413 mov r15, [rsi + {off_r15}] 414 415 // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换) 416 mov [rdi + {off_fs}], fs 417 mov [rdi + {off_gs}], gs 418 419 // mov fs, [rsi + {off_fs}] 420 // mov gs, [rsi + {off_gs}] 421 422 push rbp 423 push rax 424 425 mov [rdi + {off_rbp}], rbp 426 mov rbp, [rsi + {off_rbp}] 427 428 mov [rdi + {off_rsp}], rsp 429 mov rsp, [rsi + {off_rsp}] 430 431 // // push RFLAGS (can only be modified via stack) 432 pushfq 433 // // pop RFLAGS into `self.rflags` 434 pop QWORD PTR [rdi + {off_rflags}] 435 436 // // push `next.rflags` 437 push QWORD PTR [rsi + {off_rflags}] 438 // // pop into RFLAGS 439 popfq 440 441 // push next rip to stack 442 push QWORD PTR [rsi + {off_rip}] 443 444 445 // When we return, we cannot even guarantee that the return address on the stack, points to 446 // the calling function. Thus, we have to execute this Rust hook by 447 // ourselves, which will unlock the contexts before the later switch. 448 449 // Note that switch_finish_hook will be responsible for executing `ret`. 450 jmp {switch_hook} 451 "), 452 453 off_rflags = const(offset_of!(ArchPCBInfo, rflags)), 454 455 off_rbx = const(offset_of!(ArchPCBInfo, rbx)), 456 off_r12 = const(offset_of!(ArchPCBInfo, r12)), 457 off_r13 = const(offset_of!(ArchPCBInfo, r13)), 458 off_r14 = const(offset_of!(ArchPCBInfo, r14)), 459 off_rbp = const(offset_of!(ArchPCBInfo, rbp)), 460 off_rsp = const(offset_of!(ArchPCBInfo, rsp)), 461 off_r15 = const(offset_of!(ArchPCBInfo, r15)), 462 off_rip = const(offset_of!(ArchPCBInfo, rip)), 463 off_fs = const(offset_of!(ArchPCBInfo, fs)), 464 off_gs = const(offset_of!(ArchPCBInfo, gs)), 465 466 switch_hook = sym crate::process::switch_finish_hook, 467 options(noreturn), 468 ); 469 } 470 471 /// 从`switch_to_inner`返回后,执行这个函数 472 /// 473 /// 也就是说,当进程再次被调度时,会从这里开始执行 474 #[inline(never)] 475 unsafe extern "sysv64" fn switch_back() { 476 asm!(concat!( 477 " 478 pop rax 479 pop rbp 480 " 481 )) 482 } 483 484 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! { 485 // 以下代码不能发生中断 486 CurrentIrqArch::interrupt_disable(); 487 488 let current_pcb = ProcessManager::current_pcb(); 489 let trap_frame_vaddr = VirtAddr::new( 490 current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(), 491 ); 492 // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr); 493 let new_rip = VirtAddr::new(ret_from_intr as usize); 494 495 assert!( 496 (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(), 497 "arch_switch_to_user(): current_rsp >= fake trap 498 frame vaddr, this may cause some illegal access to memory! 499 rsp: {:#x}, trap_frame_vaddr: {:#x}", 500 x86::current::registers::rsp() as usize, 501 trap_frame_vaddr.data() 502 ); 503 504 let mut arch_guard = current_pcb.arch_info_irqsave(); 505 arch_guard.rsp = trap_frame_vaddr.data(); 506 507 arch_guard.fs = USER_DS.bits(); 508 arch_guard.gs = USER_DS.bits(); 509 510 switch_fs_and_gs( 511 SegmentSelector::from_bits_truncate(arch_guard.fs), 512 SegmentSelector::from_bits_truncate(arch_guard.gs), 513 ); 514 arch_guard.rip = new_rip.data(); 515 516 drop(arch_guard); 517 518 // 删除kthread的标志 519 current_pcb.flags().remove(ProcessFlags::KTHREAD); 520 current_pcb.worker_private().take(); 521 522 let mut trap_frame = TrapFrame::new(); 523 524 compiler_fence(Ordering::SeqCst); 525 Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| { 526 panic!( 527 "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}", 528 pid = current_pcb.pid(), 529 e = e 530 ); 531 }); 532 compiler_fence(Ordering::SeqCst); 533 534 // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题! 535 536 drop(current_pcb); 537 538 compiler_fence(Ordering::SeqCst); 539 ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data()); 540 } 541 542 /// 由于需要依赖ret来切换到用户态,所以不能inline 543 #[inline(never)] 544 unsafe extern "sysv64" fn ready_to_switch_to_user( 545 trap_frame: TrapFrame, 546 trapframe_vaddr: usize, 547 new_rip: usize, 548 ) -> ! { 549 *(trapframe_vaddr as *mut TrapFrame) = trap_frame; 550 asm!( 551 "mov rsp, {trapframe_vaddr}", 552 "push {new_rip}", 553 "ret", 554 trapframe_vaddr = in(reg) trapframe_vaddr, 555 new_rip = in(reg) new_rip 556 ); 557 unreachable!() 558 } 559