11496ba7bSLoGin use core::{ 21496ba7bSLoGin arch::asm, 31496ba7bSLoGin intrinsics::unlikely, 41496ba7bSLoGin mem::ManuallyDrop, 51496ba7bSLoGin sync::atomic::{compiler_fence, Ordering}, 61496ba7bSLoGin }; 71496ba7bSLoGin 8971462beSGnoCiYeH use alloc::{ 9971462beSGnoCiYeH string::String, 10971462beSGnoCiYeH sync::{Arc, Weak}, 11971462beSGnoCiYeH vec::Vec, 12971462beSGnoCiYeH }; 131496ba7bSLoGin 147b32f508SLoGin use kdepends::memoffset::offset_of; 1591e9d4abSLoGin use system_error::SystemError; 161496ba7bSLoGin use x86::{controlregs::Cr4, segmentation::SegmentSelector}; 171496ba7bSLoGin 181496ba7bSLoGin use crate::{ 191496ba7bSLoGin arch::process::table::TSSManager, 201496ba7bSLoGin exception::InterruptArch, 214fda81ceSLoGin kerror, kwarn, 221496ba7bSLoGin libs::spinlock::SpinLockGuard, 2340169973SLoGin mm::VirtAddr, 241496ba7bSLoGin process::{ 25971462beSGnoCiYeH fork::{CloneFlags, KernelCloneArgs}, 2640169973SLoGin KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, PROCESS_SWITCH_RESULT, 271496ba7bSLoGin }, 2891e9d4abSLoGin syscall::Syscall, 291496ba7bSLoGin }; 301496ba7bSLoGin 311496ba7bSLoGin use self::{ 321496ba7bSLoGin kthread::kernel_thread_bootstrap_stage1, 33971462beSGnoCiYeH syscall::ARCH_SET_FS, 341496ba7bSLoGin table::{switch_fs_and_gs, KERNEL_DS, USER_DS}, 351496ba7bSLoGin }; 361496ba7bSLoGin 3716033951SGnoCiYeH use super::{fpu::FpState, interrupt::TrapFrame, syscall::X86_64GSData, CurrentIrqArch}; 381496ba7bSLoGin 395b59005fSLoGin pub mod idle; 401496ba7bSLoGin pub mod kthread; 411496ba7bSLoGin pub mod syscall; 421496ba7bSLoGin pub mod table; 431496ba7bSLoGin 441496ba7bSLoGin extern "C" { 451496ba7bSLoGin /// 从中断返回 461496ba7bSLoGin fn ret_from_intr(); 471496ba7bSLoGin } 481496ba7bSLoGin 49de71ec25SLoGin #[allow(dead_code)] 50de71ec25SLoGin #[repr(align(32768))] 51de71ec25SLoGin union InitProcUnion { 52de71ec25SLoGin /// 用于存放idle进程的内核栈 53de71ec25SLoGin idle_stack: [u8; 32768], 54de71ec25SLoGin } 55de71ec25SLoGin 56de71ec25SLoGin #[link_section = ".data.init_proc_union"] 57de71ec25SLoGin #[no_mangle] 58de71ec25SLoGin static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion { 59de71ec25SLoGin idle_stack: [0; 32768], 60de71ec25SLoGin }; 61de71ec25SLoGin 621496ba7bSLoGin /// PCB中与架构相关的信息 6316033951SGnoCiYeH #[derive(Debug)] 641496ba7bSLoGin #[allow(dead_code)] 651496ba7bSLoGin pub struct ArchPCBInfo { 661496ba7bSLoGin rflags: usize, 671496ba7bSLoGin rbx: usize, 681496ba7bSLoGin r12: usize, 691496ba7bSLoGin r13: usize, 701496ba7bSLoGin r14: usize, 711496ba7bSLoGin r15: usize, 721496ba7bSLoGin rbp: usize, 731496ba7bSLoGin rsp: usize, 741496ba7bSLoGin rip: usize, 751496ba7bSLoGin cr2: usize, 761496ba7bSLoGin fsbase: usize, 771496ba7bSLoGin gsbase: usize, 7816033951SGnoCiYeH fs: SegmentSelector, 7916033951SGnoCiYeH gs: SegmentSelector, 8016033951SGnoCiYeH /// 存储PCB系统调用栈以及在syscall过程中暂存用户态rsp的结构体 8116033951SGnoCiYeH gsdata: X86_64GSData, 821496ba7bSLoGin /// 浮点寄存器的状态 831496ba7bSLoGin fp_state: Option<FpState>, 841496ba7bSLoGin } 851496ba7bSLoGin 861496ba7bSLoGin #[allow(dead_code)] 871496ba7bSLoGin impl ArchPCBInfo { 881496ba7bSLoGin /// 创建一个新的ArchPCBInfo 891496ba7bSLoGin /// 901496ba7bSLoGin /// ## 参数 911496ba7bSLoGin /// 921496ba7bSLoGin /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。 931496ba7bSLoGin /// 941496ba7bSLoGin /// ## 返回值 951496ba7bSLoGin /// 961496ba7bSLoGin /// 返回一个新的ArchPCBInfo 970d6cf65aSLoGin #[inline(never)] 9816033951SGnoCiYeH pub fn new(kstack: &KernelStack) -> Self { 991496ba7bSLoGin let mut r = Self { 1001496ba7bSLoGin rflags: 0, 1011496ba7bSLoGin rbx: 0, 1021496ba7bSLoGin r12: 0, 1031496ba7bSLoGin r13: 0, 1041496ba7bSLoGin r14: 0, 1051496ba7bSLoGin r15: 0, 1061496ba7bSLoGin rbp: 0, 1071496ba7bSLoGin rsp: 0, 1081496ba7bSLoGin rip: 0, 1091496ba7bSLoGin cr2: 0, 1101496ba7bSLoGin fsbase: 0, 1111496ba7bSLoGin gsbase: 0, 11216033951SGnoCiYeH gsdata: X86_64GSData { 11316033951SGnoCiYeH kaddr: VirtAddr::new(0), 11416033951SGnoCiYeH uaddr: VirtAddr::new(0), 11516033951SGnoCiYeH }, 11616033951SGnoCiYeH fs: KERNEL_DS, 11716033951SGnoCiYeH gs: KERNEL_DS, 1181496ba7bSLoGin fp_state: None, 1191496ba7bSLoGin }; 1201496ba7bSLoGin 12116033951SGnoCiYeH r.rsp = kstack.stack_max_address().data() - 8; 1221496ba7bSLoGin r.rbp = kstack.stack_max_address().data(); 1231496ba7bSLoGin 1241496ba7bSLoGin return r; 1251496ba7bSLoGin } 1261496ba7bSLoGin 1271496ba7bSLoGin pub fn set_stack(&mut self, stack: VirtAddr) { 1281496ba7bSLoGin self.rsp = stack.data(); 1291496ba7bSLoGin } 1301496ba7bSLoGin 1311496ba7bSLoGin pub fn set_stack_base(&mut self, stack_base: VirtAddr) { 1321496ba7bSLoGin self.rbp = stack_base.data(); 1331496ba7bSLoGin } 1341496ba7bSLoGin 1351496ba7bSLoGin pub fn rbp(&self) -> usize { 1361496ba7bSLoGin self.rbp 1371496ba7bSLoGin } 1381496ba7bSLoGin 1391496ba7bSLoGin pub unsafe fn push_to_stack(&mut self, value: usize) { 1401496ba7bSLoGin self.rsp -= core::mem::size_of::<usize>(); 1411496ba7bSLoGin *(self.rsp as *mut usize) = value; 1421496ba7bSLoGin } 1431496ba7bSLoGin 1441496ba7bSLoGin pub unsafe fn pop_from_stack(&mut self) -> usize { 1451496ba7bSLoGin let value = *(self.rsp as *const usize); 1461496ba7bSLoGin self.rsp += core::mem::size_of::<usize>(); 1471496ba7bSLoGin value 1481496ba7bSLoGin } 1491496ba7bSLoGin 1501496ba7bSLoGin pub fn save_fp_state(&mut self) { 1511496ba7bSLoGin if self.fp_state.is_none() { 1521496ba7bSLoGin self.fp_state = Some(FpState::new()); 1531496ba7bSLoGin } 1541496ba7bSLoGin 1551496ba7bSLoGin self.fp_state.as_mut().unwrap().save(); 1561496ba7bSLoGin } 1571496ba7bSLoGin 1581496ba7bSLoGin pub fn restore_fp_state(&mut self) { 1591496ba7bSLoGin if unlikely(self.fp_state.is_none()) { 1601496ba7bSLoGin return; 1611496ba7bSLoGin } 1621496ba7bSLoGin 1631496ba7bSLoGin self.fp_state.as_mut().unwrap().restore(); 1641496ba7bSLoGin } 1651496ba7bSLoGin 1663c82aa56SChiichen /// 返回浮点寄存器结构体的副本 1673c82aa56SChiichen pub fn fp_state(&self) -> &Option<FpState> { 1683c82aa56SChiichen &self.fp_state 1693c82aa56SChiichen } 1703c82aa56SChiichen 1713c82aa56SChiichen // 清空浮点寄存器 1723c82aa56SChiichen pub fn clear_fp_state(&mut self) { 1733c82aa56SChiichen if unlikely(self.fp_state.is_none()) { 1743c82aa56SChiichen kwarn!("fp_state is none"); 1753c82aa56SChiichen return; 1763c82aa56SChiichen } 1773c82aa56SChiichen 1783c82aa56SChiichen self.fp_state.as_mut().unwrap().clear(); 1793c82aa56SChiichen } 1801496ba7bSLoGin pub unsafe fn save_fsbase(&mut self) { 1811496ba7bSLoGin if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 1821496ba7bSLoGin self.fsbase = x86::current::segmentation::rdfsbase() as usize; 1831496ba7bSLoGin } else { 18416033951SGnoCiYeH self.fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE) as usize; 1851496ba7bSLoGin } 1861496ba7bSLoGin } 1871496ba7bSLoGin 1881496ba7bSLoGin pub unsafe fn save_gsbase(&mut self) { 1891496ba7bSLoGin if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 1901496ba7bSLoGin self.gsbase = x86::current::segmentation::rdgsbase() as usize; 1911496ba7bSLoGin } else { 19216033951SGnoCiYeH self.gsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE) as usize; 1931496ba7bSLoGin } 1941496ba7bSLoGin } 1951496ba7bSLoGin 1961496ba7bSLoGin pub unsafe fn restore_fsbase(&mut self) { 1971496ba7bSLoGin if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 1981496ba7bSLoGin x86::current::segmentation::wrfsbase(self.fsbase as u64); 199971462beSGnoCiYeH } else { 20016033951SGnoCiYeH x86::msr::wrmsr(x86::msr::IA32_FS_BASE, self.fsbase as u64); 2011496ba7bSLoGin } 2021496ba7bSLoGin } 2031496ba7bSLoGin 2041496ba7bSLoGin pub unsafe fn restore_gsbase(&mut self) { 2051496ba7bSLoGin if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { 2061496ba7bSLoGin x86::current::segmentation::wrgsbase(self.gsbase as u64); 207971462beSGnoCiYeH } else { 20816033951SGnoCiYeH x86::msr::wrmsr(x86::msr::IA32_GS_BASE, self.gsbase as u64); 2091496ba7bSLoGin } 2101496ba7bSLoGin } 2111496ba7bSLoGin 21216033951SGnoCiYeH /// 将gsdata写入KernelGsbase寄存器 21316033951SGnoCiYeH pub unsafe fn store_kernel_gsbase(&self) { 21416033951SGnoCiYeH x86::msr::wrmsr( 21516033951SGnoCiYeH x86::msr::IA32_KERNEL_GSBASE, 21616033951SGnoCiYeH &self.gsdata as *const X86_64GSData as u64, 21716033951SGnoCiYeH ); 21816033951SGnoCiYeH } 21916033951SGnoCiYeH 22016033951SGnoCiYeH /// ### 初始化系统调用栈,不得与PCB内核栈冲突(即传入的应该是一个新的栈,避免栈损坏) 22116033951SGnoCiYeH pub fn init_syscall_stack(&mut self, stack: &KernelStack) { 22216033951SGnoCiYeH self.gsdata.set_kstack(stack.stack_max_address() - 8); 22316033951SGnoCiYeH } 22416033951SGnoCiYeH 2251496ba7bSLoGin pub fn fsbase(&self) -> usize { 2261496ba7bSLoGin self.fsbase 2271496ba7bSLoGin } 2281496ba7bSLoGin 2291496ba7bSLoGin pub fn gsbase(&self) -> usize { 2301496ba7bSLoGin self.gsbase 2311496ba7bSLoGin } 2323c82aa56SChiichen 2333c82aa56SChiichen pub fn cr2_mut(&mut self) -> &mut usize { 2343c82aa56SChiichen &mut self.cr2 2353c82aa56SChiichen } 2363c82aa56SChiichen 2373c82aa56SChiichen pub fn fp_state_mut(&mut self) -> &mut Option<FpState> { 2383c82aa56SChiichen &mut self.fp_state 2393c82aa56SChiichen } 24016033951SGnoCiYeH 24116033951SGnoCiYeH /// ### 克隆ArchPCBInfo,需要注意gsdata也是对应clone的 24216033951SGnoCiYeH pub fn clone_all(&self) -> Self { 24316033951SGnoCiYeH Self { 24416033951SGnoCiYeH rflags: self.rflags, 24516033951SGnoCiYeH rbx: self.rbx, 24616033951SGnoCiYeH r12: self.r12, 24716033951SGnoCiYeH r13: self.r13, 24816033951SGnoCiYeH r14: self.r14, 24916033951SGnoCiYeH r15: self.r15, 25016033951SGnoCiYeH rbp: self.rbp, 25116033951SGnoCiYeH rsp: self.rsp, 25216033951SGnoCiYeH rip: self.rip, 25316033951SGnoCiYeH cr2: self.cr2, 25416033951SGnoCiYeH fsbase: self.fsbase, 25516033951SGnoCiYeH gsbase: self.gsbase, 256b5b571e0SLoGin fs: self.fs, 257b5b571e0SLoGin gs: self.gs, 25816033951SGnoCiYeH gsdata: self.gsdata.clone(), 25916033951SGnoCiYeH fp_state: self.fp_state, 26016033951SGnoCiYeH } 26116033951SGnoCiYeH } 26216033951SGnoCiYeH 26316033951SGnoCiYeH // ### 从另一个ArchPCBInfo处clone,gsdata会被保留 26416033951SGnoCiYeH pub fn clone_from(&mut self, from: &Self) { 26516033951SGnoCiYeH let gsdata = self.gsdata.clone(); 26616033951SGnoCiYeH *self = from.clone_all(); 26716033951SGnoCiYeH self.gsdata = gsdata; 26816033951SGnoCiYeH } 2691496ba7bSLoGin } 2701496ba7bSLoGin 2711496ba7bSLoGin impl ProcessControlBlock { 2721496ba7bSLoGin /// 获取当前进程的pcb 2731496ba7bSLoGin pub fn arch_current_pcb() -> Arc<Self> { 2741496ba7bSLoGin // 获取栈指针 2751496ba7bSLoGin let ptr = VirtAddr::new(x86::current::registers::rsp() as usize); 2764fda81ceSLoGin 2771496ba7bSLoGin let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1))); 2784fda81ceSLoGin 2791496ba7bSLoGin // 从内核栈的最低地址处取出pcb的地址 2801496ba7bSLoGin let p = stack_base.data() as *const *const ProcessControlBlock; 2811496ba7bSLoGin if unlikely((unsafe { *p }).is_null()) { 2824fda81ceSLoGin kerror!("p={:p}", p); 2831496ba7bSLoGin panic!("current_pcb is null"); 2841496ba7bSLoGin } 2851496ba7bSLoGin unsafe { 286971462beSGnoCiYeH // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下 287971462beSGnoCiYeH let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> = 288971462beSGnoCiYeH ManuallyDrop::new(Weak::from_raw(*p)); 2891496ba7bSLoGin 290971462beSGnoCiYeH let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap(); 2911496ba7bSLoGin return new_arc; 2921496ba7bSLoGin } 2931496ba7bSLoGin } 2941496ba7bSLoGin } 2951496ba7bSLoGin 2961496ba7bSLoGin impl ProcessManager { 2971496ba7bSLoGin pub fn arch_init() { 29840169973SLoGin // do nothing 2991496ba7bSLoGin } 3001496ba7bSLoGin /// fork的过程中复制线程 3011496ba7bSLoGin /// 3021496ba7bSLoGin /// 由于这个过程与具体的架构相关,所以放在这里 3031496ba7bSLoGin pub fn copy_thread( 3041496ba7bSLoGin current_pcb: &Arc<ProcessControlBlock>, 3051496ba7bSLoGin new_pcb: &Arc<ProcessControlBlock>, 306971462beSGnoCiYeH clone_args: KernelCloneArgs, 3071496ba7bSLoGin current_trapframe: &TrapFrame, 3081496ba7bSLoGin ) -> Result<(), SystemError> { 309971462beSGnoCiYeH let clone_flags = clone_args.flags; 310b5b571e0SLoGin let mut child_trapframe = *current_trapframe; 3111496ba7bSLoGin 3121496ba7bSLoGin // 子进程的返回值为0 3131496ba7bSLoGin child_trapframe.set_return_value(0); 3141496ba7bSLoGin 3151496ba7bSLoGin // 设置子进程的栈基址(开始执行中断返回流程时的栈基址) 3160d6cf65aSLoGin let mut new_arch_guard = unsafe { new_pcb.arch_info() }; 3171496ba7bSLoGin let kernel_stack_guard = new_pcb.kernel_stack(); 3181496ba7bSLoGin 3191496ba7bSLoGin // 设置子进程在内核态开始执行时的rsp、rbp 3201496ba7bSLoGin new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address()); 3211496ba7bSLoGin 3221496ba7bSLoGin let trap_frame_vaddr: VirtAddr = 3231496ba7bSLoGin kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>(); 3241496ba7bSLoGin new_arch_guard.set_stack(trap_frame_vaddr); 3251496ba7bSLoGin 3261496ba7bSLoGin // 拷贝栈帧 3271496ba7bSLoGin unsafe { 328971462beSGnoCiYeH let usp = clone_args.stack; 329971462beSGnoCiYeH if usp != 0 { 330971462beSGnoCiYeH child_trapframe.rsp = usp as u64; 331971462beSGnoCiYeH } 3321496ba7bSLoGin let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame; 3331496ba7bSLoGin *trap_frame_ptr = child_trapframe; 3341496ba7bSLoGin } 3351496ba7bSLoGin 3361496ba7bSLoGin let current_arch_guard = current_pcb.arch_info_irqsave(); 3371496ba7bSLoGin new_arch_guard.fsbase = current_arch_guard.fsbase; 3381496ba7bSLoGin new_arch_guard.gsbase = current_arch_guard.gsbase; 3391496ba7bSLoGin new_arch_guard.fs = current_arch_guard.fs; 3401496ba7bSLoGin new_arch_guard.gs = current_arch_guard.gs; 341b5b571e0SLoGin new_arch_guard.fp_state = current_arch_guard.fp_state; 3421496ba7bSLoGin 3431496ba7bSLoGin // 拷贝浮点寄存器的状态 3441496ba7bSLoGin if let Some(fp_state) = current_arch_guard.fp_state.as_ref() { 3451496ba7bSLoGin new_arch_guard.fp_state = Some(*fp_state); 3461496ba7bSLoGin } 3471496ba7bSLoGin drop(current_arch_guard); 3481496ba7bSLoGin 3491496ba7bSLoGin // 设置返回地址(子进程开始执行的指令地址) 3501496ba7bSLoGin if new_pcb.flags().contains(ProcessFlags::KTHREAD) { 3511496ba7bSLoGin let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize; 3521496ba7bSLoGin new_arch_guard.rip = kthread_bootstrap_stage1_func_addr; 3531496ba7bSLoGin } else { 3541496ba7bSLoGin new_arch_guard.rip = ret_from_intr as usize; 3551496ba7bSLoGin } 3561496ba7bSLoGin 357971462beSGnoCiYeH // 设置tls 358971462beSGnoCiYeH if clone_flags.contains(CloneFlags::CLONE_SETTLS) { 359971462beSGnoCiYeH drop(new_arch_guard); 360971462beSGnoCiYeH Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?; 361971462beSGnoCiYeH } 362971462beSGnoCiYeH 3631496ba7bSLoGin return Ok(()); 3641496ba7bSLoGin } 3651496ba7bSLoGin 3661496ba7bSLoGin /// 切换进程 3671496ba7bSLoGin /// 3681496ba7bSLoGin /// ## 参数 3691496ba7bSLoGin /// 3701496ba7bSLoGin /// - `prev`:上一个进程的pcb 3711496ba7bSLoGin /// - `next`:下一个进程的pcb 3721496ba7bSLoGin pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) { 373b5b571e0SLoGin assert!(!CurrentIrqArch::is_irq_enabled()); 3741496ba7bSLoGin 3751496ba7bSLoGin // 保存浮点寄存器 3760d6cf65aSLoGin prev.arch_info_irqsave().save_fp_state(); 3771496ba7bSLoGin // 切换浮点寄存器 3780d6cf65aSLoGin next.arch_info_irqsave().restore_fp_state(); 3791496ba7bSLoGin 3801496ba7bSLoGin // 切换fsbase 3810d6cf65aSLoGin prev.arch_info_irqsave().save_fsbase(); 3820d6cf65aSLoGin next.arch_info_irqsave().restore_fsbase(); 3831496ba7bSLoGin 3841496ba7bSLoGin // 切换gsbase 38516033951SGnoCiYeH Self::switch_gsbase(&prev, &next); 3861496ba7bSLoGin 3871496ba7bSLoGin // 切换地址空间 3881496ba7bSLoGin let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone(); 3891496ba7bSLoGin compiler_fence(Ordering::SeqCst); 3901496ba7bSLoGin 3911496ba7bSLoGin next_addr_space.read().user_mapper.utable.make_current(); 392971462beSGnoCiYeH drop(next_addr_space); 3931496ba7bSLoGin compiler_fence(Ordering::SeqCst); 3941496ba7bSLoGin // 切换内核栈 3951496ba7bSLoGin 3961496ba7bSLoGin // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁) 3970d6cf65aSLoGin let next_arch = SpinLockGuard::leak(next.arch_info_irqsave()) as *mut ArchPCBInfo; 3980d6cf65aSLoGin let prev_arch = SpinLockGuard::leak(prev.arch_info_irqsave()) as *mut ArchPCBInfo; 3991496ba7bSLoGin 400971462beSGnoCiYeH (*prev_arch).rip = switch_back as usize; 4011496ba7bSLoGin 4021496ba7bSLoGin // 恢复当前的 preempt count*2 4031496ba7bSLoGin ProcessManager::current_pcb().preempt_enable(); 4041496ba7bSLoGin ProcessManager::current_pcb().preempt_enable(); 4051496ba7bSLoGin 4061496ba7bSLoGin // 切换tss 4071496ba7bSLoGin TSSManager::current_tss().set_rsp( 4081496ba7bSLoGin x86::Ring::Ring0, 4091496ba7bSLoGin next.kernel_stack().stack_max_address().data() as u64, 4101496ba7bSLoGin ); 41140169973SLoGin PROCESS_SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev); 41240169973SLoGin PROCESS_SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next); 4131496ba7bSLoGin // kdebug!("switch tss ok"); 414de71ec25SLoGin compiler_fence(Ordering::SeqCst); 4151496ba7bSLoGin // 正式切换上下文 4161496ba7bSLoGin switch_to_inner(prev_arch, next_arch); 4171496ba7bSLoGin } 41816033951SGnoCiYeH 41916033951SGnoCiYeH unsafe fn switch_gsbase(prev: &Arc<ProcessControlBlock>, next: &Arc<ProcessControlBlock>) { 42016033951SGnoCiYeH asm!("swapgs", options(nostack, preserves_flags)); 4210d6cf65aSLoGin prev.arch_info_irqsave().save_gsbase(); 4220d6cf65aSLoGin next.arch_info_irqsave().restore_gsbase(); 42316033951SGnoCiYeH // 将下一个进程的kstack写入kernel_gsbase 4240d6cf65aSLoGin next.arch_info_irqsave().store_kernel_gsbase(); 42516033951SGnoCiYeH asm!("swapgs", options(nostack, preserves_flags)); 42616033951SGnoCiYeH } 4271496ba7bSLoGin } 4281496ba7bSLoGin 4291496ba7bSLoGin /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数 4301496ba7bSLoGin #[naked] 431971462beSGnoCiYeH unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) { 4321496ba7bSLoGin asm!( 4331496ba7bSLoGin // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): 4341496ba7bSLoGin // 4351496ba7bSLoGin // - the current parameters are passed in the registers `rdi`, `rsi`, 4361496ba7bSLoGin // - we can modify scratch registers, e.g. rax 4371496ba7bSLoGin // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we 4381496ba7bSLoGin // store them here in the first place. 4391496ba7bSLoGin concat!(" 4401496ba7bSLoGin // Save old registers, and load new ones 4411496ba7bSLoGin mov [rdi + {off_rbx}], rbx 4421496ba7bSLoGin mov rbx, [rsi + {off_rbx}] 4431496ba7bSLoGin 4441496ba7bSLoGin mov [rdi + {off_r12}], r12 4451496ba7bSLoGin mov r12, [rsi + {off_r12}] 4461496ba7bSLoGin 4471496ba7bSLoGin mov [rdi + {off_r13}], r13 4481496ba7bSLoGin mov r13, [rsi + {off_r13}] 4491496ba7bSLoGin 4501496ba7bSLoGin mov [rdi + {off_r14}], r14 4511496ba7bSLoGin mov r14, [rsi + {off_r14}] 4521496ba7bSLoGin 4531496ba7bSLoGin mov [rdi + {off_r15}], r15 4541496ba7bSLoGin mov r15, [rsi + {off_r15}] 4551496ba7bSLoGin 4561496ba7bSLoGin // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换) 4571496ba7bSLoGin mov [rdi + {off_fs}], fs 4581496ba7bSLoGin mov [rdi + {off_gs}], gs 4591496ba7bSLoGin 460971462beSGnoCiYeH // mov fs, [rsi + {off_fs}] 461971462beSGnoCiYeH // mov gs, [rsi + {off_gs}] 462971462beSGnoCiYeH 4631496ba7bSLoGin push rbp 4641496ba7bSLoGin push rax 4651496ba7bSLoGin 4661496ba7bSLoGin mov [rdi + {off_rbp}], rbp 4671496ba7bSLoGin mov rbp, [rsi + {off_rbp}] 4681496ba7bSLoGin 4691496ba7bSLoGin mov [rdi + {off_rsp}], rsp 4701496ba7bSLoGin mov rsp, [rsi + {off_rsp}] 4711496ba7bSLoGin 4721496ba7bSLoGin // // push RFLAGS (can only be modified via stack) 4731496ba7bSLoGin pushfq 4741496ba7bSLoGin // // pop RFLAGS into `self.rflags` 4751496ba7bSLoGin pop QWORD PTR [rdi + {off_rflags}] 4761496ba7bSLoGin 4771496ba7bSLoGin // // push `next.rflags` 4781496ba7bSLoGin push QWORD PTR [rsi + {off_rflags}] 4791496ba7bSLoGin // // pop into RFLAGS 4801496ba7bSLoGin popfq 4811496ba7bSLoGin 4821496ba7bSLoGin // push next rip to stack 4831496ba7bSLoGin push QWORD PTR [rsi + {off_rip}] 4841496ba7bSLoGin 4851496ba7bSLoGin 4861496ba7bSLoGin // When we return, we cannot even guarantee that the return address on the stack, points to 4871496ba7bSLoGin // the calling function. Thus, we have to execute this Rust hook by 4881496ba7bSLoGin // ourselves, which will unlock the contexts before the later switch. 4891496ba7bSLoGin 4901496ba7bSLoGin // Note that switch_finish_hook will be responsible for executing `ret`. 4911496ba7bSLoGin jmp {switch_hook} 4921496ba7bSLoGin "), 4931496ba7bSLoGin 4941496ba7bSLoGin off_rflags = const(offset_of!(ArchPCBInfo, rflags)), 4951496ba7bSLoGin 4961496ba7bSLoGin off_rbx = const(offset_of!(ArchPCBInfo, rbx)), 4971496ba7bSLoGin off_r12 = const(offset_of!(ArchPCBInfo, r12)), 4981496ba7bSLoGin off_r13 = const(offset_of!(ArchPCBInfo, r13)), 4991496ba7bSLoGin off_r14 = const(offset_of!(ArchPCBInfo, r14)), 5001496ba7bSLoGin off_rbp = const(offset_of!(ArchPCBInfo, rbp)), 5011496ba7bSLoGin off_rsp = const(offset_of!(ArchPCBInfo, rsp)), 5021496ba7bSLoGin off_r15 = const(offset_of!(ArchPCBInfo, r15)), 5031496ba7bSLoGin off_rip = const(offset_of!(ArchPCBInfo, rip)), 5041496ba7bSLoGin off_fs = const(offset_of!(ArchPCBInfo, fs)), 5051496ba7bSLoGin off_gs = const(offset_of!(ArchPCBInfo, gs)), 5061496ba7bSLoGin 5071496ba7bSLoGin switch_hook = sym crate::process::switch_finish_hook, 5081496ba7bSLoGin options(noreturn), 5091496ba7bSLoGin ); 5101496ba7bSLoGin } 5111496ba7bSLoGin 5121496ba7bSLoGin /// 从`switch_to_inner`返回后,执行这个函数 5131496ba7bSLoGin /// 5141496ba7bSLoGin /// 也就是说,当进程再次被调度时,会从这里开始执行 5151496ba7bSLoGin #[inline(never)] 5161496ba7bSLoGin unsafe extern "sysv64" fn switch_back() { 5171496ba7bSLoGin asm!(concat!( 5181496ba7bSLoGin " 5191496ba7bSLoGin pop rax 5201496ba7bSLoGin pop rbp 5211496ba7bSLoGin " 5221496ba7bSLoGin )) 5231496ba7bSLoGin } 5241496ba7bSLoGin 5251496ba7bSLoGin pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! { 5261496ba7bSLoGin // 以下代码不能发生中断 5271496ba7bSLoGin CurrentIrqArch::interrupt_disable(); 5281496ba7bSLoGin 5291496ba7bSLoGin let current_pcb = ProcessManager::current_pcb(); 5301496ba7bSLoGin let trap_frame_vaddr = VirtAddr::new( 5311496ba7bSLoGin current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(), 5321496ba7bSLoGin ); 5331496ba7bSLoGin // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr); 5341496ba7bSLoGin let new_rip = VirtAddr::new(ret_from_intr as usize); 5351496ba7bSLoGin 5361496ba7bSLoGin assert!( 5371496ba7bSLoGin (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(), 5381496ba7bSLoGin "arch_switch_to_user(): current_rsp >= fake trap 5391496ba7bSLoGin frame vaddr, this may cause some illegal access to memory! 5401496ba7bSLoGin rsp: {:#x}, trap_frame_vaddr: {:#x}", 5411496ba7bSLoGin x86::current::registers::rsp() as usize, 5421496ba7bSLoGin trap_frame_vaddr.data() 5431496ba7bSLoGin ); 5441496ba7bSLoGin 5451496ba7bSLoGin let mut arch_guard = current_pcb.arch_info_irqsave(); 5461496ba7bSLoGin arch_guard.rsp = trap_frame_vaddr.data(); 5471496ba7bSLoGin 54816033951SGnoCiYeH arch_guard.fs = USER_DS; 54916033951SGnoCiYeH arch_guard.gs = USER_DS; 55016033951SGnoCiYeH 55116033951SGnoCiYeH // 将内核gs数据压进cpu 55216033951SGnoCiYeH arch_guard.store_kernel_gsbase(); 5531496ba7bSLoGin 5541496ba7bSLoGin switch_fs_and_gs( 55516033951SGnoCiYeH SegmentSelector::from_bits_truncate(arch_guard.fs.bits()), 55616033951SGnoCiYeH SegmentSelector::from_bits_truncate(arch_guard.gs.bits()), 5571496ba7bSLoGin ); 5581496ba7bSLoGin arch_guard.rip = new_rip.data(); 5591496ba7bSLoGin 5601496ba7bSLoGin drop(arch_guard); 5611496ba7bSLoGin 5621496ba7bSLoGin // 删除kthread的标志 5631496ba7bSLoGin current_pcb.flags().remove(ProcessFlags::KTHREAD); 5641496ba7bSLoGin current_pcb.worker_private().take(); 5651496ba7bSLoGin 566*f0c87a89SGnoCiYeH *current_pcb.sched_info().sched_policy.write_irqsave() = crate::sched::SchedPolicy::CFS; 567*f0c87a89SGnoCiYeH 5681496ba7bSLoGin let mut trap_frame = TrapFrame::new(); 5691496ba7bSLoGin 5701496ba7bSLoGin compiler_fence(Ordering::SeqCst); 5711496ba7bSLoGin Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| { 5721496ba7bSLoGin panic!( 5731496ba7bSLoGin "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}", 5741496ba7bSLoGin pid = current_pcb.pid(), 5751496ba7bSLoGin e = e 5761496ba7bSLoGin ); 5771496ba7bSLoGin }); 5781496ba7bSLoGin compiler_fence(Ordering::SeqCst); 5791496ba7bSLoGin 5801496ba7bSLoGin // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题! 5811496ba7bSLoGin 5821496ba7bSLoGin drop(current_pcb); 5831496ba7bSLoGin 5841496ba7bSLoGin compiler_fence(Ordering::SeqCst); 5851496ba7bSLoGin ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data()); 5861496ba7bSLoGin } 5871496ba7bSLoGin 5881496ba7bSLoGin /// 由于需要依赖ret来切换到用户态,所以不能inline 5891496ba7bSLoGin #[inline(never)] 5901496ba7bSLoGin unsafe extern "sysv64" fn ready_to_switch_to_user( 5911496ba7bSLoGin trap_frame: TrapFrame, 5921496ba7bSLoGin trapframe_vaddr: usize, 5931496ba7bSLoGin new_rip: usize, 5941496ba7bSLoGin ) -> ! { 5951496ba7bSLoGin *(trapframe_vaddr as *mut TrapFrame) = trap_frame; 596*f0c87a89SGnoCiYeH compiler_fence(Ordering::SeqCst); 5971496ba7bSLoGin asm!( 59816033951SGnoCiYeH "swapgs", 5991496ba7bSLoGin "mov rsp, {trapframe_vaddr}", 6001496ba7bSLoGin "push {new_rip}", 6011496ba7bSLoGin "ret", 6021496ba7bSLoGin trapframe_vaddr = in(reg) trapframe_vaddr, 6031496ba7bSLoGin new_rip = in(reg) new_rip 6041496ba7bSLoGin ); 6051496ba7bSLoGin unreachable!() 6061496ba7bSLoGin } 607*f0c87a89SGnoCiYeH 608*f0c87a89SGnoCiYeH // bitflags! { 609*f0c87a89SGnoCiYeH // pub struct ProcessThreadFlags: u32 { 610*f0c87a89SGnoCiYeH // /* 611*f0c87a89SGnoCiYeH // * thread information flags 612*f0c87a89SGnoCiYeH // * - these are process state flags that various assembly files 613*f0c87a89SGnoCiYeH // * may need to access 614*f0c87a89SGnoCiYeH // */ 615*f0c87a89SGnoCiYeH // const TIF_NOTIFY_RESUME = 1 << 1; /* callback before returning to user */ 616*f0c87a89SGnoCiYeH // const TIF_SIGPENDING = 1 << 2; /* signal pending */ 617*f0c87a89SGnoCiYeH // const TIF_NEED_RESCHED = 1 << 3; /* rescheduling necessary */ 618*f0c87a89SGnoCiYeH // const TIF_SINGLESTEP = 1 << 4; /* reenable singlestep on user return*/ 619*f0c87a89SGnoCiYeH // const TIF_SSBD = 1 << 5; /* Speculative store bypass disable */ 620*f0c87a89SGnoCiYeH // const TIF_SPEC_IB = 1 << 9; /* Indirect branch speculation mitigation */ 621*f0c87a89SGnoCiYeH // const TIF_SPEC_L1D_FLUSH = 1 << 10; /* Flush L1D on mm switches (processes) */ 622*f0c87a89SGnoCiYeH // const TIF_USER_RETURN_NOTIFY = 1 << 11; /* notify kernel of userspace return */ 623*f0c87a89SGnoCiYeH // const TIF_UPROBE = 1 << 12; /* breakpointed or singlestepping */ 624*f0c87a89SGnoCiYeH // const TIF_PATCH_PENDING = 1 << 13; /* pending live patching update */ 625*f0c87a89SGnoCiYeH // const TIF_NEED_FPU_LOAD = 1 << 14; /* load FPU on return to userspace */ 626*f0c87a89SGnoCiYeH // const TIF_NOCPUID = 1 << 15; /* CPUID is not accessible in userland */ 627*f0c87a89SGnoCiYeH // const TIF_NOTSC = 1 << 16; /* TSC is not accessible in userland */ 628*f0c87a89SGnoCiYeH // const TIF_NOTIFY_SIGNAL = 1 << 17; /* signal notifications exist */ 629*f0c87a89SGnoCiYeH // const TIF_MEMDIE = 1 << 20; /* is terminating due to OOM killer */ 630*f0c87a89SGnoCiYeH // const TIF_POLLING_NRFLAG = 1 << 21; /* idle is polling for TIF_NEED_RESCHED */ 631*f0c87a89SGnoCiYeH // const TIF_IO_BITMAP = 1 << 22; /* uses I/O bitmap */ 632*f0c87a89SGnoCiYeH // const TIF_SPEC_FORCE_UPDATE = 1 << 23; /* Force speculation MSR update in context switch */ 633*f0c87a89SGnoCiYeH // const TIF_FORCED_TF = 1 << 24; /* true if TF in eflags artificially */ 634*f0c87a89SGnoCiYeH // const TIF_BLOCKSTEP = 1 << 25; /* set when we want DEBUGCTLMSR_BTF */ 635*f0c87a89SGnoCiYeH // const TIF_LAZY_MMU_UPDATES = 1 << 27; /* task is updating the mmu lazily */ 636*f0c87a89SGnoCiYeH // const TIF_ADDR32 = 1 << 29; /* 32-bit address space on 64 bits */ 637*f0c87a89SGnoCiYeH // } 638*f0c87a89SGnoCiYeH // } 639