1 use core::{
2 arch::asm,
3 intrinsics::unlikely,
4 mem::ManuallyDrop,
5 sync::atomic::{compiler_fence, Ordering},
6 };
7
8 use alloc::{
9 string::String,
10 sync::{Arc, Weak},
11 vec::Vec,
12 };
13
14 use kdepends::memoffset::offset_of;
15 use system_error::SystemError;
16 use x86::{controlregs::Cr4, segmentation::SegmentSelector};
17
18 use crate::{
19 arch::process::table::TSSManager,
20 exception::InterruptArch,
21 kerror, kwarn,
22 libs::spinlock::SpinLockGuard,
23 mm::{
24 percpu::{PerCpu, PerCpuVar},
25 VirtAddr,
26 },
27 process::{
28 fork::{CloneFlags, KernelCloneArgs},
29 KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult,
30 SWITCH_RESULT,
31 },
32 syscall::Syscall,
33 };
34
35 use self::{
36 kthread::kernel_thread_bootstrap_stage1,
37 syscall::ARCH_SET_FS,
38 table::{switch_fs_and_gs, KERNEL_DS, USER_DS},
39 };
40
41 use super::{fpu::FpState, interrupt::TrapFrame, syscall::X86_64GSData, CurrentIrqArch};
42
43 pub mod idle;
44 pub mod kthread;
45 pub mod syscall;
46 pub mod table;
47
48 extern "C" {
49 /// 从中断返回
ret_from_intr()50 fn ret_from_intr();
51 }
52
53 #[allow(dead_code)]
54 #[repr(align(32768))]
55 union InitProcUnion {
56 /// 用于存放idle进程的内核栈
57 idle_stack: [u8; 32768],
58 }
59
60 #[link_section = ".data.init_proc_union"]
61 #[no_mangle]
62 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion {
63 idle_stack: [0; 32768],
64 };
65
66 /// PCB中与架构相关的信息
67 #[derive(Debug)]
68 #[allow(dead_code)]
69 pub struct ArchPCBInfo {
70 rflags: usize,
71 rbx: usize,
72 r12: usize,
73 r13: usize,
74 r14: usize,
75 r15: usize,
76 rbp: usize,
77 rsp: usize,
78 rip: usize,
79 cr2: usize,
80 fsbase: usize,
81 gsbase: usize,
82 fs: SegmentSelector,
83 gs: SegmentSelector,
84 /// 存储PCB系统调用栈以及在syscall过程中暂存用户态rsp的结构体
85 gsdata: X86_64GSData,
86 /// 浮点寄存器的状态
87 fp_state: Option<FpState>,
88 }
89
90 #[allow(dead_code)]
91 impl ArchPCBInfo {
92 /// 创建一个新的ArchPCBInfo
93 ///
94 /// ## 参数
95 ///
96 /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。
97 ///
98 /// ## 返回值
99 ///
100 /// 返回一个新的ArchPCBInfo
101 #[inline(never)]
new(kstack: &KernelStack) -> Self102 pub fn new(kstack: &KernelStack) -> Self {
103 let mut r = Self {
104 rflags: 0,
105 rbx: 0,
106 r12: 0,
107 r13: 0,
108 r14: 0,
109 r15: 0,
110 rbp: 0,
111 rsp: 0,
112 rip: 0,
113 cr2: 0,
114 fsbase: 0,
115 gsbase: 0,
116 gsdata: X86_64GSData {
117 kaddr: VirtAddr::new(0),
118 uaddr: VirtAddr::new(0),
119 },
120 fs: KERNEL_DS,
121 gs: KERNEL_DS,
122 fp_state: None,
123 };
124
125 r.rsp = kstack.stack_max_address().data() - 8;
126 r.rbp = kstack.stack_max_address().data();
127
128 return r;
129 }
130
set_stack(&mut self, stack: VirtAddr)131 pub fn set_stack(&mut self, stack: VirtAddr) {
132 self.rsp = stack.data();
133 }
134
set_stack_base(&mut self, stack_base: VirtAddr)135 pub fn set_stack_base(&mut self, stack_base: VirtAddr) {
136 self.rbp = stack_base.data();
137 }
138
rbp(&self) -> usize139 pub fn rbp(&self) -> usize {
140 self.rbp
141 }
142
push_to_stack(&mut self, value: usize)143 pub unsafe fn push_to_stack(&mut self, value: usize) {
144 self.rsp -= core::mem::size_of::<usize>();
145 *(self.rsp as *mut usize) = value;
146 }
147
pop_from_stack(&mut self) -> usize148 pub unsafe fn pop_from_stack(&mut self) -> usize {
149 let value = *(self.rsp as *const usize);
150 self.rsp += core::mem::size_of::<usize>();
151 value
152 }
153
save_fp_state(&mut self)154 pub fn save_fp_state(&mut self) {
155 if self.fp_state.is_none() {
156 self.fp_state = Some(FpState::new());
157 }
158
159 self.fp_state.as_mut().unwrap().save();
160 }
161
restore_fp_state(&mut self)162 pub fn restore_fp_state(&mut self) {
163 if unlikely(self.fp_state.is_none()) {
164 return;
165 }
166
167 self.fp_state.as_mut().unwrap().restore();
168 }
169
170 /// 返回浮点寄存器结构体的副本
fp_state(&self) -> &Option<FpState>171 pub fn fp_state(&self) -> &Option<FpState> {
172 &self.fp_state
173 }
174
175 // 清空浮点寄存器
clear_fp_state(&mut self)176 pub fn clear_fp_state(&mut self) {
177 if unlikely(self.fp_state.is_none()) {
178 kwarn!("fp_state is none");
179 return;
180 }
181
182 self.fp_state.as_mut().unwrap().clear();
183 }
save_fsbase(&mut self)184 pub unsafe fn save_fsbase(&mut self) {
185 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
186 self.fsbase = x86::current::segmentation::rdfsbase() as usize;
187 } else {
188 self.fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE) as usize;
189 }
190 }
191
save_gsbase(&mut self)192 pub unsafe fn save_gsbase(&mut self) {
193 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
194 self.gsbase = x86::current::segmentation::rdgsbase() as usize;
195 } else {
196 self.gsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE) as usize;
197 }
198 }
199
restore_fsbase(&mut self)200 pub unsafe fn restore_fsbase(&mut self) {
201 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
202 x86::current::segmentation::wrfsbase(self.fsbase as u64);
203 } else {
204 x86::msr::wrmsr(x86::msr::IA32_FS_BASE, self.fsbase as u64);
205 }
206 }
207
restore_gsbase(&mut self)208 pub unsafe fn restore_gsbase(&mut self) {
209 if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
210 x86::current::segmentation::wrgsbase(self.gsbase as u64);
211 } else {
212 x86::msr::wrmsr(x86::msr::IA32_GS_BASE, self.gsbase as u64);
213 }
214 }
215
216 /// 将gsdata写入KernelGsbase寄存器
store_kernel_gsbase(&self)217 pub unsafe fn store_kernel_gsbase(&self) {
218 x86::msr::wrmsr(
219 x86::msr::IA32_KERNEL_GSBASE,
220 &self.gsdata as *const X86_64GSData as u64,
221 );
222 }
223
224 /// ### 初始化系统调用栈,不得与PCB内核栈冲突(即传入的应该是一个新的栈,避免栈损坏)
init_syscall_stack(&mut self, stack: &KernelStack)225 pub fn init_syscall_stack(&mut self, stack: &KernelStack) {
226 self.gsdata.set_kstack(stack.stack_max_address() - 8);
227 }
228
fsbase(&self) -> usize229 pub fn fsbase(&self) -> usize {
230 self.fsbase
231 }
232
gsbase(&self) -> usize233 pub fn gsbase(&self) -> usize {
234 self.gsbase
235 }
236
cr2_mut(&mut self) -> &mut usize237 pub fn cr2_mut(&mut self) -> &mut usize {
238 &mut self.cr2
239 }
240
fp_state_mut(&mut self) -> &mut Option<FpState>241 pub fn fp_state_mut(&mut self) -> &mut Option<FpState> {
242 &mut self.fp_state
243 }
244
245 /// ### 克隆ArchPCBInfo,需要注意gsdata也是对应clone的
clone_all(&self) -> Self246 pub fn clone_all(&self) -> Self {
247 Self {
248 rflags: self.rflags,
249 rbx: self.rbx,
250 r12: self.r12,
251 r13: self.r13,
252 r14: self.r14,
253 r15: self.r15,
254 rbp: self.rbp,
255 rsp: self.rsp,
256 rip: self.rip,
257 cr2: self.cr2,
258 fsbase: self.fsbase,
259 gsbase: self.gsbase,
260 fs: self.fs.clone(),
261 gs: self.gs.clone(),
262 gsdata: self.gsdata.clone(),
263 fp_state: self.fp_state,
264 }
265 }
266
267 // ### 从另一个ArchPCBInfo处clone,gsdata会被保留
clone_from(&mut self, from: &Self)268 pub fn clone_from(&mut self, from: &Self) {
269 let gsdata = self.gsdata.clone();
270 *self = from.clone_all();
271 self.gsdata = gsdata;
272 }
273 }
274
275 impl ProcessControlBlock {
276 /// 获取当前进程的pcb
arch_current_pcb() -> Arc<Self>277 pub fn arch_current_pcb() -> Arc<Self> {
278 // 获取栈指针
279 let ptr = VirtAddr::new(x86::current::registers::rsp() as usize);
280
281 let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1)));
282
283 // 从内核栈的最低地址处取出pcb的地址
284 let p = stack_base.data() as *const *const ProcessControlBlock;
285 if unlikely((unsafe { *p }).is_null()) {
286 kerror!("p={:p}", p);
287 panic!("current_pcb is null");
288 }
289 unsafe {
290 // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下
291 let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> =
292 ManuallyDrop::new(Weak::from_raw(*p));
293
294 let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap();
295 return new_arc;
296 }
297 }
298 }
299
300 impl ProcessManager {
arch_init()301 pub fn arch_init() {
302 {
303 // 初始化进程切换结果 per cpu变量
304 let mut switch_res_vec: Vec<SwitchResult> = Vec::new();
305 for _ in 0..PerCpu::MAX_CPU_NUM {
306 switch_res_vec.push(SwitchResult::new());
307 }
308 unsafe {
309 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap());
310 }
311 }
312 }
313 /// fork的过程中复制线程
314 ///
315 /// 由于这个过程与具体的架构相关,所以放在这里
copy_thread( current_pcb: &Arc<ProcessControlBlock>, new_pcb: &Arc<ProcessControlBlock>, clone_args: KernelCloneArgs, current_trapframe: &TrapFrame, ) -> Result<(), SystemError>316 pub fn copy_thread(
317 current_pcb: &Arc<ProcessControlBlock>,
318 new_pcb: &Arc<ProcessControlBlock>,
319 clone_args: KernelCloneArgs,
320 current_trapframe: &TrapFrame,
321 ) -> Result<(), SystemError> {
322 let clone_flags = clone_args.flags;
323 let mut child_trapframe = current_trapframe.clone();
324
325 // 子进程的返回值为0
326 child_trapframe.set_return_value(0);
327
328 // 设置子进程的栈基址(开始执行中断返回流程时的栈基址)
329 let mut new_arch_guard = unsafe { new_pcb.arch_info() };
330 let kernel_stack_guard = new_pcb.kernel_stack();
331
332 // 设置子进程在内核态开始执行时的rsp、rbp
333 new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address());
334
335 let trap_frame_vaddr: VirtAddr =
336 kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>();
337 new_arch_guard.set_stack(trap_frame_vaddr);
338
339 // 拷贝栈帧
340 unsafe {
341 let usp = clone_args.stack;
342 if usp != 0 {
343 child_trapframe.rsp = usp as u64;
344 }
345 let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame;
346 *trap_frame_ptr = child_trapframe;
347 }
348
349 let current_arch_guard = current_pcb.arch_info_irqsave();
350 new_arch_guard.fsbase = current_arch_guard.fsbase;
351 new_arch_guard.gsbase = current_arch_guard.gsbase;
352 new_arch_guard.fs = current_arch_guard.fs;
353 new_arch_guard.gs = current_arch_guard.gs;
354 new_arch_guard.fp_state = current_arch_guard.fp_state.clone();
355
356 // 拷贝浮点寄存器的状态
357 if let Some(fp_state) = current_arch_guard.fp_state.as_ref() {
358 new_arch_guard.fp_state = Some(*fp_state);
359 }
360 drop(current_arch_guard);
361
362 // 设置返回地址(子进程开始执行的指令地址)
363 if new_pcb.flags().contains(ProcessFlags::KTHREAD) {
364 let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize;
365 new_arch_guard.rip = kthread_bootstrap_stage1_func_addr;
366 } else {
367 new_arch_guard.rip = ret_from_intr as usize;
368 }
369
370 // 设置tls
371 if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
372 drop(new_arch_guard);
373 Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?;
374 }
375
376 return Ok(());
377 }
378
379 /// 切换进程
380 ///
381 /// ## 参数
382 ///
383 /// - `prev`:上一个进程的pcb
384 /// - `next`:下一个进程的pcb
switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>)385 pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) {
386 assert!(CurrentIrqArch::is_irq_enabled() == false);
387
388 // 保存浮点寄存器
389 prev.arch_info_irqsave().save_fp_state();
390 // 切换浮点寄存器
391 next.arch_info_irqsave().restore_fp_state();
392
393 // 切换fsbase
394 prev.arch_info_irqsave().save_fsbase();
395 next.arch_info_irqsave().restore_fsbase();
396
397 // 切换gsbase
398 Self::switch_gsbase(&prev, &next);
399
400 // 切换地址空间
401 let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone();
402 compiler_fence(Ordering::SeqCst);
403
404 next_addr_space.read().user_mapper.utable.make_current();
405 drop(next_addr_space);
406 compiler_fence(Ordering::SeqCst);
407 // 切换内核栈
408
409 // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁)
410 let next_arch = SpinLockGuard::leak(next.arch_info_irqsave()) as *mut ArchPCBInfo;
411 let prev_arch = SpinLockGuard::leak(prev.arch_info_irqsave()) as *mut ArchPCBInfo;
412
413 (*prev_arch).rip = switch_back as usize;
414
415 // 恢复当前的 preempt count*2
416 ProcessManager::current_pcb().preempt_enable();
417 ProcessManager::current_pcb().preempt_enable();
418
419 // 切换tss
420 TSSManager::current_tss().set_rsp(
421 x86::Ring::Ring0,
422 next.kernel_stack().stack_max_address().data() as u64,
423 );
424 SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev);
425 SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next);
426 // kdebug!("switch tss ok");
427 compiler_fence(Ordering::SeqCst);
428 // 正式切换上下文
429 switch_to_inner(prev_arch, next_arch);
430 }
431
switch_gsbase(prev: &Arc<ProcessControlBlock>, next: &Arc<ProcessControlBlock>)432 unsafe fn switch_gsbase(prev: &Arc<ProcessControlBlock>, next: &Arc<ProcessControlBlock>) {
433 asm!("swapgs", options(nostack, preserves_flags));
434 prev.arch_info_irqsave().save_gsbase();
435 next.arch_info_irqsave().restore_gsbase();
436 // 将下一个进程的kstack写入kernel_gsbase
437 next.arch_info_irqsave().store_kernel_gsbase();
438 asm!("swapgs", options(nostack, preserves_flags));
439 }
440 }
441
442 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数
443 #[naked]
switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo)444 unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) {
445 asm!(
446 // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"):
447 //
448 // - the current parameters are passed in the registers `rdi`, `rsi`,
449 // - we can modify scratch registers, e.g. rax
450 // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we
451 // store them here in the first place.
452 concat!("
453 // Save old registers, and load new ones
454 mov [rdi + {off_rbx}], rbx
455 mov rbx, [rsi + {off_rbx}]
456
457 mov [rdi + {off_r12}], r12
458 mov r12, [rsi + {off_r12}]
459
460 mov [rdi + {off_r13}], r13
461 mov r13, [rsi + {off_r13}]
462
463 mov [rdi + {off_r14}], r14
464 mov r14, [rsi + {off_r14}]
465
466 mov [rdi + {off_r15}], r15
467 mov r15, [rsi + {off_r15}]
468
469 // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换)
470 mov [rdi + {off_fs}], fs
471 mov [rdi + {off_gs}], gs
472
473 // mov fs, [rsi + {off_fs}]
474 // mov gs, [rsi + {off_gs}]
475
476 push rbp
477 push rax
478
479 mov [rdi + {off_rbp}], rbp
480 mov rbp, [rsi + {off_rbp}]
481
482 mov [rdi + {off_rsp}], rsp
483 mov rsp, [rsi + {off_rsp}]
484
485 // // push RFLAGS (can only be modified via stack)
486 pushfq
487 // // pop RFLAGS into `self.rflags`
488 pop QWORD PTR [rdi + {off_rflags}]
489
490 // // push `next.rflags`
491 push QWORD PTR [rsi + {off_rflags}]
492 // // pop into RFLAGS
493 popfq
494
495 // push next rip to stack
496 push QWORD PTR [rsi + {off_rip}]
497
498
499 // When we return, we cannot even guarantee that the return address on the stack, points to
500 // the calling function. Thus, we have to execute this Rust hook by
501 // ourselves, which will unlock the contexts before the later switch.
502
503 // Note that switch_finish_hook will be responsible for executing `ret`.
504 jmp {switch_hook}
505 "),
506
507 off_rflags = const(offset_of!(ArchPCBInfo, rflags)),
508
509 off_rbx = const(offset_of!(ArchPCBInfo, rbx)),
510 off_r12 = const(offset_of!(ArchPCBInfo, r12)),
511 off_r13 = const(offset_of!(ArchPCBInfo, r13)),
512 off_r14 = const(offset_of!(ArchPCBInfo, r14)),
513 off_rbp = const(offset_of!(ArchPCBInfo, rbp)),
514 off_rsp = const(offset_of!(ArchPCBInfo, rsp)),
515 off_r15 = const(offset_of!(ArchPCBInfo, r15)),
516 off_rip = const(offset_of!(ArchPCBInfo, rip)),
517 off_fs = const(offset_of!(ArchPCBInfo, fs)),
518 off_gs = const(offset_of!(ArchPCBInfo, gs)),
519
520 switch_hook = sym crate::process::switch_finish_hook,
521 options(noreturn),
522 );
523 }
524
525 /// 从`switch_to_inner`返回后,执行这个函数
526 ///
527 /// 也就是说,当进程再次被调度时,会从这里开始执行
528 #[inline(never)]
switch_back()529 unsafe extern "sysv64" fn switch_back() {
530 asm!(concat!(
531 "
532 pop rax
533 pop rbp
534 "
535 ))
536 }
537
arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> !538 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! {
539 // 以下代码不能发生中断
540 CurrentIrqArch::interrupt_disable();
541
542 let current_pcb = ProcessManager::current_pcb();
543 let trap_frame_vaddr = VirtAddr::new(
544 current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(),
545 );
546 // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr);
547 let new_rip = VirtAddr::new(ret_from_intr as usize);
548
549 assert!(
550 (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(),
551 "arch_switch_to_user(): current_rsp >= fake trap
552 frame vaddr, this may cause some illegal access to memory!
553 rsp: {:#x}, trap_frame_vaddr: {:#x}",
554 x86::current::registers::rsp() as usize,
555 trap_frame_vaddr.data()
556 );
557
558 let mut arch_guard = current_pcb.arch_info_irqsave();
559 arch_guard.rsp = trap_frame_vaddr.data();
560
561 arch_guard.fs = USER_DS;
562 arch_guard.gs = USER_DS;
563
564 // 将内核gs数据压进cpu
565 arch_guard.store_kernel_gsbase();
566
567 switch_fs_and_gs(
568 SegmentSelector::from_bits_truncate(arch_guard.fs.bits()),
569 SegmentSelector::from_bits_truncate(arch_guard.gs.bits()),
570 );
571 arch_guard.rip = new_rip.data();
572
573 drop(arch_guard);
574
575 // 删除kthread的标志
576 current_pcb.flags().remove(ProcessFlags::KTHREAD);
577 current_pcb.worker_private().take();
578
579 let mut trap_frame = TrapFrame::new();
580
581 compiler_fence(Ordering::SeqCst);
582 Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| {
583 panic!(
584 "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}",
585 pid = current_pcb.pid(),
586 e = e
587 );
588 });
589 compiler_fence(Ordering::SeqCst);
590
591 // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题!
592
593 drop(current_pcb);
594
595 compiler_fence(Ordering::SeqCst);
596 ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data());
597 }
598
599 /// 由于需要依赖ret来切换到用户态,所以不能inline
600 #[inline(never)]
ready_to_switch_to_user( trap_frame: TrapFrame, trapframe_vaddr: usize, new_rip: usize, ) -> !601 unsafe extern "sysv64" fn ready_to_switch_to_user(
602 trap_frame: TrapFrame,
603 trapframe_vaddr: usize,
604 new_rip: usize,
605 ) -> ! {
606 *(trapframe_vaddr as *mut TrapFrame) = trap_frame;
607 asm!(
608 "swapgs",
609 "mov rsp, {trapframe_vaddr}",
610 "push {new_rip}",
611 "ret",
612 trapframe_vaddr = in(reg) trapframe_vaddr,
613 new_rip = in(reg) new_rip
614 );
615 unreachable!()
616 }
617