1 use alloc::vec::Vec; 2 use core::{intrinsics::unlikely, sync::atomic::Ordering}; 3 4 use alloc::{string::ToString, sync::Arc}; 5 use log::error; 6 use system_error::SystemError; 7 8 use crate::{ 9 arch::{interrupt::TrapFrame, ipc::signal::Signal}, 10 filesystem::procfs::procfs_register_pid, 11 ipc::signal::flush_signal_handlers, 12 libs::rwlock::RwLock, 13 mm::VirtAddr, 14 namespaces::{create_new_namespaces, namespace::USER_NS, pid_namespace::PidStrcut}, 15 process::ProcessFlags, 16 sched::{sched_cgroup_fork, sched_fork}, 17 smp::core::smp_get_processor_id, 18 syscall::user_access::UserBufferWriter, 19 }; 20 21 use super::{ 22 kthread::{KernelThreadPcbPrivate, WorkerPrivate}, 23 KernelStack, Pid, ProcessControlBlock, ProcessManager, 24 }; 25 const MAX_PID_NS_LEVEL: usize = 32; 26 27 bitflags! { 28 /// 进程克隆标志 29 pub struct CloneFlags: u64 { 30 /// 在进程间共享虚拟内存空间 31 const CLONE_VM = 0x00000100; 32 /// 在进程间共享文件系统信息 33 const CLONE_FS = 0x00000200; 34 /// 共享打开的文件 35 const CLONE_FILES = 0x00000400; 36 /// 克隆时,与父进程共享信号处理结构体 37 const CLONE_SIGHAND = 0x00000800; 38 /// 返回进程的文件描述符 39 const CLONE_PIDFD = 0x00001000; 40 /// 使克隆对象成为父进程的跟踪对象 41 const CLONE_PTRACE = 0x00002000; 42 /// 在执行 exec() 或 _exit() 之前挂起父进程的执行 43 const CLONE_VFORK = 0x00004000; 44 /// 使克隆对象的父进程为调用进程的父进程 45 const CLONE_PARENT = 0x00008000; 46 /// 拷贝线程 47 const CLONE_THREAD = 0x00010000; 48 /// 创建一个新的命名空间,其中包含独立的文件系统挂载点层次结构。 49 const CLONE_NEWNS = 0x00020000; 50 /// 与父进程共享 System V 信号量。 51 const CLONE_SYSVSEM = 0x00040000; 52 /// 设置其线程本地存储 53 const CLONE_SETTLS = 0x00080000; 54 /// 设置partent_tid地址为子进程线程 ID 55 const CLONE_PARENT_SETTID = 0x00100000; 56 /// 在子进程中设置一个清除线程 ID 的用户空间地址 57 const CLONE_CHILD_CLEARTID = 0x00200000; 58 /// 创建一个新线程,将其设置为分离状态 59 const CLONE_DETACHED = 0x00400000; 60 /// 使其在创建者进程或线程视角下成为无法跟踪的。 61 const CLONE_UNTRACED = 0x00800000; 62 /// 设置其子进程线程 ID 63 const CLONE_CHILD_SETTID = 0x01000000; 64 /// 将其放置在一个新的 cgroup 命名空间中 65 const CLONE_NEWCGROUP = 0x02000000; 66 /// 将其放置在一个新的 UTS 命名空间中 67 const CLONE_NEWUTS = 0x04000000; 68 /// 将其放置在一个新的 IPC 命名空间中 69 const CLONE_NEWIPC = 0x08000000; 70 /// 将其放置在一个新的用户命名空间中 71 const CLONE_NEWUSER = 0x10000000; 72 /// 将其放置在一个新的 PID 命名空间中 73 const CLONE_NEWPID = 0x20000000; 74 /// 将其放置在一个新的网络命名空间中 75 const CLONE_NEWNET = 0x40000000; 76 /// 在新的 I/O 上下文中运行它 77 const CLONE_IO = 0x80000000; 78 /// 克隆时,与父进程共享信号结构体 79 const CLONE_SIGNAL = 0x00010000 | 0x00000800; 80 /// 克隆时,将原本被设置为SIG_IGNORE的信号,设置回SIG_DEFAULT 81 const CLONE_CLEAR_SIGHAND = 0x100000000; 82 } 83 } 84 85 /// ## clone与clone3系统调用的参数载体 86 /// 87 /// 因为这两个系统调用的参数很多,所以有这样一个载体更灵活 88 /// 89 /// 仅仅作为参数传递 90 #[allow(dead_code)] 91 #[derive(Debug, Clone)] 92 pub struct KernelCloneArgs { 93 pub flags: CloneFlags, 94 95 // 下列属性均来自用户空间 96 pub pidfd: VirtAddr, 97 pub child_tid: VirtAddr, 98 pub parent_tid: VirtAddr, 99 pub set_tid: Vec<usize>, 100 101 /// 进程退出时发送的信号 102 pub exit_signal: Signal, 103 104 pub stack: usize, 105 // clone3用到 106 pub stack_size: usize, 107 pub tls: usize, 108 109 pub set_tid_size: usize, 110 pub cgroup: i32, 111 112 pub io_thread: bool, 113 pub kthread: bool, 114 pub idle: bool, 115 pub func: VirtAddr, 116 pub fn_arg: VirtAddr, 117 // cgrp 和 cset? 118 } 119 120 impl KernelCloneArgs { new() -> Self121 pub fn new() -> Self { 122 let null_addr = VirtAddr::new(0); 123 Self { 124 flags: unsafe { CloneFlags::from_bits_unchecked(0) }, 125 pidfd: null_addr, 126 child_tid: null_addr, 127 parent_tid: null_addr, 128 set_tid: Vec::with_capacity(MAX_PID_NS_LEVEL), 129 exit_signal: Signal::SIGCHLD, 130 stack: 0, 131 stack_size: 0, 132 tls: 0, 133 set_tid_size: 0, 134 cgroup: 0, 135 io_thread: false, 136 kthread: false, 137 idle: false, 138 func: null_addr, 139 fn_arg: null_addr, 140 } 141 } 142 } 143 144 impl ProcessManager { 145 /// 创建一个新进程 146 /// 147 /// ## 参数 148 /// 149 /// - `current_trapframe`: 当前进程的trapframe 150 /// - `clone_flags`: 进程克隆标志 151 /// 152 /// ## 返回值 153 /// 154 /// - 成功:返回新进程的pid 155 /// - 失败:返回Err(SystemError),fork失败的话,子线程不会执行。 156 /// 157 /// ## Safety 158 /// 159 /// - fork失败的话,子线程不会执行。 fork( current_trapframe: &TrapFrame, clone_flags: CloneFlags, ) -> Result<Pid, SystemError>160 pub fn fork( 161 current_trapframe: &TrapFrame, 162 clone_flags: CloneFlags, 163 ) -> Result<Pid, SystemError> { 164 let current_pcb = ProcessManager::current_pcb(); 165 166 let new_kstack: KernelStack = KernelStack::new()?; 167 168 let name = current_pcb.basic().name().to_string(); 169 170 let pcb = ProcessControlBlock::new(name, new_kstack); 171 172 // TODO: 注意!这里设置tty的操作不符合Linux的行为!(毕竟创建进程不一定要fork,也可以用clone来创建) 173 // 正确做法应该是在实现进程组之后去管理前台进程组。 174 pcb.sig_info_mut() 175 .set_tty(current_pcb.sig_info_irqsave().tty()); 176 177 let mut args = KernelCloneArgs::new(); 178 args.flags = clone_flags; 179 args.exit_signal = Signal::SIGCHLD; 180 Self::copy_process(¤t_pcb, &pcb, args, current_trapframe).map_err(|e| { 181 error!( 182 "fork: Failed to copy process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 183 current_pcb.pid(), 184 pcb.pid(), 185 e 186 ); 187 e 188 })?; 189 ProcessManager::add_pcb(pcb.clone()); 190 191 // 向procfs注册进程 192 procfs_register_pid(pcb.pid()).unwrap_or_else(|e| { 193 panic!( 194 "fork: Failed to register pid to procfs, pid: [{:?}]. Error: {:?}", 195 pcb.pid(), 196 e 197 ) 198 }); 199 200 pcb.sched_info().set_on_cpu(Some(smp_get_processor_id())); 201 202 ProcessManager::wakeup(&pcb).unwrap_or_else(|e| { 203 panic!( 204 "fork: Failed to wakeup new process, pid: [{:?}]. Error: {:?}", 205 pcb.pid(), 206 e 207 ) 208 }); 209 210 return Ok(pcb.pid()); 211 } 212 copy_flags( clone_flags: &CloneFlags, new_pcb: &Arc<ProcessControlBlock>, ) -> Result<(), SystemError>213 fn copy_flags( 214 clone_flags: &CloneFlags, 215 new_pcb: &Arc<ProcessControlBlock>, 216 ) -> Result<(), SystemError> { 217 if clone_flags.contains(CloneFlags::CLONE_VM) { 218 new_pcb.flags().insert(ProcessFlags::VFORK); 219 } 220 *new_pcb.flags.get_mut() = *ProcessManager::current_pcb().flags(); 221 return Ok(()); 222 } 223 224 /// 拷贝进程的地址空间 225 /// 226 /// ## 参数 227 /// 228 /// - `clone_vm`: 是否与父进程共享地址空间。true表示共享 229 /// - `new_pcb`: 新进程的pcb 230 /// 231 /// ## 返回值 232 /// 233 /// - 成功:返回Ok(()) 234 /// - 失败:返回Err(SystemError) 235 /// 236 /// ## Panic 237 /// 238 /// - 如果当前进程没有用户地址空间,则panic 239 #[inline(never)] copy_mm( clone_flags: &CloneFlags, current_pcb: &Arc<ProcessControlBlock>, new_pcb: &Arc<ProcessControlBlock>, ) -> Result<(), SystemError>240 fn copy_mm( 241 clone_flags: &CloneFlags, 242 current_pcb: &Arc<ProcessControlBlock>, 243 new_pcb: &Arc<ProcessControlBlock>, 244 ) -> Result<(), SystemError> { 245 let old_address_space = current_pcb.basic().user_vm().unwrap_or_else(|| { 246 panic!( 247 "copy_mm: Failed to get address space of current process, current pid: [{:?}]", 248 current_pcb.pid() 249 ) 250 }); 251 252 if clone_flags.contains(CloneFlags::CLONE_VM) { 253 unsafe { new_pcb.basic_mut().set_user_vm(Some(old_address_space)) }; 254 return Ok(()); 255 } 256 let new_address_space = old_address_space.write_irqsave().try_clone().unwrap_or_else(|e| { 257 panic!( 258 "copy_mm: Failed to clone address space of current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 259 current_pcb.pid(), new_pcb.pid(), e 260 ) 261 }); 262 unsafe { new_pcb.basic_mut().set_user_vm(Some(new_address_space)) }; 263 return Ok(()); 264 } 265 266 #[inline(never)] copy_namespaces( clone_flags: &CloneFlags, current_pcb: &Arc<ProcessControlBlock>, new_pcb: &Arc<ProcessControlBlock>, ) -> Result<(), SystemError>267 fn copy_namespaces( 268 clone_flags: &CloneFlags, 269 current_pcb: &Arc<ProcessControlBlock>, 270 new_pcb: &Arc<ProcessControlBlock>, 271 ) -> Result<(), SystemError> { 272 if !clone_flags.contains(CloneFlags::CLONE_NEWNS) 273 && !clone_flags.contains(CloneFlags::CLONE_NEWUTS) 274 && !clone_flags.contains(CloneFlags::CLONE_NEWIPC) 275 && !clone_flags.contains(CloneFlags::CLONE_NEWPID) 276 && !clone_flags.contains(CloneFlags::CLONE_NEWNET) 277 && !clone_flags.contains(CloneFlags::CLONE_NEWCGROUP) 278 { 279 new_pcb.set_nsproxy(current_pcb.get_nsproxy().read().clone()); 280 return Ok(()); 281 } 282 283 if clone_flags.contains(CloneFlags::CLONE_NEWIPC) 284 && clone_flags.contains(CloneFlags::CLONE_SYSVSEM) 285 { 286 return Err(SystemError::EINVAL); 287 } 288 289 let new_nsproxy = create_new_namespaces(clone_flags.bits(), current_pcb, USER_NS.clone())?; 290 *new_pcb.nsproxy.write() = new_nsproxy; 291 Ok(()) 292 } 293 294 #[inline(never)] copy_files( clone_flags: &CloneFlags, current_pcb: &Arc<ProcessControlBlock>, new_pcb: &Arc<ProcessControlBlock>, ) -> Result<(), SystemError>295 fn copy_files( 296 clone_flags: &CloneFlags, 297 current_pcb: &Arc<ProcessControlBlock>, 298 new_pcb: &Arc<ProcessControlBlock>, 299 ) -> Result<(), SystemError> { 300 // 如果不共享文件描述符表,则拷贝文件描述符表 301 if !clone_flags.contains(CloneFlags::CLONE_FILES) { 302 let new_fd_table = current_pcb.basic().fd_table().unwrap().read().clone(); 303 let new_fd_table = Arc::new(RwLock::new(new_fd_table)); 304 new_pcb.basic_mut().set_fd_table(Some(new_fd_table)); 305 } else { 306 // 如果共享文件描述符表,则直接拷贝指针 307 new_pcb 308 .basic_mut() 309 .set_fd_table(current_pcb.basic().fd_table().clone()); 310 } 311 312 return Ok(()); 313 } 314 315 #[allow(dead_code)] copy_sighand( clone_flags: &CloneFlags, current_pcb: &Arc<ProcessControlBlock>, new_pcb: &Arc<ProcessControlBlock>, ) -> Result<(), SystemError>316 fn copy_sighand( 317 clone_flags: &CloneFlags, 318 current_pcb: &Arc<ProcessControlBlock>, 319 new_pcb: &Arc<ProcessControlBlock>, 320 ) -> Result<(), SystemError> { 321 // // 将信号的处理函数设置为default(除了那些被手动屏蔽的) 322 if clone_flags.contains(CloneFlags::CLONE_CLEAR_SIGHAND) { 323 flush_signal_handlers(new_pcb.clone(), false); 324 } 325 326 if clone_flags.contains(CloneFlags::CLONE_SIGHAND) { 327 new_pcb.sig_struct_irqsave().handlers = current_pcb.sig_struct_irqsave().handlers; 328 } 329 return Ok(()); 330 } 331 332 /// 拷贝进程信息 333 /// 334 /// ## panic: 335 /// 某一步拷贝失败时会引发panic 336 /// 例如:copy_mm等失败时会触发panic 337 /// 338 /// ## 参数 339 /// 340 /// - clone_flags 标志位 341 /// - current_pcb 拷贝源pcb 342 /// - pcb 目标pcb 343 /// 344 /// ## return 345 /// - 发生错误时返回Err(SystemError) 346 #[inline(never)] copy_process( current_pcb: &Arc<ProcessControlBlock>, pcb: &Arc<ProcessControlBlock>, clone_args: KernelCloneArgs, current_trapframe: &TrapFrame, ) -> Result<(), SystemError>347 pub fn copy_process( 348 current_pcb: &Arc<ProcessControlBlock>, 349 pcb: &Arc<ProcessControlBlock>, 350 clone_args: KernelCloneArgs, 351 current_trapframe: &TrapFrame, 352 ) -> Result<(), SystemError> { 353 let clone_flags = clone_args.flags; 354 // 不允许与不同namespace的进程共享根目录 355 if (clone_flags == (CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_FS)) 356 || clone_flags == (CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_FS) 357 { 358 return Err(SystemError::EINVAL); 359 } 360 361 // 线程组必须共享信号,分离线程只能在线程组内启动。 362 if clone_flags.contains(CloneFlags::CLONE_THREAD) 363 && !clone_flags.contains(CloneFlags::CLONE_SIGHAND) 364 { 365 return Err(SystemError::EINVAL); 366 } 367 368 // 共享信号处理器意味着共享vm。 369 // 线程组也意味着共享vm。阻止这种情况可以简化其他代码。 370 if clone_flags.contains(CloneFlags::CLONE_SIGHAND) 371 && !clone_flags.contains(CloneFlags::CLONE_VM) 372 { 373 return Err(SystemError::EINVAL); 374 } 375 376 // TODO: 处理CLONE_PARENT 与 SIGNAL_UNKILLABLE的情况 377 378 // 如果新进程使用不同的 pid 或 namespace, 379 // 则不允许它与分叉任务共享线程组。 380 if clone_flags.contains(CloneFlags::CLONE_THREAD) 381 && clone_flags.contains(CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_NEWPID) 382 { 383 return Err(SystemError::EINVAL); 384 // TODO: 判断新进程与当前进程namespace是否相同,不同则返回错误 385 } 386 387 // 如果新进程将处于不同的time namespace, 388 // 则不能让它共享vm或线程组。 389 if clone_flags.contains(CloneFlags::CLONE_THREAD | CloneFlags::CLONE_VM) { 390 // TODO: 判断time namespace,不同则返回错误 391 } 392 393 if clone_flags.contains(CloneFlags::CLONE_PIDFD) 394 && clone_flags.contains(CloneFlags::CLONE_DETACHED | CloneFlags::CLONE_THREAD) 395 { 396 return Err(SystemError::EINVAL); 397 } 398 399 // TODO: 克隆前应该锁信号处理,等待克隆完成后再处理 400 401 // 克隆架构相关 402 let guard = current_pcb.arch_info_irqsave(); 403 unsafe { pcb.arch_info().clone_from(&guard) }; 404 drop(guard); 405 406 // 为内核线程设置WorkerPrivate 407 if current_pcb.flags().contains(ProcessFlags::KTHREAD) { 408 *pcb.worker_private() = 409 Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new())); 410 } 411 412 // 设置clear_child_tid,在线程结束时将其置0以通知父进程 413 if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) { 414 pcb.thread.write_irqsave().clear_child_tid = Some(clone_args.child_tid); 415 } 416 417 // 设置child_tid,意味着子线程能够知道自己的id 418 if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) { 419 pcb.thread.write_irqsave().set_child_tid = Some(clone_args.child_tid); 420 } 421 422 // 将子进程/线程的id存储在用户态传进的地址中 423 if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) { 424 let mut writer = UserBufferWriter::new( 425 clone_args.parent_tid.data() as *mut i32, 426 core::mem::size_of::<i32>(), 427 true, 428 )?; 429 430 writer.copy_one_to_user(&(pcb.pid().0 as i32), 0)?; 431 } 432 433 sched_fork(pcb).unwrap_or_else(|e| { 434 panic!( 435 "fork: Failed to set sched info from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 436 current_pcb.pid(), pcb.pid(), e 437 ) 438 }); 439 440 // 拷贝标志位 441 Self::copy_flags(&clone_flags, pcb).unwrap_or_else(|e| { 442 panic!( 443 "fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 444 current_pcb.pid(), pcb.pid(), e 445 ) 446 }); 447 448 // 拷贝用户地址空间 449 Self::copy_mm(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| { 450 panic!( 451 "fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 452 current_pcb.pid(), pcb.pid(), e 453 ) 454 }); 455 456 Self::copy_namespaces(&clone_flags, current_pcb, pcb).unwrap_or_else(|e|{ 457 panic!("fork: Failed to copy namespace form current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 458 current_pcb.pid(), pcb.pid(), e) 459 }); 460 461 // 拷贝文件描述符表 462 Self::copy_files(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| { 463 panic!( 464 "fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 465 current_pcb.pid(), pcb.pid(), e 466 ) 467 }); 468 469 // 拷贝信号相关数据 470 Self::copy_sighand(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| { 471 panic!( 472 "fork: Failed to copy sighand from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 473 current_pcb.pid(), pcb.pid(), e 474 ) 475 }); 476 477 // 拷贝线程 478 Self::copy_thread(current_pcb, pcb, &clone_args, current_trapframe).unwrap_or_else(|e| { 479 panic!( 480 "fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", 481 current_pcb.pid(), pcb.pid(), e 482 ) 483 }); 484 if current_pcb.pid() != Pid(0) { 485 let new_pid = PidStrcut::alloc_pid( 486 pcb.get_nsproxy().read().pid_namespace.clone(), // 获取命名空间 487 clone_args.set_tid.clone(), 488 )?; 489 *pcb.thread_pid.write() = new_pid; 490 } 491 // 设置线程组id、组长 492 if clone_flags.contains(CloneFlags::CLONE_THREAD) { 493 pcb.thread.write_irqsave().group_leader = 494 current_pcb.thread.read_irqsave().group_leader.clone(); 495 unsafe { 496 let ptr = pcb.as_ref() as *const ProcessControlBlock as *mut ProcessControlBlock; 497 (*ptr).tgid = current_pcb.tgid; 498 } 499 } else { 500 pcb.thread.write_irqsave().group_leader = Arc::downgrade(pcb); 501 unsafe { 502 let ptr = pcb.as_ref() as *const ProcessControlBlock as *mut ProcessControlBlock; 503 (*ptr).tgid = pcb.tgid; 504 } 505 } 506 507 // CLONE_PARENT re-uses the old parent 508 if clone_flags.contains(CloneFlags::CLONE_PARENT | CloneFlags::CLONE_THREAD) { 509 *pcb.real_parent_pcb.write_irqsave() = 510 current_pcb.real_parent_pcb.read_irqsave().clone(); 511 512 if clone_flags.contains(CloneFlags::CLONE_THREAD) { 513 pcb.exit_signal.store(Signal::INVALID, Ordering::SeqCst); 514 } else { 515 let leader = current_pcb.thread.read_irqsave().group_leader(); 516 if unlikely(leader.is_none()) { 517 panic!( 518 "fork: Failed to get leader of current process, current pid: [{:?}]", 519 current_pcb.pid() 520 ); 521 } 522 523 pcb.exit_signal.store( 524 leader.unwrap().exit_signal.load(Ordering::SeqCst), 525 Ordering::SeqCst, 526 ); 527 } 528 } else { 529 // 新创建的进程,设置其父进程为当前进程 530 *pcb.real_parent_pcb.write_irqsave() = Arc::downgrade(current_pcb); 531 pcb.exit_signal 532 .store(clone_args.exit_signal, Ordering::SeqCst); 533 } 534 535 // todo: 增加线程组相关的逻辑。 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/fork.c#2437 536 537 sched_cgroup_fork(pcb); 538 539 Ok(()) 540 } 541 } 542