1 #include "process.h"
2 #include <common/err.h>
3 #include <common/kthread.h>
4 #include <common/spinlock.h>
5 #include <filesystem/procfs/procfs.h>
6 
7 extern spinlock_t process_global_pid_write_lock;
8 extern long process_global_pid;
9 
10 extern void kernel_thread_func(void);
11 
12 int process_copy_files(uint64_t clone_flags, struct process_control_block *pcb);
13 int process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb);
14 int process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb);
15 int process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start,
16                         uint64_t stack_size, struct pt_regs *current_regs);
17 
18 extern int process_copy_sighand(uint64_t clone_flags, struct process_control_block *pcb);
19 extern int process_copy_signal(uint64_t clone_flags, struct process_control_block *pcb);
20 extern void process_exit_sighand(struct process_control_block *pcb);
21 extern void process_exit_signal(struct process_control_block *pcb);
22 
23 /**
24  * @brief fork当前进程
25  *
26  * @param regs 新的寄存器值
27  * @param clone_flags 克隆标志
28  * @param stack_start 堆栈开始地址
29  * @param stack_size 堆栈大小
30  * @return unsigned long
31  */
do_fork(struct pt_regs * regs,unsigned long clone_flags,unsigned long stack_start,unsigned long stack_size)32 unsigned long do_fork(struct pt_regs *regs, unsigned long clone_flags, unsigned long stack_start,
33                       unsigned long stack_size)
34 {
35     int retval = 0;
36     struct process_control_block *tsk = NULL;
37 
38     // 为新的进程分配栈空间,并将pcb放置在底部
39     tsk = (struct process_control_block *)kzalloc(STACK_SIZE, 0);
40     barrier();
41 
42     if (tsk == NULL)
43     {
44         retval = -ENOMEM;
45         return retval;
46     }
47 
48     barrier();
49     memset(tsk, 0, sizeof(struct process_control_block));
50     io_mfence();
51     // 将当前进程的pcb复制到新的pcb内
52     memcpy(tsk, current_pcb, sizeof(struct process_control_block));
53     tsk->worker_private = NULL;
54     io_mfence();
55 
56     // 初始化进程的循环链表结点
57     list_init(&tsk->list);
58 
59     io_mfence();
60     // 判断是否为内核态调用fork
61     if ((current_pcb->flags & PF_KTHREAD) && stack_start != 0)
62         tsk->flags |= PF_KFORK;
63 
64     if (tsk->flags & PF_KTHREAD)
65     {
66         // 对于内核线程,设置其worker私有信息
67         retval = kthread_set_worker_private(tsk);
68         if (IS_ERR_VALUE(retval))
69             goto copy_flags_failed;
70         tsk->virtual_runtime = 0;
71     }
72     tsk->priority = 2;
73     tsk->preempt_count = 0;
74 
75     // 增加全局的pid并赋值给新进程的pid
76     spin_lock(&process_global_pid_write_lock);
77     tsk->pid = process_global_pid++;
78     barrier();
79     // 加入到进程链表中
80     // todo: 对pcb_list_lock加锁
81     tsk->prev_pcb = &initial_proc_union.pcb;
82     barrier();
83     tsk->next_pcb = initial_proc_union.pcb.next_pcb;
84     barrier();
85     initial_proc_union.pcb.next_pcb = tsk;
86     barrier();
87     tsk->parent_pcb = current_pcb;
88     barrier();
89 
90     spin_unlock(&process_global_pid_write_lock);
91 
92     tsk->cpu_id = proc_current_cpu_id;
93     tsk->state = PROC_UNINTERRUPTIBLE;
94 
95     tsk->parent_pcb = current_pcb;
96     wait_queue_init(&tsk->wait_child_proc_exit, NULL);
97     barrier();
98     list_init(&tsk->list);
99 
100     retval = -ENOMEM;
101 
102     // 拷贝标志位
103     retval = process_copy_flags(clone_flags, tsk);
104     if (retval)
105         goto copy_flags_failed;
106 
107     // 拷贝内存空间分布结构体
108     retval = process_copy_mm(clone_flags, tsk);
109     if (retval)
110         goto copy_mm_failed;
111 
112     // 拷贝文件
113     retval = process_copy_files(clone_flags, tsk);
114     if (retval)
115         goto copy_files_failed;
116 
117     // 拷贝信号处理函数
118     retval = process_copy_sighand(clone_flags, tsk);
119     if (retval)
120         goto copy_sighand_failed;
121 
122     retval = process_copy_signal(clone_flags, tsk);
123     if (retval)
124         goto copy_signal_failed;
125 
126     // 拷贝线程结构体
127     retval = process_copy_thread(clone_flags, tsk, stack_start, stack_size, regs);
128     if (retval)
129         goto copy_thread_failed;
130 
131     // 拷贝成功
132     retval = tsk->pid;
133 
134     tsk->flags &= ~PF_KFORK;
135 
136     // 唤醒进程
137     process_wakeup(tsk);
138 
139     //创建对应procfs文件
140     procfs_register_pid(tsk->pid);
141 
142     return retval;
143 
144 copy_thread_failed:;
145     // 回收线程
146     process_exit_thread(tsk);
147 copy_files_failed:;
148     // 回收文件
149     process_exit_files(tsk);
150 copy_sighand_failed:;
151     process_exit_sighand(tsk);
152 copy_signal_failed:;
153     process_exit_signal(tsk);
154 copy_mm_failed:;
155     // 回收内存空间分布结构体
156     process_exit_mm(tsk);
157 copy_flags_failed:;
158     kfree(tsk);
159     return retval;
160 }
161 
162 /**
163  * @brief 拷贝当前进程的标志位
164  *
165  * @param clone_flags 克隆标志位
166  * @param pcb 新的进程的pcb
167  * @return uint64_t
168  */
process_copy_flags(uint64_t clone_flags,struct process_control_block * pcb)169 int process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb)
170 {
171     if (clone_flags & CLONE_VM)
172         pcb->flags |= PF_VFORK;
173     return 0;
174 }
175 
176 /**
177  * @brief 拷贝当前进程的文件描述符等信息
178  *
179  * @param clone_flags 克隆标志位
180  * @param pcb 新的进程的pcb
181  * @return uint64_t
182  */
process_copy_files(uint64_t clone_flags,struct process_control_block * pcb)183 int process_copy_files(uint64_t clone_flags, struct process_control_block *pcb)
184 {
185     int retval = 0;
186     // 如果CLONE_FS被置位,那么子进程与父进程共享文件描述符
187     // 文件描述符已经在复制pcb时被拷贝
188     if (clone_flags & CLONE_FS)
189         return retval;
190 
191     // 为新进程拷贝新的文件描述符
192     for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
193     {
194         if (current_pcb->fds[i] == NULL)
195             continue;
196 
197         pcb->fds[i] = (struct vfs_file_t *)kmalloc(sizeof(struct vfs_file_t), 0);
198         memcpy(pcb->fds[i], current_pcb->fds[i], sizeof(struct vfs_file_t));
199     }
200 
201     return retval;
202 }
203 
204 /**
205  * @brief 拷贝当前进程的内存空间分布结构体信息
206  *
207  * @param clone_flags 克隆标志位
208  * @param pcb 新的进程的pcb
209  * @return uint64_t
210  */
process_copy_mm(uint64_t clone_flags,struct process_control_block * pcb)211 int process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb)
212 {
213     int retval = 0;
214     // 与父进程共享内存空间
215     if (clone_flags & CLONE_VM)
216     {
217         pcb->mm = current_pcb->mm;
218 
219         return retval;
220     }
221 
222     // 分配新的内存空间分布结构体
223     struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
224     memset(new_mms, 0, sizeof(struct mm_struct));
225 
226     memcpy(new_mms, current_pcb->mm, sizeof(struct mm_struct));
227     new_mms->vmas = NULL;
228     pcb->mm = new_mms;
229 
230     // 分配顶层页表, 并设置顶层页表的物理地址
231     new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
232     // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
233     memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
234 
235     // 拷贝内核空间的页表指针
236     memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]->mm->pgd) + 256,
237            PAGE_4K_SIZE / 2);
238 
239     uint64_t *current_pgd = (uint64_t *)phys_2_virt(current_pcb->mm->pgd);
240 
241     uint64_t *new_pml4t = (uint64_t *)phys_2_virt(new_mms->pgd);
242 
243     // 拷贝用户空间的vma
244     struct vm_area_struct *vma = current_pcb->mm->vmas;
245     while (vma != NULL)
246     {
247         if (vma->vm_end > USER_MAX_LINEAR_ADDR || vma->vm_flags & VM_DONTCOPY)
248         {
249             vma = vma->vm_next;
250             continue;
251         }
252 
253         int64_t vma_size = vma->vm_end - vma->vm_start;
254         // kdebug("vma_size=%ld, vm_start=%#018lx", vma_size, vma->vm_start);
255         if (vma_size > PAGE_2M_SIZE / 2)
256         {
257             int page_to_alloc = (PAGE_2M_ALIGN(vma_size)) >> PAGE_2M_SHIFT;
258             for (int i = 0; i < page_to_alloc; ++i)
259             {
260                 uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
261 
262                 struct vm_area_struct *new_vma = NULL;
263                 int ret = mm_create_vma(new_mms, vma->vm_start + i * PAGE_2M_SIZE, PAGE_2M_SIZE, vma->vm_flags,
264                                         vma->vm_ops, &new_vma);
265                 // 防止内存泄露
266                 if (unlikely(ret == -EEXIST))
267                     free_pages(Phy_to_2M_Page(pa), 1);
268                 else
269                     mm_map_vma(new_vma, pa, 0, PAGE_2M_SIZE);
270 
271                 memcpy((void *)phys_2_virt(pa), (void *)(vma->vm_start + i * PAGE_2M_SIZE),
272                        (vma_size >= PAGE_2M_SIZE) ? PAGE_2M_SIZE : vma_size);
273                 vma_size -= PAGE_2M_SIZE;
274             }
275         }
276         else
277         {
278             uint64_t map_size = PAGE_4K_ALIGN(vma_size);
279             uint64_t va = (uint64_t)kmalloc(map_size, 0);
280 
281             struct vm_area_struct *new_vma = NULL;
282             int ret = mm_create_vma(new_mms, vma->vm_start, map_size, vma->vm_flags, vma->vm_ops, &new_vma);
283             // 防止内存泄露
284             if (unlikely(ret == -EEXIST))
285                 kfree((void *)va);
286             else
287                 mm_map_vma(new_vma, virt_2_phys(va), 0, map_size);
288 
289             memcpy((void *)va, (void *)vma->vm_start, vma_size);
290         }
291         vma = vma->vm_next;
292     }
293 
294     return retval;
295 }
296 
297 /**
298  * @brief 重写内核栈中的rbp地址
299  *
300  * @param new_regs 子进程的reg
301  * @param new_pcb 子进程的pcb
302  * @return int
303  */
process_rewrite_rbp(struct pt_regs * new_regs,struct process_control_block * new_pcb)304 static int process_rewrite_rbp(struct pt_regs *new_regs, struct process_control_block *new_pcb)
305 {
306 
307     uint64_t new_top = ((uint64_t)new_pcb) + STACK_SIZE;
308     uint64_t old_top = (uint64_t)(current_pcb) + STACK_SIZE;
309 
310     uint64_t *rbp = &new_regs->rbp;
311     uint64_t *tmp = rbp;
312 
313     // 超出内核栈范围
314     if ((uint64_t)*rbp >= old_top || (uint64_t)*rbp < (old_top - STACK_SIZE))
315         return 0;
316 
317     while (1)
318     {
319         // 计算delta
320         uint64_t delta = old_top - *rbp;
321         // 计算新的rbp值
322         uint64_t newVal = new_top - delta;
323 
324         // 新的值不合法
325         if (unlikely((uint64_t)newVal >= new_top || (uint64_t)newVal < (new_top - STACK_SIZE)))
326             break;
327         // 将新的值写入对应位置
328         *rbp = newVal;
329         // 跳转栈帧
330         rbp = (uint64_t *)*rbp;
331     }
332 
333     // 设置内核态fork返回到enter_syscall_int()函数内的时候,rsp寄存器的值
334     new_regs->rsp = new_top - (old_top - new_regs->rsp);
335     return 0;
336 }
337 
338 /**
339  * @brief 拷贝当前进程的线程结构体
340  *
341  * @param clone_flags 克隆标志位
342  * @param pcb 新的进程的pcb
343  * @return uint64_t
344  */
process_copy_thread(uint64_t clone_flags,struct process_control_block * pcb,uint64_t stack_start,uint64_t stack_size,struct pt_regs * current_regs)345 int process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start,
346                         uint64_t stack_size, struct pt_regs *current_regs)
347 {
348     // 将线程结构体放置在pcb后方
349     struct thread_struct *thd = (struct thread_struct *)(pcb + 1);
350     memset(thd, 0, sizeof(struct thread_struct));
351     pcb->thread = thd;
352 
353     struct pt_regs *child_regs = NULL;
354     // 拷贝栈空间
355     if (pcb->flags & PF_KFORK) // 内核态下的fork
356     {
357         // 内核态下则拷贝整个内核栈
358         uint32_t size = ((uint64_t)current_pcb) + STACK_SIZE - (uint64_t)(current_regs);
359 
360         child_regs = (struct pt_regs *)(((uint64_t)pcb) + STACK_SIZE - size);
361         memcpy(child_regs, (void *)current_regs, size);
362         barrier();
363         // 然后重写新的栈中,每个栈帧的rbp值
364         process_rewrite_rbp(child_regs, pcb);
365     }
366     else
367     {
368         child_regs = (struct pt_regs *)((uint64_t)pcb + STACK_SIZE - sizeof(struct pt_regs));
369         memcpy(child_regs, current_regs, sizeof(struct pt_regs));
370         barrier();
371         child_regs->rsp = stack_start;
372     }
373 
374     // 设置子进程的返回值为0
375     child_regs->rax = 0;
376     if (pcb->flags & PF_KFORK)
377         thd->rbp =
378             (uint64_t)(child_regs + 1); // 设置新的内核线程开始执行时的rbp(也就是进入ret_from_system_call时的rbp)
379     else
380         thd->rbp = (uint64_t)pcb + STACK_SIZE;
381 
382     // 设置新的内核线程开始执行的时候的rsp
383     thd->rsp = (uint64_t)child_regs;
384     thd->fs = current_pcb->thread->fs;
385     thd->gs = current_pcb->thread->gs;
386 
387     // 根据是否为内核线程、是否在内核态fork,设置进程的开始执行的地址
388     if (pcb->flags & PF_KFORK)
389         thd->rip = (uint64_t)ret_from_system_call;
390     else if (pcb->flags & PF_KTHREAD && (!(pcb->flags & PF_KFORK)))
391         thd->rip = (uint64_t)kernel_thread_func;
392     else
393         thd->rip = (uint64_t)ret_from_system_call;
394 
395     return 0;
396 }