1 #include "process.h"
2 #include <common/err.h>
3 #include <common/kthread.h>
4 #include <common/spinlock.h>
5 
6 extern spinlock_t process_global_pid_write_lock;
7 extern long process_global_pid;
8 
9 extern void kernel_thread_func(void);
10 extern uint64_t rs_procfs_register_pid(uint64_t);
11 extern uint64_t rs_procfs_unregister_pid(uint64_t);
12 extern void *rs_dup_fpstate();
13 
14 extern int process_copy_files(uint64_t clone_flags, struct process_control_block *pcb);
15 int process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb);
16 int process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb);
17 int process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start,
18                         uint64_t stack_size, struct pt_regs *current_regs);
19 
20 extern int process_copy_sighand(uint64_t clone_flags, struct process_control_block *pcb);
21 extern int process_copy_signal(uint64_t clone_flags, struct process_control_block *pcb);
22 extern void process_exit_sighand(struct process_control_block *pcb);
23 extern void process_exit_signal(struct process_control_block *pcb);
24 
25 /**
26  * @brief fork当前进程
27  *
28  * @param regs 新的寄存器值
29  * @param clone_flags 克隆标志
30  * @param stack_start 堆栈开始地址
31  * @param stack_size 堆栈大小
32  * @return unsigned long
33  */
do_fork(struct pt_regs * regs,unsigned long clone_flags,unsigned long stack_start,unsigned long stack_size)34 unsigned long do_fork(struct pt_regs *regs, unsigned long clone_flags, unsigned long stack_start,
35                       unsigned long stack_size)
36 {
37     int retval = 0;
38     struct process_control_block *tsk = NULL;
39 
40     // 为新的进程分配栈空间,并将pcb放置在底部
41     tsk = (struct process_control_block *)kzalloc(STACK_SIZE, 0);
42     barrier();
43 
44     if (tsk == NULL)
45     {
46         retval = -ENOMEM;
47         return retval;
48     }
49 
50     barrier();
51     memset(tsk, 0, sizeof(struct process_control_block));
52     io_mfence();
53     // 将当前进程的pcb复制到新的pcb内
54     memcpy(tsk, current_pcb, sizeof(struct process_control_block));
55     tsk->worker_private = NULL;
56     io_mfence();
57 
58     // 初始化进程的循环链表结点
59     list_init(&tsk->list);
60 
61     io_mfence();
62     // 判断是否为内核态调用fork
63     if ((current_pcb->flags & PF_KTHREAD) && stack_start != 0)
64         tsk->flags |= PF_KFORK;
65 
66     if (tsk->flags & PF_KTHREAD)
67     {
68         // 对于内核线程,设置其worker私有信息
69         retval = kthread_set_worker_private(tsk);
70         if (IS_ERR_VALUE(retval))
71             goto copy_flags_failed;
72         tsk->virtual_runtime = 0;
73     }
74     tsk->priority = 2;
75     tsk->preempt_count = 0;
76 
77     // 增加全局的pid并赋值给新进程的pid
78     spin_lock(&process_global_pid_write_lock);
79     tsk->pid = process_global_pid++;
80     barrier();
81     // 加入到进程链表中
82     // todo: 对pcb_list_lock加锁
83     tsk->prev_pcb = &initial_proc_union.pcb;
84     barrier();
85     tsk->next_pcb = initial_proc_union.pcb.next_pcb;
86     barrier();
87     initial_proc_union.pcb.next_pcb = tsk;
88     barrier();
89     tsk->parent_pcb = current_pcb;
90     barrier();
91 
92     spin_unlock(&process_global_pid_write_lock);
93 
94     tsk->cpu_id = proc_current_cpu_id;
95     tsk->state = PROC_UNINTERRUPTIBLE;
96 
97     tsk->parent_pcb = current_pcb;
98     wait_queue_init(&tsk->wait_child_proc_exit, NULL);
99     barrier();
100     list_init(&tsk->list);
101 
102     retval = -ENOMEM;
103 
104     // 拷贝标志位
105     retval = process_copy_flags(clone_flags, tsk);
106     if (retval)
107         goto copy_flags_failed;
108 
109     // 拷贝内存空间分布结构体
110     retval = process_copy_mm(clone_flags, tsk);
111     if (retval)
112         goto copy_mm_failed;
113 
114     // 拷贝文件
115     retval = process_copy_files(clone_flags, tsk);
116     if (retval)
117         goto copy_files_failed;
118 
119     // 拷贝信号处理函数
120     retval = process_copy_sighand(clone_flags, tsk);
121     if (retval)
122         goto copy_sighand_failed;
123 
124     retval = process_copy_signal(clone_flags, tsk);
125     if (retval)
126         goto copy_signal_failed;
127 
128     // 拷贝线程结构体
129     retval = process_copy_thread(clone_flags, tsk, stack_start, stack_size, regs);
130     if (retval)
131         goto copy_thread_failed;
132 
133     // 拷贝成功
134     retval = tsk->pid;
135 
136     tsk->flags &= ~PF_KFORK;
137 
138     // 创建对应procfs文件
139     rs_procfs_register_pid(tsk->pid);
140 
141     // 唤醒进程
142     process_wakeup(tsk);
143 
144     return retval;
145 
146 copy_thread_failed:;
147     // 回收线程
148     process_exit_thread(tsk);
149 copy_files_failed:;
150     // 回收文件
151     process_exit_files(tsk);
152     rs_procfs_unregister_pid(tsk->pid);
153 copy_sighand_failed:;
154     process_exit_sighand(tsk);
155 copy_signal_failed:;
156     process_exit_signal(tsk);
157 copy_mm_failed:;
158     // 回收内存空间分布结构体
159     process_exit_mm(tsk);
160 copy_flags_failed:;
161     kfree(tsk);
162     return retval;
163 }
164 
165 /**
166  * @brief 拷贝当前进程的标志位
167  *
168  * @param clone_flags 克隆标志位
169  * @param pcb 新的进程的pcb
170  * @return uint64_t
171  */
process_copy_flags(uint64_t clone_flags,struct process_control_block * pcb)172 int process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb)
173 {
174     if (clone_flags & CLONE_VM)
175         pcb->flags |= PF_VFORK;
176     return 0;
177 }
178 
179 /**
180  * @brief 拷贝当前进程的内存空间分布结构体信息
181  *
182  * @param clone_flags 克隆标志位
183  * @param pcb 新的进程的pcb
184  * @return uint64_t
185  */
process_copy_mm(uint64_t clone_flags,struct process_control_block * pcb)186 int process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb)
187 {
188     int retval = 0;
189     // 与父进程共享内存空间
190     if (clone_flags & CLONE_VM)
191     {
192         pcb->mm = current_pcb->mm;
193 
194         return retval;
195     }
196 
197     // 分配新的内存空间分布结构体
198     struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
199     memset(new_mms, 0, sizeof(struct mm_struct));
200 
201     memcpy(new_mms, current_pcb->mm, sizeof(struct mm_struct));
202     new_mms->vmas = NULL;
203     pcb->mm = new_mms;
204 
205     // 分配顶层页表, 并设置顶层页表的物理地址
206     new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
207     // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
208     memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
209 
210     // 拷贝内核空间的页表指针
211     memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]->mm->pgd) + 256,
212            PAGE_4K_SIZE / 2);
213 
214     uint64_t *current_pgd = (uint64_t *)phys_2_virt(current_pcb->mm->pgd);
215 
216     uint64_t *new_pml4t = (uint64_t *)phys_2_virt(new_mms->pgd);
217 
218     // 拷贝用户空间的vma
219     struct vm_area_struct *vma = current_pcb->mm->vmas;
220     while (vma != NULL)
221     {
222         if (vma->vm_end > USER_MAX_LINEAR_ADDR || vma->vm_flags & VM_DONTCOPY)
223         {
224             vma = vma->vm_next;
225             continue;
226         }
227 
228         int64_t vma_size = vma->vm_end - vma->vm_start;
229         // kdebug("vma_size=%ld, vm_start=%#018lx", vma_size, vma->vm_start);
230         if (vma_size > PAGE_2M_SIZE / 2)
231         {
232             int page_to_alloc = (PAGE_2M_ALIGN(vma_size)) >> PAGE_2M_SHIFT;
233             for (int i = 0; i < page_to_alloc; ++i)
234             {
235                 uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
236 
237                 struct vm_area_struct *new_vma = NULL;
238                 int ret = mm_create_vma(new_mms, vma->vm_start + i * PAGE_2M_SIZE, PAGE_2M_SIZE, vma->vm_flags,
239                                         vma->vm_ops, &new_vma);
240                 // 防止内存泄露
241                 if (unlikely(ret == -EEXIST))
242                     free_pages(Phy_to_2M_Page(pa), 1);
243                 else
244                     mm_map_vma(new_vma, pa, 0, PAGE_2M_SIZE);
245 
246                 memcpy((void *)phys_2_virt(pa), (void *)(vma->vm_start + i * PAGE_2M_SIZE),
247                        (vma_size >= PAGE_2M_SIZE) ? PAGE_2M_SIZE : vma_size);
248                 vma_size -= PAGE_2M_SIZE;
249             }
250         }
251         else
252         {
253             uint64_t map_size = PAGE_4K_ALIGN(vma_size);
254             uint64_t va = (uint64_t)kmalloc(map_size, 0);
255 
256             struct vm_area_struct *new_vma = NULL;
257             int ret = mm_create_vma(new_mms, vma->vm_start, map_size, vma->vm_flags, vma->vm_ops, &new_vma);
258             // 防止内存泄露
259             if (unlikely(ret == -EEXIST))
260                 kfree((void *)va);
261             else
262                 mm_map_vma(new_vma, virt_2_phys(va), 0, map_size);
263 
264             memcpy((void *)va, (void *)vma->vm_start, vma_size);
265         }
266         vma = vma->vm_next;
267     }
268 
269     return retval;
270 }
271 
272 /**
273  * @brief 重写内核栈中的rbp地址
274  *
275  * @param new_regs 子进程的reg
276  * @param new_pcb 子进程的pcb
277  * @return int
278  */
process_rewrite_rbp(struct pt_regs * new_regs,struct process_control_block * new_pcb)279 static int process_rewrite_rbp(struct pt_regs *new_regs, struct process_control_block *new_pcb)
280 {
281 
282     uint64_t new_top = ((uint64_t)new_pcb) + STACK_SIZE;
283     uint64_t old_top = (uint64_t)(current_pcb) + STACK_SIZE;
284 
285     uint64_t *rbp = &new_regs->rbp;
286     uint64_t *tmp = rbp;
287 
288     // 超出内核栈范围
289     if ((uint64_t)*rbp >= old_top || (uint64_t)*rbp < (old_top - STACK_SIZE))
290         return 0;
291 
292     while (1)
293     {
294         // 计算delta
295         uint64_t delta = old_top - *rbp;
296         // 计算新的rbp值
297         uint64_t newVal = new_top - delta;
298 
299         // 新的值不合法
300         if (unlikely((uint64_t)newVal >= new_top || (uint64_t)newVal < (new_top - STACK_SIZE)))
301             break;
302         // 将新的值写入对应位置
303         *rbp = newVal;
304         // 跳转栈帧
305         rbp = (uint64_t *)*rbp;
306     }
307 
308     // 设置内核态fork返回到enter_syscall_int()函数内的时候,rsp寄存器的值
309     new_regs->rsp = new_top - (old_top - new_regs->rsp);
310     return 0;
311 }
312 
313 /**
314  * @brief 拷贝当前进程的线程结构体
315  *
316  * @param clone_flags 克隆标志位
317  * @param pcb 新的进程的pcb
318  * @return uint64_t
319  */
process_copy_thread(uint64_t clone_flags,struct process_control_block * pcb,uint64_t stack_start,uint64_t stack_size,struct pt_regs * current_regs)320 int process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start,
321                         uint64_t stack_size, struct pt_regs *current_regs)
322 {
323     // 将线程结构体放置在pcb后方
324     struct thread_struct *thd = (struct thread_struct *)(pcb + 1);
325     memset(thd, 0, sizeof(struct thread_struct));
326     pcb->thread = thd;
327 
328     struct pt_regs *child_regs = NULL;
329     // 拷贝栈空间
330     if (pcb->flags & PF_KFORK) // 内核态下的fork
331     {
332         // 内核态下则拷贝整个内核栈
333         uint32_t size = ((uint64_t)current_pcb) + STACK_SIZE - (uint64_t)(current_regs);
334 
335         child_regs = (struct pt_regs *)(((uint64_t)pcb) + STACK_SIZE - size);
336         memcpy(child_regs, (void *)current_regs, size);
337 
338         barrier();
339         // 然后重写新的栈中,每个栈帧的rbp值
340         process_rewrite_rbp(child_regs, pcb);
341     }
342     else
343     {
344         child_regs = (struct pt_regs *)((uint64_t)pcb + STACK_SIZE - sizeof(struct pt_regs));
345         memcpy(child_regs, current_regs, sizeof(struct pt_regs));
346         barrier();
347         child_regs->rsp = stack_start;
348     }
349 
350     // 设置子进程的返回值为0
351     child_regs->rax = 0;
352     if (pcb->flags & PF_KFORK)
353         thd->rbp = (uint64_t)(child_regs + 1); // 设置新的内核线程开始执行时的rbp(也就是进入ret_from_intr时的rbp)
354     else
355         thd->rbp = (uint64_t)pcb + STACK_SIZE;
356 
357     // 设置新的内核线程开始执行的时候的rsp
358     thd->rsp = (uint64_t)child_regs;
359     thd->fs = current_pcb->thread->fs;
360     thd->gs = current_pcb->thread->gs;
361 
362     // 根据是否为内核线程、是否在内核态fork,设置进程的开始执行的地址
363     if (pcb->flags & PF_KFORK)
364         thd->rip = (uint64_t)ret_from_intr;
365     else if (pcb->flags & PF_KTHREAD && (!(pcb->flags & PF_KFORK)))
366         thd->rip = (uint64_t)kernel_thread_func;
367     else
368         thd->rip = (uint64_t)ret_from_intr;
369 
370     pcb->fp_state = rs_dup_fpstate();
371 
372     return 0;
373 }