xref: /DragonOS/kernel/src/sched/fair.rs (revision f79998f626801329580c782fd05e36cb2027f474)
1 use core::intrinsics::likely;
2 use core::intrinsics::unlikely;
3 use core::mem::swap;
4 use core::sync::atomic::fence;
5 use core::sync::atomic::{AtomicU64, Ordering};
6 
7 use crate::libs::rbtree::RBTree;
8 use crate::libs::spinlock::SpinLock;
9 use crate::process::ProcessControlBlock;
10 use crate::process::ProcessFlags;
11 use crate::sched::clock::ClockUpdataFlag;
12 use crate::sched::{cpu_rq, SchedFeature, SCHED_FEATURES};
13 use crate::smp::core::smp_get_processor_id;
14 use crate::time::jiffies::TICK_NESC;
15 use crate::time::timer::clock;
16 use crate::time::NSEC_PER_MSEC;
17 use alloc::sync::{Arc, Weak};
18 
19 use super::idle::IdleScheduler;
20 use super::pelt::{add_positive, sub_positive, SchedulerAvg, UpdateAvgFlags, PELT_MIN_DIVIDER};
21 use super::{
22     CpuRunQueue, DequeueFlag, EnqueueFlag, LoadWeight, OnRq, SchedPolicy, Scheduler, TaskGroup,
23     WakeupFlags, SCHED_CAPACITY_SHIFT,
24 };
25 
26 /// 用于设置 CPU-bound 任务的最小抢占粒度的参数。
27 /// 默认值为 0.75 毫秒乘以(1 加上 CPU 数量的二进制对数),单位为纳秒。
28 /// 这个值影响到任务在 CPU-bound 情况下的抢占行为。
29 static SYSCTL_SHCED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000);
30 /// 规范化最小抢占粒度参数
31 #[allow(dead_code)]
32 static NORMALIZED_SYSCTL_SCHED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000);
33 
34 static SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000);
35 #[allow(dead_code)]
36 static NORMALIZED_SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000);
37 
38 /// 预设的调度延迟任务数量
39 static SCHED_NR_LATENCY: AtomicU64 = AtomicU64::new(8);
40 
41 /// 调度实体单位,一个调度实体可以是一个进程、一个进程组或者是一个用户等等划分
42 #[derive(Debug)]
43 pub struct FairSchedEntity {
44     /// 负载相关
45     pub load: LoadWeight,
46     pub deadline: u64,
47     pub min_deadline: u64,
48 
49     /// 是否在运行队列中
50     pub on_rq: OnRq,
51     /// 当前调度实体的开始执行时间
52     pub exec_start: u64,
53     /// 总运行时长
54     pub sum_exec_runtime: u64,
55     /// 虚拟运行时间
56     pub vruntime: u64,
57     /// 进程的调度延迟 它等于进程的权重(weight)乘以(V - v_i),其中V是系统当前的时间,v_i是进程的运行时间
58     pub vlag: i64,
59     // 运行时间片
60     pub slice: u64,
61     /// 上一个调度实体运行总时间
62     pub prev_sum_exec_runtime: u64,
63 
64     pub avg: SchedulerAvg,
65 
66     /// 父节点
67     parent: Weak<FairSchedEntity>,
68 
69     pub depth: u32,
70 
71     /// 指向自身
72     self_ref: Weak<FairSchedEntity>,
73 
74     /// 所在的CFS运行队列
75     cfs_rq: Weak<CfsRunQueue>,
76 
77     /// group持有的私有cfs队列
78     my_cfs_rq: Option<Arc<CfsRunQueue>>,
79 
80     runnable_weight: u64,
81 
82     pcb: Weak<ProcessControlBlock>,
83 }
84 
85 impl FairSchedEntity {
86     pub fn new() -> Arc<Self> {
87         let ret = Arc::new(Self {
88             parent: Weak::new(),
89             self_ref: Weak::new(),
90             pcb: Weak::new(),
91             cfs_rq: Weak::new(),
92             my_cfs_rq: None,
93             on_rq: OnRq::None,
94             slice: SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst),
95             load: Default::default(),
96             deadline: Default::default(),
97             min_deadline: Default::default(),
98             exec_start: Default::default(),
99             sum_exec_runtime: Default::default(),
100             vruntime: Default::default(),
101             vlag: Default::default(),
102             prev_sum_exec_runtime: Default::default(),
103             avg: Default::default(),
104             depth: Default::default(),
105             runnable_weight: Default::default(),
106         });
107 
108         ret.force_mut().self_ref = Arc::downgrade(&ret);
109 
110         ret
111     }
112 }
113 
114 impl FairSchedEntity {
115     pub fn self_arc(&self) -> Arc<FairSchedEntity> {
116         self.self_ref.upgrade().unwrap()
117     }
118 
119     #[inline]
120     pub fn on_rq(&self) -> bool {
121         self.on_rq != OnRq::None
122     }
123 
124     pub fn pcb(&self) -> Arc<ProcessControlBlock> {
125         self.pcb.upgrade().unwrap()
126     }
127 
128     pub fn set_pcb(&mut self, pcb: Weak<ProcessControlBlock>) {
129         self.pcb = pcb
130     }
131 
132     #[inline]
133     pub fn cfs_rq(&self) -> Arc<CfsRunQueue> {
134         self.cfs_rq.upgrade().unwrap()
135     }
136 
137     pub fn set_cfs(&mut self, cfs: Weak<CfsRunQueue>) {
138         self.cfs_rq = cfs;
139     }
140 
141     pub fn parent(&self) -> Option<Arc<FairSchedEntity>> {
142         self.parent.upgrade()
143     }
144 
145     #[allow(clippy::mut_from_ref)]
146     pub fn force_mut(&self) -> &mut Self {
147         unsafe { &mut *(self as *const Self as usize as *mut Self) }
148     }
149 
150     /// 判断是否是进程持有的调度实体
151     #[inline]
152     pub fn is_task(&self) -> bool {
153         // TODO: 调度组
154         true
155     }
156 
157     #[inline]
158     pub fn is_idle(&self) -> bool {
159         if self.is_task() {
160             return self.pcb().sched_info().policy() == SchedPolicy::IDLE;
161         }
162 
163         return self.cfs_rq().is_idle();
164     }
165 
166     pub fn clear_buddies(&self) {
167         let mut se = self.self_arc();
168 
169         Self::for_each_in_group(&mut se, |se| {
170             let binding = se.cfs_rq();
171             let cfs_rq = binding.force_mut();
172 
173             if let Some(next) = cfs_rq.next.upgrade() {
174                 if !Arc::ptr_eq(&next, &se) {
175                     return (false, true);
176                 }
177             }
178             cfs_rq.next = Weak::new();
179             return (true, true);
180         });
181     }
182 
183     pub fn calculate_delta_fair(&self, delta: u64) -> u64 {
184         if unlikely(self.load.weight != LoadWeight::NICE_0_LOAD_SHIFT as u64) {
185             return self
186                 .force_mut()
187                 .load
188                 .calculate_delta(delta, LoadWeight::NICE_0_LOAD_SHIFT as u64);
189         };
190 
191         delta
192     }
193 
194     /// 更新组内的权重信息
195     pub fn update_cfs_group(&self) {
196         if self.my_cfs_rq.is_none() {
197             return;
198         }
199 
200         let group_cfs = self.my_cfs_rq.clone().unwrap();
201 
202         let shares = group_cfs.task_group().shares;
203 
204         if unlikely(self.load.weight != shares) {
205             // TODO: reweight
206             self.cfs_rq()
207                 .force_mut()
208                 .reweight_entity(self.self_arc(), shares);
209         }
210     }
211 
212     /// 遍历se组,如果返回false则需要调用的函数return,
213     /// 会将se指向其顶层parent
214     /// 该函数会改变se指向
215     /// 参数:
216     /// - se: 对应调度实体
217     /// - f: 对调度实体执行操作的闭包,返回值对应(no_break,should_continue),no_break为假时,退出循环,should_continue为假时表示需要将调用者return
218     ///
219     /// 返回值:
220     /// - bool: 是否需要调度者return
221     /// - Option<Arc<FairSchedEntity>>:最终se的指向
222     pub fn for_each_in_group(
223         se: &mut Arc<FairSchedEntity>,
224         mut f: impl FnMut(Arc<FairSchedEntity>) -> (bool, bool),
225     ) -> (bool, Option<Arc<FairSchedEntity>>) {
226         let mut should_continue;
227         let ret;
228         // 这一步是循环计算,直到根节点
229         // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重
230         loop {
231             let (no_break, flag) = f(se.clone());
232             should_continue = flag;
233             if !no_break || !should_continue {
234                 ret = Some(se.clone());
235                 break;
236             }
237 
238             let parent = se.parent();
239             if parent.is_none() {
240                 ret = None;
241                 break;
242             }
243 
244             *se = parent.unwrap();
245         }
246 
247         (should_continue, ret)
248     }
249 
250     pub fn runnable(&self) -> u64 {
251         if self.is_task() {
252             return self.on_rq as u64;
253         } else {
254             self.runnable_weight
255         }
256     }
257 
258     /// 更新task和其cfsrq的负载均值
259     pub fn propagate_entity_load_avg(&mut self) -> bool {
260         if self.is_task() {
261             return false;
262         }
263 
264         let binding = self.my_cfs_rq.clone().unwrap();
265         let gcfs_rq = binding.force_mut();
266 
267         if gcfs_rq.propagate == 0 {
268             return false;
269         }
270 
271         gcfs_rq.propagate = 0;
272 
273         let binding = self.cfs_rq();
274         let cfs_rq = binding.force_mut();
275 
276         cfs_rq.add_task_group_propagate(gcfs_rq.prop_runnable_sum);
277 
278         cfs_rq.update_task_group_util(self.self_arc(), gcfs_rq);
279         cfs_rq.update_task_group_runnable(self.self_arc(), gcfs_rq);
280         cfs_rq.update_task_group_load(self.self_arc(), gcfs_rq);
281 
282         return true;
283     }
284 
285     /// 更新runnable_weight
286     pub fn update_runnable(&mut self) {
287         if !self.is_task() {
288             self.runnable_weight = self.my_cfs_rq.clone().unwrap().h_nr_running;
289         }
290     }
291 
292     /// 初始化实体运行均值
293     pub fn init_entity_runnable_average(&mut self) {
294         self.avg = SchedulerAvg::default();
295 
296         if self.is_task() {
297             self.avg.load_avg = LoadWeight::scale_load_down(self.load.weight) as usize;
298         }
299     }
300 }
301 
302 /// CFS的运行队列,这个队列需确保是percpu的
303 #[allow(dead_code)]
304 #[derive(Debug)]
305 pub struct CfsRunQueue {
306     load: LoadWeight,
307 
308     /// 全局运行的调度实体计数器,用于负载均衡
309     nr_running: u64,
310     /// 针对特定 CPU 核心的任务计数器
311     pub h_nr_running: u64,
312     /// 运行时间
313     exec_clock: u64,
314     /// 最少虚拟运行时间
315     min_vruntime: u64,
316     /// remain runtime
317     runtime_remaining: u64,
318 
319     /// 存放调度实体的红黑树
320     pub(super) entities: RBTree<u64, Arc<FairSchedEntity>>,
321 
322     /// IDLE
323     idle: usize,
324 
325     idle_nr_running: u64,
326 
327     pub idle_h_nr_running: u64,
328 
329     /// 当前运行的调度实体
330     current: Weak<FairSchedEntity>,
331     /// 下一个调度的实体
332     next: Weak<FairSchedEntity>,
333     /// 最后的调度实体
334     last: Weak<FairSchedEntity>,
335     /// 跳过运行的调度实体
336     skip: Weak<FairSchedEntity>,
337 
338     avg_load: i64,
339     avg_vruntime: i64,
340 
341     last_update_time_copy: u64,
342 
343     pub avg: SchedulerAvg,
344 
345     rq: Weak<CpuRunQueue>,
346     /// 拥有此队列的taskgroup
347     task_group: Weak<TaskGroup>,
348 
349     pub throttled_clock: u64,
350     pub throttled_clock_pelt: u64,
351     pub throttled_clock_pelt_time: u64,
352     pub throttled_pelt_idle: u64,
353 
354     pub throttled: bool,
355     pub throttled_count: u64,
356 
357     pub removed: SpinLock<CfsRemoved>,
358 
359     pub propagate: isize,
360     pub prop_runnable_sum: isize,
361 }
362 
363 #[derive(Debug, Default)]
364 pub struct CfsRemoved {
365     pub nr: u32,
366     pub load_avg: usize,
367     pub util_avg: usize,
368     pub runnable_avg: usize,
369 }
370 
371 impl CfsRunQueue {
372     pub fn new() -> Self {
373         Self {
374             load: LoadWeight::default(),
375             nr_running: 0,
376             h_nr_running: 0,
377             exec_clock: 0,
378             min_vruntime: 1 << 20,
379             entities: RBTree::new(),
380             idle: 0,
381             idle_nr_running: 0,
382             idle_h_nr_running: 0,
383             current: Weak::new(),
384             next: Weak::new(),
385             last: Weak::new(),
386             skip: Weak::new(),
387             avg_load: 0,
388             avg_vruntime: 0,
389             last_update_time_copy: 0,
390             avg: SchedulerAvg::default(),
391             rq: Weak::new(),
392             task_group: Weak::new(),
393             throttled_clock: 0,
394             throttled_clock_pelt: 0,
395             throttled_clock_pelt_time: 0,
396             throttled_pelt_idle: 0,
397             throttled: false,
398             throttled_count: 0,
399             removed: SpinLock::new(CfsRemoved::default()),
400             propagate: 0,
401             prop_runnable_sum: 0,
402             runtime_remaining: 0,
403         }
404     }
405 
406     #[inline]
407     pub fn rq(&self) -> Arc<CpuRunQueue> {
408         self.rq.upgrade().unwrap()
409     }
410 
411     #[inline]
412     pub fn set_rq(&mut self, rq: Weak<CpuRunQueue>) {
413         self.rq = rq;
414     }
415 
416     #[inline]
417     #[allow(clippy::mut_from_ref)]
418     pub fn force_mut(&self) -> &mut Self {
419         unsafe { &mut *(self as *const Self as usize as *mut Self) }
420     }
421 
422     #[inline]
423     pub fn is_idle(&self) -> bool {
424         self.idle > 0
425     }
426 
427     #[inline]
428     pub fn current(&self) -> Option<Arc<FairSchedEntity>> {
429         self.current.upgrade()
430     }
431 
432     #[inline]
433     pub fn set_current(&mut self, curr: Weak<FairSchedEntity>) {
434         self.current = curr
435     }
436 
437     #[inline]
438     pub fn next(&self) -> Option<Arc<FairSchedEntity>> {
439         self.next.upgrade()
440     }
441 
442     pub fn task_group(&self) -> Arc<TaskGroup> {
443         self.task_group.upgrade().unwrap()
444     }
445 
446     #[allow(dead_code)]
447     #[inline]
448     pub const fn bandwidth_used() -> bool {
449         false
450     }
451 
452     /// ## 计算调度周期,基本思想是在一个周期内让每个任务都至少运行一次。
453     /// 这样可以确保所有的任务都能够得到执行,而且可以避免某些任务被长时间地阻塞。
454     pub fn sched_period(nr_running: u64) -> u64 {
455         if unlikely(nr_running > SCHED_NR_LATENCY.load(Ordering::SeqCst)) {
456             // 如果当前活跃的任务数量超过了预设的调度延迟任务数量
457             // 调度周期的长度将直接设置为活跃任务数量乘以最小抢占粒度
458             return nr_running * SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst);
459         } else {
460             // 如果活跃任务数量未超过预设的延迟任务数量,那么调度周期的长度将设置为SCHED_NR_LATENCY
461             return SCHED_NR_LATENCY.load(Ordering::SeqCst);
462         }
463     }
464 
465     /// ## 计算调度任务的虚拟运行时间片大小
466     ///
467     /// vruntime = runtime / weight
468     #[allow(dead_code)]
469     pub fn sched_vslice(&self, entity: Arc<FairSchedEntity>) -> u64 {
470         let slice = self.sched_slice(entity.clone());
471         return entity.calculate_delta_fair(slice);
472     }
473 
474     /// ## 计算调度任务的实际运行时间片大小
475     #[allow(dead_code)]
476     pub fn sched_slice(&self, mut entity: Arc<FairSchedEntity>) -> u64 {
477         let mut nr_running = self.nr_running;
478         if SCHED_FEATURES.contains(SchedFeature::ALT_PERIOD) {
479             nr_running = self.h_nr_running;
480         }
481 
482         // 计算一个调度周期的整个slice
483         let mut slice = Self::sched_period(nr_running + (!entity.on_rq()) as u64);
484 
485         // 这一步是循环计算,直到根节点
486         // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重
487         FairSchedEntity::for_each_in_group(&mut entity, |se| {
488             if unlikely(!se.on_rq()) {
489                 se.cfs_rq().force_mut().load.update_load_add(se.load.weight);
490             }
491             slice = se
492                 .cfs_rq()
493                 .force_mut()
494                 .load
495                 .calculate_delta(slice, se.load.weight);
496 
497             (true, true)
498         });
499 
500         if SCHED_FEATURES.contains(SchedFeature::BASE_SLICE) {
501             // TODO: IDLE?
502             let min_gran = SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst);
503 
504             slice = min_gran.max(slice)
505         }
506 
507         slice
508     }
509 
510     /// ## 在时间片到期时检查当前任务是否需要被抢占,
511     /// 如果需要,则抢占当前任务,并确保不会由于与其他任务的“好友偏爱(buddy favours)”而重新选举为下一个运行的任务。
512     #[allow(dead_code)]
513     pub fn check_preempt_tick(&mut self, curr: Arc<FairSchedEntity>) {
514         // 计算理想状态下该调度实体的理想运行时间
515         let ideal_runtime = self.sched_slice(curr.clone());
516 
517         let delta_exec = curr.sum_exec_runtime - curr.prev_sum_exec_runtime;
518 
519         if delta_exec > ideal_runtime {
520             // 表明实际运行时间长于理想运行时间
521             self.rq().resched_current();
522 
523             self.clear_buddies(&curr);
524             return;
525         }
526 
527         if delta_exec < SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst) {
528             return;
529         }
530 
531         todo!()
532     }
533 
534     pub fn clear_buddies(&mut self, se: &Arc<FairSchedEntity>) {
535         if let Some(next) = self.next.upgrade() {
536             if Arc::ptr_eq(&next, se) {
537                 se.clear_buddies();
538             }
539         }
540     }
541 
542     /// 处理调度实体的时间片到期事件
543     pub fn entity_tick(&mut self, curr: Arc<FairSchedEntity>, queued: bool) {
544         // 更新当前调度实体的运行时间统计信息
545         self.update_current();
546 
547         self.update_load_avg(&curr, UpdateAvgFlags::UPDATE_TG);
548 
549         // 更新组调度相关
550         curr.update_cfs_group();
551 
552         if queued {
553             self.rq().resched_current();
554             return;
555         }
556     }
557 
558     /// 更新当前调度实体的运行时间统计信息
559     pub fn update_current(&mut self) {
560         let curr = self.current();
561         if unlikely(curr.is_none()) {
562             return;
563         }
564 
565         let now = self.rq().clock_task();
566         let curr = curr.unwrap();
567 
568         fence(Ordering::SeqCst);
569         if unlikely(now <= curr.exec_start) {
570             // warn!(
571             //     "update_current return now <= curr.exec_start now {now} execstart {}",
572             //     curr.exec_start
573             // );
574             return;
575         }
576 
577         fence(Ordering::SeqCst);
578         let delta_exec = now - curr.exec_start;
579 
580         let curr = curr.force_mut();
581 
582         curr.exec_start = now;
583 
584         curr.sum_exec_runtime += delta_exec;
585 
586         // 根据实际运行时长加权增加虚拟运行时长
587         curr.vruntime += curr.calculate_delta_fair(delta_exec);
588         fence(Ordering::SeqCst);
589         self.update_deadline(&curr.self_arc());
590         self.update_min_vruntime();
591 
592         self.account_cfs_rq_runtime(delta_exec);
593     }
594 
595     /// 计算当前cfs队列的运行时间是否到期
596     fn account_cfs_rq_runtime(&mut self, delta_exec: u64) {
597         if likely(self.runtime_remaining > delta_exec) {
598             self.runtime_remaining -= delta_exec;
599             // error!("runtime_remaining {}", self.runtime_remaining);
600             return;
601         }
602 
603         // warn!(
604         //     "runtime_remaining {} delta exec {delta_exec} nr_running {}",
605         //     self.runtime_remaining,
606         //     self.nr_running
607         // );
608         // fixme: 目前只是简单分配一个时间片
609         self.runtime_remaining = 5000 * NSEC_PER_MSEC as u64;
610 
611         if likely(self.current().is_some()) && self.nr_running > 1 {
612             // error!("account_cfs_rq_runtime");
613             self.rq().resched_current();
614         }
615     }
616 
617     /// 计算deadline,如果vruntime到期会重调度
618     pub fn update_deadline(&mut self, se: &Arc<FairSchedEntity>) {
619         // error!("vruntime {} deadline {}", se.vruntime, se.deadline);
620         if se.vruntime < se.deadline {
621             return;
622         }
623 
624         se.force_mut().slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst);
625 
626         se.force_mut().deadline = se.vruntime + se.calculate_delta_fair(se.slice);
627 
628         if self.nr_running > 1 {
629             self.rq().resched_current();
630             self.clear_buddies(se);
631         }
632     }
633 
634     /// ## 更新最小虚拟运行时间
635     pub fn update_min_vruntime(&mut self) {
636         let curr = self.current();
637 
638         let mut vruntime = self.min_vruntime;
639 
640         if curr.is_some() {
641             let curr = curr.as_ref().unwrap();
642             if curr.on_rq() {
643                 vruntime = curr.vruntime;
644             } else {
645                 self.set_current(Weak::default());
646             }
647         }
648 
649         // 找到最小虚拟运行时间的调度实体
650         let leftmost = self.entities.get_first();
651         if let Some(leftmost) = leftmost {
652             let se = leftmost.1;
653 
654             if curr.is_none() {
655                 vruntime = se.vruntime;
656             } else {
657                 vruntime = vruntime.min(se.vruntime);
658             }
659         }
660 
661         self.min_vruntime = self.__update_min_vruntime(vruntime);
662     }
663 
664     fn __update_min_vruntime(&mut self, vruntime: u64) -> u64 {
665         let mut min_vruntime = self.min_vruntime;
666 
667         let delta = vruntime as i64 - min_vruntime as i64;
668         if delta > 0 {
669             self.avg_vruntime -= self.avg_load * delta;
670             min_vruntime = vruntime;
671         }
672 
673         return min_vruntime;
674     }
675 
676     // 判断是否为当前任务
677     pub fn is_curr(&self, se: &Arc<FairSchedEntity>) -> bool {
678         if self.current().is_none() {
679             false
680         } else {
681             // 判断当前和传入的se是否相等
682             Arc::ptr_eq(se, self.current().as_ref().unwrap())
683         }
684     }
685 
686     // 修改后
687     pub fn reweight_entity(&mut self, se: Arc<FairSchedEntity>, weight: u64) {
688         // 判断是否为当前任务
689         let is_curr = self.is_curr(&se);
690 
691         // 如果se在队列中
692         if se.on_rq() {
693             // 如果是当前任务
694             if is_curr {
695                 self.update_current();
696             } else {
697                 // 否则,出队
698                 self.inner_dequeue_entity(&se);
699             }
700 
701             // 减去该权重
702             self.load.update_load_sub(se.load.weight);
703         }
704 
705         self.dequeue_load_avg(&se);
706 
707         if !se.on_rq() {
708             se.force_mut().vlag = se.vlag * se.load.weight as i64 / weight as i64;
709         } else {
710             self.reweight_eevdf(&se, weight);
711         }
712         se.force_mut().load.update_load_set(weight);
713 
714         // SMP
715         let divider = se.avg.get_pelt_divider();
716         se.force_mut().avg.load_avg = LoadWeight::scale_load_down(se.load.weight) as usize
717             * se.avg.load_sum as usize
718             / divider;
719 
720         self.enqueue_load_avg(se.clone());
721 
722         if se.on_rq() {
723             self.load.update_load_add(se.load.weight);
724             if !is_curr {
725                 self.inner_enqueue_entity(&se);
726             }
727 
728             self.update_min_vruntime();
729         }
730     }
731 
732     /// 用于重新计算调度实体(sched_entity)的权重(weight)和虚拟运行时间(vruntime)
733     fn reweight_eevdf(&mut self, se: &Arc<FairSchedEntity>, weight: u64) {
734         let old_weight = se.load.weight;
735         let avg_vruntime = self.avg_vruntime();
736         let mut vlag;
737         if avg_vruntime != se.vruntime {
738             vlag = avg_vruntime as i64 - se.vruntime as i64;
739             vlag = vlag * old_weight as i64 / weight as i64;
740             se.force_mut().vruntime = (avg_vruntime as i64 - vlag) as u64;
741         }
742 
743         let mut vslice = se.deadline as i64 - avg_vruntime as i64;
744         vslice = vslice * old_weight as i64 / weight as i64;
745         se.force_mut().deadline = avg_vruntime + vslice as u64;
746     }
747 
748     fn avg_vruntime(&self) -> u64 {
749         let curr = self.current();
750         let mut avg = self.avg_vruntime;
751         let mut load = self.avg_load;
752 
753         if let Some(curr) = curr {
754             if curr.on_rq() {
755                 let weight = LoadWeight::scale_load_down(curr.load.weight);
756                 avg += self.entity_key(&curr) * weight as i64;
757                 load += weight as i64;
758             }
759         }
760 
761         if load > 0 {
762             if avg < 0 {
763                 avg -= load - 1;
764             }
765 
766             avg /= load;
767         }
768 
769         return self.min_vruntime + avg as u64;
770     }
771 
772     #[inline]
773     pub fn entity_key(&self, se: &Arc<FairSchedEntity>) -> i64 {
774         return se.vruntime as i64 - self.min_vruntime as i64;
775     }
776 
777     pub fn avg_vruntime_add(&mut self, se: &Arc<FairSchedEntity>) {
778         let weight = LoadWeight::scale_load_down(se.load.weight);
779 
780         let key = self.entity_key(se);
781 
782         let avg_vruntime = self.avg_vruntime + key * weight as i64;
783 
784         self.avg_vruntime = avg_vruntime;
785         self.avg_load += weight as i64;
786     }
787 
788     pub fn avg_vruntime_sub(&mut self, se: &Arc<FairSchedEntity>) {
789         let weight = LoadWeight::scale_load_down(se.load.weight);
790 
791         let key = self.entity_key(se);
792 
793         let avg_vruntime = self.avg_vruntime - key * weight as i64;
794 
795         self.avg_vruntime = avg_vruntime;
796         self.avg_load -= weight as i64;
797     }
798 
799     /// 为调度实体计算初始vruntime等信息
800     fn place_entity(&mut self, se: Arc<FairSchedEntity>, flags: EnqueueFlag) {
801         let vruntime = self.avg_vruntime();
802         let mut lag = 0;
803 
804         let se = se.force_mut();
805         se.slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst);
806 
807         let mut vslice = se.calculate_delta_fair(se.slice);
808 
809         if self.nr_running > 0 {
810             let curr = self.current();
811 
812             lag = se.vlag;
813 
814             let mut load = self.avg_load;
815 
816             if let Some(curr) = curr {
817                 if curr.on_rq() {
818                     load += LoadWeight::scale_load_down(curr.load.weight) as i64;
819                 }
820             }
821 
822             lag *= load + LoadWeight::scale_load_down(se.load.weight) as i64;
823 
824             if load == 0 {
825                 load = 1;
826             }
827 
828             lag /= load;
829         }
830 
831         se.vruntime = vruntime - lag as u64;
832 
833         if flags.contains(EnqueueFlag::ENQUEUE_INITIAL) {
834             vslice /= 2;
835         }
836 
837         se.deadline = se.vruntime + vslice;
838     }
839 
840     /// 更新负载均值
841     fn update_load_avg(&mut self, se: &Arc<FairSchedEntity>, flags: UpdateAvgFlags) {
842         let now = self.cfs_rq_clock_pelt();
843 
844         if se.avg.last_update_time > 0 && !flags.contains(UpdateAvgFlags::SKIP_AGE_LOAD) {
845             se.force_mut().update_load_avg(self, now);
846         }
847 
848         let mut decayed = self.update_self_load_avg(now);
849         decayed |= se.force_mut().propagate_entity_load_avg() as u32;
850 
851         if se.avg.last_update_time > 0 && flags.contains(UpdateAvgFlags::DO_ATTACH) {
852             todo!()
853         } else if flags.contains(UpdateAvgFlags::DO_ATTACH) {
854             self.detach_entity_load_avg(se);
855         } else if decayed > 0 {
856             // cfs_rq_util_change
857 
858             todo!()
859         }
860     }
861 
862     /// 将实体的负载均值与对应cfs分离
863     fn detach_entity_load_avg(&mut self, se: &Arc<FairSchedEntity>) {
864         self.dequeue_load_avg(se);
865 
866         sub_positive(&mut self.avg.util_avg, se.avg.util_avg);
867         sub_positive(&mut (self.avg.util_sum as usize), se.avg.util_sum as usize);
868         self.avg.util_sum = self
869             .avg
870             .util_sum
871             .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
872 
873         sub_positive(&mut self.avg.runnable_avg, se.avg.runnable_avg);
874         sub_positive(
875             &mut (self.avg.runnable_sum as usize),
876             se.avg.runnable_sum as usize,
877         );
878         self.avg.runnable_sum = self
879             .avg
880             .runnable_sum
881             .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
882 
883         self.propagate = 1;
884         self.prop_runnable_sum += se.avg.load_sum as isize;
885     }
886 
887     fn update_self_load_avg(&mut self, now: u64) -> u32 {
888         let mut removed_load = 0;
889         let mut removed_util = 0;
890         let mut removed_runnable = 0;
891 
892         let mut decayed = 0;
893 
894         if self.removed.lock().nr > 0 {
895             let mut removed_guard = self.removed.lock();
896             let divider = self.avg.get_pelt_divider();
897 
898             swap::<usize>(&mut removed_guard.util_avg, &mut removed_util);
899             swap::<usize>(&mut removed_guard.load_avg, &mut removed_load);
900             swap::<usize>(&mut removed_guard.runnable_avg, &mut removed_runnable);
901 
902             removed_guard.nr = 0;
903 
904             let mut r = removed_load;
905 
906             sub_positive(&mut self.avg.load_avg, r);
907             sub_positive(&mut (self.avg.load_sum as usize), r * divider);
908 
909             self.avg.load_sum = self
910                 .avg
911                 .load_sum
912                 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64);
913 
914             r = removed_util;
915             sub_positive(&mut self.avg.util_avg, r);
916             sub_positive(&mut (self.avg.util_sum as usize), r * divider);
917             self.avg.util_sum = self
918                 .avg
919                 .util_sum
920                 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
921 
922             r = removed_runnable;
923             sub_positive(&mut self.avg.runnable_avg, r);
924             sub_positive(&mut (self.avg.runnable_sum as usize), r * divider);
925             self.avg.runnable_sum = self
926                 .avg
927                 .runnable_sum
928                 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
929 
930             drop(removed_guard);
931             self.add_task_group_propagate(
932                 -(removed_runnable as isize * divider as isize) >> SCHED_CAPACITY_SHIFT,
933             );
934 
935             decayed = 1;
936         }
937 
938         decayed |= self.__update_load_avg(now) as u32;
939 
940         self.last_update_time_copy = self.avg.last_update_time;
941 
942         return decayed;
943     }
944 
945     fn __update_load_avg(&mut self, now: u64) -> bool {
946         if self.avg.update_load_sum(
947             now,
948             LoadWeight::scale_load_down(self.load.weight) as u32,
949             self.h_nr_running as u32,
950             self.current().is_some() as u32,
951         ) {
952             self.avg.update_load_avg(1);
953             return true;
954         }
955 
956         return false;
957     }
958 
959     fn add_task_group_propagate(&mut self, runnable_sum: isize) {
960         self.propagate = 1;
961         self.prop_runnable_sum += runnable_sum;
962     }
963 
964     /// 将实体加入队列
965     pub fn enqueue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: EnqueueFlag) {
966         let is_curr = self.is_curr(se);
967 
968         if is_curr {
969             self.place_entity(se.clone(), flags);
970         }
971 
972         self.update_current();
973 
974         self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG | UpdateAvgFlags::DO_ATTACH);
975 
976         se.force_mut().update_runnable();
977 
978         se.update_cfs_group();
979 
980         if !is_curr {
981             self.place_entity(se.clone(), flags);
982         }
983 
984         self.account_entity_enqueue(se);
985 
986         if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) {
987             se.force_mut().exec_start = 0;
988         }
989 
990         if !is_curr {
991             self.inner_enqueue_entity(se);
992         }
993 
994         se.force_mut().on_rq = OnRq::Queued;
995 
996         if self.nr_running == 1 {
997             // 只有上面加入的
998             // TODO: throttle
999         }
1000     }
1001 
1002     pub fn dequeue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: DequeueFlag) {
1003         let mut action = UpdateAvgFlags::UPDATE_TG;
1004 
1005         if se.is_task() && se.on_rq == OnRq::Migrating {
1006             action |= UpdateAvgFlags::DO_DETACH;
1007         }
1008 
1009         self.update_current();
1010 
1011         self.update_load_avg(se, action);
1012 
1013         se.force_mut().update_runnable();
1014 
1015         self.clear_buddies(se);
1016 
1017         self.update_entity_lag(se);
1018 
1019         if let Some(curr) = self.current() {
1020             if !Arc::ptr_eq(&curr, se) {
1021                 self.inner_dequeue_entity(se);
1022             }
1023         } else {
1024             self.inner_dequeue_entity(se);
1025         }
1026 
1027         se.force_mut().on_rq = OnRq::None;
1028 
1029         self.account_entity_dequeue(se);
1030 
1031         // return_cfs_rq_runtime
1032 
1033         se.update_cfs_group();
1034 
1035         if flags & (DequeueFlag::DEQUEUE_SAVE | DequeueFlag::DEQUEUE_MOVE)
1036             != DequeueFlag::DEQUEUE_SAVE
1037         {
1038             self.update_min_vruntime();
1039         }
1040 
1041         if self.nr_running == 0 {
1042             self.update_idle_clock_pelt()
1043         }
1044     }
1045 
1046     /// 将前一个调度的task放回队列
1047     pub fn put_prev_entity(&mut self, prev: Arc<FairSchedEntity>) {
1048         if prev.on_rq() {
1049             self.update_current();
1050         }
1051 
1052         if prev.on_rq() {
1053             self.inner_enqueue_entity(&prev);
1054         }
1055 
1056         self.set_current(Weak::default());
1057     }
1058 
1059     /// 将下一个运行的task设置为current
1060     pub fn set_next_entity(&mut self, se: &Arc<FairSchedEntity>) {
1061         self.clear_buddies(se);
1062 
1063         if se.on_rq() {
1064             self.inner_dequeue_entity(se);
1065             self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG);
1066             se.force_mut().vlag = se.deadline as i64;
1067         }
1068 
1069         self.set_current(Arc::downgrade(se));
1070 
1071         se.force_mut().prev_sum_exec_runtime = se.sum_exec_runtime;
1072     }
1073 
1074     fn update_idle_clock_pelt(&mut self) {
1075         let throttled = if unlikely(self.throttled_count > 0) {
1076             u64::MAX
1077         } else {
1078             self.throttled_clock_pelt_time
1079         };
1080 
1081         self.throttled_clock_pelt = throttled;
1082     }
1083 
1084     fn update_entity_lag(&mut self, se: &Arc<FairSchedEntity>) {
1085         let lag = self.avg_vruntime() as i64 - se.vruntime as i64;
1086 
1087         let limit = se.calculate_delta_fair((TICK_NESC as u64).max(2 * se.slice)) as i64;
1088 
1089         se.force_mut().vlag = if lag < -limit {
1090             -limit
1091         } else if lag > limit {
1092             limit
1093         } else {
1094             lag
1095         }
1096     }
1097 
1098     fn account_entity_enqueue(&mut self, se: &Arc<FairSchedEntity>) {
1099         self.load.update_load_add(se.load.weight);
1100 
1101         if se.is_task() {
1102             let rq = self.rq();
1103             let (rq, _guard) = rq.self_lock();
1104             // TODO:numa
1105             rq.cfs_tasks.push_back(se.clone());
1106         }
1107         self.nr_running += 1;
1108         if se.is_idle() {
1109             self.idle_nr_running += 1;
1110         }
1111     }
1112 
1113     fn account_entity_dequeue(&mut self, se: &Arc<FairSchedEntity>) {
1114         self.load.update_load_sub(se.load.weight);
1115 
1116         if se.is_task() {
1117             let rq = self.rq();
1118             let (rq, _guard) = rq.self_lock();
1119 
1120             // TODO:numa
1121             let _ = rq.cfs_tasks.extract_if(|x| Arc::ptr_eq(x, se));
1122         }
1123 
1124         self.nr_running -= 1;
1125         if se.is_idle() {
1126             self.idle_nr_running -= 1;
1127         }
1128     }
1129 
1130     pub fn inner_enqueue_entity(&mut self, se: &Arc<FairSchedEntity>) {
1131         self.avg_vruntime_add(se);
1132         se.force_mut().min_deadline = se.deadline;
1133         self.entities.insert(se.vruntime, se.clone());
1134         // warn!(
1135         //     "enqueue pcb {:?} cfsrq {:?}",
1136         //     se.pcb().pid(),
1137         //     self.entities
1138         //         .iter()
1139         //         .map(|x| (x.0, x.1.pcb().pid()))
1140         //         .collect::<Vec<_>>()
1141         // );
1142         // send_to_default_serial8250_port(
1143         //     format!(
1144         //         "enqueue pcb {:?} cfsrq {:?}\n",
1145         //         se.pcb().pid(),
1146         //         self.entities
1147         //             .iter()
1148         //             .map(|x| (x.0, x.1.pcb().pid()))
1149         //             .collect::<Vec<_>>()
1150         //     )
1151         //     .as_bytes(),
1152         // );
1153     }
1154 
1155     fn inner_dequeue_entity(&mut self, se: &Arc<FairSchedEntity>) {
1156         // warn!(
1157         //     "before dequeue pcb {:?} cfsrq {:?}",
1158         //     se.pcb().pid(),
1159         //     self.entities
1160         //         .iter()
1161         //         .map(|x| (x.0, x.1.pcb().pid()))
1162         //         .collect::<Vec<_>>()
1163         // );
1164 
1165         // send_to_default_serial8250_port(
1166         //     format!(
1167         //         "before dequeue pcb {:?} cfsrq {:?}\n",
1168         //         se.pcb().pid(),
1169         //         self.entities
1170         //             .iter()
1171         //             .map(|x| (x.0, x.1.pcb().pid()))
1172         //             .collect::<Vec<_>>()
1173         //     )
1174         //     .as_bytes(),
1175         // );
1176 
1177         let mut i = 1;
1178         while let Some(rm) = self.entities.remove(&se.vruntime) {
1179             if Arc::ptr_eq(&rm, se) {
1180                 break;
1181             }
1182             rm.force_mut().vruntime += i;
1183             self.entities.insert(rm.vruntime, rm);
1184 
1185             i += 1;
1186         }
1187         // send_to_default_serial8250_port(
1188         //     format!(
1189         //         "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}\n",
1190         //         se.pcb().pid(),
1191         //         remove.pcb().pid(),
1192         //         self.entities
1193         //             .iter()
1194         //             .map(|x| (x.0, x.1.pcb().pid()))
1195         //             .collect::<Vec<_>>()
1196         //     )
1197         //     .as_bytes(),
1198         // );
1199         // warn!(
1200         //     "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}",
1201         //     se.pcb().pid(),
1202         //     remove.pcb().pid(),
1203         //     self.entities
1204         //         .iter()
1205         //         .map(|x| (x.0, x.1.pcb().pid()))
1206         //         .collect::<Vec<_>>()
1207         // );
1208         self.avg_vruntime_sub(se);
1209     }
1210 
1211     pub fn enqueue_load_avg(&mut self, se: Arc<FairSchedEntity>) {
1212         self.avg.load_avg += se.avg.load_avg;
1213         self.avg.load_sum += LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum;
1214     }
1215 
1216     pub fn dequeue_load_avg(&mut self, se: &Arc<FairSchedEntity>) {
1217         if self.avg.load_avg > se.avg.load_avg {
1218             self.avg.load_avg -= se.avg.load_avg;
1219         } else {
1220             self.avg.load_avg = 0;
1221         };
1222 
1223         let se_load = LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum;
1224 
1225         if self.avg.load_sum > se_load {
1226             self.avg.load_sum -= se_load;
1227         } else {
1228             self.avg.load_sum = 0;
1229         }
1230 
1231         self.avg.load_sum = self
1232             .avg
1233             .load_sum
1234             .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64)
1235     }
1236 
1237     pub fn update_task_group_util(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) {
1238         let mut delta_sum = gcfs_rq.avg.load_avg as isize - se.avg.load_avg as isize;
1239         let delta_avg = delta_sum;
1240 
1241         if delta_avg == 0 {
1242             return;
1243         }
1244 
1245         let divider = self.avg.get_pelt_divider();
1246 
1247         let se = se.force_mut();
1248         se.avg.util_avg = gcfs_rq.avg.util_avg;
1249         let new_sum = se.avg.util_avg * divider;
1250         delta_sum = new_sum as isize - se.avg.util_sum as isize;
1251 
1252         se.avg.util_sum = new_sum as u64;
1253 
1254         add_positive(&mut (self.avg.util_avg as isize), delta_avg);
1255         add_positive(&mut (self.avg.util_sum as isize), delta_sum);
1256 
1257         self.avg.util_sum = self
1258             .avg
1259             .util_sum
1260             .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
1261     }
1262 
1263     pub fn update_task_group_runnable(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) {
1264         let mut delta_sum = gcfs_rq.avg.runnable_avg as isize - se.avg.runnable_avg as isize;
1265         let delta_avg = delta_sum;
1266 
1267         if delta_avg == 0 {
1268             return;
1269         }
1270 
1271         let divider = self.avg.get_pelt_divider();
1272 
1273         let se = se.force_mut();
1274         se.avg.runnable_avg = gcfs_rq.avg.runnable_avg;
1275         let new_sum = se.avg.runnable_sum * divider as u64;
1276         delta_sum = new_sum as isize - se.avg.runnable_sum as isize;
1277 
1278         se.avg.runnable_sum = new_sum;
1279 
1280         add_positive(&mut (self.avg.runnable_avg as isize), delta_avg);
1281         add_positive(&mut (self.avg.runnable_sum as isize), delta_sum);
1282 
1283         self.avg.runnable_sum = self
1284             .avg
1285             .runnable_sum
1286             .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
1287     }
1288 
1289     pub fn update_task_group_load(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &mut CfsRunQueue) {
1290         let mut runnable_sum = gcfs_rq.prop_runnable_sum;
1291 
1292         let mut load_sum = 0;
1293 
1294         if runnable_sum == 0 {
1295             return;
1296         }
1297 
1298         gcfs_rq.prop_runnable_sum = 0;
1299 
1300         let divider = self.avg.get_pelt_divider();
1301 
1302         if runnable_sum >= 0 {
1303             runnable_sum += se.avg.load_sum as isize;
1304             runnable_sum = runnable_sum.min(divider as isize);
1305         } else {
1306             if LoadWeight::scale_load_down(gcfs_rq.load.weight) > 0 {
1307                 load_sum = gcfs_rq.avg.load_sum / LoadWeight::scale_load_down(gcfs_rq.load.weight);
1308             }
1309 
1310             runnable_sum = se.avg.load_sum.min(load_sum) as isize;
1311         }
1312 
1313         let running_sum = se.avg.util_sum as isize >> SCHED_CAPACITY_SHIFT;
1314         runnable_sum = runnable_sum.max(running_sum);
1315 
1316         load_sum = LoadWeight::scale_load_down(se.load.weight) * runnable_sum as u64;
1317         let load_avg = load_sum / divider as u64;
1318 
1319         let delta_avg = load_avg as isize - se.avg.load_avg as isize;
1320         if delta_avg == 0 {
1321             return;
1322         }
1323 
1324         let delta_sum = load_sum as isize
1325             - LoadWeight::scale_load_down(se.load.weight) as isize * se.avg.load_sum as isize;
1326 
1327         let se = se.force_mut();
1328         se.avg.load_sum = runnable_sum as u64;
1329         se.avg.load_avg = load_avg as usize;
1330 
1331         add_positive(&mut (self.avg.load_avg as isize), delta_avg);
1332         add_positive(&mut (self.avg.util_sum as isize), delta_sum);
1333 
1334         self.avg.load_sum = self
1335             .avg
1336             .load_sum
1337             .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64);
1338     }
1339 
1340     /// pick下一个运行的task
1341     pub fn pick_next_entity(&self) -> Option<Arc<FairSchedEntity>> {
1342         if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY)
1343             && self.next().is_some()
1344             && self.entity_eligible(&self.next().unwrap())
1345         {
1346             return self.next();
1347         }
1348         self.entities.get_first().map(|val| val.1.clone())
1349     }
1350 
1351     pub fn entity_eligible(&self, se: &Arc<FairSchedEntity>) -> bool {
1352         let curr = self.current();
1353         let mut avg = self.avg_vruntime;
1354         let mut load = self.avg_load;
1355 
1356         if let Some(curr) = curr {
1357             if curr.on_rq() {
1358                 let weight = LoadWeight::scale_load_down(curr.load.weight);
1359 
1360                 avg += self.entity_key(&curr) * weight as i64;
1361                 load += weight as i64;
1362             }
1363         }
1364 
1365         return avg >= self.entity_key(se) * load;
1366     }
1367 }
1368 
1369 pub struct CompletelyFairScheduler;
1370 
1371 impl CompletelyFairScheduler {
1372     /// 寻找到最近公共组长
1373     fn find_matching_se(se: &mut Arc<FairSchedEntity>, pse: &mut Arc<FairSchedEntity>) {
1374         let mut se_depth = se.depth;
1375         let mut pse_depth = pse.depth;
1376 
1377         while se_depth > pse_depth {
1378             se_depth -= 1;
1379             *se = se.parent().unwrap();
1380         }
1381 
1382         while pse_depth > se_depth {
1383             pse_depth -= 1;
1384             *pse = pse.parent().unwrap();
1385         }
1386 
1387         while !Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq()) {
1388             *se = se.parent().unwrap();
1389             *pse = pse.parent().unwrap();
1390         }
1391     }
1392 }
1393 
1394 impl Scheduler for CompletelyFairScheduler {
1395     fn enqueue(
1396         rq: &mut CpuRunQueue,
1397         pcb: Arc<crate::process::ProcessControlBlock>,
1398         mut flags: EnqueueFlag,
1399     ) {
1400         let mut se = pcb.sched_info().sched_entity();
1401         let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE;
1402         let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| {
1403             if se.on_rq() {
1404                 return (false, true);
1405             }
1406 
1407             let binding = se.cfs_rq();
1408             let cfs_rq = binding.force_mut();
1409             cfs_rq.enqueue_entity(&se, flags);
1410 
1411             cfs_rq.h_nr_running += 1;
1412             cfs_rq.idle_h_nr_running += idle_h_nr_running as u64;
1413 
1414             if cfs_rq.is_idle() {
1415                 idle_h_nr_running = true;
1416             }
1417 
1418             // TODO: cfs_rq_throttled
1419 
1420             flags = EnqueueFlag::ENQUEUE_WAKEUP;
1421 
1422             return (true, true);
1423         });
1424 
1425         if !should_continue {
1426             return;
1427         }
1428 
1429         if let Some(mut se) = se {
1430             FairSchedEntity::for_each_in_group(&mut se, |se| {
1431                 let binding = se.cfs_rq();
1432                 let cfs_rq = binding.force_mut();
1433 
1434                 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG);
1435 
1436                 let se = se.force_mut();
1437                 se.update_runnable();
1438 
1439                 se.update_cfs_group();
1440 
1441                 cfs_rq.h_nr_running += 1;
1442                 cfs_rq.idle_h_nr_running += idle_h_nr_running as u64;
1443 
1444                 if cfs_rq.is_idle() {
1445                     idle_h_nr_running = true;
1446                 }
1447 
1448                 // TODO: cfs_rq_throttled
1449 
1450                 return (true, true);
1451             });
1452         }
1453 
1454         rq.add_nr_running(1);
1455     }
1456 
1457     fn dequeue(
1458         rq: &mut CpuRunQueue,
1459         pcb: Arc<crate::process::ProcessControlBlock>,
1460         mut flags: DequeueFlag,
1461     ) {
1462         let mut se = pcb.sched_info().sched_entity();
1463         let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE;
1464         let task_sleep = flags.contains(DequeueFlag::DEQUEUE_SLEEP);
1465         let was_sched_idle = rq.sched_idle_rq();
1466 
1467         let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| {
1468             let binding = se.cfs_rq();
1469             let cfs_rq = binding.force_mut();
1470             cfs_rq.dequeue_entity(&se, flags);
1471 
1472             cfs_rq.h_nr_running -= 1;
1473             cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64;
1474 
1475             if cfs_rq.is_idle() {
1476                 idle_h_nr_running = true;
1477             }
1478 
1479             // TODO: cfs_rq_throttled
1480 
1481             if cfs_rq.load.weight > 0 {
1482                 let sep = se.parent();
1483 
1484                 if task_sleep && sep.is_some() {
1485                     todo!()
1486                 }
1487             }
1488 
1489             flags |= DequeueFlag::DEQUEUE_SLEEP;
1490 
1491             return (true, true);
1492         });
1493 
1494         if !should_continue {
1495             return;
1496         }
1497 
1498         if let Some(mut se) = se {
1499             FairSchedEntity::for_each_in_group(&mut se, |se| {
1500                 let binding = se.cfs_rq();
1501                 let cfs_rq = binding.force_mut();
1502 
1503                 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG);
1504 
1505                 let se = se.force_mut();
1506                 se.update_runnable();
1507 
1508                 se.update_cfs_group();
1509 
1510                 cfs_rq.h_nr_running -= 1;
1511                 cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64;
1512 
1513                 if cfs_rq.is_idle() {
1514                     idle_h_nr_running = true;
1515                 }
1516 
1517                 // TODO: cfs_rq_throttled
1518 
1519                 return (true, true);
1520             });
1521         }
1522 
1523         rq.sub_nr_running(1);
1524 
1525         if unlikely(!was_sched_idle && rq.sched_idle_rq()) {
1526             rq.next_balance = clock();
1527         }
1528     }
1529 
1530     fn yield_task(rq: &mut CpuRunQueue) {
1531         let curr = rq.current();
1532         let se = curr.sched_info().sched_entity();
1533         let binding = se.cfs_rq();
1534         let cfs_rq = binding.force_mut();
1535 
1536         if unlikely(rq.nr_running == 1) {
1537             return;
1538         }
1539 
1540         cfs_rq.clear_buddies(&se);
1541 
1542         rq.update_rq_clock();
1543 
1544         cfs_rq.update_current();
1545 
1546         rq.clock_updata_flags |= ClockUpdataFlag::RQCF_REQ_SKIP;
1547 
1548         se.force_mut().deadline += se.calculate_delta_fair(se.slice);
1549     }
1550 
1551     fn check_preempt_currnet(
1552         rq: &mut CpuRunQueue,
1553         pcb: &Arc<crate::process::ProcessControlBlock>,
1554         wake_flags: WakeupFlags,
1555     ) {
1556         let curr = rq.current();
1557         let mut se = curr.sched_info().sched_entity();
1558         let mut pse = pcb.sched_info().sched_entity();
1559 
1560         if unlikely(Arc::ptr_eq(&se, &pse)) {
1561             return;
1562         }
1563 
1564         // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/fair.c#8160
1565 
1566         let _next_buddy_mark = if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY)
1567             && !wake_flags.contains(WakeupFlags::WF_FORK)
1568         {
1569             FairSchedEntity::for_each_in_group(&mut pse, |se| {
1570                 if !se.on_rq() {
1571                     return (false, true);
1572                 }
1573 
1574                 if se.is_idle() {
1575                     return (false, true);
1576                 }
1577 
1578                 se.cfs_rq().force_mut().next = Arc::downgrade(&se);
1579 
1580                 return (true, true);
1581             });
1582             true
1583         } else {
1584             false
1585         };
1586 
1587         if curr.flags().contains(ProcessFlags::NEED_SCHEDULE) {
1588             return;
1589         }
1590 
1591         if unlikely(curr.sched_info().policy() == SchedPolicy::IDLE)
1592             && likely(pcb.sched_info().policy() != SchedPolicy::IDLE)
1593         {
1594             rq.resched_current();
1595             return;
1596         }
1597 
1598         if unlikely(pcb.sched_info().policy() != SchedPolicy::CFS)
1599             || !SCHED_FEATURES.contains(SchedFeature::WAKEUP_PREEMPTION)
1600         {
1601             return;
1602         }
1603 
1604         Self::find_matching_se(&mut se, &mut pse);
1605 
1606         let cse_is_idle = se.is_idle();
1607         let pse_is_idle = pse.is_idle();
1608 
1609         if cse_is_idle && !pse_is_idle {
1610             rq.resched_current();
1611             return;
1612         }
1613 
1614         if cse_is_idle != pse_is_idle {
1615             return;
1616         }
1617 
1618         let cfs_rq = se.cfs_rq();
1619         cfs_rq.force_mut().update_current();
1620 
1621         if let Some((_, pick_se)) = cfs_rq.entities.get_first() {
1622             if Arc::ptr_eq(pick_se, &pse) {
1623                 rq.resched_current();
1624                 return;
1625             }
1626         }
1627     }
1628 
1629     fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<crate::process::ProcessControlBlock>> {
1630         let mut cfs_rq = Some(rq.cfs_rq());
1631         if cfs_rq.as_ref().unwrap().nr_running == 0 {
1632             return None;
1633         }
1634 
1635         let mut se;
1636         loop {
1637             let cfs = cfs_rq.unwrap();
1638             let cfs = cfs.force_mut();
1639             let curr = cfs.current();
1640             if let Some(curr) = curr {
1641                 if curr.on_rq() {
1642                     cfs.update_current();
1643                 } else {
1644                     cfs.set_current(Weak::default());
1645                 }
1646             }
1647 
1648             se = cfs.pick_next_entity();
1649             match se.clone() {
1650                 Some(val) => cfs_rq = val.my_cfs_rq.clone(),
1651                 None => {
1652                     break;
1653                 }
1654             }
1655 
1656             if cfs_rq.is_none() {
1657                 break;
1658             }
1659         }
1660 
1661         se.map(|se| se.pcb())
1662     }
1663 
1664     fn tick(_rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, queued: bool) {
1665         let mut se = pcb.sched_info().sched_entity();
1666 
1667         FairSchedEntity::for_each_in_group(&mut se, |se| {
1668             let binding = se.clone();
1669             let binding = binding.cfs_rq();
1670             let cfs_rq = binding.force_mut();
1671 
1672             cfs_rq.entity_tick(se, queued);
1673             (true, true)
1674         });
1675     }
1676 
1677     fn task_fork(pcb: Arc<ProcessControlBlock>) {
1678         let rq = cpu_rq(smp_get_processor_id().data() as usize);
1679         let se = pcb.sched_info().sched_entity();
1680 
1681         let (rq, _guard) = rq.self_lock();
1682 
1683         rq.update_rq_clock();
1684 
1685         let binding = se.cfs_rq();
1686         let cfs_rq = binding.force_mut();
1687 
1688         if cfs_rq.current().is_some() {
1689             cfs_rq.update_current();
1690         }
1691 
1692         cfs_rq.place_entity(se.clone(), EnqueueFlag::ENQUEUE_INITIAL);
1693     }
1694 
1695     fn pick_next_task(
1696         rq: &mut CpuRunQueue,
1697         prev: Option<Arc<ProcessControlBlock>>,
1698     ) -> Option<Arc<ProcessControlBlock>> {
1699         let mut cfs_rq = rq.cfs_rq();
1700         if rq.nr_running == 0 {
1701             return None;
1702         }
1703 
1704         if prev.is_none()
1705             || (prev.is_some() && prev.as_ref().unwrap().sched_info().policy() != SchedPolicy::CFS)
1706         {
1707             if let Some(prev) = prev {
1708                 match prev.sched_info().policy() {
1709                     SchedPolicy::RT => todo!(),
1710                     SchedPolicy::FIFO => todo!(),
1711                     SchedPolicy::CFS => todo!(),
1712                     SchedPolicy::IDLE => IdleScheduler::put_prev_task(rq, prev),
1713                 }
1714             }
1715             let mut se;
1716             loop {
1717                 match cfs_rq.pick_next_entity() {
1718                     Some(s) => se = s,
1719                     None => return None,
1720                 }
1721 
1722                 cfs_rq.force_mut().set_next_entity(&se);
1723 
1724                 match &se.my_cfs_rq {
1725                     Some(q) => cfs_rq = q.clone(),
1726                     None => break,
1727                 }
1728             }
1729 
1730             return Some(se.pcb());
1731         }
1732 
1733         let prev = prev.unwrap();
1734         let se = cfs_rq.pick_next_entity();
1735 
1736         if let Some(mut se) = se {
1737             loop {
1738                 let curr = cfs_rq.current();
1739                 if let Some(current) = curr {
1740                     if current.on_rq() {
1741                         cfs_rq.force_mut().update_current()
1742                     } else {
1743                         cfs_rq.force_mut().set_current(Weak::default());
1744                     }
1745                 }
1746 
1747                 match cfs_rq.pick_next_entity() {
1748                     Some(e) => se = e,
1749                     None => break,
1750                 }
1751 
1752                 if let Some(q) = se.my_cfs_rq.clone() {
1753                     cfs_rq = q;
1754                 } else {
1755                     break;
1756                 }
1757             }
1758 
1759             let p = se.pcb();
1760 
1761             if !Arc::ptr_eq(&prev, &p) {
1762                 let mut pse = prev.sched_info().sched_entity();
1763 
1764                 while !(Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq())
1765                     && Arc::ptr_eq(&se.cfs_rq(), &cfs_rq))
1766                 {
1767                     let se_depth = se.depth;
1768                     let pse_depth = pse.depth;
1769 
1770                     if se_depth <= pse_depth {
1771                         pse.cfs_rq().force_mut().put_prev_entity(pse.clone());
1772                         pse = pse.parent().unwrap();
1773                     }
1774 
1775                     if se_depth >= pse_depth {
1776                         se.cfs_rq().force_mut().set_next_entity(&se);
1777                         se = se.parent().unwrap();
1778                     }
1779                 }
1780 
1781                 cfs_rq.force_mut().put_prev_entity(pse);
1782                 cfs_rq.force_mut().set_next_entity(&se);
1783             }
1784 
1785             return Some(p);
1786         } else {
1787             return None;
1788         }
1789     }
1790 
1791     fn put_prev_task(_rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>) {
1792         let mut se = prev.sched_info().sched_entity();
1793 
1794         FairSchedEntity::for_each_in_group(&mut se, |se| {
1795             let cfs = se.cfs_rq();
1796             cfs.force_mut().put_prev_entity(se);
1797 
1798             return (true, true);
1799         });
1800     }
1801 }
1802