xref: /DragonOS/kernel/src/sched/fair.rs (revision cf7f801e1d50ee5b04cb728e4251a57f4183bfbc)
1 use core::intrinsics::likely;
2 use core::intrinsics::unlikely;
3 use core::mem::swap;
4 use core::sync::atomic::fence;
5 use core::sync::atomic::{AtomicU64, Ordering};
6 
7 use crate::libs::rbtree::RBTree;
8 use crate::libs::spinlock::SpinLock;
9 use crate::process::ProcessControlBlock;
10 use crate::process::ProcessFlags;
11 use crate::sched::clock::ClockUpdataFlag;
12 use crate::sched::{cpu_rq, SchedFeature, SCHED_FEATURES};
13 use crate::smp::core::smp_get_processor_id;
14 use crate::time::jiffies::TICK_NESC;
15 use crate::time::timer::clock;
16 use crate::time::NSEC_PER_MSEC;
17 use alloc::sync::{Arc, Weak};
18 
19 use super::idle::IdleScheduler;
20 use super::pelt::{add_positive, sub_positive, SchedulerAvg, UpdateAvgFlags, PELT_MIN_DIVIDER};
21 use super::{
22     CpuRunQueue, DequeueFlag, EnqueueFlag, LoadWeight, OnRq, SchedPolicy, Scheduler, TaskGroup,
23     WakeupFlags, SCHED_CAPACITY_SHIFT,
24 };
25 
26 /// 用于设置 CPU-bound 任务的最小抢占粒度的参数。
27 /// 默认值为 0.75 毫秒乘以(1 加上 CPU 数量的二进制对数),单位为纳秒。
28 /// 这个值影响到任务在 CPU-bound 情况下的抢占行为。
29 static SYSCTL_SHCED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000);
30 /// 规范化最小抢占粒度参数
31 #[allow(dead_code)]
32 static NORMALIZED_SYSCTL_SCHED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000);
33 
34 static SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000);
35 #[allow(dead_code)]
36 static NORMALIZED_SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000);
37 
38 /// 预设的调度延迟任务数量
39 static SCHED_NR_LATENCY: AtomicU64 = AtomicU64::new(8);
40 
41 /// 调度实体单位,一个调度实体可以是一个进程、一个进程组或者是一个用户等等划分
42 #[derive(Debug)]
43 pub struct FairSchedEntity {
44     /// 负载相关
45     pub load: LoadWeight,
46     pub deadline: u64,
47     pub min_deadline: u64,
48 
49     /// 是否在运行队列中
50     pub on_rq: OnRq,
51     /// 当前调度实体的开始执行时间
52     pub exec_start: u64,
53     /// 总运行时长
54     pub sum_exec_runtime: u64,
55     /// 虚拟运行时间
56     pub vruntime: u64,
57     /// 进程的调度延迟 它等于进程的权重(weight)乘以(V - v_i),其中V是系统当前的时间,v_i是进程的运行时间
58     pub vlag: i64,
59     // 运行时间片
60     pub slice: u64,
61     /// 上一个调度实体运行总时间
62     pub prev_sum_exec_runtime: u64,
63 
64     pub avg: SchedulerAvg,
65 
66     /// 父节点
67     parent: Weak<FairSchedEntity>,
68 
69     pub depth: u32,
70 
71     /// 指向自身
72     self_ref: Weak<FairSchedEntity>,
73 
74     /// 所在的CFS运行队列
75     cfs_rq: Weak<CfsRunQueue>,
76 
77     /// group持有的私有cfs队列
78     my_cfs_rq: Option<Arc<CfsRunQueue>>,
79 
80     runnable_weight: u64,
81 
82     pcb: Weak<ProcessControlBlock>,
83 }
84 
85 impl FairSchedEntity {
86     pub fn new() -> Arc<Self> {
87         let ret = Arc::new(Self {
88             parent: Weak::new(),
89             self_ref: Weak::new(),
90             pcb: Weak::new(),
91             cfs_rq: Weak::new(),
92             my_cfs_rq: None,
93             on_rq: OnRq::None,
94             slice: SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst),
95             load: Default::default(),
96             deadline: Default::default(),
97             min_deadline: Default::default(),
98             exec_start: Default::default(),
99             sum_exec_runtime: Default::default(),
100             vruntime: Default::default(),
101             vlag: Default::default(),
102             prev_sum_exec_runtime: Default::default(),
103             avg: Default::default(),
104             depth: Default::default(),
105             runnable_weight: Default::default(),
106         });
107 
108         ret.force_mut().self_ref = Arc::downgrade(&ret);
109 
110         ret
111     }
112 }
113 
114 impl FairSchedEntity {
115     pub fn self_arc(&self) -> Arc<FairSchedEntity> {
116         self.self_ref.upgrade().unwrap()
117     }
118 
119     #[inline]
120     pub fn on_rq(&self) -> bool {
121         self.on_rq != OnRq::None
122     }
123 
124     pub fn pcb(&self) -> Arc<ProcessControlBlock> {
125         self.pcb.upgrade().unwrap()
126     }
127 
128     pub fn set_pcb(&mut self, pcb: Weak<ProcessControlBlock>) {
129         self.pcb = pcb
130     }
131 
132     #[inline]
133     pub fn cfs_rq(&self) -> Arc<CfsRunQueue> {
134         self.cfs_rq.upgrade().unwrap()
135     }
136 
137     pub fn set_cfs(&mut self, cfs: Weak<CfsRunQueue>) {
138         self.cfs_rq = cfs;
139     }
140 
141     pub fn parent(&self) -> Option<Arc<FairSchedEntity>> {
142         self.parent.upgrade()
143     }
144 
145     #[allow(clippy::mut_from_ref)]
146     pub fn force_mut(&self) -> &mut Self {
147         unsafe {
148             let p = self as *const Self as usize;
149             (p as *mut Self).as_mut().unwrap()
150         }
151     }
152 
153     /// 判断是否是进程持有的调度实体
154     #[inline]
155     pub fn is_task(&self) -> bool {
156         // TODO: 调度组
157         true
158     }
159 
160     #[inline]
161     pub fn is_idle(&self) -> bool {
162         if self.is_task() {
163             return self.pcb().sched_info().policy() == SchedPolicy::IDLE;
164         }
165 
166         return self.cfs_rq().is_idle();
167     }
168 
169     pub fn clear_buddies(&self) {
170         let mut se = self.self_arc();
171 
172         Self::for_each_in_group(&mut se, |se| {
173             let binding = se.cfs_rq();
174             let cfs_rq = binding.force_mut();
175 
176             if let Some(next) = cfs_rq.next.upgrade() {
177                 if !Arc::ptr_eq(&next, &se) {
178                     return (false, true);
179                 }
180             }
181             cfs_rq.next = Weak::new();
182             return (true, true);
183         });
184     }
185 
186     pub fn calculate_delta_fair(&self, delta: u64) -> u64 {
187         if unlikely(self.load.weight != LoadWeight::NICE_0_LOAD_SHIFT as u64) {
188             return self
189                 .force_mut()
190                 .load
191                 .calculate_delta(delta, LoadWeight::NICE_0_LOAD_SHIFT as u64);
192         };
193 
194         delta
195     }
196 
197     /// 更新组内的权重信息
198     pub fn update_cfs_group(&self) {
199         if self.my_cfs_rq.is_none() {
200             return;
201         }
202 
203         let group_cfs = self.my_cfs_rq.clone().unwrap();
204 
205         let shares = group_cfs.task_group().shares;
206 
207         if unlikely(self.load.weight != shares) {
208             // TODO: reweight
209             self.cfs_rq()
210                 .force_mut()
211                 .reweight_entity(self.self_arc(), shares);
212         }
213     }
214 
215     /// 遍历se组,如果返回false则需要调用的函数return,
216     /// 会将se指向其顶层parent
217     /// 该函数会改变se指向
218     /// 参数:
219     /// - se: 对应调度实体
220     /// - f: 对调度实体执行操作的闭包,返回值对应(no_break,should_continue),no_break为假时,退出循环,should_continue为假时表示需要将调用者return
221     ///
222     /// 返回值:
223     /// - bool: 是否需要调度者return
224     /// - Option<Arc<FairSchedEntity>>:最终se的指向
225     pub fn for_each_in_group(
226         se: &mut Arc<FairSchedEntity>,
227         mut f: impl FnMut(Arc<FairSchedEntity>) -> (bool, bool),
228     ) -> (bool, Option<Arc<FairSchedEntity>>) {
229         let mut should_continue;
230         let ret;
231         // 这一步是循环计算,直到根节点
232         // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重
233         loop {
234             let (no_break, flag) = f(se.clone());
235             should_continue = flag;
236             if !no_break || !should_continue {
237                 ret = Some(se.clone());
238                 break;
239             }
240 
241             let parent = se.parent();
242             if parent.is_none() {
243                 ret = None;
244                 break;
245             }
246 
247             *se = parent.unwrap();
248         }
249 
250         (should_continue, ret)
251     }
252 
253     pub fn runnable(&self) -> u64 {
254         if self.is_task() {
255             return self.on_rq as u64;
256         } else {
257             self.runnable_weight
258         }
259     }
260 
261     /// 更新task和其cfsrq的负载均值
262     pub fn propagate_entity_load_avg(&mut self) -> bool {
263         if self.is_task() {
264             return false;
265         }
266 
267         let binding = self.my_cfs_rq.clone().unwrap();
268         let gcfs_rq = binding.force_mut();
269 
270         if gcfs_rq.propagate == 0 {
271             return false;
272         }
273 
274         gcfs_rq.propagate = 0;
275 
276         let binding = self.cfs_rq();
277         let cfs_rq = binding.force_mut();
278 
279         cfs_rq.add_task_group_propagate(gcfs_rq.prop_runnable_sum);
280 
281         cfs_rq.update_task_group_util(self.self_arc(), gcfs_rq);
282         cfs_rq.update_task_group_runnable(self.self_arc(), gcfs_rq);
283         cfs_rq.update_task_group_load(self.self_arc(), gcfs_rq);
284 
285         return true;
286     }
287 
288     /// 更新runnable_weight
289     pub fn update_runnable(&mut self) {
290         if !self.is_task() {
291             self.runnable_weight = self.my_cfs_rq.clone().unwrap().h_nr_running;
292         }
293     }
294 
295     /// 初始化实体运行均值
296     pub fn init_entity_runnable_average(&mut self) {
297         self.avg = SchedulerAvg::default();
298 
299         if self.is_task() {
300             self.avg.load_avg = LoadWeight::scale_load_down(self.load.weight) as usize;
301         }
302     }
303 }
304 
305 /// CFS的运行队列,这个队列需确保是percpu的
306 #[allow(dead_code)]
307 #[derive(Debug)]
308 pub struct CfsRunQueue {
309     load: LoadWeight,
310 
311     /// 全局运行的调度实体计数器,用于负载均衡
312     nr_running: u64,
313     /// 针对特定 CPU 核心的任务计数器
314     pub h_nr_running: u64,
315     /// 运行时间
316     exec_clock: u64,
317     /// 最少虚拟运行时间
318     min_vruntime: u64,
319     /// remain runtime
320     runtime_remaining: u64,
321 
322     /// 存放调度实体的红黑树
323     pub(super) entities: RBTree<u64, Arc<FairSchedEntity>>,
324 
325     /// IDLE
326     idle: usize,
327 
328     idle_nr_running: u64,
329 
330     pub idle_h_nr_running: u64,
331 
332     /// 当前运行的调度实体
333     current: Weak<FairSchedEntity>,
334     /// 下一个调度的实体
335     next: Weak<FairSchedEntity>,
336     /// 最后的调度实体
337     last: Weak<FairSchedEntity>,
338     /// 跳过运行的调度实体
339     skip: Weak<FairSchedEntity>,
340 
341     avg_load: i64,
342     avg_vruntime: i64,
343 
344     last_update_time_copy: u64,
345 
346     pub avg: SchedulerAvg,
347 
348     rq: Weak<CpuRunQueue>,
349     /// 拥有此队列的taskgroup
350     task_group: Weak<TaskGroup>,
351 
352     pub throttled_clock: u64,
353     pub throttled_clock_pelt: u64,
354     pub throttled_clock_pelt_time: u64,
355     pub throttled_pelt_idle: u64,
356 
357     pub throttled: bool,
358     pub throttled_count: u64,
359 
360     pub removed: SpinLock<CfsRemoved>,
361 
362     pub propagate: isize,
363     pub prop_runnable_sum: isize,
364 }
365 
366 #[derive(Debug, Default)]
367 pub struct CfsRemoved {
368     pub nr: u32,
369     pub load_avg: usize,
370     pub util_avg: usize,
371     pub runnable_avg: usize,
372 }
373 
374 impl CfsRunQueue {
375     pub fn new() -> Self {
376         Self {
377             load: LoadWeight::default(),
378             nr_running: 0,
379             h_nr_running: 0,
380             exec_clock: 0,
381             min_vruntime: 1 << 20,
382             entities: RBTree::new(),
383             idle: 0,
384             idle_nr_running: 0,
385             idle_h_nr_running: 0,
386             current: Weak::new(),
387             next: Weak::new(),
388             last: Weak::new(),
389             skip: Weak::new(),
390             avg_load: 0,
391             avg_vruntime: 0,
392             last_update_time_copy: 0,
393             avg: SchedulerAvg::default(),
394             rq: Weak::new(),
395             task_group: Weak::new(),
396             throttled_clock: 0,
397             throttled_clock_pelt: 0,
398             throttled_clock_pelt_time: 0,
399             throttled_pelt_idle: 0,
400             throttled: false,
401             throttled_count: 0,
402             removed: SpinLock::new(CfsRemoved::default()),
403             propagate: 0,
404             prop_runnable_sum: 0,
405             runtime_remaining: 0,
406         }
407     }
408 
409     #[inline]
410     pub fn rq(&self) -> Arc<CpuRunQueue> {
411         self.rq.upgrade().unwrap()
412     }
413 
414     #[inline]
415     pub fn set_rq(&mut self, rq: Weak<CpuRunQueue>) {
416         self.rq = rq;
417     }
418 
419     #[inline]
420     #[allow(clippy::mut_from_ref)]
421     pub fn force_mut(&self) -> &mut Self {
422         unsafe {
423             (self as *const Self as usize as *mut Self)
424                 .as_mut()
425                 .unwrap()
426         }
427     }
428 
429     #[inline]
430     pub fn is_idle(&self) -> bool {
431         self.idle > 0
432     }
433 
434     #[inline]
435     pub fn current(&self) -> Option<Arc<FairSchedEntity>> {
436         self.current.upgrade()
437     }
438 
439     #[inline]
440     pub fn set_current(&mut self, curr: Weak<FairSchedEntity>) {
441         self.current = curr
442     }
443 
444     #[inline]
445     pub fn next(&self) -> Option<Arc<FairSchedEntity>> {
446         self.next.upgrade()
447     }
448 
449     pub fn task_group(&self) -> Arc<TaskGroup> {
450         self.task_group.upgrade().unwrap()
451     }
452 
453     #[allow(dead_code)]
454     #[inline]
455     pub const fn bandwidth_used() -> bool {
456         false
457     }
458 
459     /// ## 计算调度周期,基本思想是在一个周期内让每个任务都至少运行一次。
460     /// 这样可以确保所有的任务都能够得到执行,而且可以避免某些任务被长时间地阻塞。
461     pub fn sched_period(nr_running: u64) -> u64 {
462         if unlikely(nr_running > SCHED_NR_LATENCY.load(Ordering::SeqCst)) {
463             // 如果当前活跃的任务数量超过了预设的调度延迟任务数量
464             // 调度周期的长度将直接设置为活跃任务数量乘以最小抢占粒度
465             return nr_running * SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst);
466         } else {
467             // 如果活跃任务数量未超过预设的延迟任务数量,那么调度周期的长度将设置为SCHED_NR_LATENCY
468             return SCHED_NR_LATENCY.load(Ordering::SeqCst);
469         }
470     }
471 
472     /// ## 计算调度任务的虚拟运行时间片大小
473     ///
474     /// vruntime = runtime / weight
475     #[allow(dead_code)]
476     pub fn sched_vslice(&self, entity: Arc<FairSchedEntity>) -> u64 {
477         let slice = self.sched_slice(entity.clone());
478         return entity.calculate_delta_fair(slice);
479     }
480 
481     /// ## 计算调度任务的实际运行时间片大小
482     #[allow(dead_code)]
483     pub fn sched_slice(&self, mut entity: Arc<FairSchedEntity>) -> u64 {
484         let mut nr_running = self.nr_running;
485         if SCHED_FEATURES.contains(SchedFeature::ALT_PERIOD) {
486             nr_running = self.h_nr_running;
487         }
488 
489         // 计算一个调度周期的整个slice
490         let mut slice = Self::sched_period(nr_running + (!entity.on_rq()) as u64);
491 
492         // 这一步是循环计算,直到根节点
493         // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重
494         FairSchedEntity::for_each_in_group(&mut entity, |se| {
495             if unlikely(!se.on_rq()) {
496                 se.cfs_rq().force_mut().load.update_load_add(se.load.weight);
497             }
498             slice = se
499                 .cfs_rq()
500                 .force_mut()
501                 .load
502                 .calculate_delta(slice, se.load.weight);
503 
504             (true, true)
505         });
506 
507         if SCHED_FEATURES.contains(SchedFeature::BASE_SLICE) {
508             // TODO: IDLE?
509             let min_gran = SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst);
510 
511             slice = min_gran.max(slice)
512         }
513 
514         slice
515     }
516 
517     /// ## 在时间片到期时检查当前任务是否需要被抢占,
518     /// 如果需要,则抢占当前任务,并确保不会由于与其他任务的“好友偏爱(buddy favours)”而重新选举为下一个运行的任务。
519     #[allow(dead_code)]
520     pub fn check_preempt_tick(&mut self, curr: Arc<FairSchedEntity>) {
521         // 计算理想状态下该调度实体的理想运行时间
522         let ideal_runtime = self.sched_slice(curr.clone());
523 
524         let delta_exec = curr.sum_exec_runtime - curr.prev_sum_exec_runtime;
525 
526         if delta_exec > ideal_runtime {
527             // 表明实际运行时间长于理想运行时间
528             self.rq().resched_current();
529 
530             self.clear_buddies(&curr);
531             return;
532         }
533 
534         if delta_exec < SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst) {
535             return;
536         }
537 
538         todo!()
539     }
540 
541     pub fn clear_buddies(&mut self, se: &Arc<FairSchedEntity>) {
542         if let Some(next) = self.next.upgrade() {
543             if Arc::ptr_eq(&next, se) {
544                 se.clear_buddies();
545             }
546         }
547     }
548 
549     /// 处理调度实体的时间片到期事件
550     pub fn entity_tick(&mut self, curr: Arc<FairSchedEntity>, queued: bool) {
551         // 更新当前调度实体的运行时间统计信息
552         self.update_current();
553 
554         self.update_load_avg(&curr, UpdateAvgFlags::UPDATE_TG);
555 
556         // 更新组调度相关
557         curr.update_cfs_group();
558 
559         if queued {
560             self.rq().resched_current();
561             return;
562         }
563     }
564 
565     /// 更新当前调度实体的运行时间统计信息
566     pub fn update_current(&mut self) {
567         let curr = self.current();
568         if unlikely(curr.is_none()) {
569             return;
570         }
571 
572         let now = self.rq().clock_task();
573         let curr = curr.unwrap();
574 
575         fence(Ordering::SeqCst);
576         if unlikely(now <= curr.exec_start) {
577             // warn!(
578             //     "update_current return now <= curr.exec_start now {now} execstart {}",
579             //     curr.exec_start
580             // );
581             return;
582         }
583 
584         fence(Ordering::SeqCst);
585         let delta_exec = now - curr.exec_start;
586 
587         let curr = curr.force_mut();
588 
589         curr.exec_start = now;
590 
591         curr.sum_exec_runtime += delta_exec;
592 
593         // 根据实际运行时长加权增加虚拟运行时长
594         curr.vruntime += curr.calculate_delta_fair(delta_exec);
595         fence(Ordering::SeqCst);
596         self.update_deadline(&curr.self_arc());
597         self.update_min_vruntime();
598 
599         self.account_cfs_rq_runtime(delta_exec);
600     }
601 
602     /// 计算当前cfs队列的运行时间是否到期
603     fn account_cfs_rq_runtime(&mut self, delta_exec: u64) {
604         if likely(self.runtime_remaining > delta_exec) {
605             self.runtime_remaining -= delta_exec;
606             // error!("runtime_remaining {}", self.runtime_remaining);
607             return;
608         }
609 
610         // warn!(
611         //     "runtime_remaining {} delta exec {delta_exec} nr_running {}",
612         //     self.runtime_remaining,
613         //     self.nr_running
614         // );
615         // fixme: 目前只是简单分配一个时间片
616         self.runtime_remaining = 5000 * NSEC_PER_MSEC as u64;
617 
618         if likely(self.current().is_some()) && self.nr_running > 1 {
619             // error!("account_cfs_rq_runtime");
620             self.rq().resched_current();
621         }
622     }
623 
624     /// 计算deadline,如果vruntime到期会重调度
625     pub fn update_deadline(&mut self, se: &Arc<FairSchedEntity>) {
626         // error!("vruntime {} deadline {}", se.vruntime, se.deadline);
627         if se.vruntime < se.deadline {
628             return;
629         }
630 
631         se.force_mut().slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst);
632 
633         se.force_mut().deadline = se.vruntime + se.calculate_delta_fair(se.slice);
634 
635         if self.nr_running > 1 {
636             self.rq().resched_current();
637             self.clear_buddies(se);
638         }
639     }
640 
641     /// ## 更新最小虚拟运行时间
642     pub fn update_min_vruntime(&mut self) {
643         let curr = self.current();
644 
645         let mut vruntime = self.min_vruntime;
646 
647         if curr.is_some() {
648             let curr = curr.as_ref().unwrap();
649             if curr.on_rq() {
650                 vruntime = curr.vruntime;
651             } else {
652                 self.set_current(Weak::default());
653             }
654         }
655 
656         // 找到最小虚拟运行时间的调度实体
657         let leftmost = self.entities.get_first();
658         if let Some(leftmost) = leftmost {
659             let se = leftmost.1;
660 
661             if curr.is_none() {
662                 vruntime = se.vruntime;
663             } else {
664                 vruntime = vruntime.min(se.vruntime);
665             }
666         }
667 
668         self.min_vruntime = self.__update_min_vruntime(vruntime);
669     }
670 
671     fn __update_min_vruntime(&mut self, vruntime: u64) -> u64 {
672         let mut min_vruntime = self.min_vruntime;
673 
674         let delta = vruntime as i64 - min_vruntime as i64;
675         if delta > 0 {
676             self.avg_vruntime -= self.avg_load * delta;
677             min_vruntime = vruntime;
678         }
679 
680         return min_vruntime;
681     }
682 
683     // 判断是否为当前任务
684     pub fn is_curr(&self, se: &Arc<FairSchedEntity>) -> bool {
685         if self.current().is_none() {
686             false
687         } else {
688             // 判断当前和传入的se是否相等
689             Arc::ptr_eq(se, self.current().as_ref().unwrap())
690         }
691     }
692 
693     // 修改后
694     pub fn reweight_entity(&mut self, se: Arc<FairSchedEntity>, weight: u64) {
695         // 判断是否为当前任务
696         let is_curr = self.is_curr(&se);
697 
698         // 如果se在队列中
699         if se.on_rq() {
700             // 如果是当前任务
701             if is_curr {
702                 self.update_current();
703             } else {
704                 // 否则,出队
705                 self.inner_dequeue_entity(&se);
706             }
707 
708             // 减去该权重
709             self.load.update_load_sub(se.load.weight);
710         }
711 
712         self.dequeue_load_avg(&se);
713 
714         if !se.on_rq() {
715             se.force_mut().vlag = se.vlag * se.load.weight as i64 / weight as i64;
716         } else {
717             self.reweight_eevdf(&se, weight);
718         }
719         se.force_mut().load.update_load_set(weight);
720 
721         // SMP
722         let divider = se.avg.get_pelt_divider();
723         se.force_mut().avg.load_avg = LoadWeight::scale_load_down(se.load.weight) as usize
724             * se.avg.load_sum as usize
725             / divider;
726 
727         self.enqueue_load_avg(se.clone());
728 
729         if se.on_rq() {
730             self.load.update_load_add(se.load.weight);
731             if !is_curr {
732                 self.inner_enqueue_entity(&se);
733             }
734 
735             self.update_min_vruntime();
736         }
737     }
738 
739     /// 用于重新计算调度实体(sched_entity)的权重(weight)和虚拟运行时间(vruntime)
740     fn reweight_eevdf(&mut self, se: &Arc<FairSchedEntity>, weight: u64) {
741         let old_weight = se.load.weight;
742         let avg_vruntime = self.avg_vruntime();
743         let mut vlag;
744         if avg_vruntime != se.vruntime {
745             vlag = avg_vruntime as i64 - se.vruntime as i64;
746             vlag = vlag * old_weight as i64 / weight as i64;
747             se.force_mut().vruntime = (avg_vruntime as i64 - vlag) as u64;
748         }
749 
750         let mut vslice = se.deadline as i64 - avg_vruntime as i64;
751         vslice = vslice * old_weight as i64 / weight as i64;
752         se.force_mut().deadline = avg_vruntime + vslice as u64;
753     }
754 
755     fn avg_vruntime(&self) -> u64 {
756         let curr = self.current();
757         let mut avg = self.avg_vruntime;
758         let mut load = self.avg_load;
759 
760         if let Some(curr) = curr {
761             if curr.on_rq() {
762                 let weight = LoadWeight::scale_load_down(curr.load.weight);
763                 avg += self.entity_key(&curr) * weight as i64;
764                 load += weight as i64;
765             }
766         }
767 
768         if load > 0 {
769             if avg < 0 {
770                 avg -= load - 1;
771             }
772 
773             avg /= load;
774         }
775 
776         return self.min_vruntime + avg as u64;
777     }
778 
779     #[inline]
780     pub fn entity_key(&self, se: &Arc<FairSchedEntity>) -> i64 {
781         return se.vruntime as i64 - self.min_vruntime as i64;
782     }
783 
784     pub fn avg_vruntime_add(&mut self, se: &Arc<FairSchedEntity>) {
785         let weight = LoadWeight::scale_load_down(se.load.weight);
786 
787         let key = self.entity_key(se);
788 
789         let avg_vruntime = self.avg_vruntime + key * weight as i64;
790 
791         self.avg_vruntime = avg_vruntime;
792         self.avg_load += weight as i64;
793     }
794 
795     pub fn avg_vruntime_sub(&mut self, se: &Arc<FairSchedEntity>) {
796         let weight = LoadWeight::scale_load_down(se.load.weight);
797 
798         let key = self.entity_key(se);
799 
800         let avg_vruntime = self.avg_vruntime - key * weight as i64;
801 
802         self.avg_vruntime = avg_vruntime;
803         self.avg_load -= weight as i64;
804     }
805 
806     /// 为调度实体计算初始vruntime等信息
807     fn place_entity(&mut self, se: Arc<FairSchedEntity>, flags: EnqueueFlag) {
808         let vruntime = self.avg_vruntime();
809         let mut lag = 0;
810 
811         let se = se.force_mut();
812         se.slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst);
813 
814         let mut vslice = se.calculate_delta_fair(se.slice);
815 
816         if self.nr_running > 0 {
817             let curr = self.current();
818 
819             lag = se.vlag;
820 
821             let mut load = self.avg_load;
822 
823             if let Some(curr) = curr {
824                 if curr.on_rq() {
825                     load += LoadWeight::scale_load_down(curr.load.weight) as i64;
826                 }
827             }
828 
829             lag *= load + LoadWeight::scale_load_down(se.load.weight) as i64;
830 
831             if load == 0 {
832                 load = 1;
833             }
834 
835             lag /= load;
836         }
837 
838         se.vruntime = vruntime - lag as u64;
839 
840         if flags.contains(EnqueueFlag::ENQUEUE_INITIAL) {
841             vslice /= 2;
842         }
843 
844         se.deadline = se.vruntime + vslice;
845     }
846 
847     /// 更新负载均值
848     fn update_load_avg(&mut self, se: &Arc<FairSchedEntity>, flags: UpdateAvgFlags) {
849         let now = self.cfs_rq_clock_pelt();
850 
851         if se.avg.last_update_time > 0 && !flags.contains(UpdateAvgFlags::SKIP_AGE_LOAD) {
852             se.force_mut().update_load_avg(self, now);
853         }
854 
855         let mut decayed = self.update_self_load_avg(now);
856         decayed |= se.force_mut().propagate_entity_load_avg() as u32;
857 
858         if se.avg.last_update_time > 0 && flags.contains(UpdateAvgFlags::DO_ATTACH) {
859             todo!()
860         } else if flags.contains(UpdateAvgFlags::DO_ATTACH) {
861             self.detach_entity_load_avg(se);
862         } else if decayed > 0 {
863             // cfs_rq_util_change
864 
865             todo!()
866         }
867     }
868 
869     /// 将实体的负载均值与对应cfs分离
870     fn detach_entity_load_avg(&mut self, se: &Arc<FairSchedEntity>) {
871         self.dequeue_load_avg(se);
872 
873         sub_positive(&mut self.avg.util_avg, se.avg.util_avg);
874         sub_positive(&mut (self.avg.util_sum as usize), se.avg.util_sum as usize);
875         self.avg.util_sum = self
876             .avg
877             .util_sum
878             .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
879 
880         sub_positive(&mut self.avg.runnable_avg, se.avg.runnable_avg);
881         sub_positive(
882             &mut (self.avg.runnable_sum as usize),
883             se.avg.runnable_sum as usize,
884         );
885         self.avg.runnable_sum = self
886             .avg
887             .runnable_sum
888             .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
889 
890         self.propagate = 1;
891         self.prop_runnable_sum += se.avg.load_sum as isize;
892     }
893 
894     fn update_self_load_avg(&mut self, now: u64) -> u32 {
895         let mut removed_load = 0;
896         let mut removed_util = 0;
897         let mut removed_runnable = 0;
898 
899         let mut decayed = 0;
900 
901         if self.removed.lock().nr > 0 {
902             let mut removed_guard = self.removed.lock();
903             let divider = self.avg.get_pelt_divider();
904 
905             swap::<usize>(&mut removed_guard.util_avg, &mut removed_util);
906             swap::<usize>(&mut removed_guard.load_avg, &mut removed_load);
907             swap::<usize>(&mut removed_guard.runnable_avg, &mut removed_runnable);
908 
909             removed_guard.nr = 0;
910 
911             let mut r = removed_load;
912 
913             sub_positive(&mut self.avg.load_avg, r);
914             sub_positive(&mut (self.avg.load_sum as usize), r * divider);
915 
916             self.avg.load_sum = self
917                 .avg
918                 .load_sum
919                 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64);
920 
921             r = removed_util;
922             sub_positive(&mut self.avg.util_avg, r);
923             sub_positive(&mut (self.avg.util_sum as usize), r * divider);
924             self.avg.util_sum = self
925                 .avg
926                 .util_sum
927                 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
928 
929             r = removed_runnable;
930             sub_positive(&mut self.avg.runnable_avg, r);
931             sub_positive(&mut (self.avg.runnable_sum as usize), r * divider);
932             self.avg.runnable_sum = self
933                 .avg
934                 .runnable_sum
935                 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
936 
937             drop(removed_guard);
938             self.add_task_group_propagate(
939                 -(removed_runnable as isize * divider as isize) >> SCHED_CAPACITY_SHIFT,
940             );
941 
942             decayed = 1;
943         }
944 
945         decayed |= self.__update_load_avg(now) as u32;
946 
947         self.last_update_time_copy = self.avg.last_update_time;
948 
949         return decayed;
950     }
951 
952     fn __update_load_avg(&mut self, now: u64) -> bool {
953         if self.avg.update_load_sum(
954             now,
955             LoadWeight::scale_load_down(self.load.weight) as u32,
956             self.h_nr_running as u32,
957             self.current().is_some() as u32,
958         ) {
959             self.avg.update_load_avg(1);
960             return true;
961         }
962 
963         return false;
964     }
965 
966     fn add_task_group_propagate(&mut self, runnable_sum: isize) {
967         self.propagate = 1;
968         self.prop_runnable_sum += runnable_sum;
969     }
970 
971     /// 将实体加入队列
972     pub fn enqueue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: EnqueueFlag) {
973         let is_curr = self.is_curr(se);
974 
975         if is_curr {
976             self.place_entity(se.clone(), flags);
977         }
978 
979         self.update_current();
980 
981         self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG | UpdateAvgFlags::DO_ATTACH);
982 
983         se.force_mut().update_runnable();
984 
985         se.update_cfs_group();
986 
987         if !is_curr {
988             self.place_entity(se.clone(), flags);
989         }
990 
991         self.account_entity_enqueue(se);
992 
993         if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) {
994             se.force_mut().exec_start = 0;
995         }
996 
997         if !is_curr {
998             self.inner_enqueue_entity(se);
999         }
1000 
1001         se.force_mut().on_rq = OnRq::Queued;
1002 
1003         if self.nr_running == 1 {
1004             // 只有上面加入的
1005             // TODO: throttle
1006         }
1007     }
1008 
1009     pub fn dequeue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: DequeueFlag) {
1010         let mut action = UpdateAvgFlags::UPDATE_TG;
1011 
1012         if se.is_task() && se.on_rq == OnRq::Migrating {
1013             action |= UpdateAvgFlags::DO_DETACH;
1014         }
1015 
1016         self.update_current();
1017 
1018         self.update_load_avg(se, action);
1019 
1020         se.force_mut().update_runnable();
1021 
1022         self.clear_buddies(se);
1023 
1024         self.update_entity_lag(se);
1025 
1026         if let Some(curr) = self.current() {
1027             if !Arc::ptr_eq(&curr, se) {
1028                 self.inner_dequeue_entity(se);
1029             }
1030         } else {
1031             self.inner_dequeue_entity(se);
1032         }
1033 
1034         se.force_mut().on_rq = OnRq::None;
1035 
1036         self.account_entity_dequeue(se);
1037 
1038         // return_cfs_rq_runtime
1039 
1040         se.update_cfs_group();
1041 
1042         if flags & (DequeueFlag::DEQUEUE_SAVE | DequeueFlag::DEQUEUE_MOVE)
1043             != DequeueFlag::DEQUEUE_SAVE
1044         {
1045             self.update_min_vruntime();
1046         }
1047 
1048         if self.nr_running == 0 {
1049             self.update_idle_clock_pelt()
1050         }
1051     }
1052 
1053     /// 将前一个调度的task放回队列
1054     pub fn put_prev_entity(&mut self, prev: Arc<FairSchedEntity>) {
1055         if prev.on_rq() {
1056             self.update_current();
1057         }
1058 
1059         if prev.on_rq() {
1060             self.inner_enqueue_entity(&prev);
1061         }
1062 
1063         self.set_current(Weak::default());
1064     }
1065 
1066     /// 将下一个运行的task设置为current
1067     pub fn set_next_entity(&mut self, se: &Arc<FairSchedEntity>) {
1068         self.clear_buddies(se);
1069 
1070         if se.on_rq() {
1071             self.inner_dequeue_entity(se);
1072             self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG);
1073             se.force_mut().vlag = se.deadline as i64;
1074         }
1075 
1076         self.set_current(Arc::downgrade(se));
1077 
1078         se.force_mut().prev_sum_exec_runtime = se.sum_exec_runtime;
1079     }
1080 
1081     fn update_idle_clock_pelt(&mut self) {
1082         let throttled = if unlikely(self.throttled_count > 0) {
1083             u64::MAX
1084         } else {
1085             self.throttled_clock_pelt_time
1086         };
1087 
1088         self.throttled_clock_pelt = throttled;
1089     }
1090 
1091     fn update_entity_lag(&mut self, se: &Arc<FairSchedEntity>) {
1092         let lag = self.avg_vruntime() as i64 - se.vruntime as i64;
1093 
1094         let limit = se.calculate_delta_fair((TICK_NESC as u64).max(2 * se.slice)) as i64;
1095 
1096         se.force_mut().vlag = if lag < -limit {
1097             -limit
1098         } else if lag > limit {
1099             limit
1100         } else {
1101             lag
1102         }
1103     }
1104 
1105     fn account_entity_enqueue(&mut self, se: &Arc<FairSchedEntity>) {
1106         self.load.update_load_add(se.load.weight);
1107 
1108         if se.is_task() {
1109             let rq = self.rq();
1110             let (rq, _guard) = rq.self_lock();
1111             // TODO:numa
1112             rq.cfs_tasks.push_back(se.clone());
1113         }
1114         self.nr_running += 1;
1115         if se.is_idle() {
1116             self.idle_nr_running += 1;
1117         }
1118     }
1119 
1120     fn account_entity_dequeue(&mut self, se: &Arc<FairSchedEntity>) {
1121         self.load.update_load_sub(se.load.weight);
1122 
1123         if se.is_task() {
1124             let rq = self.rq();
1125             let (rq, _guard) = rq.self_lock();
1126 
1127             // TODO:numa
1128             let _ = rq.cfs_tasks.extract_if(|x| Arc::ptr_eq(x, se));
1129         }
1130 
1131         self.nr_running -= 1;
1132         if se.is_idle() {
1133             self.idle_nr_running -= 1;
1134         }
1135     }
1136 
1137     pub fn inner_enqueue_entity(&mut self, se: &Arc<FairSchedEntity>) {
1138         self.avg_vruntime_add(se);
1139         se.force_mut().min_deadline = se.deadline;
1140         self.entities.insert(se.vruntime, se.clone());
1141         // warn!(
1142         //     "enqueue pcb {:?} cfsrq {:?}",
1143         //     se.pcb().pid(),
1144         //     self.entities
1145         //         .iter()
1146         //         .map(|x| (x.0, x.1.pcb().pid()))
1147         //         .collect::<Vec<_>>()
1148         // );
1149         // send_to_default_serial8250_port(
1150         //     format!(
1151         //         "enqueue pcb {:?} cfsrq {:?}\n",
1152         //         se.pcb().pid(),
1153         //         self.entities
1154         //             .iter()
1155         //             .map(|x| (x.0, x.1.pcb().pid()))
1156         //             .collect::<Vec<_>>()
1157         //     )
1158         //     .as_bytes(),
1159         // );
1160     }
1161 
1162     fn inner_dequeue_entity(&mut self, se: &Arc<FairSchedEntity>) {
1163         // warn!(
1164         //     "before dequeue pcb {:?} cfsrq {:?}",
1165         //     se.pcb().pid(),
1166         //     self.entities
1167         //         .iter()
1168         //         .map(|x| (x.0, x.1.pcb().pid()))
1169         //         .collect::<Vec<_>>()
1170         // );
1171 
1172         // send_to_default_serial8250_port(
1173         //     format!(
1174         //         "before dequeue pcb {:?} cfsrq {:?}\n",
1175         //         se.pcb().pid(),
1176         //         self.entities
1177         //             .iter()
1178         //             .map(|x| (x.0, x.1.pcb().pid()))
1179         //             .collect::<Vec<_>>()
1180         //     )
1181         //     .as_bytes(),
1182         // );
1183 
1184         let mut i = 1;
1185         while let Some(rm) = self.entities.remove(&se.vruntime) {
1186             if Arc::ptr_eq(&rm, se) {
1187                 break;
1188             }
1189             rm.force_mut().vruntime += i;
1190             self.entities.insert(rm.vruntime, rm);
1191 
1192             i += 1;
1193         }
1194         // send_to_default_serial8250_port(
1195         //     format!(
1196         //         "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}\n",
1197         //         se.pcb().pid(),
1198         //         remove.pcb().pid(),
1199         //         self.entities
1200         //             .iter()
1201         //             .map(|x| (x.0, x.1.pcb().pid()))
1202         //             .collect::<Vec<_>>()
1203         //     )
1204         //     .as_bytes(),
1205         // );
1206         // warn!(
1207         //     "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}",
1208         //     se.pcb().pid(),
1209         //     remove.pcb().pid(),
1210         //     self.entities
1211         //         .iter()
1212         //         .map(|x| (x.0, x.1.pcb().pid()))
1213         //         .collect::<Vec<_>>()
1214         // );
1215         self.avg_vruntime_sub(se);
1216     }
1217 
1218     pub fn enqueue_load_avg(&mut self, se: Arc<FairSchedEntity>) {
1219         self.avg.load_avg += se.avg.load_avg;
1220         self.avg.load_sum += LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum;
1221     }
1222 
1223     pub fn dequeue_load_avg(&mut self, se: &Arc<FairSchedEntity>) {
1224         if self.avg.load_avg > se.avg.load_avg {
1225             self.avg.load_avg -= se.avg.load_avg;
1226         } else {
1227             self.avg.load_avg = 0;
1228         };
1229 
1230         let se_load = LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum;
1231 
1232         if self.avg.load_sum > se_load {
1233             self.avg.load_sum -= se_load;
1234         } else {
1235             self.avg.load_sum = 0;
1236         }
1237 
1238         self.avg.load_sum = self
1239             .avg
1240             .load_sum
1241             .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64)
1242     }
1243 
1244     pub fn update_task_group_util(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) {
1245         let mut delta_sum = gcfs_rq.avg.load_avg as isize - se.avg.load_avg as isize;
1246         let delta_avg = delta_sum;
1247 
1248         if delta_avg == 0 {
1249             return;
1250         }
1251 
1252         let divider = self.avg.get_pelt_divider();
1253 
1254         let se = se.force_mut();
1255         se.avg.util_avg = gcfs_rq.avg.util_avg;
1256         let new_sum = se.avg.util_avg * divider;
1257         delta_sum = new_sum as isize - se.avg.util_sum as isize;
1258 
1259         se.avg.util_sum = new_sum as u64;
1260 
1261         add_positive(&mut (self.avg.util_avg as isize), delta_avg);
1262         add_positive(&mut (self.avg.util_sum as isize), delta_sum);
1263 
1264         self.avg.util_sum = self
1265             .avg
1266             .util_sum
1267             .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
1268     }
1269 
1270     pub fn update_task_group_runnable(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) {
1271         let mut delta_sum = gcfs_rq.avg.runnable_avg as isize - se.avg.runnable_avg as isize;
1272         let delta_avg = delta_sum;
1273 
1274         if delta_avg == 0 {
1275             return;
1276         }
1277 
1278         let divider = self.avg.get_pelt_divider();
1279 
1280         let se = se.force_mut();
1281         se.avg.runnable_avg = gcfs_rq.avg.runnable_avg;
1282         let new_sum = se.avg.runnable_sum * divider as u64;
1283         delta_sum = new_sum as isize - se.avg.runnable_sum as isize;
1284 
1285         se.avg.runnable_sum = new_sum;
1286 
1287         add_positive(&mut (self.avg.runnable_avg as isize), delta_avg);
1288         add_positive(&mut (self.avg.runnable_sum as isize), delta_sum);
1289 
1290         self.avg.runnable_sum = self
1291             .avg
1292             .runnable_sum
1293             .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
1294     }
1295 
1296     pub fn update_task_group_load(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &mut CfsRunQueue) {
1297         let mut runnable_sum = gcfs_rq.prop_runnable_sum;
1298 
1299         let mut load_sum = 0;
1300 
1301         if runnable_sum == 0 {
1302             return;
1303         }
1304 
1305         gcfs_rq.prop_runnable_sum = 0;
1306 
1307         let divider = self.avg.get_pelt_divider();
1308 
1309         if runnable_sum >= 0 {
1310             runnable_sum += se.avg.load_sum as isize;
1311             runnable_sum = runnable_sum.min(divider as isize);
1312         } else {
1313             if LoadWeight::scale_load_down(gcfs_rq.load.weight) > 0 {
1314                 load_sum = gcfs_rq.avg.load_sum / LoadWeight::scale_load_down(gcfs_rq.load.weight);
1315             }
1316 
1317             runnable_sum = se.avg.load_sum.min(load_sum) as isize;
1318         }
1319 
1320         let running_sum = se.avg.util_sum as isize >> SCHED_CAPACITY_SHIFT;
1321         runnable_sum = runnable_sum.max(running_sum);
1322 
1323         load_sum = LoadWeight::scale_load_down(se.load.weight) * runnable_sum as u64;
1324         let load_avg = load_sum / divider as u64;
1325 
1326         let delta_avg = load_avg as isize - se.avg.load_avg as isize;
1327         if delta_avg == 0 {
1328             return;
1329         }
1330 
1331         let delta_sum = load_sum as isize
1332             - LoadWeight::scale_load_down(se.load.weight) as isize * se.avg.load_sum as isize;
1333 
1334         let se = se.force_mut();
1335         se.avg.load_sum = runnable_sum as u64;
1336         se.avg.load_avg = load_avg as usize;
1337 
1338         add_positive(&mut (self.avg.load_avg as isize), delta_avg);
1339         add_positive(&mut (self.avg.util_sum as isize), delta_sum);
1340 
1341         self.avg.load_sum = self
1342             .avg
1343             .load_sum
1344             .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64);
1345     }
1346 
1347     /// pick下一个运行的task
1348     pub fn pick_next_entity(&self) -> Option<Arc<FairSchedEntity>> {
1349         if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY)
1350             && self.next().is_some()
1351             && self.entity_eligible(&self.next().unwrap())
1352         {
1353             return self.next();
1354         }
1355         self.entities.get_first().map(|val| val.1.clone())
1356     }
1357 
1358     pub fn entity_eligible(&self, se: &Arc<FairSchedEntity>) -> bool {
1359         let curr = self.current();
1360         let mut avg = self.avg_vruntime;
1361         let mut load = self.avg_load;
1362 
1363         if let Some(curr) = curr {
1364             if curr.on_rq() {
1365                 let weight = LoadWeight::scale_load_down(curr.load.weight);
1366 
1367                 avg += self.entity_key(&curr) * weight as i64;
1368                 load += weight as i64;
1369             }
1370         }
1371 
1372         return avg >= self.entity_key(se) * load;
1373     }
1374 }
1375 
1376 impl Default for CfsRunQueue {
1377     fn default() -> Self {
1378         Self::new()
1379     }
1380 }
1381 pub struct CompletelyFairScheduler;
1382 
1383 impl CompletelyFairScheduler {
1384     /// 寻找到最近公共组长
1385     fn find_matching_se(se: &mut Arc<FairSchedEntity>, pse: &mut Arc<FairSchedEntity>) {
1386         let mut se_depth = se.depth;
1387         let mut pse_depth = pse.depth;
1388 
1389         while se_depth > pse_depth {
1390             se_depth -= 1;
1391             *se = se.parent().unwrap();
1392         }
1393 
1394         while pse_depth > se_depth {
1395             pse_depth -= 1;
1396             *pse = pse.parent().unwrap();
1397         }
1398 
1399         while !Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq()) {
1400             *se = se.parent().unwrap();
1401             *pse = pse.parent().unwrap();
1402         }
1403     }
1404 }
1405 
1406 impl Scheduler for CompletelyFairScheduler {
1407     fn enqueue(
1408         rq: &mut CpuRunQueue,
1409         pcb: Arc<crate::process::ProcessControlBlock>,
1410         mut flags: EnqueueFlag,
1411     ) {
1412         let mut se = pcb.sched_info().sched_entity();
1413         let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE;
1414         let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| {
1415             if se.on_rq() {
1416                 return (false, true);
1417             }
1418 
1419             let binding = se.cfs_rq();
1420             let cfs_rq = binding.force_mut();
1421             cfs_rq.enqueue_entity(&se, flags);
1422 
1423             cfs_rq.h_nr_running += 1;
1424             cfs_rq.idle_h_nr_running += idle_h_nr_running as u64;
1425 
1426             if cfs_rq.is_idle() {
1427                 idle_h_nr_running = true;
1428             }
1429 
1430             // TODO: cfs_rq_throttled
1431 
1432             flags = EnqueueFlag::ENQUEUE_WAKEUP;
1433 
1434             return (true, true);
1435         });
1436 
1437         if !should_continue {
1438             return;
1439         }
1440 
1441         if let Some(mut se) = se {
1442             FairSchedEntity::for_each_in_group(&mut se, |se| {
1443                 let binding = se.cfs_rq();
1444                 let cfs_rq = binding.force_mut();
1445 
1446                 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG);
1447 
1448                 let se = se.force_mut();
1449                 se.update_runnable();
1450 
1451                 se.update_cfs_group();
1452 
1453                 cfs_rq.h_nr_running += 1;
1454                 cfs_rq.idle_h_nr_running += idle_h_nr_running as u64;
1455 
1456                 if cfs_rq.is_idle() {
1457                     idle_h_nr_running = true;
1458                 }
1459 
1460                 // TODO: cfs_rq_throttled
1461 
1462                 return (true, true);
1463             });
1464         }
1465 
1466         rq.add_nr_running(1);
1467     }
1468 
1469     fn dequeue(
1470         rq: &mut CpuRunQueue,
1471         pcb: Arc<crate::process::ProcessControlBlock>,
1472         mut flags: DequeueFlag,
1473     ) {
1474         let mut se = pcb.sched_info().sched_entity();
1475         let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE;
1476         let task_sleep = flags.contains(DequeueFlag::DEQUEUE_SLEEP);
1477         let was_sched_idle = rq.sched_idle_rq();
1478 
1479         let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| {
1480             let binding = se.cfs_rq();
1481             let cfs_rq = binding.force_mut();
1482             cfs_rq.dequeue_entity(&se, flags);
1483 
1484             cfs_rq.h_nr_running -= 1;
1485             cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64;
1486 
1487             if cfs_rq.is_idle() {
1488                 idle_h_nr_running = true;
1489             }
1490 
1491             // TODO: cfs_rq_throttled
1492 
1493             if cfs_rq.load.weight > 0 {
1494                 let sep = se.parent();
1495 
1496                 if task_sleep && sep.is_some() {
1497                     todo!()
1498                 }
1499             }
1500 
1501             flags |= DequeueFlag::DEQUEUE_SLEEP;
1502 
1503             return (true, true);
1504         });
1505 
1506         if !should_continue {
1507             return;
1508         }
1509 
1510         if let Some(mut se) = se {
1511             FairSchedEntity::for_each_in_group(&mut se, |se| {
1512                 let binding = se.cfs_rq();
1513                 let cfs_rq = binding.force_mut();
1514 
1515                 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG);
1516 
1517                 let se = se.force_mut();
1518                 se.update_runnable();
1519 
1520                 se.update_cfs_group();
1521 
1522                 cfs_rq.h_nr_running -= 1;
1523                 cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64;
1524 
1525                 if cfs_rq.is_idle() {
1526                     idle_h_nr_running = true;
1527                 }
1528 
1529                 // TODO: cfs_rq_throttled
1530 
1531                 return (true, true);
1532             });
1533         }
1534 
1535         rq.sub_nr_running(1);
1536 
1537         if unlikely(!was_sched_idle && rq.sched_idle_rq()) {
1538             rq.next_balance = clock();
1539         }
1540     }
1541 
1542     fn yield_task(rq: &mut CpuRunQueue) {
1543         let curr = rq.current();
1544         let se = curr.sched_info().sched_entity();
1545         let binding = se.cfs_rq();
1546         let cfs_rq = binding.force_mut();
1547 
1548         if unlikely(rq.nr_running == 1) {
1549             return;
1550         }
1551 
1552         cfs_rq.clear_buddies(&se);
1553 
1554         rq.update_rq_clock();
1555 
1556         cfs_rq.update_current();
1557 
1558         rq.clock_updata_flags |= ClockUpdataFlag::RQCF_REQ_SKIP;
1559 
1560         se.force_mut().deadline += se.calculate_delta_fair(se.slice);
1561     }
1562 
1563     fn check_preempt_currnet(
1564         rq: &mut CpuRunQueue,
1565         pcb: &Arc<crate::process::ProcessControlBlock>,
1566         wake_flags: WakeupFlags,
1567     ) {
1568         let curr = rq.current();
1569         let mut se = curr.sched_info().sched_entity();
1570         let mut pse = pcb.sched_info().sched_entity();
1571 
1572         if unlikely(Arc::ptr_eq(&se, &pse)) {
1573             return;
1574         }
1575 
1576         // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/fair.c#8160
1577 
1578         let _next_buddy_mark = if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY)
1579             && !wake_flags.contains(WakeupFlags::WF_FORK)
1580         {
1581             FairSchedEntity::for_each_in_group(&mut pse, |se| {
1582                 if !se.on_rq() {
1583                     return (false, true);
1584                 }
1585 
1586                 if se.is_idle() {
1587                     return (false, true);
1588                 }
1589 
1590                 se.cfs_rq().force_mut().next = Arc::downgrade(&se);
1591 
1592                 return (true, true);
1593             });
1594             true
1595         } else {
1596             false
1597         };
1598 
1599         if curr.flags().contains(ProcessFlags::NEED_SCHEDULE) {
1600             return;
1601         }
1602 
1603         if unlikely(curr.sched_info().policy() == SchedPolicy::IDLE)
1604             && likely(pcb.sched_info().policy() != SchedPolicy::IDLE)
1605         {
1606             rq.resched_current();
1607             return;
1608         }
1609 
1610         if unlikely(pcb.sched_info().policy() != SchedPolicy::CFS)
1611             || !SCHED_FEATURES.contains(SchedFeature::WAKEUP_PREEMPTION)
1612         {
1613             return;
1614         }
1615 
1616         Self::find_matching_se(&mut se, &mut pse);
1617 
1618         let cse_is_idle = se.is_idle();
1619         let pse_is_idle = pse.is_idle();
1620 
1621         if cse_is_idle && !pse_is_idle {
1622             rq.resched_current();
1623             return;
1624         }
1625 
1626         if cse_is_idle != pse_is_idle {
1627             return;
1628         }
1629 
1630         let cfs_rq = se.cfs_rq();
1631         cfs_rq.force_mut().update_current();
1632 
1633         if let Some((_, pick_se)) = cfs_rq.entities.get_first() {
1634             if Arc::ptr_eq(pick_se, &pse) {
1635                 rq.resched_current();
1636                 return;
1637             }
1638         }
1639     }
1640 
1641     fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<crate::process::ProcessControlBlock>> {
1642         let mut cfs_rq = Some(rq.cfs_rq());
1643         if cfs_rq.as_ref().unwrap().nr_running == 0 {
1644             return None;
1645         }
1646 
1647         let mut se;
1648         loop {
1649             let cfs = cfs_rq.unwrap();
1650             let cfs = cfs.force_mut();
1651             let curr = cfs.current();
1652             if let Some(curr) = curr {
1653                 if curr.on_rq() {
1654                     cfs.update_current();
1655                 } else {
1656                     cfs.set_current(Weak::default());
1657                 }
1658             }
1659 
1660             se = cfs.pick_next_entity();
1661             match se.clone() {
1662                 Some(val) => cfs_rq = val.my_cfs_rq.clone(),
1663                 None => {
1664                     break;
1665                 }
1666             }
1667 
1668             if cfs_rq.is_none() {
1669                 break;
1670             }
1671         }
1672 
1673         se.map(|se| se.pcb())
1674     }
1675 
1676     fn tick(_rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, queued: bool) {
1677         let mut se = pcb.sched_info().sched_entity();
1678 
1679         FairSchedEntity::for_each_in_group(&mut se, |se| {
1680             let binding = se.clone();
1681             let binding = binding.cfs_rq();
1682             let cfs_rq = binding.force_mut();
1683 
1684             cfs_rq.entity_tick(se, queued);
1685             (true, true)
1686         });
1687     }
1688 
1689     fn task_fork(pcb: Arc<ProcessControlBlock>) {
1690         let rq = cpu_rq(smp_get_processor_id().data() as usize);
1691         let se = pcb.sched_info().sched_entity();
1692 
1693         let (rq, _guard) = rq.self_lock();
1694 
1695         rq.update_rq_clock();
1696 
1697         let binding = se.cfs_rq();
1698         let cfs_rq = binding.force_mut();
1699 
1700         if cfs_rq.current().is_some() {
1701             cfs_rq.update_current();
1702         }
1703 
1704         cfs_rq.place_entity(se.clone(), EnqueueFlag::ENQUEUE_INITIAL);
1705     }
1706 
1707     fn pick_next_task(
1708         rq: &mut CpuRunQueue,
1709         prev: Option<Arc<ProcessControlBlock>>,
1710     ) -> Option<Arc<ProcessControlBlock>> {
1711         let mut cfs_rq = rq.cfs_rq();
1712         if rq.nr_running == 0 {
1713             return None;
1714         }
1715 
1716         if prev.is_none()
1717             || (prev.is_some() && prev.as_ref().unwrap().sched_info().policy() != SchedPolicy::CFS)
1718         {
1719             if let Some(prev) = prev {
1720                 match prev.sched_info().policy() {
1721                     SchedPolicy::RT => todo!(),
1722                     SchedPolicy::FIFO => todo!(),
1723                     SchedPolicy::CFS => todo!(),
1724                     SchedPolicy::IDLE => IdleScheduler::put_prev_task(rq, prev),
1725                 }
1726             }
1727             let mut se;
1728             loop {
1729                 match cfs_rq.pick_next_entity() {
1730                     Some(s) => se = s,
1731                     None => return None,
1732                 }
1733 
1734                 cfs_rq.force_mut().set_next_entity(&se);
1735 
1736                 match &se.my_cfs_rq {
1737                     Some(q) => cfs_rq = q.clone(),
1738                     None => break,
1739                 }
1740             }
1741 
1742             return Some(se.pcb());
1743         }
1744 
1745         let prev = prev.unwrap();
1746         let se = cfs_rq.pick_next_entity();
1747 
1748         if let Some(mut se) = se {
1749             loop {
1750                 let curr = cfs_rq.current();
1751                 if let Some(current) = curr {
1752                     if current.on_rq() {
1753                         cfs_rq.force_mut().update_current()
1754                     } else {
1755                         cfs_rq.force_mut().set_current(Weak::default());
1756                     }
1757                 }
1758 
1759                 match cfs_rq.pick_next_entity() {
1760                     Some(e) => se = e,
1761                     None => break,
1762                 }
1763 
1764                 if let Some(q) = se.my_cfs_rq.clone() {
1765                     cfs_rq = q;
1766                 } else {
1767                     break;
1768                 }
1769             }
1770 
1771             let p = se.pcb();
1772 
1773             if !Arc::ptr_eq(&prev, &p) {
1774                 let mut pse = prev.sched_info().sched_entity();
1775 
1776                 while !(Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq())
1777                     && Arc::ptr_eq(&se.cfs_rq(), &cfs_rq))
1778                 {
1779                     let se_depth = se.depth;
1780                     let pse_depth = pse.depth;
1781 
1782                     if se_depth <= pse_depth {
1783                         pse.cfs_rq().force_mut().put_prev_entity(pse.clone());
1784                         pse = pse.parent().unwrap();
1785                     }
1786 
1787                     if se_depth >= pse_depth {
1788                         se.cfs_rq().force_mut().set_next_entity(&se);
1789                         se = se.parent().unwrap();
1790                     }
1791                 }
1792 
1793                 cfs_rq.force_mut().put_prev_entity(pse);
1794                 cfs_rq.force_mut().set_next_entity(&se);
1795             }
1796 
1797             return Some(p);
1798         } else {
1799             return None;
1800         }
1801     }
1802 
1803     fn put_prev_task(_rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>) {
1804         let mut se = prev.sched_info().sched_entity();
1805 
1806         FairSchedEntity::for_each_in_group(&mut se, |se| {
1807             let cfs = se.cfs_rq();
1808             cfs.force_mut().put_prev_entity(se);
1809 
1810             return (true, true);
1811         });
1812     }
1813 }
1814