xref: /DragonOS/kernel/src/sched/fair.rs (revision 703ce5a77c1e4bebadc5aaa6cbb21595663d4477)
1 use core::intrinsics::likely;
2 use core::intrinsics::unlikely;
3 use core::mem::swap;
4 use core::sync::atomic::fence;
5 use core::sync::atomic::{AtomicU64, Ordering};
6 
7 use crate::libs::rbtree::RBTree;
8 use crate::libs::spinlock::SpinLock;
9 use crate::process::ProcessControlBlock;
10 use crate::process::ProcessFlags;
11 use crate::sched::clock::ClockUpdataFlag;
12 use crate::sched::{cpu_rq, SchedFeature, SCHED_FEATURES};
13 use crate::smp::core::smp_get_processor_id;
14 use crate::time::jiffies::TICK_NESC;
15 use crate::time::timer::clock;
16 use crate::time::NSEC_PER_MSEC;
17 use alloc::sync::{Arc, Weak};
18 
19 use super::idle::IdleScheduler;
20 use super::pelt::{add_positive, sub_positive, SchedulerAvg, UpdateAvgFlags, PELT_MIN_DIVIDER};
21 use super::{
22     CpuRunQueue, DequeueFlag, EnqueueFlag, LoadWeight, OnRq, SchedPolicy, Scheduler, TaskGroup,
23     WakeupFlags, SCHED_CAPACITY_SHIFT,
24 };
25 
26 /// 用于设置 CPU-bound 任务的最小抢占粒度的参数。
27 /// 默认值为 0.75 毫秒乘以(1 加上 CPU 数量的二进制对数),单位为纳秒。
28 /// 这个值影响到任务在 CPU-bound 情况下的抢占行为。
29 static SYSCTL_SHCED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000);
30 /// 规范化最小抢占粒度参数
31 #[allow(dead_code)]
32 static NORMALIZED_SYSCTL_SCHED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000);
33 
34 static SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000);
35 #[allow(dead_code)]
36 static NORMALIZED_SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000);
37 
38 /// 预设的调度延迟任务数量
39 static SCHED_NR_LATENCY: AtomicU64 = AtomicU64::new(8);
40 
41 /// 调度实体单位,一个调度实体可以是一个进程、一个进程组或者是一个用户等等划分
42 #[derive(Debug)]
43 pub struct FairSchedEntity {
44     /// 负载相关
45     pub load: LoadWeight,
46     pub deadline: u64,
47     pub min_deadline: u64,
48 
49     /// 是否在运行队列中
50     pub on_rq: OnRq,
51     /// 当前调度实体的开始执行时间
52     pub exec_start: u64,
53     /// 总运行时长
54     pub sum_exec_runtime: u64,
55     /// 虚拟运行时间
56     pub vruntime: u64,
57     /// 进程的调度延迟 它等于进程的权重(weight)乘以(V - v_i),其中V是系统当前的时间,v_i是进程的运行时间
58     pub vlag: i64,
59     // 运行时间片
60     pub slice: u64,
61     /// 上一个调度实体运行总时间
62     pub prev_sum_exec_runtime: u64,
63 
64     pub avg: SchedulerAvg,
65 
66     /// 父节点
67     parent: Weak<FairSchedEntity>,
68 
69     pub depth: u32,
70 
71     /// 指向自身
72     self_ref: Weak<FairSchedEntity>,
73 
74     /// 所在的CFS运行队列
75     cfs_rq: Weak<CfsRunQueue>,
76 
77     /// group持有的私有cfs队列
78     my_cfs_rq: Option<Arc<CfsRunQueue>>,
79 
80     runnable_weight: u64,
81 
82     pcb: Weak<ProcessControlBlock>,
83 }
84 
85 impl FairSchedEntity {
86     pub fn new() -> Arc<Self> {
87         let ret = Arc::new(Self {
88             parent: Weak::new(),
89             self_ref: Weak::new(),
90             pcb: Weak::new(),
91             cfs_rq: Weak::new(),
92             my_cfs_rq: None,
93             on_rq: OnRq::None,
94             slice: SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst),
95             load: Default::default(),
96             deadline: Default::default(),
97             min_deadline: Default::default(),
98             exec_start: Default::default(),
99             sum_exec_runtime: Default::default(),
100             vruntime: Default::default(),
101             vlag: Default::default(),
102             prev_sum_exec_runtime: Default::default(),
103             avg: Default::default(),
104             depth: Default::default(),
105             runnable_weight: Default::default(),
106         });
107 
108         ret.force_mut().self_ref = Arc::downgrade(&ret);
109 
110         ret
111     }
112 }
113 
114 impl FairSchedEntity {
115     pub fn self_arc(&self) -> Arc<FairSchedEntity> {
116         self.self_ref.upgrade().unwrap()
117     }
118 
119     #[inline]
120     pub fn on_rq(&self) -> bool {
121         self.on_rq != OnRq::None
122     }
123 
124     pub fn pcb(&self) -> Arc<ProcessControlBlock> {
125         self.pcb.upgrade().unwrap()
126     }
127 
128     pub fn set_pcb(&mut self, pcb: Weak<ProcessControlBlock>) {
129         self.pcb = pcb
130     }
131 
132     #[inline]
133     pub fn cfs_rq(&self) -> Arc<CfsRunQueue> {
134         self.cfs_rq.upgrade().unwrap()
135     }
136 
137     pub fn set_cfs(&mut self, cfs: Weak<CfsRunQueue>) {
138         self.cfs_rq = cfs;
139     }
140 
141     pub fn parent(&self) -> Option<Arc<FairSchedEntity>> {
142         self.parent.upgrade()
143     }
144 
145     #[allow(clippy::mut_from_ref)]
146     pub fn force_mut(&self) -> &mut Self {
147         unsafe {
148             let p = self as *const Self as usize;
149             (p as *mut Self).as_mut().unwrap()
150         }
151     }
152 
153     /// 判断是否是进程持有的调度实体
154     #[inline]
155     pub fn is_task(&self) -> bool {
156         // TODO: 调度组
157         true
158     }
159 
160     #[inline]
161     pub fn is_idle(&self) -> bool {
162         if self.is_task() {
163             return self.pcb().sched_info().policy() == SchedPolicy::IDLE;
164         }
165 
166         return self.cfs_rq().is_idle();
167     }
168 
169     pub fn clear_buddies(&self) {
170         let mut se = self.self_arc();
171 
172         Self::for_each_in_group(&mut se, |se| {
173             let binding = se.cfs_rq();
174             let cfs_rq = binding.force_mut();
175 
176             if let Some(next) = cfs_rq.next.upgrade() {
177                 if !Arc::ptr_eq(&next, &se) {
178                     return (false, true);
179                 }
180             }
181             cfs_rq.next = Weak::new();
182             return (true, true);
183         });
184     }
185 
186     pub fn calculate_delta_fair(&self, delta: u64) -> u64 {
187         if unlikely(self.load.weight != LoadWeight::NICE_0_LOAD_SHIFT as u64) {
188             return self
189                 .force_mut()
190                 .load
191                 .calculate_delta(delta, LoadWeight::NICE_0_LOAD_SHIFT as u64);
192         };
193 
194         delta
195     }
196 
197     /// 更新组内的权重信息
198     pub fn update_cfs_group(&self) {
199         if self.my_cfs_rq.is_none() {
200             return;
201         }
202 
203         let group_cfs = self.my_cfs_rq.clone().unwrap();
204 
205         let shares = group_cfs.task_group().shares;
206 
207         if unlikely(self.load.weight != shares) {
208             // TODO: reweight
209             self.cfs_rq()
210                 .force_mut()
211                 .reweight_entity(self.self_arc(), shares);
212         }
213     }
214 
215     /// 遍历se组,如果返回false则需要调用的函数return,
216     /// 会将se指向其顶层parent
217     /// 该函数会改变se指向
218     /// 参数:
219     /// - se: 对应调度实体
220     /// - f: 对调度实体执行操作的闭包,返回值对应(no_break,should_continue),no_break为假时,退出循环,should_continue为假时表示需要将调用者return
221     ///
222     /// 返回值:
223     /// - bool: 是否需要调度者return
224     /// - Option<Arc<FairSchedEntity>>:最终se的指向
225     pub fn for_each_in_group(
226         se: &mut Arc<FairSchedEntity>,
227         mut f: impl FnMut(Arc<FairSchedEntity>) -> (bool, bool),
228     ) -> (bool, Option<Arc<FairSchedEntity>>) {
229         let mut should_continue;
230         let ret;
231         // 这一步是循环计算,直到根节点
232         // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重
233         loop {
234             let (no_break, flag) = f(se.clone());
235             should_continue = flag;
236             if !no_break || !should_continue {
237                 ret = Some(se.clone());
238                 break;
239             }
240 
241             let parent = se.parent();
242             if parent.is_none() {
243                 ret = None;
244                 break;
245             }
246 
247             *se = parent.unwrap();
248         }
249 
250         (should_continue, ret)
251     }
252 
253     pub fn runnable(&self) -> u64 {
254         if self.is_task() {
255             return self.on_rq as u64;
256         } else {
257             self.runnable_weight
258         }
259     }
260 
261     /// 更新task和其cfsrq的负载均值
262     pub fn propagate_entity_load_avg(&mut self) -> bool {
263         if self.is_task() {
264             return false;
265         }
266 
267         let binding = self.my_cfs_rq.clone().unwrap();
268         let gcfs_rq = binding.force_mut();
269 
270         if gcfs_rq.propagate == 0 {
271             return false;
272         }
273 
274         gcfs_rq.propagate = 0;
275 
276         let binding = self.cfs_rq();
277         let cfs_rq = binding.force_mut();
278 
279         cfs_rq.add_task_group_propagate(gcfs_rq.prop_runnable_sum);
280 
281         cfs_rq.update_task_group_util(self.self_arc(), gcfs_rq);
282         cfs_rq.update_task_group_runnable(self.self_arc(), gcfs_rq);
283         cfs_rq.update_task_group_load(self.self_arc(), gcfs_rq);
284 
285         return true;
286     }
287 
288     /// 更新runnable_weight
289     pub fn update_runnable(&mut self) {
290         if !self.is_task() {
291             self.runnable_weight = self.my_cfs_rq.clone().unwrap().h_nr_running;
292         }
293     }
294 
295     /// 初始化实体运行均值
296     pub fn init_entity_runnable_average(&mut self) {
297         self.avg = SchedulerAvg::default();
298 
299         if self.is_task() {
300             self.avg.load_avg = LoadWeight::scale_load_down(self.load.weight) as usize;
301         }
302     }
303 }
304 
305 /// CFS的运行队列,这个队列需确保是percpu的
306 #[allow(dead_code)]
307 #[derive(Debug)]
308 pub struct CfsRunQueue {
309     load: LoadWeight,
310 
311     /// 全局运行的调度实体计数器,用于负载均衡
312     nr_running: u64,
313     /// 针对特定 CPU 核心的任务计数器
314     pub h_nr_running: u64,
315     /// 运行时间
316     exec_clock: u64,
317     /// 最少虚拟运行时间
318     min_vruntime: u64,
319     /// remain runtime
320     runtime_remaining: u64,
321 
322     /// 存放调度实体的红黑树
323     pub(super) entities: RBTree<u64, Arc<FairSchedEntity>>,
324 
325     /// IDLE
326     idle: usize,
327 
328     idle_nr_running: u64,
329 
330     pub idle_h_nr_running: u64,
331 
332     /// 当前运行的调度实体
333     current: Weak<FairSchedEntity>,
334     /// 下一个调度的实体
335     next: Weak<FairSchedEntity>,
336     /// 最后的调度实体
337     last: Weak<FairSchedEntity>,
338     /// 跳过运行的调度实体
339     skip: Weak<FairSchedEntity>,
340 
341     avg_load: i64,
342     avg_vruntime: i64,
343 
344     last_update_time_copy: u64,
345 
346     pub avg: SchedulerAvg,
347 
348     rq: Weak<CpuRunQueue>,
349     /// 拥有此队列的taskgroup
350     task_group: Weak<TaskGroup>,
351 
352     pub throttled_clock: u64,
353     pub throttled_clock_pelt: u64,
354     pub throttled_clock_pelt_time: u64,
355     pub throttled_pelt_idle: u64,
356 
357     pub throttled: bool,
358     pub throttled_count: u64,
359 
360     pub removed: SpinLock<CfsRemoved>,
361 
362     pub propagate: isize,
363     pub prop_runnable_sum: isize,
364 }
365 
366 #[derive(Debug, Default)]
367 pub struct CfsRemoved {
368     pub nr: u32,
369     pub load_avg: usize,
370     pub util_avg: usize,
371     pub runnable_avg: usize,
372 }
373 
374 impl CfsRunQueue {
375     pub fn new() -> Self {
376         Self {
377             load: LoadWeight::default(),
378             nr_running: 0,
379             h_nr_running: 0,
380             exec_clock: 0,
381             min_vruntime: 1 << 20,
382             entities: RBTree::new(),
383             idle: 0,
384             idle_nr_running: 0,
385             idle_h_nr_running: 0,
386             current: Weak::new(),
387             next: Weak::new(),
388             last: Weak::new(),
389             skip: Weak::new(),
390             avg_load: 0,
391             avg_vruntime: 0,
392             last_update_time_copy: 0,
393             avg: SchedulerAvg::default(),
394             rq: Weak::new(),
395             task_group: Weak::new(),
396             throttled_clock: 0,
397             throttled_clock_pelt: 0,
398             throttled_clock_pelt_time: 0,
399             throttled_pelt_idle: 0,
400             throttled: false,
401             throttled_count: 0,
402             removed: SpinLock::new(CfsRemoved::default()),
403             propagate: 0,
404             prop_runnable_sum: 0,
405             runtime_remaining: 0,
406         }
407     }
408 
409     #[inline]
410     pub fn rq(&self) -> Arc<CpuRunQueue> {
411         self.rq.upgrade().unwrap()
412     }
413 
414     #[inline]
415     pub fn set_rq(&mut self, rq: Weak<CpuRunQueue>) {
416         self.rq = rq;
417     }
418 
419     #[inline]
420     #[allow(clippy::mut_from_ref)]
421     pub fn force_mut(&self) -> &mut Self {
422         unsafe {
423             (self as *const Self as usize as *mut Self)
424                 .as_mut()
425                 .unwrap()
426         }
427     }
428 
429     #[inline]
430     pub fn is_idle(&self) -> bool {
431         self.idle > 0
432     }
433 
434     #[inline]
435     pub fn current(&self) -> Option<Arc<FairSchedEntity>> {
436         self.current.upgrade()
437     }
438 
439     #[inline]
440     pub fn set_current(&mut self, curr: Weak<FairSchedEntity>) {
441         self.current = curr
442     }
443 
444     #[inline]
445     pub fn next(&self) -> Option<Arc<FairSchedEntity>> {
446         self.next.upgrade()
447     }
448 
449     pub fn task_group(&self) -> Arc<TaskGroup> {
450         self.task_group.upgrade().unwrap()
451     }
452 
453     #[allow(dead_code)]
454     #[inline]
455     pub const fn bandwidth_used() -> bool {
456         false
457     }
458 
459     /// ## 计算调度周期,基本思想是在一个周期内让每个任务都至少运行一次。
460     /// 这样可以确保所有的任务都能够得到执行,而且可以避免某些任务被长时间地阻塞。
461     pub fn sched_period(nr_running: u64) -> u64 {
462         if unlikely(nr_running > SCHED_NR_LATENCY.load(Ordering::SeqCst)) {
463             // 如果当前活跃的任务数量超过了预设的调度延迟任务数量
464             // 调度周期的长度将直接设置为活跃任务数量乘以最小抢占粒度
465             return nr_running * SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst);
466         } else {
467             // 如果活跃任务数量未超过预设的延迟任务数量,那么调度周期的长度将设置为SCHED_NR_LATENCY
468             return SCHED_NR_LATENCY.load(Ordering::SeqCst);
469         }
470     }
471 
472     /// ## 计算调度任务的虚拟运行时间片大小
473     ///
474     /// vruntime = runtime / weight
475     #[allow(dead_code)]
476     pub fn sched_vslice(&self, entity: Arc<FairSchedEntity>) -> u64 {
477         let slice = self.sched_slice(entity.clone());
478         return entity.calculate_delta_fair(slice);
479     }
480 
481     /// ## 计算调度任务的实际运行时间片大小
482     #[allow(dead_code)]
483     pub fn sched_slice(&self, mut entity: Arc<FairSchedEntity>) -> u64 {
484         let mut nr_running = self.nr_running;
485         if SCHED_FEATURES.contains(SchedFeature::ALT_PERIOD) {
486             nr_running = self.h_nr_running;
487         }
488 
489         // 计算一个调度周期的整个slice
490         let mut slice = Self::sched_period(nr_running + (!entity.on_rq()) as u64);
491 
492         // 这一步是循环计算,直到根节点
493         // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重
494         FairSchedEntity::for_each_in_group(&mut entity, |se| {
495             if unlikely(!se.on_rq()) {
496                 se.cfs_rq().force_mut().load.update_load_add(se.load.weight);
497             }
498             slice = se
499                 .cfs_rq()
500                 .force_mut()
501                 .load
502                 .calculate_delta(slice, se.load.weight);
503 
504             (true, true)
505         });
506 
507         if SCHED_FEATURES.contains(SchedFeature::BASE_SLICE) {
508             // TODO: IDLE?
509             let min_gran = SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst);
510 
511             slice = min_gran.max(slice)
512         }
513 
514         slice
515     }
516 
517     /// ## 在时间片到期时检查当前任务是否需要被抢占,
518     /// 如果需要,则抢占当前任务,并确保不会由于与其他任务的“好友偏爱(buddy favours)”而重新选举为下一个运行的任务。
519     #[allow(dead_code)]
520     pub fn check_preempt_tick(&mut self, curr: Arc<FairSchedEntity>) {
521         // 计算理想状态下该调度实体的理想运行时间
522         let ideal_runtime = self.sched_slice(curr.clone());
523 
524         let delta_exec = curr.sum_exec_runtime - curr.prev_sum_exec_runtime;
525 
526         if delta_exec > ideal_runtime {
527             // 表明实际运行时间长于理想运行时间
528             self.rq().resched_current();
529 
530             self.clear_buddies(&curr);
531             return;
532         }
533 
534         if delta_exec < SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst) {
535             return;
536         }
537 
538         todo!()
539     }
540 
541     pub fn clear_buddies(&mut self, se: &Arc<FairSchedEntity>) {
542         if let Some(next) = self.next.upgrade() {
543             if Arc::ptr_eq(&next, se) {
544                 se.clear_buddies();
545             }
546         }
547     }
548 
549     /// 处理调度实体的时间片到期事件
550     pub fn entity_tick(&mut self, curr: Arc<FairSchedEntity>, queued: bool) {
551         // 更新当前调度实体的运行时间统计信息
552         self.update_current();
553 
554         self.update_load_avg(&curr, UpdateAvgFlags::UPDATE_TG);
555 
556         // 更新组调度相关
557         curr.update_cfs_group();
558 
559         if queued {
560             self.rq().resched_current();
561             return;
562         }
563     }
564 
565     /// 更新当前调度实体的运行时间统计信息
566     pub fn update_current(&mut self) {
567         let curr = self.current();
568         if unlikely(curr.is_none()) {
569             return;
570         }
571 
572         let now = self.rq().clock_task();
573         let curr = curr.unwrap();
574 
575         fence(Ordering::SeqCst);
576         if unlikely(now <= curr.exec_start) {
577             // warn!(
578             //     "update_current return now <= curr.exec_start now {now} execstart {}",
579             //     curr.exec_start
580             // );
581             return;
582         }
583 
584         fence(Ordering::SeqCst);
585         let delta_exec = now - curr.exec_start;
586 
587         let curr = curr.force_mut();
588 
589         curr.exec_start = now;
590 
591         curr.sum_exec_runtime += delta_exec;
592 
593         // 根据实际运行时长加权增加虚拟运行时长
594         curr.vruntime += curr.calculate_delta_fair(delta_exec);
595         fence(Ordering::SeqCst);
596         self.update_deadline(&curr.self_arc());
597         self.update_min_vruntime();
598 
599         self.account_cfs_rq_runtime(delta_exec);
600     }
601 
602     /// 计算当前cfs队列的运行时间是否到期
603     fn account_cfs_rq_runtime(&mut self, delta_exec: u64) {
604         if likely(self.runtime_remaining > delta_exec) {
605             self.runtime_remaining -= delta_exec;
606             // error!("runtime_remaining {}", self.runtime_remaining);
607             return;
608         }
609 
610         // warn!(
611         //     "runtime_remaining {} delta exec {delta_exec} nr_running {}",
612         //     self.runtime_remaining,
613         //     self.nr_running
614         // );
615         // fixme: 目前只是简单分配一个时间片
616         self.runtime_remaining = 5000 * NSEC_PER_MSEC as u64;
617 
618         if likely(self.current().is_some()) && self.nr_running > 1 {
619             // error!("account_cfs_rq_runtime");
620             self.rq().resched_current();
621         }
622     }
623 
624     /// 计算deadline,如果vruntime到期会重调度
625     pub fn update_deadline(&mut self, se: &Arc<FairSchedEntity>) {
626         // error!("vruntime {} deadline {}", se.vruntime, se.deadline);
627         if se.vruntime < se.deadline {
628             return;
629         }
630 
631         se.force_mut().slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst);
632 
633         se.force_mut().deadline = se.vruntime + se.calculate_delta_fair(se.slice);
634 
635         if self.nr_running > 1 {
636             self.rq().resched_current();
637             self.clear_buddies(se);
638         }
639     }
640 
641     /// ## 更新最小虚拟运行时间
642     pub fn update_min_vruntime(&mut self) {
643         let curr = self.current();
644 
645         let mut vruntime = self.min_vruntime;
646 
647         if curr.is_some() {
648             let curr = curr.as_ref().unwrap();
649             if curr.on_rq() {
650                 vruntime = curr.vruntime;
651             } else {
652                 self.set_current(Weak::default());
653             }
654         }
655 
656         // 找到最小虚拟运行时间的调度实体
657         let leftmost = self.entities.get_first();
658         if let Some(leftmost) = leftmost {
659             let se = leftmost.1;
660 
661             if curr.is_none() {
662                 vruntime = se.vruntime;
663             } else {
664                 vruntime = vruntime.min(se.vruntime);
665             }
666         }
667 
668         self.min_vruntime = self.__update_min_vruntime(vruntime);
669     }
670 
671     fn __update_min_vruntime(&mut self, vruntime: u64) -> u64 {
672         let mut min_vruntime = self.min_vruntime;
673 
674         let delta = vruntime as i64 - min_vruntime as i64;
675         if delta > 0 {
676             self.avg_vruntime -= self.avg_load * delta;
677             min_vruntime = vruntime;
678         }
679 
680         return min_vruntime;
681     }
682 
683     // 判断是否为当前任务
684     pub fn is_curr(&self, se: &Arc<FairSchedEntity>) -> bool {
685         if self.current().is_none() {
686             false
687         } else {
688             // 判断当前和传入的se是否相等
689             Arc::ptr_eq(se, self.current().as_ref().unwrap())
690         }
691     }
692 
693     // 修改后
694     pub fn reweight_entity(&mut self, se: Arc<FairSchedEntity>, weight: u64) {
695         // 判断是否为当前任务
696         let is_curr = self.is_curr(&se);
697 
698         // 如果se在队列中
699         if se.on_rq() {
700             // 如果是当前任务
701             if is_curr {
702                 self.update_current();
703             } else {
704                 // 否则,出队
705                 self.inner_dequeue_entity(&se);
706             }
707 
708             // 减去该权重
709             self.load.update_load_sub(se.load.weight);
710         }
711 
712         self.dequeue_load_avg(&se);
713 
714         if !se.on_rq() {
715             se.force_mut().vlag = se.vlag * se.load.weight as i64 / weight as i64;
716         } else {
717             self.reweight_eevdf(&se, weight);
718         }
719         se.force_mut().load.update_load_set(weight);
720 
721         // SMP
722         let divider = se.avg.get_pelt_divider();
723         se.force_mut().avg.load_avg = LoadWeight::scale_load_down(se.load.weight) as usize
724             * se.avg.load_sum as usize
725             / divider;
726 
727         self.enqueue_load_avg(se.clone());
728 
729         if se.on_rq() {
730             self.load.update_load_add(se.load.weight);
731             if !is_curr {
732                 self.inner_enqueue_entity(&se);
733             }
734 
735             self.update_min_vruntime();
736         }
737     }
738 
739     /// 用于重新计算调度实体(sched_entity)的权重(weight)和虚拟运行时间(vruntime)
740     fn reweight_eevdf(&mut self, se: &Arc<FairSchedEntity>, weight: u64) {
741         let old_weight = se.load.weight;
742         let avg_vruntime = self.avg_vruntime();
743         let mut vlag;
744         if avg_vruntime != se.vruntime {
745             vlag = avg_vruntime as i64 - se.vruntime as i64;
746             vlag = vlag * old_weight as i64 / weight as i64;
747             se.force_mut().vruntime = (avg_vruntime as i64 - vlag) as u64;
748         }
749 
750         let mut vslice = se.deadline as i64 - avg_vruntime as i64;
751         vslice = vslice * old_weight as i64 / weight as i64;
752         se.force_mut().deadline = avg_vruntime + vslice as u64;
753     }
754 
755     fn avg_vruntime(&self) -> u64 {
756         let curr = self.current();
757         let mut avg = self.avg_vruntime;
758         let mut load = self.avg_load;
759 
760         if let Some(curr) = curr {
761             if curr.on_rq() {
762                 let weight = LoadWeight::scale_load_down(curr.load.weight);
763                 avg += self.entity_key(&curr) * weight as i64;
764                 load += weight as i64;
765             }
766         }
767 
768         if load > 0 {
769             if avg < 0 {
770                 avg -= load - 1;
771             }
772 
773             avg /= load;
774         }
775 
776         return self.min_vruntime + avg as u64;
777     }
778 
779     #[inline]
780     pub fn entity_key(&self, se: &Arc<FairSchedEntity>) -> i64 {
781         return se.vruntime as i64 - self.min_vruntime as i64;
782     }
783 
784     pub fn avg_vruntime_add(&mut self, se: &Arc<FairSchedEntity>) {
785         let weight = LoadWeight::scale_load_down(se.load.weight);
786 
787         let key = self.entity_key(se);
788 
789         let avg_vruntime = self.avg_vruntime + key * weight as i64;
790 
791         self.avg_vruntime = avg_vruntime;
792         self.avg_load += weight as i64;
793     }
794 
795     pub fn avg_vruntime_sub(&mut self, se: &Arc<FairSchedEntity>) {
796         let weight = LoadWeight::scale_load_down(se.load.weight);
797 
798         let key = self.entity_key(se);
799 
800         let avg_vruntime = self.avg_vruntime - key * weight as i64;
801 
802         self.avg_vruntime = avg_vruntime;
803         self.avg_load -= weight as i64;
804     }
805 
806     /// 为调度实体计算初始vruntime等信息
807     fn place_entity(&mut self, se: Arc<FairSchedEntity>, flags: EnqueueFlag) {
808         let vruntime = self.avg_vruntime();
809         let mut lag = 0;
810 
811         let se = se.force_mut();
812         se.slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst);
813 
814         let mut vslice = se.calculate_delta_fair(se.slice);
815 
816         if self.nr_running > 0 {
817             let curr = self.current();
818 
819             lag = se.vlag;
820 
821             let mut load = self.avg_load;
822 
823             if let Some(curr) = curr {
824                 if curr.on_rq() {
825                     load += LoadWeight::scale_load_down(curr.load.weight) as i64;
826                 }
827             }
828 
829             lag *= load + LoadWeight::scale_load_down(se.load.weight) as i64;
830 
831             if load == 0 {
832                 load = 1;
833             }
834 
835             lag /= load;
836         }
837 
838         se.vruntime = vruntime - lag as u64;
839 
840         if flags.contains(EnqueueFlag::ENQUEUE_INITIAL) {
841             vslice /= 2;
842         }
843 
844         se.deadline = se.vruntime + vslice;
845     }
846 
847     /// 更新负载均值
848     fn update_load_avg(&mut self, se: &Arc<FairSchedEntity>, flags: UpdateAvgFlags) {
849         let now = self.cfs_rq_clock_pelt();
850 
851         if se.avg.last_update_time > 0 && !flags.contains(UpdateAvgFlags::SKIP_AGE_LOAD) {
852             se.force_mut().update_load_avg(self, now);
853         }
854 
855         let mut decayed = self.update_self_load_avg(now);
856         decayed |= se.force_mut().propagate_entity_load_avg() as u32;
857 
858         if se.avg.last_update_time > 0 && flags.contains(UpdateAvgFlags::DO_ATTACH) {
859             todo!()
860         } else if flags.contains(UpdateAvgFlags::DO_ATTACH) {
861             self.detach_entity_load_avg(se);
862         } else if decayed > 0 {
863             // cfs_rq_util_change
864 
865             todo!()
866         }
867     }
868 
869     /// 将实体的负载均值与对应cfs分离
870     fn detach_entity_load_avg(&mut self, se: &Arc<FairSchedEntity>) {
871         self.dequeue_load_avg(se);
872 
873         sub_positive(&mut self.avg.util_avg, se.avg.util_avg);
874         sub_positive(&mut (self.avg.util_sum as usize), se.avg.util_sum as usize);
875         self.avg.util_sum = self
876             .avg
877             .util_sum
878             .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
879 
880         sub_positive(&mut self.avg.runnable_avg, se.avg.runnable_avg);
881         sub_positive(
882             &mut (self.avg.runnable_sum as usize),
883             se.avg.runnable_sum as usize,
884         );
885         self.avg.runnable_sum = self
886             .avg
887             .runnable_sum
888             .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
889 
890         self.propagate = 1;
891         self.prop_runnable_sum += se.avg.load_sum as isize;
892     }
893 
894     fn update_self_load_avg(&mut self, now: u64) -> u32 {
895         let mut removed_load = 0;
896         let mut removed_util = 0;
897         let mut removed_runnable = 0;
898 
899         let mut decayed = 0;
900 
901         if self.removed.lock().nr > 0 {
902             let mut removed_guard = self.removed.lock();
903             let divider = self.avg.get_pelt_divider();
904 
905             swap::<usize>(&mut removed_guard.util_avg, &mut removed_util);
906             swap::<usize>(&mut removed_guard.load_avg, &mut removed_load);
907             swap::<usize>(&mut removed_guard.runnable_avg, &mut removed_runnable);
908 
909             removed_guard.nr = 0;
910 
911             let mut r = removed_load;
912 
913             sub_positive(&mut self.avg.load_avg, r);
914             sub_positive(&mut (self.avg.load_sum as usize), r * divider);
915 
916             self.avg.load_sum = self
917                 .avg
918                 .load_sum
919                 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64);
920 
921             r = removed_util;
922             sub_positive(&mut self.avg.util_avg, r);
923             sub_positive(&mut (self.avg.util_sum as usize), r * divider);
924             self.avg.util_sum = self
925                 .avg
926                 .util_sum
927                 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
928 
929             r = removed_runnable;
930             sub_positive(&mut self.avg.runnable_avg, r);
931             sub_positive(&mut (self.avg.runnable_sum as usize), r * divider);
932             self.avg.runnable_sum = self
933                 .avg
934                 .runnable_sum
935                 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
936 
937             drop(removed_guard);
938             self.add_task_group_propagate(
939                 -(removed_runnable as isize * divider as isize) >> SCHED_CAPACITY_SHIFT,
940             );
941 
942             decayed = 1;
943         }
944 
945         decayed |= self.__update_load_avg(now) as u32;
946 
947         self.last_update_time_copy = self.avg.last_update_time;
948 
949         return decayed;
950     }
951 
952     fn __update_load_avg(&mut self, now: u64) -> bool {
953         if self.avg.update_load_sum(
954             now,
955             LoadWeight::scale_load_down(self.load.weight) as u32,
956             self.h_nr_running as u32,
957             self.current().is_some() as u32,
958         ) {
959             self.avg.update_load_avg(1);
960             return true;
961         }
962 
963         return false;
964     }
965 
966     fn add_task_group_propagate(&mut self, runnable_sum: isize) {
967         self.propagate = 1;
968         self.prop_runnable_sum += runnable_sum;
969     }
970 
971     /// 将实体加入队列
972     pub fn enqueue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: EnqueueFlag) {
973         let is_curr = self.is_curr(se);
974 
975         if is_curr {
976             self.place_entity(se.clone(), flags);
977         }
978 
979         self.update_current();
980 
981         self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG | UpdateAvgFlags::DO_ATTACH);
982 
983         se.force_mut().update_runnable();
984 
985         se.update_cfs_group();
986 
987         if !is_curr {
988             self.place_entity(se.clone(), flags);
989         }
990 
991         self.account_entity_enqueue(se);
992 
993         if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) {
994             se.force_mut().exec_start = 0;
995         }
996 
997         if !is_curr {
998             self.inner_enqueue_entity(se);
999         }
1000 
1001         se.force_mut().on_rq = OnRq::Queued;
1002 
1003         if self.nr_running == 1 {
1004             // 只有上面加入的
1005             // TODO: throttle
1006         }
1007     }
1008 
1009     pub fn dequeue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: DequeueFlag) {
1010         let mut action = UpdateAvgFlags::UPDATE_TG;
1011 
1012         if se.is_task() && se.on_rq == OnRq::Migrating {
1013             action |= UpdateAvgFlags::DO_DETACH;
1014         }
1015 
1016         self.update_current();
1017 
1018         self.update_load_avg(se, action);
1019 
1020         se.force_mut().update_runnable();
1021 
1022         self.clear_buddies(se);
1023 
1024         self.update_entity_lag(se);
1025 
1026         if let Some(curr) = self.current() {
1027             if !Arc::ptr_eq(&curr, se) {
1028                 self.inner_dequeue_entity(se);
1029             }
1030         } else {
1031             self.inner_dequeue_entity(se);
1032         }
1033 
1034         se.force_mut().on_rq = OnRq::None;
1035 
1036         self.account_entity_dequeue(se);
1037 
1038         // return_cfs_rq_runtime
1039 
1040         se.update_cfs_group();
1041 
1042         if flags & (DequeueFlag::DEQUEUE_SAVE | DequeueFlag::DEQUEUE_MOVE)
1043             != DequeueFlag::DEQUEUE_SAVE
1044         {
1045             self.update_min_vruntime();
1046         }
1047 
1048         if self.nr_running == 0 {
1049             self.update_idle_clock_pelt()
1050         }
1051     }
1052 
1053     /// 将前一个调度的task放回队列
1054     pub fn put_prev_entity(&mut self, prev: Arc<FairSchedEntity>) {
1055         if prev.on_rq() {
1056             self.update_current();
1057         }
1058 
1059         if prev.on_rq() {
1060             self.inner_enqueue_entity(&prev);
1061         }
1062 
1063         self.set_current(Weak::default());
1064     }
1065 
1066     /// 将下一个运行的task设置为current
1067     pub fn set_next_entity(&mut self, se: &Arc<FairSchedEntity>) {
1068         self.clear_buddies(se);
1069 
1070         if se.on_rq() {
1071             self.inner_dequeue_entity(se);
1072             self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG);
1073             se.force_mut().vlag = se.deadline as i64;
1074         }
1075 
1076         self.set_current(Arc::downgrade(se));
1077 
1078         se.force_mut().prev_sum_exec_runtime = se.sum_exec_runtime;
1079     }
1080 
1081     fn update_idle_clock_pelt(&mut self) {
1082         let throttled = if unlikely(self.throttled_count > 0) {
1083             u64::MAX
1084         } else {
1085             self.throttled_clock_pelt_time
1086         };
1087 
1088         self.throttled_clock_pelt = throttled;
1089     }
1090 
1091     fn update_entity_lag(&mut self, se: &Arc<FairSchedEntity>) {
1092         let lag = self.avg_vruntime() as i64 - se.vruntime as i64;
1093 
1094         let limit = se.calculate_delta_fair((TICK_NESC as u64).max(2 * se.slice)) as i64;
1095 
1096         se.force_mut().vlag = if lag < -limit {
1097             -limit
1098         } else if lag > limit {
1099             limit
1100         } else {
1101             lag
1102         }
1103     }
1104 
1105     fn account_entity_enqueue(&mut self, se: &Arc<FairSchedEntity>) {
1106         self.load.update_load_add(se.load.weight);
1107 
1108         if se.is_task() {
1109             let rq = self.rq();
1110             let (rq, _guard) = rq.self_lock();
1111             // TODO:numa
1112             rq.cfs_tasks.push_back(se.clone());
1113         }
1114         self.nr_running += 1;
1115         if se.is_idle() {
1116             self.idle_nr_running += 1;
1117         }
1118     }
1119 
1120     fn account_entity_dequeue(&mut self, se: &Arc<FairSchedEntity>) {
1121         self.load.update_load_sub(se.load.weight);
1122 
1123         if se.is_task() {
1124             let rq = self.rq();
1125             let (rq, _guard) = rq.self_lock();
1126 
1127             // TODO:numa
1128             let _ = rq.cfs_tasks.extract_if(|x| Arc::ptr_eq(x, se));
1129         }
1130 
1131         self.nr_running -= 1;
1132         if se.is_idle() {
1133             self.idle_nr_running -= 1;
1134         }
1135     }
1136 
1137     pub fn inner_enqueue_entity(&mut self, se: &Arc<FairSchedEntity>) {
1138         self.avg_vruntime_add(se);
1139         se.force_mut().min_deadline = se.deadline;
1140         self.entities.insert(se.vruntime, se.clone());
1141         // warn!(
1142         //     "enqueue pcb {:?} cfsrq {:?}",
1143         //     se.pcb().pid(),
1144         //     self.entities
1145         //         .iter()
1146         //         .map(|x| (x.0, x.1.pcb().pid()))
1147         //         .collect::<Vec<_>>()
1148         // );
1149         // send_to_default_serial8250_port(
1150         //     format!(
1151         //         "enqueue pcb {:?} cfsrq {:?}\n",
1152         //         se.pcb().pid(),
1153         //         self.entities
1154         //             .iter()
1155         //             .map(|x| (x.0, x.1.pcb().pid()))
1156         //             .collect::<Vec<_>>()
1157         //     )
1158         //     .as_bytes(),
1159         // );
1160     }
1161 
1162     fn inner_dequeue_entity(&mut self, se: &Arc<FairSchedEntity>) {
1163         // warn!(
1164         //     "before dequeue pcb {:?} cfsrq {:?}",
1165         //     se.pcb().pid(),
1166         //     self.entities
1167         //         .iter()
1168         //         .map(|x| (x.0, x.1.pcb().pid()))
1169         //         .collect::<Vec<_>>()
1170         // );
1171 
1172         // send_to_default_serial8250_port(
1173         //     format!(
1174         //         "before dequeue pcb {:?} cfsrq {:?}\n",
1175         //         se.pcb().pid(),
1176         //         self.entities
1177         //             .iter()
1178         //             .map(|x| (x.0, x.1.pcb().pid()))
1179         //             .collect::<Vec<_>>()
1180         //     )
1181         //     .as_bytes(),
1182         // );
1183 
1184         let mut i = 1;
1185         while let Some(rm) = self.entities.remove(&se.vruntime) {
1186             if Arc::ptr_eq(&rm, se) {
1187                 break;
1188             }
1189             rm.force_mut().vruntime += i;
1190             self.entities.insert(rm.vruntime, rm);
1191 
1192             i += 1;
1193         }
1194         // send_to_default_serial8250_port(
1195         //     format!(
1196         //         "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}\n",
1197         //         se.pcb().pid(),
1198         //         remove.pcb().pid(),
1199         //         self.entities
1200         //             .iter()
1201         //             .map(|x| (x.0, x.1.pcb().pid()))
1202         //             .collect::<Vec<_>>()
1203         //     )
1204         //     .as_bytes(),
1205         // );
1206         // warn!(
1207         //     "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}",
1208         //     se.pcb().pid(),
1209         //     remove.pcb().pid(),
1210         //     self.entities
1211         //         .iter()
1212         //         .map(|x| (x.0, x.1.pcb().pid()))
1213         //         .collect::<Vec<_>>()
1214         // );
1215         self.avg_vruntime_sub(se);
1216     }
1217 
1218     pub fn enqueue_load_avg(&mut self, se: Arc<FairSchedEntity>) {
1219         self.avg.load_avg += se.avg.load_avg;
1220         self.avg.load_sum += LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum;
1221     }
1222 
1223     pub fn dequeue_load_avg(&mut self, se: &Arc<FairSchedEntity>) {
1224         if self.avg.load_avg > se.avg.load_avg {
1225             self.avg.load_avg -= se.avg.load_avg;
1226         } else {
1227             self.avg.load_avg = 0;
1228         };
1229 
1230         let se_load = LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum;
1231 
1232         if self.avg.load_sum > se_load {
1233             self.avg.load_sum -= se_load;
1234         } else {
1235             self.avg.load_sum = 0;
1236         }
1237 
1238         self.avg.load_sum = self
1239             .avg
1240             .load_sum
1241             .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64)
1242     }
1243 
1244     pub fn update_task_group_util(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) {
1245         let mut delta_sum = gcfs_rq.avg.load_avg as isize - se.avg.load_avg as isize;
1246         let delta_avg = delta_sum;
1247 
1248         if delta_avg == 0 {
1249             return;
1250         }
1251 
1252         let divider = self.avg.get_pelt_divider();
1253 
1254         let se = se.force_mut();
1255         se.avg.util_avg = gcfs_rq.avg.util_avg;
1256         let new_sum = se.avg.util_avg * divider;
1257         delta_sum = new_sum as isize - se.avg.util_sum as isize;
1258 
1259         se.avg.util_sum = new_sum as u64;
1260 
1261         add_positive(&mut (self.avg.util_avg as isize), delta_avg);
1262         add_positive(&mut (self.avg.util_sum as isize), delta_sum);
1263 
1264         self.avg.util_sum = self
1265             .avg
1266             .util_sum
1267             .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64);
1268     }
1269 
1270     pub fn update_task_group_runnable(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) {
1271         let mut delta_sum = gcfs_rq.avg.runnable_avg as isize - se.avg.runnable_avg as isize;
1272         let delta_avg = delta_sum;
1273 
1274         if delta_avg == 0 {
1275             return;
1276         }
1277 
1278         let divider = self.avg.get_pelt_divider();
1279 
1280         let se = se.force_mut();
1281         se.avg.runnable_avg = gcfs_rq.avg.runnable_avg;
1282         let new_sum = se.avg.runnable_sum * divider as u64;
1283         delta_sum = new_sum as isize - se.avg.runnable_sum as isize;
1284 
1285         se.avg.runnable_sum = new_sum;
1286 
1287         add_positive(&mut (self.avg.runnable_avg as isize), delta_avg);
1288         add_positive(&mut (self.avg.runnable_sum as isize), delta_sum);
1289 
1290         self.avg.runnable_sum = self
1291             .avg
1292             .runnable_sum
1293             .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64);
1294     }
1295 
1296     pub fn update_task_group_load(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &mut CfsRunQueue) {
1297         let mut runnable_sum = gcfs_rq.prop_runnable_sum;
1298 
1299         let mut load_sum = 0;
1300 
1301         if runnable_sum == 0 {
1302             return;
1303         }
1304 
1305         gcfs_rq.prop_runnable_sum = 0;
1306 
1307         let divider = self.avg.get_pelt_divider();
1308 
1309         if runnable_sum >= 0 {
1310             runnable_sum += se.avg.load_sum as isize;
1311             runnable_sum = runnable_sum.min(divider as isize);
1312         } else {
1313             if LoadWeight::scale_load_down(gcfs_rq.load.weight) > 0 {
1314                 load_sum = gcfs_rq.avg.load_sum / LoadWeight::scale_load_down(gcfs_rq.load.weight);
1315             }
1316 
1317             runnable_sum = se.avg.load_sum.min(load_sum) as isize;
1318         }
1319 
1320         let running_sum = se.avg.util_sum as isize >> SCHED_CAPACITY_SHIFT;
1321         runnable_sum = runnable_sum.max(running_sum);
1322 
1323         load_sum = LoadWeight::scale_load_down(se.load.weight) * runnable_sum as u64;
1324         let load_avg = load_sum / divider as u64;
1325 
1326         let delta_avg = load_avg as isize - se.avg.load_avg as isize;
1327         if delta_avg == 0 {
1328             return;
1329         }
1330 
1331         let delta_sum = load_sum as isize
1332             - LoadWeight::scale_load_down(se.load.weight) as isize * se.avg.load_sum as isize;
1333 
1334         let se = se.force_mut();
1335         se.avg.load_sum = runnable_sum as u64;
1336         se.avg.load_avg = load_avg as usize;
1337 
1338         add_positive(&mut (self.avg.load_avg as isize), delta_avg);
1339         add_positive(&mut (self.avg.util_sum as isize), delta_sum);
1340 
1341         self.avg.load_sum = self
1342             .avg
1343             .load_sum
1344             .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64);
1345     }
1346 
1347     /// pick下一个运行的task
1348     pub fn pick_next_entity(&self) -> Option<Arc<FairSchedEntity>> {
1349         if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY)
1350             && self.next().is_some()
1351             && self.entity_eligible(&self.next().unwrap())
1352         {
1353             return self.next();
1354         }
1355         self.entities.get_first().map(|val| val.1.clone())
1356     }
1357 
1358     pub fn entity_eligible(&self, se: &Arc<FairSchedEntity>) -> bool {
1359         let curr = self.current();
1360         let mut avg = self.avg_vruntime;
1361         let mut load = self.avg_load;
1362 
1363         if let Some(curr) = curr {
1364             if curr.on_rq() {
1365                 let weight = LoadWeight::scale_load_down(curr.load.weight);
1366 
1367                 avg += self.entity_key(&curr) * weight as i64;
1368                 load += weight as i64;
1369             }
1370         }
1371 
1372         return avg >= self.entity_key(se) * load;
1373     }
1374 }
1375 
1376 pub struct CompletelyFairScheduler;
1377 
1378 impl CompletelyFairScheduler {
1379     /// 寻找到最近公共组长
1380     fn find_matching_se(se: &mut Arc<FairSchedEntity>, pse: &mut Arc<FairSchedEntity>) {
1381         let mut se_depth = se.depth;
1382         let mut pse_depth = pse.depth;
1383 
1384         while se_depth > pse_depth {
1385             se_depth -= 1;
1386             *se = se.parent().unwrap();
1387         }
1388 
1389         while pse_depth > se_depth {
1390             pse_depth -= 1;
1391             *pse = pse.parent().unwrap();
1392         }
1393 
1394         while !Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq()) {
1395             *se = se.parent().unwrap();
1396             *pse = pse.parent().unwrap();
1397         }
1398     }
1399 }
1400 
1401 impl Scheduler for CompletelyFairScheduler {
1402     fn enqueue(
1403         rq: &mut CpuRunQueue,
1404         pcb: Arc<crate::process::ProcessControlBlock>,
1405         mut flags: EnqueueFlag,
1406     ) {
1407         let mut se = pcb.sched_info().sched_entity();
1408         let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE;
1409         let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| {
1410             if se.on_rq() {
1411                 return (false, true);
1412             }
1413 
1414             let binding = se.cfs_rq();
1415             let cfs_rq = binding.force_mut();
1416             cfs_rq.enqueue_entity(&se, flags);
1417 
1418             cfs_rq.h_nr_running += 1;
1419             cfs_rq.idle_h_nr_running += idle_h_nr_running as u64;
1420 
1421             if cfs_rq.is_idle() {
1422                 idle_h_nr_running = true;
1423             }
1424 
1425             // TODO: cfs_rq_throttled
1426 
1427             flags = EnqueueFlag::ENQUEUE_WAKEUP;
1428 
1429             return (true, true);
1430         });
1431 
1432         if !should_continue {
1433             return;
1434         }
1435 
1436         if let Some(mut se) = se {
1437             FairSchedEntity::for_each_in_group(&mut se, |se| {
1438                 let binding = se.cfs_rq();
1439                 let cfs_rq = binding.force_mut();
1440 
1441                 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG);
1442 
1443                 let se = se.force_mut();
1444                 se.update_runnable();
1445 
1446                 se.update_cfs_group();
1447 
1448                 cfs_rq.h_nr_running += 1;
1449                 cfs_rq.idle_h_nr_running += idle_h_nr_running as u64;
1450 
1451                 if cfs_rq.is_idle() {
1452                     idle_h_nr_running = true;
1453                 }
1454 
1455                 // TODO: cfs_rq_throttled
1456 
1457                 return (true, true);
1458             });
1459         }
1460 
1461         rq.add_nr_running(1);
1462     }
1463 
1464     fn dequeue(
1465         rq: &mut CpuRunQueue,
1466         pcb: Arc<crate::process::ProcessControlBlock>,
1467         mut flags: DequeueFlag,
1468     ) {
1469         let mut se = pcb.sched_info().sched_entity();
1470         let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE;
1471         let task_sleep = flags.contains(DequeueFlag::DEQUEUE_SLEEP);
1472         let was_sched_idle = rq.sched_idle_rq();
1473 
1474         let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| {
1475             let binding = se.cfs_rq();
1476             let cfs_rq = binding.force_mut();
1477             cfs_rq.dequeue_entity(&se, flags);
1478 
1479             cfs_rq.h_nr_running -= 1;
1480             cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64;
1481 
1482             if cfs_rq.is_idle() {
1483                 idle_h_nr_running = true;
1484             }
1485 
1486             // TODO: cfs_rq_throttled
1487 
1488             if cfs_rq.load.weight > 0 {
1489                 let sep = se.parent();
1490 
1491                 if task_sleep && sep.is_some() {
1492                     todo!()
1493                 }
1494             }
1495 
1496             flags |= DequeueFlag::DEQUEUE_SLEEP;
1497 
1498             return (true, true);
1499         });
1500 
1501         if !should_continue {
1502             return;
1503         }
1504 
1505         if let Some(mut se) = se {
1506             FairSchedEntity::for_each_in_group(&mut se, |se| {
1507                 let binding = se.cfs_rq();
1508                 let cfs_rq = binding.force_mut();
1509 
1510                 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG);
1511 
1512                 let se = se.force_mut();
1513                 se.update_runnable();
1514 
1515                 se.update_cfs_group();
1516 
1517                 cfs_rq.h_nr_running -= 1;
1518                 cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64;
1519 
1520                 if cfs_rq.is_idle() {
1521                     idle_h_nr_running = true;
1522                 }
1523 
1524                 // TODO: cfs_rq_throttled
1525 
1526                 return (true, true);
1527             });
1528         }
1529 
1530         rq.sub_nr_running(1);
1531 
1532         if unlikely(!was_sched_idle && rq.sched_idle_rq()) {
1533             rq.next_balance = clock();
1534         }
1535     }
1536 
1537     fn yield_task(rq: &mut CpuRunQueue) {
1538         let curr = rq.current();
1539         let se = curr.sched_info().sched_entity();
1540         let binding = se.cfs_rq();
1541         let cfs_rq = binding.force_mut();
1542 
1543         if unlikely(rq.nr_running == 1) {
1544             return;
1545         }
1546 
1547         cfs_rq.clear_buddies(&se);
1548 
1549         rq.update_rq_clock();
1550 
1551         cfs_rq.update_current();
1552 
1553         rq.clock_updata_flags |= ClockUpdataFlag::RQCF_REQ_SKIP;
1554 
1555         se.force_mut().deadline += se.calculate_delta_fair(se.slice);
1556     }
1557 
1558     fn check_preempt_currnet(
1559         rq: &mut CpuRunQueue,
1560         pcb: &Arc<crate::process::ProcessControlBlock>,
1561         wake_flags: WakeupFlags,
1562     ) {
1563         let curr = rq.current();
1564         let mut se = curr.sched_info().sched_entity();
1565         let mut pse = pcb.sched_info().sched_entity();
1566 
1567         if unlikely(Arc::ptr_eq(&se, &pse)) {
1568             return;
1569         }
1570 
1571         // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/fair.c#8160
1572 
1573         let _next_buddy_mark = if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY)
1574             && !wake_flags.contains(WakeupFlags::WF_FORK)
1575         {
1576             FairSchedEntity::for_each_in_group(&mut pse, |se| {
1577                 if !se.on_rq() {
1578                     return (false, true);
1579                 }
1580 
1581                 if se.is_idle() {
1582                     return (false, true);
1583                 }
1584 
1585                 se.cfs_rq().force_mut().next = Arc::downgrade(&se);
1586 
1587                 return (true, true);
1588             });
1589             true
1590         } else {
1591             false
1592         };
1593 
1594         if curr.flags().contains(ProcessFlags::NEED_SCHEDULE) {
1595             return;
1596         }
1597 
1598         if unlikely(curr.sched_info().policy() == SchedPolicy::IDLE)
1599             && likely(pcb.sched_info().policy() != SchedPolicy::IDLE)
1600         {
1601             rq.resched_current();
1602             return;
1603         }
1604 
1605         if unlikely(pcb.sched_info().policy() != SchedPolicy::CFS)
1606             || !SCHED_FEATURES.contains(SchedFeature::WAKEUP_PREEMPTION)
1607         {
1608             return;
1609         }
1610 
1611         Self::find_matching_se(&mut se, &mut pse);
1612 
1613         let cse_is_idle = se.is_idle();
1614         let pse_is_idle = pse.is_idle();
1615 
1616         if cse_is_idle && !pse_is_idle {
1617             rq.resched_current();
1618             return;
1619         }
1620 
1621         if cse_is_idle != pse_is_idle {
1622             return;
1623         }
1624 
1625         let cfs_rq = se.cfs_rq();
1626         cfs_rq.force_mut().update_current();
1627 
1628         if let Some((_, pick_se)) = cfs_rq.entities.get_first() {
1629             if Arc::ptr_eq(pick_se, &pse) {
1630                 rq.resched_current();
1631                 return;
1632             }
1633         }
1634     }
1635 
1636     fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<crate::process::ProcessControlBlock>> {
1637         let mut cfs_rq = Some(rq.cfs_rq());
1638         if cfs_rq.as_ref().unwrap().nr_running == 0 {
1639             return None;
1640         }
1641 
1642         let mut se;
1643         loop {
1644             let cfs = cfs_rq.unwrap();
1645             let cfs = cfs.force_mut();
1646             let curr = cfs.current();
1647             if let Some(curr) = curr {
1648                 if curr.on_rq() {
1649                     cfs.update_current();
1650                 } else {
1651                     cfs.set_current(Weak::default());
1652                 }
1653             }
1654 
1655             se = cfs.pick_next_entity();
1656             match se.clone() {
1657                 Some(val) => cfs_rq = val.my_cfs_rq.clone(),
1658                 None => {
1659                     break;
1660                 }
1661             }
1662 
1663             if cfs_rq.is_none() {
1664                 break;
1665             }
1666         }
1667 
1668         se.map(|se| se.pcb())
1669     }
1670 
1671     fn tick(_rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, queued: bool) {
1672         let mut se = pcb.sched_info().sched_entity();
1673 
1674         FairSchedEntity::for_each_in_group(&mut se, |se| {
1675             let binding = se.clone();
1676             let binding = binding.cfs_rq();
1677             let cfs_rq = binding.force_mut();
1678 
1679             cfs_rq.entity_tick(se, queued);
1680             (true, true)
1681         });
1682     }
1683 
1684     fn task_fork(pcb: Arc<ProcessControlBlock>) {
1685         let rq = cpu_rq(smp_get_processor_id().data() as usize);
1686         let se = pcb.sched_info().sched_entity();
1687 
1688         let (rq, _guard) = rq.self_lock();
1689 
1690         rq.update_rq_clock();
1691 
1692         let binding = se.cfs_rq();
1693         let cfs_rq = binding.force_mut();
1694 
1695         if cfs_rq.current().is_some() {
1696             cfs_rq.update_current();
1697         }
1698 
1699         cfs_rq.place_entity(se.clone(), EnqueueFlag::ENQUEUE_INITIAL);
1700     }
1701 
1702     fn pick_next_task(
1703         rq: &mut CpuRunQueue,
1704         prev: Option<Arc<ProcessControlBlock>>,
1705     ) -> Option<Arc<ProcessControlBlock>> {
1706         let mut cfs_rq = rq.cfs_rq();
1707         if rq.nr_running == 0 {
1708             return None;
1709         }
1710 
1711         if prev.is_none()
1712             || (prev.is_some() && prev.as_ref().unwrap().sched_info().policy() != SchedPolicy::CFS)
1713         {
1714             if let Some(prev) = prev {
1715                 match prev.sched_info().policy() {
1716                     SchedPolicy::RT => todo!(),
1717                     SchedPolicy::FIFO => todo!(),
1718                     SchedPolicy::CFS => todo!(),
1719                     SchedPolicy::IDLE => IdleScheduler::put_prev_task(rq, prev),
1720                 }
1721             }
1722             let mut se;
1723             loop {
1724                 match cfs_rq.pick_next_entity() {
1725                     Some(s) => se = s,
1726                     None => return None,
1727                 }
1728 
1729                 cfs_rq.force_mut().set_next_entity(&se);
1730 
1731                 match &se.my_cfs_rq {
1732                     Some(q) => cfs_rq = q.clone(),
1733                     None => break,
1734                 }
1735             }
1736 
1737             return Some(se.pcb());
1738         }
1739 
1740         let prev = prev.unwrap();
1741         let se = cfs_rq.pick_next_entity();
1742 
1743         if let Some(mut se) = se {
1744             loop {
1745                 let curr = cfs_rq.current();
1746                 if let Some(current) = curr {
1747                     if current.on_rq() {
1748                         cfs_rq.force_mut().update_current()
1749                     } else {
1750                         cfs_rq.force_mut().set_current(Weak::default());
1751                     }
1752                 }
1753 
1754                 match cfs_rq.pick_next_entity() {
1755                     Some(e) => se = e,
1756                     None => break,
1757                 }
1758 
1759                 if let Some(q) = se.my_cfs_rq.clone() {
1760                     cfs_rq = q;
1761                 } else {
1762                     break;
1763                 }
1764             }
1765 
1766             let p = se.pcb();
1767 
1768             if !Arc::ptr_eq(&prev, &p) {
1769                 let mut pse = prev.sched_info().sched_entity();
1770 
1771                 while !(Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq())
1772                     && Arc::ptr_eq(&se.cfs_rq(), &cfs_rq))
1773                 {
1774                     let se_depth = se.depth;
1775                     let pse_depth = pse.depth;
1776 
1777                     if se_depth <= pse_depth {
1778                         pse.cfs_rq().force_mut().put_prev_entity(pse.clone());
1779                         pse = pse.parent().unwrap();
1780                     }
1781 
1782                     if se_depth >= pse_depth {
1783                         se.cfs_rq().force_mut().set_next_entity(&se);
1784                         se = se.parent().unwrap();
1785                     }
1786                 }
1787 
1788                 cfs_rq.force_mut().put_prev_entity(pse);
1789                 cfs_rq.force_mut().set_next_entity(&se);
1790             }
1791 
1792             return Some(p);
1793         } else {
1794             return None;
1795         }
1796     }
1797 
1798     fn put_prev_task(_rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>) {
1799         let mut se = prev.sched_info().sched_entity();
1800 
1801         FairSchedEntity::for_each_in_group(&mut se, |se| {
1802             let cfs = se.cfs_rq();
1803             cfs.force_mut().put_prev_entity(se);
1804 
1805             return (true, true);
1806         });
1807     }
1808 }
1809