1 pub mod clock;
2 pub mod completion;
3 pub mod cputime;
4 pub mod fair;
5 pub mod idle;
6 pub mod pelt;
7 pub mod prio;
8 pub mod syscall;
9
10 use core::{
11 intrinsics::{likely, unlikely},
12 sync::atomic::{compiler_fence, fence, AtomicUsize, Ordering},
13 };
14
15 use alloc::{
16 boxed::Box,
17 collections::LinkedList,
18 sync::{Arc, Weak},
19 vec::Vec,
20 };
21 use system_error::SystemError;
22
23 use crate::{
24 arch::{interrupt::ipi::send_ipi, CurrentIrqArch},
25 exception::{
26 ipi::{IpiKind, IpiTarget},
27 InterruptArch,
28 },
29 libs::{
30 lazy_init::Lazy,
31 spinlock::{SpinLock, SpinLockGuard},
32 },
33 mm::percpu::{PerCpu, PerCpuVar},
34 process::{ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState, SchedInfo},
35 sched::idle::IdleScheduler,
36 smp::{core::smp_get_processor_id, cpu::ProcessorId},
37 time::{clocksource::HZ, timer::clock},
38 };
39
40 use self::{
41 clock::{ClockUpdataFlag, SchedClock},
42 cputime::{irq_time_read, CpuTimeFunc, IrqTime},
43 fair::{CfsRunQueue, CompletelyFairScheduler, FairSchedEntity},
44 prio::PrioUtil,
45 };
46
47 static mut CPU_IRQ_TIME: Option<Vec<&'static mut IrqTime>> = None;
48
49 // 这里虽然rq是percpu的,但是在负载均衡的时候需要修改对端cpu的rq,所以仍需加锁
50 static CPU_RUNQUEUE: Lazy<PerCpuVar<Arc<CpuRunQueue>>> = PerCpuVar::define_lazy();
51
52 /// 用于记录系统中所有 CPU 的可执行进程数量的总和。
53 static CALCULATE_LOAD_TASKS: AtomicUsize = AtomicUsize::new(0);
54
55 const LOAD_FREQ: usize = HZ as usize * 5 + 1;
56
57 pub const SCHED_FIXEDPOINT_SHIFT: u64 = 10;
58 #[allow(dead_code)]
59 pub const SCHED_FIXEDPOINT_SCALE: u64 = 1 << SCHED_FIXEDPOINT_SHIFT;
60 #[allow(dead_code)]
61 pub const SCHED_CAPACITY_SHIFT: u64 = SCHED_FIXEDPOINT_SHIFT;
62 #[allow(dead_code)]
63 pub const SCHED_CAPACITY_SCALE: u64 = 1 << SCHED_CAPACITY_SHIFT;
64
65 #[inline]
cpu_irq_time(cpu: usize) -> &'static mut IrqTime66 pub fn cpu_irq_time(cpu: usize) -> &'static mut IrqTime {
67 unsafe { CPU_IRQ_TIME.as_mut().unwrap()[cpu] }
68 }
69
70 #[inline]
cpu_rq(cpu: usize) -> Arc<CpuRunQueue>71 pub fn cpu_rq(cpu: usize) -> Arc<CpuRunQueue> {
72 CPU_RUNQUEUE.ensure();
73 unsafe {
74 CPU_RUNQUEUE
75 .get()
76 .force_get(ProcessorId::new(cpu as u32))
77 .clone()
78 }
79 }
80
81 lazy_static! {
82 pub static ref SCHED_FEATURES: SchedFeature = SchedFeature::GENTLE_FAIR_SLEEPERS
83 | SchedFeature::START_DEBIT
84 | SchedFeature::LAST_BUDDY
85 | SchedFeature::CACHE_HOT_BUDDY
86 | SchedFeature::WAKEUP_PREEMPTION
87 | SchedFeature::NONTASK_CAPACITY
88 | SchedFeature::TTWU_QUEUE
89 | SchedFeature::SIS_UTIL
90 | SchedFeature::RT_PUSH_IPI
91 | SchedFeature::ALT_PERIOD
92 | SchedFeature::BASE_SLICE
93 | SchedFeature::UTIL_EST
94 | SchedFeature::UTIL_EST_FASTUP;
95 }
96
97 pub trait Scheduler {
98 /// ## 加入当任务进入可运行状态时调用。它将调度实体(任务)放到红黑树中,增加nr_running变量的值。
enqueue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag)99 fn enqueue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag);
100
101 /// ## 当任务不再可运行时被调用,对应的调度实体被移出红黑树。它减少nr_running变量的值。
dequeue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag)102 fn dequeue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag);
103
104 /// ## 主动让出cpu,这个函数的行为基本上是出队,紧接着入队
yield_task(rq: &mut CpuRunQueue)105 fn yield_task(rq: &mut CpuRunQueue);
106
107 /// ## 检查进入可运行状态的任务能否抢占当前正在运行的任务
check_preempt_currnet( rq: &mut CpuRunQueue, pcb: &Arc<ProcessControlBlock>, flags: WakeupFlags, )108 fn check_preempt_currnet(
109 rq: &mut CpuRunQueue,
110 pcb: &Arc<ProcessControlBlock>,
111 flags: WakeupFlags,
112 );
113
114 /// ## 选择接下来最适合运行的任务
115 #[allow(dead_code)]
pick_task(rq: &mut CpuRunQueue) -> Option<Arc<ProcessControlBlock>>116 fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<ProcessControlBlock>>;
117
118 /// ## 选择接下来最适合运行的任务
pick_next_task( rq: &mut CpuRunQueue, pcb: Option<Arc<ProcessControlBlock>>, ) -> Option<Arc<ProcessControlBlock>>119 fn pick_next_task(
120 rq: &mut CpuRunQueue,
121 pcb: Option<Arc<ProcessControlBlock>>,
122 ) -> Option<Arc<ProcessControlBlock>>;
123
124 /// ## 被时间滴答函数调用,它可能导致进程切换。驱动了运行时抢占。
tick(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, queued: bool)125 fn tick(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, queued: bool);
126
127 /// ## 在进程fork时,如需加入cfs,则调用
task_fork(pcb: Arc<ProcessControlBlock>)128 fn task_fork(pcb: Arc<ProcessControlBlock>);
129
put_prev_task(rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>)130 fn put_prev_task(rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>);
131 }
132
133 /// 调度策略
134 #[allow(dead_code)]
135 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
136 pub enum SchedPolicy {
137 /// 实时进程
138 RT,
139 /// 先进先出调度
140 FIFO,
141 /// 完全公平调度
142 CFS,
143 /// IDLE
144 IDLE,
145 }
146
147 #[allow(dead_code)]
148 pub struct TaskGroup {
149 /// CFS管理的调度实体,percpu的
150 entitys: Vec<Arc<FairSchedEntity>>,
151 /// 每个CPU的CFS运行队列
152 cfs: Vec<Arc<CfsRunQueue>>,
153 /// 父节点
154 parent: Option<Arc<TaskGroup>>,
155
156 shares: u64,
157 }
158
159 #[derive(Debug, Default)]
160 pub struct LoadWeight {
161 /// 负载权重
162 pub weight: u64,
163 /// weight的倒数,方便计算
164 pub inv_weight: u32,
165 }
166
167 impl LoadWeight {
168 /// 用于限制权重在一个合适的区域内
169 pub const SCHED_FIXEDPOINT_SHIFT: u32 = 10;
170
171 pub const WMULT_SHIFT: u32 = 32;
172 pub const WMULT_CONST: u32 = !0;
173
174 pub const NICE_0_LOAD_SHIFT: u32 = Self::SCHED_FIXEDPOINT_SHIFT + Self::SCHED_FIXEDPOINT_SHIFT;
175
update_load_add(&mut self, inc: u64)176 pub fn update_load_add(&mut self, inc: u64) {
177 self.weight += inc;
178 self.inv_weight = 0;
179 }
180
update_load_sub(&mut self, dec: u64)181 pub fn update_load_sub(&mut self, dec: u64) {
182 self.weight -= dec;
183 self.inv_weight = 0;
184 }
185
update_load_set(&mut self, weight: u64)186 pub fn update_load_set(&mut self, weight: u64) {
187 self.weight = weight;
188 self.inv_weight = 0;
189 }
190
191 /// ## 更新负载权重的倒数
update_inv_weight(&mut self)192 pub fn update_inv_weight(&mut self) {
193 // 已经更新
194 if likely(self.inv_weight != 0) {
195 return;
196 }
197
198 let w = Self::scale_load_down(self.weight);
199
200 if unlikely(w >= Self::WMULT_CONST as u64) {
201 // 高位有数据
202 self.inv_weight = 1;
203 } else if unlikely(w == 0) {
204 // 倒数去最大
205 self.inv_weight = Self::WMULT_CONST;
206 } else {
207 // 计算倒数
208 self.inv_weight = Self::WMULT_CONST / w as u32;
209 }
210 }
211
212 /// ## 计算任务的执行时间差
213 ///
214 /// 计算公式:(delta_exec * (weight * self.inv_weight)) >> WMULT_SHIFT
calculate_delta(&mut self, delta_exec: u64, weight: u64) -> u64215 pub fn calculate_delta(&mut self, delta_exec: u64, weight: u64) -> u64 {
216 // 降低精度
217 let mut fact = Self::scale_load_down(weight);
218
219 // 记录fact高32位
220 let mut fact_hi = (fact >> 32) as u32;
221 // 用于恢复
222 let mut shift = Self::WMULT_SHIFT;
223
224 self.update_inv_weight();
225
226 if unlikely(fact_hi != 0) {
227 // 这里表示高32位还有数据
228 // 需要计算最高位,然后继续调整fact
229 let fs = 32 - fact_hi.leading_zeros();
230 shift -= fs;
231
232 // 确保高32位全为0
233 fact >>= fs;
234 }
235
236 // 这里确定了fact已经在32位内
237 fact *= self.inv_weight as u64;
238
239 fact_hi = (fact >> 32) as u32;
240
241 if fact_hi != 0 {
242 // 这里表示高32位还有数据
243 // 需要计算最高位,然后继续调整fact
244 let fs = 32 - fact_hi.leading_zeros();
245 shift -= fs;
246
247 // 确保高32位全为0
248 fact >>= fs;
249 }
250
251 return ((delta_exec as u128 * fact as u128) >> shift) as u64;
252 }
253
254 /// ## 将负载权重缩小到到一个小的范围中计算,相当于减小精度计算
scale_load_down(mut weight: u64) -> u64255 pub const fn scale_load_down(mut weight: u64) -> u64 {
256 if weight != 0 {
257 weight >>= Self::SCHED_FIXEDPOINT_SHIFT;
258
259 if weight < 2 {
260 weight = 2;
261 }
262 }
263 weight
264 }
265
266 #[allow(dead_code)]
scale_load(weight: u64) -> u64267 pub const fn scale_load(weight: u64) -> u64 {
268 weight << Self::SCHED_FIXEDPOINT_SHIFT
269 }
270 }
271
272 pub trait SchedArch {
273 /// 开启当前核心的调度
enable_sched_local()274 fn enable_sched_local();
275 /// 关闭当前核心的调度
276 #[allow(dead_code)]
disable_sched_local()277 fn disable_sched_local();
278
279 /// 在第一次开启调度之前,进行初始化工作。
280 ///
281 /// 注意区别于sched_init,这个函数只是做初始化时钟的工作等等。
initial_setup_sched_local()282 fn initial_setup_sched_local() {}
283 }
284
285 /// ## PerCpu的运行队列,其中维护了各个调度器对应的rq
286 #[allow(dead_code)]
287 #[derive(Debug)]
288 pub struct CpuRunQueue {
289 lock: SpinLock<()>,
290 lock_on_who: AtomicUsize,
291
292 cpu: usize,
293 clock_task: u64,
294 clock: u64,
295 prev_irq_time: u64,
296 clock_updata_flags: ClockUpdataFlag,
297
298 /// 过载
299 overload: bool,
300
301 next_balance: u64,
302
303 /// 运行任务数
304 nr_running: usize,
305
306 /// 被阻塞的任务数量
307 nr_uninterruptible: usize,
308
309 /// 记录上次更新负载时间
310 cala_load_update: usize,
311 cala_load_active: usize,
312
313 /// CFS调度器
314 cfs: Arc<CfsRunQueue>,
315
316 clock_pelt: u64,
317 lost_idle_time: u64,
318 clock_idle: u64,
319
320 cfs_tasks: LinkedList<Arc<FairSchedEntity>>,
321
322 /// 最近一次的调度信息
323 sched_info: SchedInfo,
324
325 /// 当前在运行队列上执行的进程
326 current: Weak<ProcessControlBlock>,
327
328 idle: Weak<ProcessControlBlock>,
329 }
330
331 impl CpuRunQueue {
new(cpu: usize) -> Self332 pub fn new(cpu: usize) -> Self {
333 Self {
334 lock: SpinLock::new(()),
335 lock_on_who: AtomicUsize::new(usize::MAX),
336 cpu,
337 clock_task: 0,
338 clock: 0,
339 prev_irq_time: 0,
340 clock_updata_flags: ClockUpdataFlag::empty(),
341 overload: false,
342 next_balance: 0,
343 nr_running: 0,
344 nr_uninterruptible: 0,
345 cala_load_update: (clock() + (5 * HZ + 1)) as usize,
346 cala_load_active: 0,
347 cfs: Arc::new(CfsRunQueue::new()),
348 clock_pelt: 0,
349 lost_idle_time: 0,
350 clock_idle: 0,
351 cfs_tasks: LinkedList::new(),
352 sched_info: SchedInfo::default(),
353 current: Weak::new(),
354 idle: Weak::new(),
355 }
356 }
357
358 /// 此函数只能在关中断的情况下使用!!!
359 /// 获取到rq的可变引用,需要注意的是返回的第二个值需要确保其生命周期
360 /// 所以可以说这个函数是unsafe的,需要确保正确性
361 /// 在中断上下文,关中断的情况下,此函数是安全的
self_lock(&self) -> (&mut Self, Option<SpinLockGuard<()>>)362 pub fn self_lock(&self) -> (&mut Self, Option<SpinLockGuard<()>>) {
363 if self.lock.is_locked()
364 && smp_get_processor_id().data() as usize == self.lock_on_who.load(Ordering::SeqCst)
365 {
366 // 在本cpu已上锁则可以直接拿
367 (
368 unsafe {
369 (self as *const Self as usize as *mut Self)
370 .as_mut()
371 .unwrap()
372 },
373 None,
374 )
375 } else {
376 // 否则先上锁再拿
377 let guard = self.lock();
378 (
379 unsafe {
380 (self as *const Self as usize as *mut Self)
381 .as_mut()
382 .unwrap()
383 },
384 Some(guard),
385 )
386 }
387 }
388
lock(&self) -> SpinLockGuard<()>389 fn lock(&self) -> SpinLockGuard<()> {
390 let guard = self.lock.lock_irqsave();
391
392 // 更新在哪一个cpu上锁
393 self.lock_on_who
394 .store(smp_get_processor_id().data() as usize, Ordering::SeqCst);
395
396 guard
397 }
398
enqueue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag)399 pub fn enqueue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag) {
400 if !flags.contains(EnqueueFlag::ENQUEUE_NOCLOCK) {
401 self.update_rq_clock();
402 }
403
404 if !flags.contains(EnqueueFlag::ENQUEUE_RESTORE) {
405 let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave();
406 if sched_info.last_queued == 0 {
407 sched_info.upgrade().last_queued = self.clock;
408 }
409 }
410
411 match pcb.sched_info().policy() {
412 SchedPolicy::CFS => CompletelyFairScheduler::enqueue(self, pcb, flags),
413 SchedPolicy::FIFO => todo!(),
414 SchedPolicy::RT => todo!(),
415 SchedPolicy::IDLE => IdleScheduler::enqueue(self, pcb, flags),
416 }
417
418 // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c#239
419 }
420
dequeue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag)421 pub fn dequeue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) {
422 // TODO:sched_core
423
424 if !flags.contains(DequeueFlag::DEQUEUE_NOCLOCK) {
425 self.update_rq_clock()
426 }
427
428 if !flags.contains(DequeueFlag::DEQUEUE_SAVE) {
429 let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave();
430
431 if sched_info.last_queued > 0 {
432 let delta = self.clock - sched_info.last_queued;
433
434 let mut sched_info = sched_info.upgrade();
435 sched_info.last_queued = 0;
436 sched_info.run_delay += delta as usize;
437
438 self.sched_info.run_delay += delta as usize;
439 }
440 }
441
442 match pcb.sched_info().policy() {
443 SchedPolicy::CFS => CompletelyFairScheduler::dequeue(self, pcb, flags),
444 SchedPolicy::FIFO => todo!(),
445 SchedPolicy::RT => todo!(),
446 SchedPolicy::IDLE => IdleScheduler::dequeue(self, pcb, flags),
447 }
448 }
449
450 /// 启用一个任务,将加入队列
activate_task(&mut self, pcb: &Arc<ProcessControlBlock>, mut flags: EnqueueFlag)451 pub fn activate_task(&mut self, pcb: &Arc<ProcessControlBlock>, mut flags: EnqueueFlag) {
452 if *pcb.sched_info().on_rq.lock_irqsave() == OnRq::Migrating {
453 flags |= EnqueueFlag::ENQUEUE_MIGRATED;
454 }
455
456 if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) {
457 todo!()
458 }
459
460 self.enqueue_task(pcb.clone(), flags);
461
462 *pcb.sched_info().on_rq.lock_irqsave() = OnRq::Queued;
463 }
464
465 /// 检查对应的task是否可以抢占当前运行的task
466 #[allow(clippy::comparison_chain)]
check_preempt_currnet(&mut self, pcb: &Arc<ProcessControlBlock>, flags: WakeupFlags)467 pub fn check_preempt_currnet(&mut self, pcb: &Arc<ProcessControlBlock>, flags: WakeupFlags) {
468 if pcb.sched_info().policy() == self.current().sched_info().policy() {
469 match self.current().sched_info().policy() {
470 SchedPolicy::CFS => {
471 CompletelyFairScheduler::check_preempt_currnet(self, pcb, flags)
472 }
473 SchedPolicy::FIFO => todo!(),
474 SchedPolicy::RT => todo!(),
475 SchedPolicy::IDLE => IdleScheduler::check_preempt_currnet(self, pcb, flags),
476 }
477 } else if pcb.sched_info().policy() < self.current().sched_info().policy() {
478 // 调度优先级更高
479 self.resched_current();
480 }
481
482 if *self.current().sched_info().on_rq.lock_irqsave() == OnRq::Queued
483 && self.current().flags().contains(ProcessFlags::NEED_SCHEDULE)
484 {
485 self.clock_updata_flags
486 .insert(ClockUpdataFlag::RQCF_REQ_SKIP);
487 }
488 }
489
490 /// 禁用一个任务,将离开队列
deactivate_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag)491 pub fn deactivate_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) {
492 *pcb.sched_info().on_rq.lock_irqsave() = if flags.contains(DequeueFlag::DEQUEUE_SLEEP) {
493 OnRq::None
494 } else {
495 OnRq::Migrating
496 };
497
498 self.dequeue_task(pcb, flags);
499 }
500
501 #[inline]
cfs_rq(&self) -> Arc<CfsRunQueue>502 pub fn cfs_rq(&self) -> Arc<CfsRunQueue> {
503 self.cfs.clone()
504 }
505
506 /// 更新rq时钟
update_rq_clock(&mut self)507 pub fn update_rq_clock(&mut self) {
508 // 需要跳过这次时钟更新
509 if self
510 .clock_updata_flags
511 .contains(ClockUpdataFlag::RQCF_ACT_SKIP)
512 {
513 return;
514 }
515
516 let clock = SchedClock::sched_clock_cpu(self.cpu);
517 if clock < self.clock {
518 return;
519 }
520
521 let delta = clock - self.clock;
522 self.clock += delta;
523 // error!("clock {}", self.clock);
524 self.update_rq_clock_task(delta);
525 }
526
527 /// 更新任务时钟
update_rq_clock_task(&mut self, mut delta: u64)528 pub fn update_rq_clock_task(&mut self, mut delta: u64) {
529 let mut irq_delta = irq_time_read(self.cpu) - self.prev_irq_time;
530 // if self.cpu == 0 {
531 // error!(
532 // "cpu 0 delta {delta} irq_delta {} irq_time_read(self.cpu) {} self.prev_irq_time {}",
533 // irq_delta,
534 // irq_time_read(self.cpu),
535 // self.prev_irq_time
536 // );
537 // }
538 compiler_fence(Ordering::SeqCst);
539
540 if irq_delta > delta {
541 irq_delta = delta;
542 }
543
544 self.prev_irq_time += irq_delta;
545
546 delta -= irq_delta;
547
548 // todo: psi?
549
550 // send_to_default_serial8250_port(format!("\n{delta}\n",).as_bytes());
551 compiler_fence(Ordering::SeqCst);
552 self.clock_task += delta;
553 compiler_fence(Ordering::SeqCst);
554 // if self.cpu == 0 {
555 // error!("cpu {} clock_task {}", self.cpu, self.clock_task);
556 // }
557 // todo: pelt?
558 }
559
560 /// 计算当前进程中的可执行数量
calculate_load_fold_active(&mut self, adjust: usize) -> usize561 fn calculate_load_fold_active(&mut self, adjust: usize) -> usize {
562 let mut nr_active = self.nr_running - adjust;
563 nr_active += self.nr_uninterruptible;
564 let mut delta = 0;
565
566 if nr_active != self.cala_load_active {
567 delta = nr_active - self.cala_load_active;
568 self.cala_load_active = nr_active;
569 }
570
571 delta
572 }
573
574 /// ## tick计算全局负载
calculate_global_load_tick(&mut self)575 pub fn calculate_global_load_tick(&mut self) {
576 if clock() < self.cala_load_update as u64 {
577 // 如果当前时间在上次更新时间之前,则直接返回
578 return;
579 }
580
581 let delta = self.calculate_load_fold_active(0);
582
583 if delta != 0 {
584 CALCULATE_LOAD_TASKS.fetch_add(delta, Ordering::SeqCst);
585 }
586
587 self.cala_load_update += LOAD_FREQ;
588 }
589
add_nr_running(&mut self, nr_running: usize)590 pub fn add_nr_running(&mut self, nr_running: usize) {
591 let prev = self.nr_running;
592
593 self.nr_running = prev + nr_running;
594 if prev < 2 && self.nr_running >= 2 && !self.overload {
595 self.overload = true;
596 }
597 }
598
sub_nr_running(&mut self, count: usize)599 pub fn sub_nr_running(&mut self, count: usize) {
600 self.nr_running -= count;
601 }
602
603 /// 在运行idle?
sched_idle_rq(&self) -> bool604 pub fn sched_idle_rq(&self) -> bool {
605 return unlikely(
606 self.nr_running == self.cfs.idle_h_nr_running as usize && self.nr_running > 0,
607 );
608 }
609
610 #[inline]
current(&self) -> Arc<ProcessControlBlock>611 pub fn current(&self) -> Arc<ProcessControlBlock> {
612 self.current.upgrade().unwrap()
613 }
614
615 #[inline]
set_current(&mut self, pcb: Weak<ProcessControlBlock>)616 pub fn set_current(&mut self, pcb: Weak<ProcessControlBlock>) {
617 self.current = pcb;
618 }
619
620 #[inline]
set_idle(&mut self, pcb: Weak<ProcessControlBlock>)621 pub fn set_idle(&mut self, pcb: Weak<ProcessControlBlock>) {
622 self.idle = pcb;
623 }
624
625 #[inline]
clock_task(&self) -> u64626 pub fn clock_task(&self) -> u64 {
627 self.clock_task
628 }
629
630 /// 重新调度当前进程
resched_current(&self)631 pub fn resched_current(&self) {
632 let current = self.current();
633
634 // 又需要被调度?
635 if unlikely(current.flags().contains(ProcessFlags::NEED_SCHEDULE)) {
636 return;
637 }
638
639 let cpu = self.cpu;
640
641 if cpu == smp_get_processor_id().data() as usize {
642 // assert!(
643 // Arc::ptr_eq(¤t, &ProcessManager::current_pcb()),
644 // "rq current name {} process current {}",
645 // current.basic().name().to_string(),
646 // ProcessManager::current_pcb().basic().name().to_string(),
647 // );
648 // 设置需要调度
649 ProcessManager::current_pcb()
650 .flags()
651 .insert(ProcessFlags::NEED_SCHEDULE);
652 return;
653 }
654
655 // 向目标cpu发送重调度ipi
656 send_resched_ipi(ProcessorId::new(cpu as u32));
657 }
658
659 /// 选择下一个task
pick_next_task(&mut self, prev: Arc<ProcessControlBlock>) -> Arc<ProcessControlBlock>660 pub fn pick_next_task(&mut self, prev: Arc<ProcessControlBlock>) -> Arc<ProcessControlBlock> {
661 if likely(prev.sched_info().policy() >= SchedPolicy::CFS)
662 && self.nr_running == self.cfs.h_nr_running as usize
663 {
664 let p = CompletelyFairScheduler::pick_next_task(self, Some(prev.clone()));
665
666 if let Some(pcb) = p.as_ref() {
667 return pcb.clone();
668 } else {
669 // error!(
670 // "pick idle cfs rq {:?}",
671 // self.cfs_rq()
672 // .entities
673 // .iter()
674 // .map(|x| x.1.pid)
675 // .collect::<Vec<_>>()
676 // );
677 match prev.sched_info().policy() {
678 SchedPolicy::FIFO => todo!(),
679 SchedPolicy::RT => todo!(),
680 SchedPolicy::CFS => CompletelyFairScheduler::put_prev_task(self, prev),
681 SchedPolicy::IDLE => IdleScheduler::put_prev_task(self, prev),
682 }
683 // 选择idle
684 return self.idle.upgrade().unwrap();
685 }
686 }
687
688 todo!()
689 }
690 }
691
692 bitflags! {
693 pub struct SchedFeature:u32 {
694 /// 给予睡眠任务仅有 50% 的服务赤字。这意味着睡眠任务在被唤醒后会获得一定的服务,但不能过多地占用资源。
695 const GENTLE_FAIR_SLEEPERS = 1 << 0;
696 /// 将新任务排在前面,以避免已经运行的任务被饿死
697 const START_DEBIT = 1 << 1;
698 /// 在调度时优先选择上次唤醒的任务,因为它可能会访问之前唤醒的任务所使用的数据,从而提高缓存局部性。
699 const NEXT_BUDDY = 1 << 2;
700 /// 在调度时优先选择上次运行的任务,因为它可能会访问与之前运行的任务相同的数据,从而提高缓存局部性。
701 const LAST_BUDDY = 1 << 3;
702 /// 认为任务的伙伴(buddy)在缓存中是热点,减少缓存伙伴被迁移的可能性,从而提高缓存局部性。
703 const CACHE_HOT_BUDDY = 1 << 4;
704 /// 允许唤醒时抢占当前任务。
705 const WAKEUP_PREEMPTION = 1 << 5;
706 /// 基于任务未运行时间来减少 CPU 的容量。
707 const NONTASK_CAPACITY = 1 << 6;
708 /// 将远程唤醒排队到目标 CPU,并使用调度器 IPI 处理它们,以减少运行队列锁的争用。
709 const TTWU_QUEUE = 1 << 7;
710 /// 在唤醒时尝试限制对最后级联缓存(LLC)域的无谓扫描。
711 const SIS_UTIL = 1 << 8;
712 /// 在 RT(Real-Time)任务迁移时,通过发送 IPI 来减少 CPU 之间的锁竞争。
713 const RT_PUSH_IPI = 1 << 9;
714 /// 启用估计的 CPU 利用率功能,用于调度决策。
715 const UTIL_EST = 1 << 10;
716 const UTIL_EST_FASTUP = 1 << 11;
717 /// 启用备选调度周期
718 const ALT_PERIOD = 1 << 12;
719 /// 启用基本时间片
720 const BASE_SLICE = 1 << 13;
721 }
722
723 pub struct EnqueueFlag: u8 {
724 const ENQUEUE_WAKEUP = 0x01;
725 const ENQUEUE_RESTORE = 0x02;
726 const ENQUEUE_MOVE = 0x04;
727 const ENQUEUE_NOCLOCK = 0x08;
728
729 const ENQUEUE_MIGRATED = 0x40;
730
731 const ENQUEUE_INITIAL = 0x80;
732 }
733
734 pub struct DequeueFlag: u8 {
735 const DEQUEUE_SLEEP = 0x01;
736 const DEQUEUE_SAVE = 0x02; /* Matches ENQUEUE_RESTORE */
737 const DEQUEUE_MOVE = 0x04; /* Matches ENQUEUE_MOVE */
738 const DEQUEUE_NOCLOCK = 0x08; /* Matches ENQUEUE_NOCLOCK */
739 }
740
741 pub struct WakeupFlags: u8 {
742 /* Wake flags. The first three directly map to some SD flag value */
743 const WF_EXEC = 0x02; /* Wakeup after exec; maps to SD_BALANCE_EXEC */
744 const WF_FORK = 0x04; /* Wakeup after fork; maps to SD_BALANCE_FORK */
745 const WF_TTWU = 0x08; /* Wakeup; maps to SD_BALANCE_WAKE */
746
747 const WF_SYNC = 0x10; /* Waker goes to sleep after wakeup */
748 const WF_MIGRATED = 0x20; /* Internal use, task got migrated */
749 const WF_CURRENT_CPU = 0x40; /* Prefer to move the wakee to the current CPU. */
750 }
751
752 pub struct SchedMode: u8 {
753 /*
754 * Constants for the sched_mode argument of __schedule().
755 *
756 * The mode argument allows RT enabled kernels to differentiate a
757 * preemption from blocking on an 'sleeping' spin/rwlock. Note that
758 * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
759 * optimize the AND operation out and just check for zero.
760 */
761 /// 在调度过程中不会再次进入队列,即需要手动唤醒
762 const SM_NONE = 0x0;
763 /// 重新加入队列,即当前进程被抢占,需要时钟调度
764 const SM_PREEMPT = 0x1;
765 /// rt相关
766 const SM_RTLOCK_WAIT = 0x2;
767 /// 默认与SM_PREEMPT相同
768 const SM_MASK_PREEMPT = Self::SM_PREEMPT.bits;
769 }
770 }
771
772 #[derive(Copy, Clone, Debug, PartialEq)]
773 pub enum OnRq {
774 Queued,
775 Migrating,
776 None,
777 }
778
779 impl ProcessManager {
update_process_times(user_tick: bool)780 pub fn update_process_times(user_tick: bool) {
781 let pcb = Self::current_pcb();
782 CpuTimeFunc::irqtime_account_process_tick(&pcb, user_tick, 1);
783
784 scheduler_tick();
785 }
786 }
787
788 /// ## 时钟tick时调用此函数
scheduler_tick()789 pub fn scheduler_tick() {
790 fence(Ordering::SeqCst);
791 // 获取当前CPU索引
792 let cpu_idx = smp_get_processor_id().data() as usize;
793
794 // 获取当前CPU的请求队列
795 let rq = cpu_rq(cpu_idx);
796
797 let (rq, guard) = rq.self_lock();
798
799 // 获取当前请求队列的当前请求
800 let current = rq.current();
801
802 // 更新请求队列时钟
803 rq.update_rq_clock();
804
805 match current.sched_info().policy() {
806 SchedPolicy::CFS => CompletelyFairScheduler::tick(rq, current, false),
807 SchedPolicy::FIFO => todo!(),
808 SchedPolicy::RT => todo!(),
809 SchedPolicy::IDLE => IdleScheduler::tick(rq, current, false),
810 }
811
812 rq.calculate_global_load_tick();
813
814 drop(guard);
815 // TODO:处理负载均衡
816 }
817
818 /// ## 执行调度
819 /// 若preempt_count不为0则报错
820 #[inline]
schedule(sched_mod: SchedMode)821 pub fn schedule(sched_mod: SchedMode) {
822 let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
823 assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
824 __schedule(sched_mod);
825 }
826
827 /// ## 执行调度
828 /// 此函数与schedule的区别为,该函数不会检查preempt_count
829 /// 适用于时钟中断等场景
__schedule(sched_mod: SchedMode)830 pub fn __schedule(sched_mod: SchedMode) {
831 let cpu = smp_get_processor_id().data() as usize;
832 let rq = cpu_rq(cpu);
833
834 let mut prev = rq.current();
835 if let ProcessState::Exited(_) = prev.clone().sched_info().inner_lock_read_irqsave().state() {
836 // 从exit进的Schedule
837 prev = ProcessManager::current_pcb();
838 }
839
840 // TODO: hrtick_clear(rq);
841
842 let (rq, _guard) = rq.self_lock();
843
844 rq.clock_updata_flags = ClockUpdataFlag::from_bits_truncate(rq.clock_updata_flags.bits() << 1);
845
846 rq.update_rq_clock();
847 rq.clock_updata_flags = ClockUpdataFlag::RQCF_UPDATE;
848
849 // kBUG!(
850 // "before cfs rq pcbs {:?}\nvruntimes {:?}\n",
851 // rq.cfs
852 // .entities
853 // .iter()
854 // .map(|x| { x.1.pcb().pid() })
855 // .collect::<Vec<_>>(),
856 // rq.cfs
857 // .entities
858 // .iter()
859 // .map(|x| { x.1.vruntime })
860 // .collect::<Vec<_>>(),
861 // );
862 // warn!(
863 // "before cfs rq {:?} prev {:?}",
864 // rq.cfs
865 // .entities
866 // .iter()
867 // .map(|x| { x.1.pcb().pid() })
868 // .collect::<Vec<_>>(),
869 // prev.pid()
870 // );
871
872 // error!("prev pid {:?} {:?}", prev.pid(), prev.sched_info().policy());
873 if !sched_mod.contains(SchedMode::SM_MASK_PREEMPT)
874 && prev.sched_info().policy() != SchedPolicy::IDLE
875 && prev.sched_info().inner_lock_read_irqsave().is_mark_sleep()
876 {
877 // warn!("deactivate_task prev {:?}", prev.pid());
878 // TODO: 这里需要处理信号
879 // https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c?r=&mo=172979&fi=6578#6630
880 rq.deactivate_task(
881 prev.clone(),
882 DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK,
883 );
884 }
885
886 let next = rq.pick_next_task(prev.clone());
887
888 // kBUG!(
889 // "after cfs rq pcbs {:?}\nvruntimes {:?}\n",
890 // rq.cfs
891 // .entities
892 // .iter()
893 // .map(|x| { x.1.pcb().pid() })
894 // .collect::<Vec<_>>(),
895 // rq.cfs
896 // .entities
897 // .iter()
898 // .map(|x| { x.1.vruntime })
899 // .collect::<Vec<_>>(),
900 // );
901
902 // error!("next {:?}", next.pid());
903
904 prev.flags().remove(ProcessFlags::NEED_SCHEDULE);
905 fence(Ordering::SeqCst);
906 if likely(!Arc::ptr_eq(&prev, &next)) {
907 rq.set_current(Arc::downgrade(&next));
908 // warn!(
909 // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}",
910 // prev.pid(),
911 // next.pid()
912 // );
913
914 // send_to_default_serial8250_port(
915 // format!(
916 // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}\n",
917 // prev.pid(),
918 // next.pid()
919 // )
920 // .as_bytes(),
921 // );
922
923 // CurrentApic.send_eoi();
924 compiler_fence(Ordering::SeqCst);
925
926 unsafe { ProcessManager::switch_process(prev, next) };
927 } else {
928 assert!(
929 Arc::ptr_eq(&ProcessManager::current_pcb(), &prev),
930 "{}",
931 ProcessManager::current_pcb().basic().name()
932 );
933 }
934 }
935
sched_fork(pcb: &Arc<ProcessControlBlock>) -> Result<(), SystemError>936 pub fn sched_fork(pcb: &Arc<ProcessControlBlock>) -> Result<(), SystemError> {
937 let mut prio_guard = pcb.sched_info().prio_data.write_irqsave();
938 let current = ProcessManager::current_pcb();
939
940 prio_guard.prio = current.sched_info().prio_data.read_irqsave().normal_prio;
941
942 if PrioUtil::dl_prio(prio_guard.prio) {
943 return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
944 } else if PrioUtil::rt_prio(prio_guard.prio) {
945 let policy = &pcb.sched_info().sched_policy;
946 *policy.write_irqsave() = SchedPolicy::RT;
947 } else {
948 let policy = &pcb.sched_info().sched_policy;
949 *policy.write_irqsave() = SchedPolicy::CFS;
950 }
951
952 pcb.sched_info()
953 .sched_entity()
954 .force_mut()
955 .init_entity_runnable_average();
956
957 Ok(())
958 }
959
sched_cgroup_fork(pcb: &Arc<ProcessControlBlock>)960 pub fn sched_cgroup_fork(pcb: &Arc<ProcessControlBlock>) {
961 __set_task_cpu(pcb, smp_get_processor_id());
962 match pcb.sched_info().policy() {
963 SchedPolicy::RT => todo!(),
964 SchedPolicy::FIFO => todo!(),
965 SchedPolicy::CFS => CompletelyFairScheduler::task_fork(pcb.clone()),
966 SchedPolicy::IDLE => todo!(),
967 }
968 }
969
__set_task_cpu(pcb: &Arc<ProcessControlBlock>, cpu: ProcessorId)970 fn __set_task_cpu(pcb: &Arc<ProcessControlBlock>, cpu: ProcessorId) {
971 // TODO: Fixme There is not implement group sched;
972 let se = pcb.sched_info().sched_entity();
973 let rq = cpu_rq(cpu.data() as usize);
974 se.force_mut().set_cfs(Arc::downgrade(&rq.cfs));
975 }
976
977 #[inline(never)]
sched_init()978 pub fn sched_init() {
979 // 初始化percpu变量
980 unsafe {
981 CPU_IRQ_TIME = Some(Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize));
982 CPU_IRQ_TIME
983 .as_mut()
984 .unwrap()
985 .resize_with(PerCpu::MAX_CPU_NUM as usize, || Box::leak(Box::default()));
986
987 let mut cpu_runqueue = Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize);
988 for cpu in 0..PerCpu::MAX_CPU_NUM as usize {
989 let rq = Arc::new(CpuRunQueue::new(cpu));
990 rq.cfs.force_mut().set_rq(Arc::downgrade(&rq));
991 cpu_runqueue.push(rq);
992 }
993
994 CPU_RUNQUEUE.init(PerCpuVar::new(cpu_runqueue).unwrap());
995 };
996 }
997
998 #[inline]
send_resched_ipi(cpu: ProcessorId)999 pub fn send_resched_ipi(cpu: ProcessorId) {
1000 send_ipi(IpiKind::KickCpu, IpiTarget::Specified(cpu));
1001 }
1002