1 pub mod clock; 2 pub mod completion; 3 pub mod cputime; 4 pub mod fair; 5 pub mod idle; 6 pub mod pelt; 7 pub mod prio; 8 9 use core::{ 10 intrinsics::{likely, unlikely}, 11 sync::atomic::{compiler_fence, fence, AtomicUsize, Ordering}, 12 }; 13 14 use alloc::{ 15 boxed::Box, 16 collections::LinkedList, 17 sync::{Arc, Weak}, 18 vec::Vec, 19 }; 20 use system_error::SystemError; 21 22 use crate::{ 23 arch::{interrupt::ipi::send_ipi, CurrentIrqArch}, 24 exception::{ 25 ipi::{IpiKind, IpiTarget}, 26 InterruptArch, 27 }, 28 libs::{ 29 lazy_init::Lazy, 30 spinlock::{SpinLock, SpinLockGuard}, 31 }, 32 mm::percpu::{PerCpu, PerCpuVar}, 33 process::{ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState, SchedInfo}, 34 sched::idle::IdleScheduler, 35 smp::{core::smp_get_processor_id, cpu::ProcessorId}, 36 time::{clocksource::HZ, timer::clock}, 37 }; 38 39 use self::{ 40 clock::{ClockUpdataFlag, SchedClock}, 41 cputime::{irq_time_read, CpuTimeFunc, IrqTime}, 42 fair::{CfsRunQueue, CompletelyFairScheduler, FairSchedEntity}, 43 prio::PrioUtil, 44 }; 45 46 static mut CPU_IRQ_TIME: Option<Vec<&'static mut IrqTime>> = None; 47 48 // 这里虽然rq是percpu的,但是在负载均衡的时候需要修改对端cpu的rq,所以仍需加锁 49 static CPU_RUNQUEUE: Lazy<PerCpuVar<Arc<CpuRunQueue>>> = PerCpuVar::define_lazy(); 50 51 /// 用于记录系统中所有 CPU 的可执行进程数量的总和。 52 static CALCULATE_LOAD_TASKS: AtomicUsize = AtomicUsize::new(0); 53 54 const LOAD_FREQ: usize = HZ as usize * 5 + 1; 55 56 pub const SCHED_FIXEDPOINT_SHIFT: u64 = 10; 57 #[allow(dead_code)] 58 pub const SCHED_FIXEDPOINT_SCALE: u64 = 1 << SCHED_FIXEDPOINT_SHIFT; 59 #[allow(dead_code)] 60 pub const SCHED_CAPACITY_SHIFT: u64 = SCHED_FIXEDPOINT_SHIFT; 61 #[allow(dead_code)] 62 pub const SCHED_CAPACITY_SCALE: u64 = 1 << SCHED_CAPACITY_SHIFT; 63 64 #[inline] 65 pub fn cpu_irq_time(cpu: usize) -> &'static mut IrqTime { 66 unsafe { CPU_IRQ_TIME.as_mut().unwrap()[cpu] } 67 } 68 69 #[inline] 70 pub fn cpu_rq(cpu: usize) -> Arc<CpuRunQueue> { 71 CPU_RUNQUEUE.ensure(); 72 unsafe { 73 CPU_RUNQUEUE 74 .get() 75 .force_get(ProcessorId::new(cpu as u32)) 76 .clone() 77 } 78 } 79 80 lazy_static! { 81 pub static ref SCHED_FEATURES: SchedFeature = SchedFeature::GENTLE_FAIR_SLEEPERS 82 | SchedFeature::START_DEBIT 83 | SchedFeature::LAST_BUDDY 84 | SchedFeature::CACHE_HOT_BUDDY 85 | SchedFeature::WAKEUP_PREEMPTION 86 | SchedFeature::NONTASK_CAPACITY 87 | SchedFeature::TTWU_QUEUE 88 | SchedFeature::SIS_UTIL 89 | SchedFeature::RT_PUSH_IPI 90 | SchedFeature::ALT_PERIOD 91 | SchedFeature::BASE_SLICE 92 | SchedFeature::UTIL_EST 93 | SchedFeature::UTIL_EST_FASTUP; 94 } 95 96 pub trait Scheduler { 97 /// ## 加入当任务进入可运行状态时调用。它将调度实体(任务)放到红黑树中,增加nr_running变量的值。 98 fn enqueue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag); 99 100 /// ## 当任务不再可运行时被调用,对应的调度实体被移出红黑树。它减少nr_running变量的值。 101 fn dequeue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag); 102 103 /// ## 主动让出cpu,这个函数的行为基本上是出队,紧接着入队 104 fn yield_task(rq: &mut CpuRunQueue); 105 106 /// ## 检查进入可运行状态的任务能否抢占当前正在运行的任务 107 fn check_preempt_currnet( 108 rq: &mut CpuRunQueue, 109 pcb: &Arc<ProcessControlBlock>, 110 flags: WakeupFlags, 111 ); 112 113 /// ## 选择接下来最适合运行的任务 114 fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<ProcessControlBlock>>; 115 116 /// ## 选择接下来最适合运行的任务 117 fn pick_next_task( 118 rq: &mut CpuRunQueue, 119 pcb: Option<Arc<ProcessControlBlock>>, 120 ) -> Option<Arc<ProcessControlBlock>>; 121 122 /// ## 被时间滴答函数调用,它可能导致进程切换。驱动了运行时抢占。 123 fn tick(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, queued: bool); 124 125 /// ## 在进程fork时,如需加入cfs,则调用 126 fn task_fork(pcb: Arc<ProcessControlBlock>); 127 128 fn put_prev_task(rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>); 129 } 130 131 /// 调度策略 132 #[allow(dead_code)] 133 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 134 pub enum SchedPolicy { 135 /// 实时进程 136 RT, 137 /// 先进先出调度 138 FIFO, 139 /// 完全公平调度 140 CFS, 141 /// IDLE 142 IDLE, 143 } 144 145 #[allow(dead_code)] 146 pub struct TaskGroup { 147 /// CFS管理的调度实体,percpu的 148 entitys: Vec<Arc<FairSchedEntity>>, 149 /// 每个CPU的CFS运行队列 150 cfs: Vec<Arc<CfsRunQueue>>, 151 /// 父节点 152 parent: Option<Arc<TaskGroup>>, 153 154 shares: u64, 155 } 156 157 #[derive(Debug, Default)] 158 pub struct LoadWeight { 159 /// 负载权重 160 pub weight: u64, 161 /// weight的倒数,方便计算 162 pub inv_weight: u32, 163 } 164 165 impl LoadWeight { 166 /// 用于限制权重在一个合适的区域内 167 pub const SCHED_FIXEDPOINT_SHIFT: u32 = 10; 168 169 pub const WMULT_SHIFT: u32 = 32; 170 pub const WMULT_CONST: u32 = !0; 171 172 pub const NICE_0_LOAD_SHIFT: u32 = Self::SCHED_FIXEDPOINT_SHIFT + Self::SCHED_FIXEDPOINT_SHIFT; 173 174 pub fn update_load_add(&mut self, inc: u64) { 175 self.weight += inc; 176 self.inv_weight = 0; 177 } 178 179 pub fn update_load_sub(&mut self, dec: u64) { 180 self.weight -= dec; 181 self.inv_weight = 0; 182 } 183 184 pub fn update_load_set(&mut self, weight: u64) { 185 self.weight = weight; 186 self.inv_weight = 0; 187 } 188 189 /// ## 更新负载权重的倒数 190 pub fn update_inv_weight(&mut self) { 191 // 已经更新 192 if likely(self.inv_weight != 0) { 193 return; 194 } 195 196 let w = Self::scale_load_down(self.weight); 197 198 if unlikely(w >= Self::WMULT_CONST as u64) { 199 // 高位有数据 200 self.inv_weight = 1; 201 } else if unlikely(w == 0) { 202 // 倒数去最大 203 self.inv_weight = Self::WMULT_CONST; 204 } else { 205 // 计算倒数 206 self.inv_weight = Self::WMULT_CONST / w as u32; 207 } 208 } 209 210 /// ## 计算任务的执行时间差 211 /// 212 /// 计算公式:(delta_exec * (weight * self.inv_weight)) >> WMULT_SHIFT 213 pub fn calculate_delta(&mut self, delta_exec: u64, weight: u64) -> u64 { 214 // 降低精度 215 let mut fact = Self::scale_load_down(weight); 216 217 // 记录fact高32位 218 let mut fact_hi = (fact >> 32) as u32; 219 // 用于恢复 220 let mut shift = Self::WMULT_SHIFT; 221 222 self.update_inv_weight(); 223 224 if unlikely(fact_hi != 0) { 225 // 这里表示高32位还有数据 226 // 需要计算最高位,然后继续调整fact 227 let fs = 32 - fact_hi.leading_zeros(); 228 shift -= fs; 229 230 // 确保高32位全为0 231 fact >>= fs; 232 } 233 234 // 这里确定了fact已经在32位内 235 fact *= self.inv_weight as u64; 236 237 fact_hi = (fact >> 32) as u32; 238 239 if fact_hi != 0 { 240 // 这里表示高32位还有数据 241 // 需要计算最高位,然后继续调整fact 242 let fs = 32 - fact_hi.leading_zeros(); 243 shift -= fs; 244 245 // 确保高32位全为0 246 fact >>= fs; 247 } 248 249 return ((delta_exec as u128 * fact as u128) >> shift) as u64; 250 } 251 252 /// ## 将负载权重缩小到到一个小的范围中计算,相当于减小精度计算 253 pub const fn scale_load_down(mut weight: u64) -> u64 { 254 if weight != 0 { 255 weight >>= Self::SCHED_FIXEDPOINT_SHIFT; 256 257 if weight < 2 { 258 weight = 2; 259 } 260 } 261 weight 262 } 263 264 #[allow(dead_code)] 265 pub const fn scale_load(weight: u64) -> u64 { 266 weight << Self::SCHED_FIXEDPOINT_SHIFT 267 } 268 } 269 270 pub trait SchedArch { 271 /// 开启当前核心的调度 272 fn enable_sched_local(); 273 /// 关闭当前核心的调度 274 fn disable_sched_local(); 275 276 /// 在第一次开启调度之前,进行初始化工作。 277 /// 278 /// 注意区别于sched_init,这个函数只是做初始化时钟的工作等等。 279 fn initial_setup_sched_local() {} 280 } 281 282 /// ## PerCpu的运行队列,其中维护了各个调度器对应的rq 283 #[allow(dead_code)] 284 #[derive(Debug)] 285 pub struct CpuRunQueue { 286 lock: SpinLock<()>, 287 lock_on_who: AtomicUsize, 288 289 cpu: usize, 290 clock_task: u64, 291 clock: u64, 292 prev_irq_time: u64, 293 clock_updata_flags: ClockUpdataFlag, 294 295 /// 过载 296 overload: bool, 297 298 next_balance: u64, 299 300 /// 运行任务数 301 nr_running: usize, 302 303 /// 被阻塞的任务数量 304 nr_uninterruptible: usize, 305 306 /// 记录上次更新负载时间 307 cala_load_update: usize, 308 cala_load_active: usize, 309 310 /// CFS调度器 311 cfs: Arc<CfsRunQueue>, 312 313 clock_pelt: u64, 314 lost_idle_time: u64, 315 clock_idle: u64, 316 317 cfs_tasks: LinkedList<Arc<FairSchedEntity>>, 318 319 /// 最近一次的调度信息 320 sched_info: SchedInfo, 321 322 /// 当前在运行队列上执行的进程 323 current: Weak<ProcessControlBlock>, 324 325 idle: Weak<ProcessControlBlock>, 326 } 327 328 impl CpuRunQueue { 329 pub fn new(cpu: usize) -> Self { 330 Self { 331 lock: SpinLock::new(()), 332 lock_on_who: AtomicUsize::new(usize::MAX), 333 cpu, 334 clock_task: 0, 335 clock: 0, 336 prev_irq_time: 0, 337 clock_updata_flags: ClockUpdataFlag::empty(), 338 overload: false, 339 next_balance: 0, 340 nr_running: 0, 341 nr_uninterruptible: 0, 342 cala_load_update: (clock() + (5 * HZ + 1)) as usize, 343 cala_load_active: 0, 344 cfs: Arc::new(CfsRunQueue::new()), 345 clock_pelt: 0, 346 lost_idle_time: 0, 347 clock_idle: 0, 348 cfs_tasks: LinkedList::new(), 349 sched_info: SchedInfo::default(), 350 current: Weak::new(), 351 idle: Weak::new(), 352 } 353 } 354 355 /// 此函数只能在关中断的情况下使用!!! 356 /// 获取到rq的可变引用,需要注意的是返回的第二个值需要确保其生命周期 357 /// 所以可以说这个函数是unsafe的,需要确保正确性 358 /// 在中断上下文,关中断的情况下,此函数是安全的 359 pub fn self_lock(&self) -> (&mut Self, Option<SpinLockGuard<()>>) { 360 if self.lock.is_locked() 361 && smp_get_processor_id().data() as usize == self.lock_on_who.load(Ordering::SeqCst) 362 { 363 // 在本cpu已上锁则可以直接拿 364 ( 365 unsafe { &mut *(self as *const Self as usize as *mut Self) }, 366 None, 367 ) 368 } else { 369 // 否则先上锁再拿 370 let guard = self.lock(); 371 ( 372 unsafe { &mut *(self as *const Self as usize as *mut Self) }, 373 Some(guard), 374 ) 375 } 376 } 377 378 fn lock(&self) -> SpinLockGuard<()> { 379 let guard = self.lock.lock_irqsave(); 380 381 // 更新在哪一个cpu上锁 382 self.lock_on_who 383 .store(smp_get_processor_id().data() as usize, Ordering::SeqCst); 384 385 guard 386 } 387 388 pub fn enqueue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag) { 389 if !flags.contains(EnqueueFlag::ENQUEUE_NOCLOCK) { 390 self.update_rq_clock(); 391 } 392 393 if !flags.contains(EnqueueFlag::ENQUEUE_RESTORE) { 394 let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave(); 395 if sched_info.last_queued == 0 { 396 sched_info.upgrade().last_queued = self.clock; 397 } 398 } 399 400 match pcb.sched_info().policy() { 401 SchedPolicy::CFS => CompletelyFairScheduler::enqueue(self, pcb, flags), 402 SchedPolicy::FIFO => todo!(), 403 SchedPolicy::RT => todo!(), 404 SchedPolicy::IDLE => IdleScheduler::enqueue(self, pcb, flags), 405 } 406 407 // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c#239 408 } 409 410 pub fn dequeue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) { 411 // TODO:sched_core 412 413 if !flags.contains(DequeueFlag::DEQUEUE_NOCLOCK) { 414 self.update_rq_clock() 415 } 416 417 if !flags.contains(DequeueFlag::DEQUEUE_SAVE) { 418 let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave(); 419 420 if sched_info.last_queued > 0 { 421 let delta = self.clock - sched_info.last_queued; 422 423 let mut sched_info = sched_info.upgrade(); 424 sched_info.last_queued = 0; 425 sched_info.run_delay += delta as usize; 426 427 self.sched_info.run_delay += delta as usize; 428 } 429 } 430 431 match pcb.sched_info().policy() { 432 SchedPolicy::CFS => CompletelyFairScheduler::dequeue(self, pcb, flags), 433 SchedPolicy::FIFO => todo!(), 434 SchedPolicy::RT => todo!(), 435 SchedPolicy::IDLE => todo!(), 436 } 437 } 438 439 /// 启用一个任务,将加入队列 440 pub fn activate_task(&mut self, pcb: &Arc<ProcessControlBlock>, mut flags: EnqueueFlag) { 441 if *pcb.sched_info().on_rq.lock_irqsave() == OnRq::Migrating { 442 flags |= EnqueueFlag::ENQUEUE_MIGRATED; 443 } 444 445 if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) { 446 todo!() 447 } 448 449 self.enqueue_task(pcb.clone(), flags); 450 451 *pcb.sched_info().on_rq.lock_irqsave() = OnRq::Queued; 452 } 453 454 /// 检查对应的task是否可以抢占当前运行的task 455 #[allow(clippy::comparison_chain)] 456 pub fn check_preempt_currnet(&mut self, pcb: &Arc<ProcessControlBlock>, flags: WakeupFlags) { 457 if pcb.sched_info().policy() == self.current().sched_info().policy() { 458 match self.current().sched_info().policy() { 459 SchedPolicy::CFS => { 460 CompletelyFairScheduler::check_preempt_currnet(self, pcb, flags) 461 } 462 SchedPolicy::FIFO => todo!(), 463 SchedPolicy::RT => todo!(), 464 SchedPolicy::IDLE => IdleScheduler::check_preempt_currnet(self, pcb, flags), 465 } 466 } else if pcb.sched_info().policy() < self.current().sched_info().policy() { 467 // 调度优先级更高 468 self.resched_current(); 469 } 470 471 if *self.current().sched_info().on_rq.lock_irqsave() == OnRq::Queued 472 && self.current().flags().contains(ProcessFlags::NEED_SCHEDULE) 473 { 474 self.clock_updata_flags 475 .insert(ClockUpdataFlag::RQCF_REQ_SKIP); 476 } 477 } 478 479 /// 禁用一个任务,将离开队列 480 pub fn deactivate_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) { 481 *pcb.sched_info().on_rq.lock_irqsave() = if flags.contains(DequeueFlag::DEQUEUE_SLEEP) { 482 OnRq::None 483 } else { 484 OnRq::Migrating 485 }; 486 487 self.dequeue_task(pcb, flags); 488 } 489 490 #[inline] 491 pub fn cfs_rq(&self) -> Arc<CfsRunQueue> { 492 self.cfs.clone() 493 } 494 495 /// 更新rq时钟 496 pub fn update_rq_clock(&mut self) { 497 // 需要跳过这次时钟更新 498 if self 499 .clock_updata_flags 500 .contains(ClockUpdataFlag::RQCF_ACT_SKIP) 501 { 502 return; 503 } 504 505 let clock = SchedClock::sched_clock_cpu(self.cpu); 506 if clock < self.clock { 507 return; 508 } 509 510 let delta = clock - self.clock; 511 self.clock += delta; 512 // kerror!("clock {}", self.clock); 513 self.update_rq_clock_task(delta); 514 } 515 516 /// 更新任务时钟 517 pub fn update_rq_clock_task(&mut self, mut delta: u64) { 518 let mut irq_delta = irq_time_read(self.cpu) - self.prev_irq_time; 519 // if self.cpu == 0 { 520 // kerror!( 521 // "cpu 0 delta {delta} irq_delta {} irq_time_read(self.cpu) {} self.prev_irq_time {}", 522 // irq_delta, 523 // irq_time_read(self.cpu), 524 // self.prev_irq_time 525 // ); 526 // } 527 compiler_fence(Ordering::SeqCst); 528 529 if irq_delta > delta { 530 irq_delta = delta; 531 } 532 533 self.prev_irq_time += irq_delta; 534 535 delta -= irq_delta; 536 537 // todo: psi? 538 539 // send_to_default_serial8250_port(format!("\n{delta}\n",).as_bytes()); 540 compiler_fence(Ordering::SeqCst); 541 self.clock_task += delta; 542 compiler_fence(Ordering::SeqCst); 543 // if self.cpu == 0 { 544 // kerror!("cpu {} clock_task {}", self.cpu, self.clock_task); 545 // } 546 // todo: pelt? 547 } 548 549 /// 计算当前进程中的可执行数量 550 fn calculate_load_fold_active(&mut self, adjust: usize) -> usize { 551 let mut nr_active = self.nr_running - adjust; 552 nr_active += self.nr_uninterruptible; 553 let mut delta = 0; 554 555 if nr_active != self.cala_load_active { 556 delta = nr_active - self.cala_load_active; 557 self.cala_load_active = nr_active; 558 } 559 560 delta 561 } 562 563 /// ## tick计算全局负载 564 pub fn calculate_global_load_tick(&mut self) { 565 if clock() < self.cala_load_update as u64 { 566 // 如果当前时间在上次更新时间之前,则直接返回 567 return; 568 } 569 570 let delta = self.calculate_load_fold_active(0); 571 572 if delta != 0 { 573 CALCULATE_LOAD_TASKS.fetch_add(delta, Ordering::SeqCst); 574 } 575 576 self.cala_load_update += LOAD_FREQ; 577 } 578 579 pub fn add_nr_running(&mut self, nr_running: usize) { 580 let prev = self.nr_running; 581 582 self.nr_running = prev + nr_running; 583 if prev < 2 && self.nr_running >= 2 && !self.overload { 584 self.overload = true; 585 } 586 } 587 588 pub fn sub_nr_running(&mut self, count: usize) { 589 self.nr_running -= count; 590 } 591 592 /// 在运行idle? 593 pub fn sched_idle_rq(&self) -> bool { 594 return unlikely( 595 self.nr_running == self.cfs.idle_h_nr_running as usize && self.nr_running > 0, 596 ); 597 } 598 599 #[inline] 600 pub fn current(&self) -> Arc<ProcessControlBlock> { 601 self.current.upgrade().unwrap() 602 } 603 604 #[inline] 605 pub fn set_current(&mut self, pcb: Weak<ProcessControlBlock>) { 606 self.current = pcb; 607 } 608 609 #[inline] 610 pub fn set_idle(&mut self, pcb: Weak<ProcessControlBlock>) { 611 self.idle = pcb; 612 } 613 614 #[inline] 615 pub fn clock_task(&self) -> u64 { 616 self.clock_task 617 } 618 619 /// 重新调度当前进程 620 pub fn resched_current(&self) { 621 let current = self.current(); 622 623 // 又需要被调度? 624 if unlikely(current.flags().contains(ProcessFlags::NEED_SCHEDULE)) { 625 return; 626 } 627 628 let cpu = self.cpu; 629 630 if cpu == smp_get_processor_id().data() as usize { 631 // assert!( 632 // Arc::ptr_eq(¤t, &ProcessManager::current_pcb()), 633 // "rq current name {} process current {}", 634 // current.basic().name().to_string(), 635 // ProcessManager::current_pcb().basic().name().to_string(), 636 // ); 637 // 设置需要调度 638 ProcessManager::current_pcb() 639 .flags() 640 .insert(ProcessFlags::NEED_SCHEDULE); 641 return; 642 } 643 644 // 向目标cpu发送重调度ipi 645 send_resched_ipi(ProcessorId::new(cpu as u32)); 646 } 647 648 /// 选择下一个task 649 pub fn pick_next_task(&mut self, prev: Arc<ProcessControlBlock>) -> Arc<ProcessControlBlock> { 650 if likely(prev.sched_info().policy() >= SchedPolicy::CFS) 651 && self.nr_running == self.cfs.h_nr_running as usize 652 { 653 let p = CompletelyFairScheduler::pick_next_task(self, Some(prev.clone())); 654 655 if let Some(pcb) = p.as_ref() { 656 return pcb.clone(); 657 } else { 658 // kerror!( 659 // "pick idle cfs rq {:?}", 660 // self.cfs_rq() 661 // .entities 662 // .iter() 663 // .map(|x| x.1.pid) 664 // .collect::<Vec<_>>() 665 // ); 666 match prev.sched_info().policy() { 667 SchedPolicy::FIFO => todo!(), 668 SchedPolicy::RT => todo!(), 669 SchedPolicy::CFS => CompletelyFairScheduler::put_prev_task(self, prev), 670 SchedPolicy::IDLE => IdleScheduler::put_prev_task(self, prev), 671 } 672 // 选择idle 673 return self.idle.upgrade().unwrap(); 674 } 675 } 676 677 todo!() 678 } 679 } 680 681 bitflags! { 682 pub struct SchedFeature:u32 { 683 /// 给予睡眠任务仅有 50% 的服务赤字。这意味着睡眠任务在被唤醒后会获得一定的服务,但不能过多地占用资源。 684 const GENTLE_FAIR_SLEEPERS = 1 << 0; 685 /// 将新任务排在前面,以避免已经运行的任务被饿死 686 const START_DEBIT = 1 << 1; 687 /// 在调度时优先选择上次唤醒的任务,因为它可能会访问之前唤醒的任务所使用的数据,从而提高缓存局部性。 688 const NEXT_BUDDY = 1 << 2; 689 /// 在调度时优先选择上次运行的任务,因为它可能会访问与之前运行的任务相同的数据,从而提高缓存局部性。 690 const LAST_BUDDY = 1 << 3; 691 /// 认为任务的伙伴(buddy)在缓存中是热点,减少缓存伙伴被迁移的可能性,从而提高缓存局部性。 692 const CACHE_HOT_BUDDY = 1 << 4; 693 /// 允许唤醒时抢占当前任务。 694 const WAKEUP_PREEMPTION = 1 << 5; 695 /// 基于任务未运行时间来减少 CPU 的容量。 696 const NONTASK_CAPACITY = 1 << 6; 697 /// 将远程唤醒排队到目标 CPU,并使用调度器 IPI 处理它们,以减少运行队列锁的争用。 698 const TTWU_QUEUE = 1 << 7; 699 /// 在唤醒时尝试限制对最后级联缓存(LLC)域的无谓扫描。 700 const SIS_UTIL = 1 << 8; 701 /// 在 RT(Real-Time)任务迁移时,通过发送 IPI 来减少 CPU 之间的锁竞争。 702 const RT_PUSH_IPI = 1 << 9; 703 /// 启用估计的 CPU 利用率功能,用于调度决策。 704 const UTIL_EST = 1 << 10; 705 const UTIL_EST_FASTUP = 1 << 11; 706 /// 启用备选调度周期 707 const ALT_PERIOD = 1 << 12; 708 /// 启用基本时间片 709 const BASE_SLICE = 1 << 13; 710 } 711 712 pub struct EnqueueFlag: u8 { 713 const ENQUEUE_WAKEUP = 0x01; 714 const ENQUEUE_RESTORE = 0x02; 715 const ENQUEUE_MOVE = 0x04; 716 const ENQUEUE_NOCLOCK = 0x08; 717 718 const ENQUEUE_MIGRATED = 0x40; 719 720 const ENQUEUE_INITIAL = 0x80; 721 } 722 723 pub struct DequeueFlag: u8 { 724 const DEQUEUE_SLEEP = 0x01; 725 const DEQUEUE_SAVE = 0x02; /* Matches ENQUEUE_RESTORE */ 726 const DEQUEUE_MOVE = 0x04; /* Matches ENQUEUE_MOVE */ 727 const DEQUEUE_NOCLOCK = 0x08; /* Matches ENQUEUE_NOCLOCK */ 728 } 729 730 pub struct WakeupFlags: u8 { 731 /* Wake flags. The first three directly map to some SD flag value */ 732 const WF_EXEC = 0x02; /* Wakeup after exec; maps to SD_BALANCE_EXEC */ 733 const WF_FORK = 0x04; /* Wakeup after fork; maps to SD_BALANCE_FORK */ 734 const WF_TTWU = 0x08; /* Wakeup; maps to SD_BALANCE_WAKE */ 735 736 const WF_SYNC = 0x10; /* Waker goes to sleep after wakeup */ 737 const WF_MIGRATED = 0x20; /* Internal use, task got migrated */ 738 const WF_CURRENT_CPU = 0x40; /* Prefer to move the wakee to the current CPU. */ 739 } 740 741 pub struct SchedMode: u8 { 742 /* 743 * Constants for the sched_mode argument of __schedule(). 744 * 745 * The mode argument allows RT enabled kernels to differentiate a 746 * preemption from blocking on an 'sleeping' spin/rwlock. Note that 747 * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to 748 * optimize the AND operation out and just check for zero. 749 */ 750 /// 在调度过程中不会再次进入队列,即需要手动唤醒 751 const SM_NONE = 0x0; 752 /// 重新加入队列,即当前进程被抢占,需要时钟调度 753 const SM_PREEMPT = 0x1; 754 /// rt相关 755 const SM_RTLOCK_WAIT = 0x2; 756 /// 默认与SM_PREEMPT相同 757 const SM_MASK_PREEMPT = Self::SM_PREEMPT.bits; 758 } 759 } 760 761 #[derive(Copy, Clone, Debug, PartialEq)] 762 pub enum OnRq { 763 Queued, 764 Migrating, 765 None, 766 } 767 768 impl ProcessManager { 769 pub fn update_process_times(user_tick: bool) { 770 let pcb = Self::current_pcb(); 771 CpuTimeFunc::irqtime_account_process_tick(&pcb, user_tick, 1); 772 773 scheduler_tick(); 774 } 775 } 776 777 /// ## 时钟tick时调用此函数 778 pub fn scheduler_tick() { 779 fence(Ordering::SeqCst); 780 // 获取当前CPU索引 781 let cpu_idx = smp_get_processor_id().data() as usize; 782 783 // 获取当前CPU的请求队列 784 let rq = cpu_rq(cpu_idx); 785 786 let (rq, guard) = rq.self_lock(); 787 788 // 获取当前请求队列的当前请求 789 let current = rq.current(); 790 791 // 更新请求队列时钟 792 rq.update_rq_clock(); 793 794 match current.sched_info().policy() { 795 SchedPolicy::CFS => CompletelyFairScheduler::tick(rq, current, false), 796 SchedPolicy::FIFO => todo!(), 797 SchedPolicy::RT => todo!(), 798 SchedPolicy::IDLE => IdleScheduler::tick(rq, current, false), 799 } 800 801 rq.calculate_global_load_tick(); 802 803 drop(guard); 804 // TODO:处理负载均衡 805 } 806 807 /// ## 执行调度 808 /// 若preempt_count不为0则报错 809 #[inline] 810 pub fn schedule(sched_mod: SchedMode) { 811 let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; 812 assert_eq!(ProcessManager::current_pcb().preempt_count(), 0); 813 __schedule(sched_mod); 814 } 815 816 /// ## 执行调度 817 /// 此函数与schedule的区别为,该函数不会检查preempt_count 818 /// 适用于时钟中断等场景 819 pub fn __schedule(sched_mod: SchedMode) { 820 let cpu = smp_get_processor_id().data() as usize; 821 let rq = cpu_rq(cpu); 822 823 let mut prev = rq.current(); 824 if let ProcessState::Exited(_) = prev.clone().sched_info().inner_lock_read_irqsave().state() { 825 // 从exit进的Schedule 826 prev = ProcessManager::current_pcb(); 827 } 828 829 // TODO: hrtick_clear(rq); 830 831 let (rq, _guard) = rq.self_lock(); 832 833 rq.clock_updata_flags = ClockUpdataFlag::from_bits_truncate(rq.clock_updata_flags.bits() << 1); 834 835 rq.update_rq_clock(); 836 rq.clock_updata_flags = ClockUpdataFlag::RQCF_UPDATE; 837 838 // kBUG!( 839 // "before cfs rq pcbs {:?}\nvruntimes {:?}\n", 840 // rq.cfs 841 // .entities 842 // .iter() 843 // .map(|x| { x.1.pcb().pid() }) 844 // .collect::<Vec<_>>(), 845 // rq.cfs 846 // .entities 847 // .iter() 848 // .map(|x| { x.1.vruntime }) 849 // .collect::<Vec<_>>(), 850 // ); 851 // kwarn!( 852 // "before cfs rq {:?} prev {:?}", 853 // rq.cfs 854 // .entities 855 // .iter() 856 // .map(|x| { x.1.pcb().pid() }) 857 // .collect::<Vec<_>>(), 858 // prev.pid() 859 // ); 860 861 // kerror!("prev pid {:?} {:?}", prev.pid(), prev.sched_info().policy()); 862 if !sched_mod.contains(SchedMode::SM_MASK_PREEMPT) 863 && prev.sched_info().policy() != SchedPolicy::IDLE 864 && prev.sched_info().inner_lock_read_irqsave().is_mark_sleep() 865 { 866 // kwarn!("deactivate_task prev {:?}", prev.pid()); 867 // TODO: 这里需要处理信号 868 // https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c?r=&mo=172979&fi=6578#6630 869 rq.deactivate_task( 870 prev.clone(), 871 DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK, 872 ); 873 } 874 875 let next = rq.pick_next_task(prev.clone()); 876 877 // kBUG!( 878 // "after cfs rq pcbs {:?}\nvruntimes {:?}\n", 879 // rq.cfs 880 // .entities 881 // .iter() 882 // .map(|x| { x.1.pcb().pid() }) 883 // .collect::<Vec<_>>(), 884 // rq.cfs 885 // .entities 886 // .iter() 887 // .map(|x| { x.1.vruntime }) 888 // .collect::<Vec<_>>(), 889 // ); 890 891 // kerror!("next {:?}", next.pid()); 892 893 prev.flags().remove(ProcessFlags::NEED_SCHEDULE); 894 fence(Ordering::SeqCst); 895 if likely(!Arc::ptr_eq(&prev, &next)) { 896 rq.set_current(Arc::downgrade(&next)); 897 // kwarn!( 898 // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}", 899 // prev.pid(), 900 // next.pid() 901 // ); 902 903 // send_to_default_serial8250_port( 904 // format!( 905 // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}\n", 906 // prev.pid(), 907 // next.pid() 908 // ) 909 // .as_bytes(), 910 // ); 911 912 // CurrentApic.send_eoi(); 913 compiler_fence(Ordering::SeqCst); 914 915 unsafe { ProcessManager::switch_process(prev, next) }; 916 } else { 917 assert!( 918 Arc::ptr_eq(&ProcessManager::current_pcb(), &prev), 919 "{}", 920 ProcessManager::current_pcb().basic().name() 921 ); 922 } 923 } 924 925 pub fn sched_fork(pcb: &Arc<ProcessControlBlock>) -> Result<(), SystemError> { 926 let mut prio_guard = pcb.sched_info().prio_data.write_irqsave(); 927 let current = ProcessManager::current_pcb(); 928 929 prio_guard.prio = current.sched_info().prio_data.read_irqsave().normal_prio; 930 931 if PrioUtil::dl_prio(prio_guard.prio) { 932 return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); 933 } else if PrioUtil::rt_prio(prio_guard.prio) { 934 let policy = &pcb.sched_info().sched_policy; 935 *policy.write_irqsave() = SchedPolicy::RT; 936 } else { 937 let policy = &pcb.sched_info().sched_policy; 938 *policy.write_irqsave() = SchedPolicy::CFS; 939 } 940 941 pcb.sched_info() 942 .sched_entity() 943 .force_mut() 944 .init_entity_runnable_average(); 945 946 Ok(()) 947 } 948 949 pub fn sched_cgroup_fork(pcb: &Arc<ProcessControlBlock>) { 950 __set_task_cpu(pcb, smp_get_processor_id()); 951 match pcb.sched_info().policy() { 952 SchedPolicy::RT => todo!(), 953 SchedPolicy::FIFO => todo!(), 954 SchedPolicy::CFS => CompletelyFairScheduler::task_fork(pcb.clone()), 955 SchedPolicy::IDLE => todo!(), 956 } 957 } 958 959 fn __set_task_cpu(pcb: &Arc<ProcessControlBlock>, cpu: ProcessorId) { 960 // TODO: Fixme There is not implement group sched; 961 let se = pcb.sched_info().sched_entity(); 962 let rq = cpu_rq(cpu.data() as usize); 963 se.force_mut().set_cfs(Arc::downgrade(&rq.cfs)); 964 } 965 966 #[inline(never)] 967 pub fn sched_init() { 968 // 初始化percpu变量 969 unsafe { 970 CPU_IRQ_TIME = Some(Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize)); 971 CPU_IRQ_TIME 972 .as_mut() 973 .unwrap() 974 .resize_with(PerCpu::MAX_CPU_NUM as usize, || Box::leak(Box::default())); 975 976 let mut cpu_runqueue = Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize); 977 for cpu in 0..PerCpu::MAX_CPU_NUM as usize { 978 let rq = Arc::new(CpuRunQueue::new(cpu)); 979 rq.cfs.force_mut().set_rq(Arc::downgrade(&rq)); 980 cpu_runqueue.push(rq); 981 } 982 983 CPU_RUNQUEUE.init(PerCpuVar::new(cpu_runqueue).unwrap()); 984 }; 985 } 986 987 #[inline] 988 pub fn send_resched_ipi(cpu: ProcessorId) { 989 send_ipi(IpiKind::KickCpu, IpiTarget::Specified(cpu)); 990 } 991