1 pub mod clock; 2 pub mod completion; 3 pub mod cputime; 4 pub mod fair; 5 pub mod idle; 6 pub mod pelt; 7 pub mod prio; 8 pub mod syscall; 9 10 use core::{ 11 intrinsics::{likely, unlikely}, 12 sync::atomic::{compiler_fence, fence, AtomicUsize, Ordering}, 13 }; 14 15 use alloc::{ 16 boxed::Box, 17 collections::LinkedList, 18 sync::{Arc, Weak}, 19 vec::Vec, 20 }; 21 use system_error::SystemError; 22 23 use crate::{ 24 arch::{interrupt::ipi::send_ipi, CurrentIrqArch}, 25 exception::{ 26 ipi::{IpiKind, IpiTarget}, 27 InterruptArch, 28 }, 29 libs::{ 30 lazy_init::Lazy, 31 spinlock::{SpinLock, SpinLockGuard}, 32 }, 33 mm::percpu::{PerCpu, PerCpuVar}, 34 process::{ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState, SchedInfo}, 35 sched::idle::IdleScheduler, 36 smp::{core::smp_get_processor_id, cpu::ProcessorId}, 37 time::{clocksource::HZ, timer::clock}, 38 }; 39 40 use self::{ 41 clock::{ClockUpdataFlag, SchedClock}, 42 cputime::{irq_time_read, CpuTimeFunc, IrqTime}, 43 fair::{CfsRunQueue, CompletelyFairScheduler, FairSchedEntity}, 44 prio::PrioUtil, 45 }; 46 47 static mut CPU_IRQ_TIME: Option<Vec<&'static mut IrqTime>> = None; 48 49 // 这里虽然rq是percpu的,但是在负载均衡的时候需要修改对端cpu的rq,所以仍需加锁 50 static CPU_RUNQUEUE: Lazy<PerCpuVar<Arc<CpuRunQueue>>> = PerCpuVar::define_lazy(); 51 52 /// 用于记录系统中所有 CPU 的可执行进程数量的总和。 53 static CALCULATE_LOAD_TASKS: AtomicUsize = AtomicUsize::new(0); 54 55 const LOAD_FREQ: usize = HZ as usize * 5 + 1; 56 57 pub const SCHED_FIXEDPOINT_SHIFT: u64 = 10; 58 #[allow(dead_code)] 59 pub const SCHED_FIXEDPOINT_SCALE: u64 = 1 << SCHED_FIXEDPOINT_SHIFT; 60 #[allow(dead_code)] 61 pub const SCHED_CAPACITY_SHIFT: u64 = SCHED_FIXEDPOINT_SHIFT; 62 #[allow(dead_code)] 63 pub const SCHED_CAPACITY_SCALE: u64 = 1 << SCHED_CAPACITY_SHIFT; 64 65 #[inline] 66 pub fn cpu_irq_time(cpu: usize) -> &'static mut IrqTime { 67 unsafe { CPU_IRQ_TIME.as_mut().unwrap()[cpu] } 68 } 69 70 #[inline] 71 pub fn cpu_rq(cpu: usize) -> Arc<CpuRunQueue> { 72 CPU_RUNQUEUE.ensure(); 73 unsafe { 74 CPU_RUNQUEUE 75 .get() 76 .force_get(ProcessorId::new(cpu as u32)) 77 .clone() 78 } 79 } 80 81 lazy_static! { 82 pub static ref SCHED_FEATURES: SchedFeature = SchedFeature::GENTLE_FAIR_SLEEPERS 83 | SchedFeature::START_DEBIT 84 | SchedFeature::LAST_BUDDY 85 | SchedFeature::CACHE_HOT_BUDDY 86 | SchedFeature::WAKEUP_PREEMPTION 87 | SchedFeature::NONTASK_CAPACITY 88 | SchedFeature::TTWU_QUEUE 89 | SchedFeature::SIS_UTIL 90 | SchedFeature::RT_PUSH_IPI 91 | SchedFeature::ALT_PERIOD 92 | SchedFeature::BASE_SLICE 93 | SchedFeature::UTIL_EST 94 | SchedFeature::UTIL_EST_FASTUP; 95 } 96 97 pub trait Scheduler { 98 /// ## 加入当任务进入可运行状态时调用。它将调度实体(任务)放到红黑树中,增加nr_running变量的值。 99 fn enqueue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag); 100 101 /// ## 当任务不再可运行时被调用,对应的调度实体被移出红黑树。它减少nr_running变量的值。 102 fn dequeue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag); 103 104 /// ## 主动让出cpu,这个函数的行为基本上是出队,紧接着入队 105 fn yield_task(rq: &mut CpuRunQueue); 106 107 /// ## 检查进入可运行状态的任务能否抢占当前正在运行的任务 108 fn check_preempt_currnet( 109 rq: &mut CpuRunQueue, 110 pcb: &Arc<ProcessControlBlock>, 111 flags: WakeupFlags, 112 ); 113 114 /// ## 选择接下来最适合运行的任务 115 #[allow(dead_code)] 116 fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<ProcessControlBlock>>; 117 118 /// ## 选择接下来最适合运行的任务 119 fn pick_next_task( 120 rq: &mut CpuRunQueue, 121 pcb: Option<Arc<ProcessControlBlock>>, 122 ) -> Option<Arc<ProcessControlBlock>>; 123 124 /// ## 被时间滴答函数调用,它可能导致进程切换。驱动了运行时抢占。 125 fn tick(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, queued: bool); 126 127 /// ## 在进程fork时,如需加入cfs,则调用 128 fn task_fork(pcb: Arc<ProcessControlBlock>); 129 130 fn put_prev_task(rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>); 131 } 132 133 /// 调度策略 134 #[allow(dead_code)] 135 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 136 pub enum SchedPolicy { 137 /// 实时进程 138 RT, 139 /// 先进先出调度 140 FIFO, 141 /// 完全公平调度 142 CFS, 143 /// IDLE 144 IDLE, 145 } 146 147 #[allow(dead_code)] 148 pub struct TaskGroup { 149 /// CFS管理的调度实体,percpu的 150 entitys: Vec<Arc<FairSchedEntity>>, 151 /// 每个CPU的CFS运行队列 152 cfs: Vec<Arc<CfsRunQueue>>, 153 /// 父节点 154 parent: Option<Arc<TaskGroup>>, 155 156 shares: u64, 157 } 158 159 #[derive(Debug, Default)] 160 pub struct LoadWeight { 161 /// 负载权重 162 pub weight: u64, 163 /// weight的倒数,方便计算 164 pub inv_weight: u32, 165 } 166 167 impl LoadWeight { 168 /// 用于限制权重在一个合适的区域内 169 pub const SCHED_FIXEDPOINT_SHIFT: u32 = 10; 170 171 pub const WMULT_SHIFT: u32 = 32; 172 pub const WMULT_CONST: u32 = !0; 173 174 pub const NICE_0_LOAD_SHIFT: u32 = Self::SCHED_FIXEDPOINT_SHIFT + Self::SCHED_FIXEDPOINT_SHIFT; 175 176 pub fn update_load_add(&mut self, inc: u64) { 177 self.weight += inc; 178 self.inv_weight = 0; 179 } 180 181 pub fn update_load_sub(&mut self, dec: u64) { 182 self.weight -= dec; 183 self.inv_weight = 0; 184 } 185 186 pub fn update_load_set(&mut self, weight: u64) { 187 self.weight = weight; 188 self.inv_weight = 0; 189 } 190 191 /// ## 更新负载权重的倒数 192 pub fn update_inv_weight(&mut self) { 193 // 已经更新 194 if likely(self.inv_weight != 0) { 195 return; 196 } 197 198 let w = Self::scale_load_down(self.weight); 199 200 if unlikely(w >= Self::WMULT_CONST as u64) { 201 // 高位有数据 202 self.inv_weight = 1; 203 } else if unlikely(w == 0) { 204 // 倒数去最大 205 self.inv_weight = Self::WMULT_CONST; 206 } else { 207 // 计算倒数 208 self.inv_weight = Self::WMULT_CONST / w as u32; 209 } 210 } 211 212 /// ## 计算任务的执行时间差 213 /// 214 /// 计算公式:(delta_exec * (weight * self.inv_weight)) >> WMULT_SHIFT 215 pub fn calculate_delta(&mut self, delta_exec: u64, weight: u64) -> u64 { 216 // 降低精度 217 let mut fact = Self::scale_load_down(weight); 218 219 // 记录fact高32位 220 let mut fact_hi = (fact >> 32) as u32; 221 // 用于恢复 222 let mut shift = Self::WMULT_SHIFT; 223 224 self.update_inv_weight(); 225 226 if unlikely(fact_hi != 0) { 227 // 这里表示高32位还有数据 228 // 需要计算最高位,然后继续调整fact 229 let fs = 32 - fact_hi.leading_zeros(); 230 shift -= fs; 231 232 // 确保高32位全为0 233 fact >>= fs; 234 } 235 236 // 这里确定了fact已经在32位内 237 fact *= self.inv_weight as u64; 238 239 fact_hi = (fact >> 32) as u32; 240 241 if fact_hi != 0 { 242 // 这里表示高32位还有数据 243 // 需要计算最高位,然后继续调整fact 244 let fs = 32 - fact_hi.leading_zeros(); 245 shift -= fs; 246 247 // 确保高32位全为0 248 fact >>= fs; 249 } 250 251 return ((delta_exec as u128 * fact as u128) >> shift) as u64; 252 } 253 254 /// ## 将负载权重缩小到到一个小的范围中计算,相当于减小精度计算 255 pub const fn scale_load_down(mut weight: u64) -> u64 { 256 if weight != 0 { 257 weight >>= Self::SCHED_FIXEDPOINT_SHIFT; 258 259 if weight < 2 { 260 weight = 2; 261 } 262 } 263 weight 264 } 265 266 #[allow(dead_code)] 267 pub const fn scale_load(weight: u64) -> u64 { 268 weight << Self::SCHED_FIXEDPOINT_SHIFT 269 } 270 } 271 272 pub trait SchedArch { 273 /// 开启当前核心的调度 274 fn enable_sched_local(); 275 /// 关闭当前核心的调度 276 #[allow(dead_code)] 277 fn disable_sched_local(); 278 279 /// 在第一次开启调度之前,进行初始化工作。 280 /// 281 /// 注意区别于sched_init,这个函数只是做初始化时钟的工作等等。 282 fn initial_setup_sched_local() {} 283 } 284 285 /// ## PerCpu的运行队列,其中维护了各个调度器对应的rq 286 #[allow(dead_code)] 287 #[derive(Debug)] 288 pub struct CpuRunQueue { 289 lock: SpinLock<()>, 290 lock_on_who: AtomicUsize, 291 292 cpu: usize, 293 clock_task: u64, 294 clock: u64, 295 prev_irq_time: u64, 296 clock_updata_flags: ClockUpdataFlag, 297 298 /// 过载 299 overload: bool, 300 301 next_balance: u64, 302 303 /// 运行任务数 304 nr_running: usize, 305 306 /// 被阻塞的任务数量 307 nr_uninterruptible: usize, 308 309 /// 记录上次更新负载时间 310 cala_load_update: usize, 311 cala_load_active: usize, 312 313 /// CFS调度器 314 cfs: Arc<CfsRunQueue>, 315 316 clock_pelt: u64, 317 lost_idle_time: u64, 318 clock_idle: u64, 319 320 cfs_tasks: LinkedList<Arc<FairSchedEntity>>, 321 322 /// 最近一次的调度信息 323 sched_info: SchedInfo, 324 325 /// 当前在运行队列上执行的进程 326 current: Weak<ProcessControlBlock>, 327 328 idle: Weak<ProcessControlBlock>, 329 } 330 331 impl CpuRunQueue { 332 pub fn new(cpu: usize) -> Self { 333 Self { 334 lock: SpinLock::new(()), 335 lock_on_who: AtomicUsize::new(usize::MAX), 336 cpu, 337 clock_task: 0, 338 clock: 0, 339 prev_irq_time: 0, 340 clock_updata_flags: ClockUpdataFlag::empty(), 341 overload: false, 342 next_balance: 0, 343 nr_running: 0, 344 nr_uninterruptible: 0, 345 cala_load_update: (clock() + (5 * HZ + 1)) as usize, 346 cala_load_active: 0, 347 cfs: Arc::new(CfsRunQueue::new()), 348 clock_pelt: 0, 349 lost_idle_time: 0, 350 clock_idle: 0, 351 cfs_tasks: LinkedList::new(), 352 sched_info: SchedInfo::default(), 353 current: Weak::new(), 354 idle: Weak::new(), 355 } 356 } 357 358 /// 此函数只能在关中断的情况下使用!!! 359 /// 获取到rq的可变引用,需要注意的是返回的第二个值需要确保其生命周期 360 /// 所以可以说这个函数是unsafe的,需要确保正确性 361 /// 在中断上下文,关中断的情况下,此函数是安全的 362 pub fn self_lock(&self) -> (&mut Self, Option<SpinLockGuard<()>>) { 363 if self.lock.is_locked() 364 && smp_get_processor_id().data() as usize == self.lock_on_who.load(Ordering::SeqCst) 365 { 366 // 在本cpu已上锁则可以直接拿 367 ( 368 unsafe { 369 (self as *const Self as usize as *mut Self) 370 .as_mut() 371 .unwrap() 372 }, 373 None, 374 ) 375 } else { 376 // 否则先上锁再拿 377 let guard = self.lock(); 378 ( 379 unsafe { 380 (self as *const Self as usize as *mut Self) 381 .as_mut() 382 .unwrap() 383 }, 384 Some(guard), 385 ) 386 } 387 } 388 389 fn lock(&self) -> SpinLockGuard<()> { 390 let guard = self.lock.lock_irqsave(); 391 392 // 更新在哪一个cpu上锁 393 self.lock_on_who 394 .store(smp_get_processor_id().data() as usize, Ordering::SeqCst); 395 396 guard 397 } 398 399 pub fn enqueue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag) { 400 if !flags.contains(EnqueueFlag::ENQUEUE_NOCLOCK) { 401 self.update_rq_clock(); 402 } 403 404 if !flags.contains(EnqueueFlag::ENQUEUE_RESTORE) { 405 let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave(); 406 if sched_info.last_queued == 0 { 407 sched_info.upgrade().last_queued = self.clock; 408 } 409 } 410 411 match pcb.sched_info().policy() { 412 SchedPolicy::CFS => CompletelyFairScheduler::enqueue(self, pcb, flags), 413 SchedPolicy::FIFO => todo!(), 414 SchedPolicy::RT => todo!(), 415 SchedPolicy::IDLE => IdleScheduler::enqueue(self, pcb, flags), 416 } 417 418 // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c#239 419 } 420 421 pub fn dequeue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) { 422 // TODO:sched_core 423 424 if !flags.contains(DequeueFlag::DEQUEUE_NOCLOCK) { 425 self.update_rq_clock() 426 } 427 428 if !flags.contains(DequeueFlag::DEQUEUE_SAVE) { 429 let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave(); 430 431 if sched_info.last_queued > 0 { 432 let delta = self.clock - sched_info.last_queued; 433 434 let mut sched_info = sched_info.upgrade(); 435 sched_info.last_queued = 0; 436 sched_info.run_delay += delta as usize; 437 438 self.sched_info.run_delay += delta as usize; 439 } 440 } 441 442 match pcb.sched_info().policy() { 443 SchedPolicy::CFS => CompletelyFairScheduler::dequeue(self, pcb, flags), 444 SchedPolicy::FIFO => todo!(), 445 SchedPolicy::RT => todo!(), 446 SchedPolicy::IDLE => IdleScheduler::dequeue(self, pcb, flags), 447 } 448 } 449 450 /// 启用一个任务,将加入队列 451 pub fn activate_task(&mut self, pcb: &Arc<ProcessControlBlock>, mut flags: EnqueueFlag) { 452 if *pcb.sched_info().on_rq.lock_irqsave() == OnRq::Migrating { 453 flags |= EnqueueFlag::ENQUEUE_MIGRATED; 454 } 455 456 if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) { 457 todo!() 458 } 459 460 self.enqueue_task(pcb.clone(), flags); 461 462 *pcb.sched_info().on_rq.lock_irqsave() = OnRq::Queued; 463 } 464 465 /// 检查对应的task是否可以抢占当前运行的task 466 #[allow(clippy::comparison_chain)] 467 pub fn check_preempt_currnet(&mut self, pcb: &Arc<ProcessControlBlock>, flags: WakeupFlags) { 468 if pcb.sched_info().policy() == self.current().sched_info().policy() { 469 match self.current().sched_info().policy() { 470 SchedPolicy::CFS => { 471 CompletelyFairScheduler::check_preempt_currnet(self, pcb, flags) 472 } 473 SchedPolicy::FIFO => todo!(), 474 SchedPolicy::RT => todo!(), 475 SchedPolicy::IDLE => IdleScheduler::check_preempt_currnet(self, pcb, flags), 476 } 477 } else if pcb.sched_info().policy() < self.current().sched_info().policy() { 478 // 调度优先级更高 479 self.resched_current(); 480 } 481 482 if *self.current().sched_info().on_rq.lock_irqsave() == OnRq::Queued 483 && self.current().flags().contains(ProcessFlags::NEED_SCHEDULE) 484 { 485 self.clock_updata_flags 486 .insert(ClockUpdataFlag::RQCF_REQ_SKIP); 487 } 488 } 489 490 /// 禁用一个任务,将离开队列 491 pub fn deactivate_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) { 492 *pcb.sched_info().on_rq.lock_irqsave() = if flags.contains(DequeueFlag::DEQUEUE_SLEEP) { 493 OnRq::None 494 } else { 495 OnRq::Migrating 496 }; 497 498 self.dequeue_task(pcb, flags); 499 } 500 501 #[inline] 502 pub fn cfs_rq(&self) -> Arc<CfsRunQueue> { 503 self.cfs.clone() 504 } 505 506 /// 更新rq时钟 507 pub fn update_rq_clock(&mut self) { 508 // 需要跳过这次时钟更新 509 if self 510 .clock_updata_flags 511 .contains(ClockUpdataFlag::RQCF_ACT_SKIP) 512 { 513 return; 514 } 515 516 let clock = SchedClock::sched_clock_cpu(self.cpu); 517 if clock < self.clock { 518 return; 519 } 520 521 let delta = clock - self.clock; 522 self.clock += delta; 523 // error!("clock {}", self.clock); 524 self.update_rq_clock_task(delta); 525 } 526 527 /// 更新任务时钟 528 pub fn update_rq_clock_task(&mut self, mut delta: u64) { 529 let mut irq_delta = irq_time_read(self.cpu) - self.prev_irq_time; 530 // if self.cpu == 0 { 531 // error!( 532 // "cpu 0 delta {delta} irq_delta {} irq_time_read(self.cpu) {} self.prev_irq_time {}", 533 // irq_delta, 534 // irq_time_read(self.cpu), 535 // self.prev_irq_time 536 // ); 537 // } 538 compiler_fence(Ordering::SeqCst); 539 540 if irq_delta > delta { 541 irq_delta = delta; 542 } 543 544 self.prev_irq_time += irq_delta; 545 546 delta -= irq_delta; 547 548 // todo: psi? 549 550 // send_to_default_serial8250_port(format!("\n{delta}\n",).as_bytes()); 551 compiler_fence(Ordering::SeqCst); 552 self.clock_task += delta; 553 compiler_fence(Ordering::SeqCst); 554 // if self.cpu == 0 { 555 // error!("cpu {} clock_task {}", self.cpu, self.clock_task); 556 // } 557 // todo: pelt? 558 } 559 560 /// 计算当前进程中的可执行数量 561 fn calculate_load_fold_active(&mut self, adjust: usize) -> usize { 562 let mut nr_active = self.nr_running - adjust; 563 nr_active += self.nr_uninterruptible; 564 let mut delta = 0; 565 566 if nr_active != self.cala_load_active { 567 delta = nr_active - self.cala_load_active; 568 self.cala_load_active = nr_active; 569 } 570 571 delta 572 } 573 574 /// ## tick计算全局负载 575 pub fn calculate_global_load_tick(&mut self) { 576 if clock() < self.cala_load_update as u64 { 577 // 如果当前时间在上次更新时间之前,则直接返回 578 return; 579 } 580 581 let delta = self.calculate_load_fold_active(0); 582 583 if delta != 0 { 584 CALCULATE_LOAD_TASKS.fetch_add(delta, Ordering::SeqCst); 585 } 586 587 self.cala_load_update += LOAD_FREQ; 588 } 589 590 pub fn add_nr_running(&mut self, nr_running: usize) { 591 let prev = self.nr_running; 592 593 self.nr_running = prev + nr_running; 594 if prev < 2 && self.nr_running >= 2 && !self.overload { 595 self.overload = true; 596 } 597 } 598 599 pub fn sub_nr_running(&mut self, count: usize) { 600 self.nr_running -= count; 601 } 602 603 /// 在运行idle? 604 pub fn sched_idle_rq(&self) -> bool { 605 return unlikely( 606 self.nr_running == self.cfs.idle_h_nr_running as usize && self.nr_running > 0, 607 ); 608 } 609 610 #[inline] 611 pub fn current(&self) -> Arc<ProcessControlBlock> { 612 self.current.upgrade().unwrap() 613 } 614 615 #[inline] 616 pub fn set_current(&mut self, pcb: Weak<ProcessControlBlock>) { 617 self.current = pcb; 618 } 619 620 #[inline] 621 pub fn set_idle(&mut self, pcb: Weak<ProcessControlBlock>) { 622 self.idle = pcb; 623 } 624 625 #[inline] 626 pub fn clock_task(&self) -> u64 { 627 self.clock_task 628 } 629 630 /// 重新调度当前进程 631 pub fn resched_current(&self) { 632 let current = self.current(); 633 634 // 又需要被调度? 635 if unlikely(current.flags().contains(ProcessFlags::NEED_SCHEDULE)) { 636 return; 637 } 638 639 let cpu = self.cpu; 640 641 if cpu == smp_get_processor_id().data() as usize { 642 // assert!( 643 // Arc::ptr_eq(¤t, &ProcessManager::current_pcb()), 644 // "rq current name {} process current {}", 645 // current.basic().name().to_string(), 646 // ProcessManager::current_pcb().basic().name().to_string(), 647 // ); 648 // 设置需要调度 649 ProcessManager::current_pcb() 650 .flags() 651 .insert(ProcessFlags::NEED_SCHEDULE); 652 return; 653 } 654 655 // 向目标cpu发送重调度ipi 656 send_resched_ipi(ProcessorId::new(cpu as u32)); 657 } 658 659 /// 选择下一个task 660 pub fn pick_next_task(&mut self, prev: Arc<ProcessControlBlock>) -> Arc<ProcessControlBlock> { 661 if likely(prev.sched_info().policy() >= SchedPolicy::CFS) 662 && self.nr_running == self.cfs.h_nr_running as usize 663 { 664 let p = CompletelyFairScheduler::pick_next_task(self, Some(prev.clone())); 665 666 if let Some(pcb) = p.as_ref() { 667 return pcb.clone(); 668 } else { 669 // error!( 670 // "pick idle cfs rq {:?}", 671 // self.cfs_rq() 672 // .entities 673 // .iter() 674 // .map(|x| x.1.pid) 675 // .collect::<Vec<_>>() 676 // ); 677 match prev.sched_info().policy() { 678 SchedPolicy::FIFO => todo!(), 679 SchedPolicy::RT => todo!(), 680 SchedPolicy::CFS => CompletelyFairScheduler::put_prev_task(self, prev), 681 SchedPolicy::IDLE => IdleScheduler::put_prev_task(self, prev), 682 } 683 // 选择idle 684 return self.idle.upgrade().unwrap(); 685 } 686 } 687 688 todo!() 689 } 690 } 691 692 bitflags! { 693 pub struct SchedFeature:u32 { 694 /// 给予睡眠任务仅有 50% 的服务赤字。这意味着睡眠任务在被唤醒后会获得一定的服务,但不能过多地占用资源。 695 const GENTLE_FAIR_SLEEPERS = 1 << 0; 696 /// 将新任务排在前面,以避免已经运行的任务被饿死 697 const START_DEBIT = 1 << 1; 698 /// 在调度时优先选择上次唤醒的任务,因为它可能会访问之前唤醒的任务所使用的数据,从而提高缓存局部性。 699 const NEXT_BUDDY = 1 << 2; 700 /// 在调度时优先选择上次运行的任务,因为它可能会访问与之前运行的任务相同的数据,从而提高缓存局部性。 701 const LAST_BUDDY = 1 << 3; 702 /// 认为任务的伙伴(buddy)在缓存中是热点,减少缓存伙伴被迁移的可能性,从而提高缓存局部性。 703 const CACHE_HOT_BUDDY = 1 << 4; 704 /// 允许唤醒时抢占当前任务。 705 const WAKEUP_PREEMPTION = 1 << 5; 706 /// 基于任务未运行时间来减少 CPU 的容量。 707 const NONTASK_CAPACITY = 1 << 6; 708 /// 将远程唤醒排队到目标 CPU,并使用调度器 IPI 处理它们,以减少运行队列锁的争用。 709 const TTWU_QUEUE = 1 << 7; 710 /// 在唤醒时尝试限制对最后级联缓存(LLC)域的无谓扫描。 711 const SIS_UTIL = 1 << 8; 712 /// 在 RT(Real-Time)任务迁移时,通过发送 IPI 来减少 CPU 之间的锁竞争。 713 const RT_PUSH_IPI = 1 << 9; 714 /// 启用估计的 CPU 利用率功能,用于调度决策。 715 const UTIL_EST = 1 << 10; 716 const UTIL_EST_FASTUP = 1 << 11; 717 /// 启用备选调度周期 718 const ALT_PERIOD = 1 << 12; 719 /// 启用基本时间片 720 const BASE_SLICE = 1 << 13; 721 } 722 723 pub struct EnqueueFlag: u8 { 724 const ENQUEUE_WAKEUP = 0x01; 725 const ENQUEUE_RESTORE = 0x02; 726 const ENQUEUE_MOVE = 0x04; 727 const ENQUEUE_NOCLOCK = 0x08; 728 729 const ENQUEUE_MIGRATED = 0x40; 730 731 const ENQUEUE_INITIAL = 0x80; 732 } 733 734 pub struct DequeueFlag: u8 { 735 const DEQUEUE_SLEEP = 0x01; 736 const DEQUEUE_SAVE = 0x02; /* Matches ENQUEUE_RESTORE */ 737 const DEQUEUE_MOVE = 0x04; /* Matches ENQUEUE_MOVE */ 738 const DEQUEUE_NOCLOCK = 0x08; /* Matches ENQUEUE_NOCLOCK */ 739 } 740 741 pub struct WakeupFlags: u8 { 742 /* Wake flags. The first three directly map to some SD flag value */ 743 const WF_EXEC = 0x02; /* Wakeup after exec; maps to SD_BALANCE_EXEC */ 744 const WF_FORK = 0x04; /* Wakeup after fork; maps to SD_BALANCE_FORK */ 745 const WF_TTWU = 0x08; /* Wakeup; maps to SD_BALANCE_WAKE */ 746 747 const WF_SYNC = 0x10; /* Waker goes to sleep after wakeup */ 748 const WF_MIGRATED = 0x20; /* Internal use, task got migrated */ 749 const WF_CURRENT_CPU = 0x40; /* Prefer to move the wakee to the current CPU. */ 750 } 751 752 pub struct SchedMode: u8 { 753 /* 754 * Constants for the sched_mode argument of __schedule(). 755 * 756 * The mode argument allows RT enabled kernels to differentiate a 757 * preemption from blocking on an 'sleeping' spin/rwlock. Note that 758 * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to 759 * optimize the AND operation out and just check for zero. 760 */ 761 /// 在调度过程中不会再次进入队列,即需要手动唤醒 762 const SM_NONE = 0x0; 763 /// 重新加入队列,即当前进程被抢占,需要时钟调度 764 const SM_PREEMPT = 0x1; 765 /// rt相关 766 const SM_RTLOCK_WAIT = 0x2; 767 /// 默认与SM_PREEMPT相同 768 const SM_MASK_PREEMPT = Self::SM_PREEMPT.bits; 769 } 770 } 771 772 #[derive(Copy, Clone, Debug, PartialEq)] 773 pub enum OnRq { 774 Queued, 775 Migrating, 776 None, 777 } 778 779 impl ProcessManager { 780 pub fn update_process_times(user_tick: bool) { 781 let pcb = Self::current_pcb(); 782 CpuTimeFunc::irqtime_account_process_tick(&pcb, user_tick, 1); 783 784 scheduler_tick(); 785 } 786 } 787 788 /// ## 时钟tick时调用此函数 789 pub fn scheduler_tick() { 790 fence(Ordering::SeqCst); 791 // 获取当前CPU索引 792 let cpu_idx = smp_get_processor_id().data() as usize; 793 794 // 获取当前CPU的请求队列 795 let rq = cpu_rq(cpu_idx); 796 797 let (rq, guard) = rq.self_lock(); 798 799 // 获取当前请求队列的当前请求 800 let current = rq.current(); 801 802 // 更新请求队列时钟 803 rq.update_rq_clock(); 804 805 match current.sched_info().policy() { 806 SchedPolicy::CFS => CompletelyFairScheduler::tick(rq, current, false), 807 SchedPolicy::FIFO => todo!(), 808 SchedPolicy::RT => todo!(), 809 SchedPolicy::IDLE => IdleScheduler::tick(rq, current, false), 810 } 811 812 rq.calculate_global_load_tick(); 813 814 drop(guard); 815 // TODO:处理负载均衡 816 } 817 818 /// ## 执行调度 819 /// 若preempt_count不为0则报错 820 #[inline] 821 pub fn schedule(sched_mod: SchedMode) { 822 let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; 823 assert_eq!(ProcessManager::current_pcb().preempt_count(), 0); 824 __schedule(sched_mod); 825 } 826 827 /// ## 执行调度 828 /// 此函数与schedule的区别为,该函数不会检查preempt_count 829 /// 适用于时钟中断等场景 830 pub fn __schedule(sched_mod: SchedMode) { 831 let cpu = smp_get_processor_id().data() as usize; 832 let rq = cpu_rq(cpu); 833 834 let mut prev = rq.current(); 835 if let ProcessState::Exited(_) = prev.clone().sched_info().inner_lock_read_irqsave().state() { 836 // 从exit进的Schedule 837 prev = ProcessManager::current_pcb(); 838 } 839 840 // TODO: hrtick_clear(rq); 841 842 let (rq, _guard) = rq.self_lock(); 843 844 rq.clock_updata_flags = ClockUpdataFlag::from_bits_truncate(rq.clock_updata_flags.bits() << 1); 845 846 rq.update_rq_clock(); 847 rq.clock_updata_flags = ClockUpdataFlag::RQCF_UPDATE; 848 849 // kBUG!( 850 // "before cfs rq pcbs {:?}\nvruntimes {:?}\n", 851 // rq.cfs 852 // .entities 853 // .iter() 854 // .map(|x| { x.1.pcb().pid() }) 855 // .collect::<Vec<_>>(), 856 // rq.cfs 857 // .entities 858 // .iter() 859 // .map(|x| { x.1.vruntime }) 860 // .collect::<Vec<_>>(), 861 // ); 862 // warn!( 863 // "before cfs rq {:?} prev {:?}", 864 // rq.cfs 865 // .entities 866 // .iter() 867 // .map(|x| { x.1.pcb().pid() }) 868 // .collect::<Vec<_>>(), 869 // prev.pid() 870 // ); 871 872 // error!("prev pid {:?} {:?}", prev.pid(), prev.sched_info().policy()); 873 if !sched_mod.contains(SchedMode::SM_MASK_PREEMPT) 874 && prev.sched_info().policy() != SchedPolicy::IDLE 875 && prev.sched_info().inner_lock_read_irqsave().is_mark_sleep() 876 { 877 // warn!("deactivate_task prev {:?}", prev.pid()); 878 // TODO: 这里需要处理信号 879 // https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c?r=&mo=172979&fi=6578#6630 880 rq.deactivate_task( 881 prev.clone(), 882 DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK, 883 ); 884 } 885 886 let next = rq.pick_next_task(prev.clone()); 887 888 // kBUG!( 889 // "after cfs rq pcbs {:?}\nvruntimes {:?}\n", 890 // rq.cfs 891 // .entities 892 // .iter() 893 // .map(|x| { x.1.pcb().pid() }) 894 // .collect::<Vec<_>>(), 895 // rq.cfs 896 // .entities 897 // .iter() 898 // .map(|x| { x.1.vruntime }) 899 // .collect::<Vec<_>>(), 900 // ); 901 902 // error!("next {:?}", next.pid()); 903 904 prev.flags().remove(ProcessFlags::NEED_SCHEDULE); 905 fence(Ordering::SeqCst); 906 if likely(!Arc::ptr_eq(&prev, &next)) { 907 rq.set_current(Arc::downgrade(&next)); 908 // warn!( 909 // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}", 910 // prev.pid(), 911 // next.pid() 912 // ); 913 914 // send_to_default_serial8250_port( 915 // format!( 916 // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}\n", 917 // prev.pid(), 918 // next.pid() 919 // ) 920 // .as_bytes(), 921 // ); 922 923 // CurrentApic.send_eoi(); 924 compiler_fence(Ordering::SeqCst); 925 926 unsafe { ProcessManager::switch_process(prev, next) }; 927 } else { 928 assert!( 929 Arc::ptr_eq(&ProcessManager::current_pcb(), &prev), 930 "{}", 931 ProcessManager::current_pcb().basic().name() 932 ); 933 } 934 } 935 936 pub fn sched_fork(pcb: &Arc<ProcessControlBlock>) -> Result<(), SystemError> { 937 let mut prio_guard = pcb.sched_info().prio_data.write_irqsave(); 938 let current = ProcessManager::current_pcb(); 939 940 prio_guard.prio = current.sched_info().prio_data.read_irqsave().normal_prio; 941 942 if PrioUtil::dl_prio(prio_guard.prio) { 943 return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); 944 } else if PrioUtil::rt_prio(prio_guard.prio) { 945 let policy = &pcb.sched_info().sched_policy; 946 *policy.write_irqsave() = SchedPolicy::RT; 947 } else { 948 let policy = &pcb.sched_info().sched_policy; 949 *policy.write_irqsave() = SchedPolicy::CFS; 950 } 951 952 pcb.sched_info() 953 .sched_entity() 954 .force_mut() 955 .init_entity_runnable_average(); 956 957 Ok(()) 958 } 959 960 pub fn sched_cgroup_fork(pcb: &Arc<ProcessControlBlock>) { 961 __set_task_cpu(pcb, smp_get_processor_id()); 962 match pcb.sched_info().policy() { 963 SchedPolicy::RT => todo!(), 964 SchedPolicy::FIFO => todo!(), 965 SchedPolicy::CFS => CompletelyFairScheduler::task_fork(pcb.clone()), 966 SchedPolicy::IDLE => todo!(), 967 } 968 } 969 970 fn __set_task_cpu(pcb: &Arc<ProcessControlBlock>, cpu: ProcessorId) { 971 // TODO: Fixme There is not implement group sched; 972 let se = pcb.sched_info().sched_entity(); 973 let rq = cpu_rq(cpu.data() as usize); 974 se.force_mut().set_cfs(Arc::downgrade(&rq.cfs)); 975 } 976 977 #[inline(never)] 978 pub fn sched_init() { 979 // 初始化percpu变量 980 unsafe { 981 CPU_IRQ_TIME = Some(Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize)); 982 CPU_IRQ_TIME 983 .as_mut() 984 .unwrap() 985 .resize_with(PerCpu::MAX_CPU_NUM as usize, || Box::leak(Box::default())); 986 987 let mut cpu_runqueue = Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize); 988 for cpu in 0..PerCpu::MAX_CPU_NUM as usize { 989 let rq = Arc::new(CpuRunQueue::new(cpu)); 990 rq.cfs.force_mut().set_rq(Arc::downgrade(&rq)); 991 cpu_runqueue.push(rq); 992 } 993 994 CPU_RUNQUEUE.init(PerCpuVar::new(cpu_runqueue).unwrap()); 995 }; 996 } 997 998 #[inline] 999 pub fn send_resched_ipi(cpu: ProcessorId) { 1000 send_ipi(IpiKind::KickCpu, IpiTarget::Specified(cpu)); 1001 } 1002