1 use core::intrinsics::likely; 2 use core::intrinsics::unlikely; 3 use core::mem::swap; 4 use core::sync::atomic::fence; 5 use core::sync::atomic::{AtomicU64, Ordering}; 6 7 use crate::libs::rbtree::RBTree; 8 use crate::libs::spinlock::SpinLock; 9 use crate::process::ProcessControlBlock; 10 use crate::process::ProcessFlags; 11 use crate::sched::clock::ClockUpdataFlag; 12 use crate::sched::{cpu_rq, SchedFeature, SCHED_FEATURES}; 13 use crate::smp::core::smp_get_processor_id; 14 use crate::time::jiffies::TICK_NESC; 15 use crate::time::timer::clock; 16 use crate::time::NSEC_PER_MSEC; 17 use alloc::sync::{Arc, Weak}; 18 19 use super::idle::IdleScheduler; 20 use super::pelt::{add_positive, sub_positive, SchedulerAvg, UpdateAvgFlags, PELT_MIN_DIVIDER}; 21 use super::{ 22 CpuRunQueue, DequeueFlag, EnqueueFlag, LoadWeight, OnRq, SchedPolicy, Scheduler, TaskGroup, 23 WakeupFlags, SCHED_CAPACITY_SHIFT, 24 }; 25 26 /// 用于设置 CPU-bound 任务的最小抢占粒度的参数。 27 /// 默认值为 0.75 毫秒乘以(1 加上 CPU 数量的二进制对数),单位为纳秒。 28 /// 这个值影响到任务在 CPU-bound 情况下的抢占行为。 29 static SYSCTL_SHCED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000); 30 /// 规范化最小抢占粒度参数 31 #[allow(dead_code)] 32 static NORMALIZED_SYSCTL_SCHED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000); 33 34 static SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000); 35 #[allow(dead_code)] 36 static NORMALIZED_SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000); 37 38 /// 预设的调度延迟任务数量 39 static SCHED_NR_LATENCY: AtomicU64 = AtomicU64::new(8); 40 41 /// 调度实体单位,一个调度实体可以是一个进程、一个进程组或者是一个用户等等划分 42 #[derive(Debug)] 43 pub struct FairSchedEntity { 44 /// 负载相关 45 pub load: LoadWeight, 46 pub deadline: u64, 47 pub min_deadline: u64, 48 49 /// 是否在运行队列中 50 pub on_rq: OnRq, 51 /// 当前调度实体的开始执行时间 52 pub exec_start: u64, 53 /// 总运行时长 54 pub sum_exec_runtime: u64, 55 /// 虚拟运行时间 56 pub vruntime: u64, 57 /// 进程的调度延迟 它等于进程的权重(weight)乘以(V - v_i),其中V是系统当前的时间,v_i是进程的运行时间 58 pub vlag: i64, 59 // 运行时间片 60 pub slice: u64, 61 /// 上一个调度实体运行总时间 62 pub prev_sum_exec_runtime: u64, 63 64 pub avg: SchedulerAvg, 65 66 /// 父节点 67 parent: Weak<FairSchedEntity>, 68 69 pub depth: u32, 70 71 /// 指向自身 72 self_ref: Weak<FairSchedEntity>, 73 74 /// 所在的CFS运行队列 75 cfs_rq: Weak<CfsRunQueue>, 76 77 /// group持有的私有cfs队列 78 my_cfs_rq: Option<Arc<CfsRunQueue>>, 79 80 runnable_weight: u64, 81 82 pcb: Weak<ProcessControlBlock>, 83 } 84 85 impl FairSchedEntity { 86 pub fn new() -> Arc<Self> { 87 let ret = Arc::new(Self { 88 parent: Weak::new(), 89 self_ref: Weak::new(), 90 pcb: Weak::new(), 91 cfs_rq: Weak::new(), 92 my_cfs_rq: None, 93 on_rq: OnRq::None, 94 slice: SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst), 95 load: Default::default(), 96 deadline: Default::default(), 97 min_deadline: Default::default(), 98 exec_start: Default::default(), 99 sum_exec_runtime: Default::default(), 100 vruntime: Default::default(), 101 vlag: Default::default(), 102 prev_sum_exec_runtime: Default::default(), 103 avg: Default::default(), 104 depth: Default::default(), 105 runnable_weight: Default::default(), 106 }); 107 108 ret.force_mut().self_ref = Arc::downgrade(&ret); 109 110 ret 111 } 112 } 113 114 impl FairSchedEntity { 115 pub fn self_arc(&self) -> Arc<FairSchedEntity> { 116 self.self_ref.upgrade().unwrap() 117 } 118 119 #[inline] 120 pub fn on_rq(&self) -> bool { 121 self.on_rq != OnRq::None 122 } 123 124 pub fn pcb(&self) -> Arc<ProcessControlBlock> { 125 self.pcb.upgrade().unwrap() 126 } 127 128 pub fn set_pcb(&mut self, pcb: Weak<ProcessControlBlock>) { 129 self.pcb = pcb 130 } 131 132 #[inline] 133 pub fn cfs_rq(&self) -> Arc<CfsRunQueue> { 134 self.cfs_rq.upgrade().unwrap() 135 } 136 137 pub fn set_cfs(&mut self, cfs: Weak<CfsRunQueue>) { 138 self.cfs_rq = cfs; 139 } 140 141 pub fn parent(&self) -> Option<Arc<FairSchedEntity>> { 142 self.parent.upgrade() 143 } 144 145 #[allow(clippy::mut_from_ref)] 146 pub fn force_mut(&self) -> &mut Self { 147 unsafe { &mut *(self as *const Self as usize as *mut Self) } 148 } 149 150 /// 判断是否是进程持有的调度实体 151 #[inline] 152 pub fn is_task(&self) -> bool { 153 // TODO: 调度组 154 true 155 } 156 157 #[inline] 158 pub fn is_idle(&self) -> bool { 159 if self.is_task() { 160 return self.pcb().sched_info().policy() == SchedPolicy::IDLE; 161 } 162 163 return self.cfs_rq().is_idle(); 164 } 165 166 pub fn clear_buddies(&self) { 167 let mut se = self.self_arc(); 168 169 Self::for_each_in_group(&mut se, |se| { 170 let binding = se.cfs_rq(); 171 let cfs_rq = binding.force_mut(); 172 173 if let Some(next) = cfs_rq.next.upgrade() { 174 if !Arc::ptr_eq(&next, &se) { 175 return (false, true); 176 } 177 } 178 cfs_rq.next = Weak::new(); 179 return (true, true); 180 }); 181 } 182 183 pub fn calculate_delta_fair(&self, delta: u64) -> u64 { 184 if unlikely(self.load.weight != LoadWeight::NICE_0_LOAD_SHIFT as u64) { 185 return self 186 .force_mut() 187 .load 188 .calculate_delta(delta, LoadWeight::NICE_0_LOAD_SHIFT as u64); 189 }; 190 191 delta 192 } 193 194 /// 更新组内的权重信息 195 pub fn update_cfs_group(&self) { 196 if self.my_cfs_rq.is_none() { 197 return; 198 } 199 200 let group_cfs = self.my_cfs_rq.clone().unwrap(); 201 202 let shares = group_cfs.task_group().shares; 203 204 if unlikely(self.load.weight != shares) { 205 // TODO: reweight 206 self.cfs_rq() 207 .force_mut() 208 .reweight_entity(self.self_arc(), shares); 209 } 210 } 211 212 /// 遍历se组,如果返回false则需要调用的函数return, 213 /// 会将se指向其顶层parent 214 /// 该函数会改变se指向 215 /// 参数: 216 /// - se: 对应调度实体 217 /// - f: 对调度实体执行操作的闭包,返回值对应(no_break,should_continue),no_break为假时,退出循环,should_continue为假时表示需要将调用者return 218 /// 219 /// 返回值: 220 /// - bool: 是否需要调度者return 221 /// - Option<Arc<FairSchedEntity>>:最终se的指向 222 pub fn for_each_in_group( 223 se: &mut Arc<FairSchedEntity>, 224 mut f: impl FnMut(Arc<FairSchedEntity>) -> (bool, bool), 225 ) -> (bool, Option<Arc<FairSchedEntity>>) { 226 let mut should_continue; 227 let ret; 228 // 这一步是循环计算,直到根节点 229 // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重 230 loop { 231 let (no_break, flag) = f(se.clone()); 232 should_continue = flag; 233 if !no_break || !should_continue { 234 ret = Some(se.clone()); 235 break; 236 } 237 238 let parent = se.parent(); 239 if parent.is_none() { 240 ret = None; 241 break; 242 } 243 244 *se = parent.unwrap(); 245 } 246 247 (should_continue, ret) 248 } 249 250 pub fn runnable(&self) -> u64 { 251 if self.is_task() { 252 return self.on_rq as u64; 253 } else { 254 self.runnable_weight 255 } 256 } 257 258 /// 更新task和其cfsrq的负载均值 259 pub fn propagate_entity_load_avg(&mut self) -> bool { 260 if self.is_task() { 261 return false; 262 } 263 264 let binding = self.my_cfs_rq.clone().unwrap(); 265 let gcfs_rq = binding.force_mut(); 266 267 if gcfs_rq.propagate == 0 { 268 return false; 269 } 270 271 gcfs_rq.propagate = 0; 272 273 let binding = self.cfs_rq(); 274 let cfs_rq = binding.force_mut(); 275 276 cfs_rq.add_task_group_propagate(gcfs_rq.prop_runnable_sum); 277 278 cfs_rq.update_task_group_util(self.self_arc(), gcfs_rq); 279 cfs_rq.update_task_group_runnable(self.self_arc(), gcfs_rq); 280 cfs_rq.update_task_group_load(self.self_arc(), gcfs_rq); 281 282 return true; 283 } 284 285 /// 更新runnable_weight 286 pub fn update_runnable(&mut self) { 287 if !self.is_task() { 288 self.runnable_weight = self.my_cfs_rq.clone().unwrap().h_nr_running; 289 } 290 } 291 292 /// 初始化实体运行均值 293 pub fn init_entity_runnable_average(&mut self) { 294 self.avg = SchedulerAvg::default(); 295 296 if self.is_task() { 297 self.avg.load_avg = LoadWeight::scale_load_down(self.load.weight) as usize; 298 } 299 } 300 } 301 302 /// CFS的运行队列,这个队列需确保是percpu的 303 #[allow(dead_code)] 304 #[derive(Debug)] 305 pub struct CfsRunQueue { 306 load: LoadWeight, 307 308 /// 全局运行的调度实体计数器,用于负载均衡 309 nr_running: u64, 310 /// 针对特定 CPU 核心的任务计数器 311 pub h_nr_running: u64, 312 /// 运行时间 313 exec_clock: u64, 314 /// 最少虚拟运行时间 315 min_vruntime: u64, 316 /// remain runtime 317 runtime_remaining: u64, 318 319 /// 存放调度实体的红黑树 320 pub(super) entities: RBTree<u64, Arc<FairSchedEntity>>, 321 322 /// IDLE 323 idle: usize, 324 325 idle_nr_running: u64, 326 327 pub idle_h_nr_running: u64, 328 329 /// 当前运行的调度实体 330 current: Weak<FairSchedEntity>, 331 /// 下一个调度的实体 332 next: Weak<FairSchedEntity>, 333 /// 最后的调度实体 334 last: Weak<FairSchedEntity>, 335 /// 跳过运行的调度实体 336 skip: Weak<FairSchedEntity>, 337 338 avg_load: i64, 339 avg_vruntime: i64, 340 341 last_update_time_copy: u64, 342 343 pub avg: SchedulerAvg, 344 345 rq: Weak<CpuRunQueue>, 346 /// 拥有此队列的taskgroup 347 task_group: Weak<TaskGroup>, 348 349 pub throttled_clock: u64, 350 pub throttled_clock_pelt: u64, 351 pub throttled_clock_pelt_time: u64, 352 pub throttled_pelt_idle: u64, 353 354 pub throttled: bool, 355 pub throttled_count: u64, 356 357 pub removed: SpinLock<CfsRemoved>, 358 359 pub propagate: isize, 360 pub prop_runnable_sum: isize, 361 } 362 363 #[derive(Debug, Default)] 364 pub struct CfsRemoved { 365 pub nr: u32, 366 pub load_avg: usize, 367 pub util_avg: usize, 368 pub runnable_avg: usize, 369 } 370 371 impl CfsRunQueue { 372 pub fn new() -> Self { 373 Self { 374 load: LoadWeight::default(), 375 nr_running: 0, 376 h_nr_running: 0, 377 exec_clock: 0, 378 min_vruntime: 1 << 20, 379 entities: RBTree::new(), 380 idle: 0, 381 idle_nr_running: 0, 382 idle_h_nr_running: 0, 383 current: Weak::new(), 384 next: Weak::new(), 385 last: Weak::new(), 386 skip: Weak::new(), 387 avg_load: 0, 388 avg_vruntime: 0, 389 last_update_time_copy: 0, 390 avg: SchedulerAvg::default(), 391 rq: Weak::new(), 392 task_group: Weak::new(), 393 throttled_clock: 0, 394 throttled_clock_pelt: 0, 395 throttled_clock_pelt_time: 0, 396 throttled_pelt_idle: 0, 397 throttled: false, 398 throttled_count: 0, 399 removed: SpinLock::new(CfsRemoved::default()), 400 propagate: 0, 401 prop_runnable_sum: 0, 402 runtime_remaining: 0, 403 } 404 } 405 406 #[inline] 407 pub fn rq(&self) -> Arc<CpuRunQueue> { 408 self.rq.upgrade().unwrap() 409 } 410 411 #[inline] 412 pub fn set_rq(&mut self, rq: Weak<CpuRunQueue>) { 413 self.rq = rq; 414 } 415 416 #[inline] 417 #[allow(clippy::mut_from_ref)] 418 pub fn force_mut(&self) -> &mut Self { 419 unsafe { &mut *(self as *const Self as usize as *mut Self) } 420 } 421 422 #[inline] 423 pub fn is_idle(&self) -> bool { 424 self.idle > 0 425 } 426 427 #[inline] 428 pub fn current(&self) -> Option<Arc<FairSchedEntity>> { 429 self.current.upgrade() 430 } 431 432 #[inline] 433 pub fn set_current(&mut self, curr: Weak<FairSchedEntity>) { 434 self.current = curr 435 } 436 437 #[inline] 438 pub fn next(&self) -> Option<Arc<FairSchedEntity>> { 439 self.next.upgrade() 440 } 441 442 pub fn task_group(&self) -> Arc<TaskGroup> { 443 self.task_group.upgrade().unwrap() 444 } 445 446 #[allow(dead_code)] 447 #[inline] 448 pub const fn bandwidth_used() -> bool { 449 false 450 } 451 452 /// ## 计算调度周期,基本思想是在一个周期内让每个任务都至少运行一次。 453 /// 这样可以确保所有的任务都能够得到执行,而且可以避免某些任务被长时间地阻塞。 454 pub fn sched_period(nr_running: u64) -> u64 { 455 if unlikely(nr_running > SCHED_NR_LATENCY.load(Ordering::SeqCst)) { 456 // 如果当前活跃的任务数量超过了预设的调度延迟任务数量 457 // 调度周期的长度将直接设置为活跃任务数量乘以最小抢占粒度 458 return nr_running * SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst); 459 } else { 460 // 如果活跃任务数量未超过预设的延迟任务数量,那么调度周期的长度将设置为SCHED_NR_LATENCY 461 return SCHED_NR_LATENCY.load(Ordering::SeqCst); 462 } 463 } 464 465 /// ## 计算调度任务的虚拟运行时间片大小 466 /// 467 /// vruntime = runtime / weight 468 #[allow(dead_code)] 469 pub fn sched_vslice(&self, entity: Arc<FairSchedEntity>) -> u64 { 470 let slice = self.sched_slice(entity.clone()); 471 return entity.calculate_delta_fair(slice); 472 } 473 474 /// ## 计算调度任务的实际运行时间片大小 475 #[allow(dead_code)] 476 pub fn sched_slice(&self, mut entity: Arc<FairSchedEntity>) -> u64 { 477 let mut nr_running = self.nr_running; 478 if SCHED_FEATURES.contains(SchedFeature::ALT_PERIOD) { 479 nr_running = self.h_nr_running; 480 } 481 482 // 计算一个调度周期的整个slice 483 let mut slice = Self::sched_period(nr_running + (!entity.on_rq()) as u64); 484 485 // 这一步是循环计算,直到根节点 486 // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重 487 FairSchedEntity::for_each_in_group(&mut entity, |se| { 488 if unlikely(!se.on_rq()) { 489 se.cfs_rq().force_mut().load.update_load_add(se.load.weight); 490 } 491 slice = se 492 .cfs_rq() 493 .force_mut() 494 .load 495 .calculate_delta(slice, se.load.weight); 496 497 (true, true) 498 }); 499 500 if SCHED_FEATURES.contains(SchedFeature::BASE_SLICE) { 501 // TODO: IDLE? 502 let min_gran = SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst); 503 504 slice = min_gran.max(slice) 505 } 506 507 slice 508 } 509 510 /// ## 在时间片到期时检查当前任务是否需要被抢占, 511 /// 如果需要,则抢占当前任务,并确保不会由于与其他任务的“好友偏爱(buddy favours)”而重新选举为下一个运行的任务。 512 #[allow(dead_code)] 513 pub fn check_preempt_tick(&mut self, curr: Arc<FairSchedEntity>) { 514 // 计算理想状态下该调度实体的理想运行时间 515 let ideal_runtime = self.sched_slice(curr.clone()); 516 517 let delta_exec = curr.sum_exec_runtime - curr.prev_sum_exec_runtime; 518 519 if delta_exec > ideal_runtime { 520 // 表明实际运行时间长于理想运行时间 521 self.rq().resched_current(); 522 523 self.clear_buddies(&curr); 524 return; 525 } 526 527 if delta_exec < SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst) { 528 return; 529 } 530 531 todo!() 532 } 533 534 pub fn clear_buddies(&mut self, se: &Arc<FairSchedEntity>) { 535 if let Some(next) = self.next.upgrade() { 536 if Arc::ptr_eq(&next, se) { 537 se.clear_buddies(); 538 } 539 } 540 } 541 542 /// 处理调度实体的时间片到期事件 543 pub fn entity_tick(&mut self, curr: Arc<FairSchedEntity>, queued: bool) { 544 // 更新当前调度实体的运行时间统计信息 545 self.update_current(); 546 547 self.update_load_avg(&curr, UpdateAvgFlags::UPDATE_TG); 548 549 // 更新组调度相关 550 curr.update_cfs_group(); 551 552 if queued { 553 self.rq().resched_current(); 554 return; 555 } 556 } 557 558 /// 更新当前调度实体的运行时间统计信息 559 pub fn update_current(&mut self) { 560 let curr = self.current(); 561 if unlikely(curr.is_none()) { 562 return; 563 } 564 565 let now = self.rq().clock_task(); 566 let curr = curr.unwrap(); 567 568 fence(Ordering::SeqCst); 569 if unlikely(now <= curr.exec_start) { 570 // kwarn!( 571 // "update_current return now <= curr.exec_start now {now} execstart {}", 572 // curr.exec_start 573 // ); 574 return; 575 } 576 577 fence(Ordering::SeqCst); 578 let delta_exec = now - curr.exec_start; 579 580 let curr = curr.force_mut(); 581 582 curr.exec_start = now; 583 584 curr.sum_exec_runtime += delta_exec; 585 586 // 根据实际运行时长加权增加虚拟运行时长 587 curr.vruntime += curr.calculate_delta_fair(delta_exec); 588 fence(Ordering::SeqCst); 589 self.update_deadline(&curr.self_arc()); 590 self.update_min_vruntime(); 591 592 self.account_cfs_rq_runtime(delta_exec); 593 } 594 595 /// 计算当前cfs队列的运行时间是否到期 596 fn account_cfs_rq_runtime(&mut self, delta_exec: u64) { 597 if likely(self.runtime_remaining > delta_exec) { 598 self.runtime_remaining -= delta_exec; 599 // kerror!("runtime_remaining {}", self.runtime_remaining); 600 return; 601 } 602 603 // kwarn!( 604 // "runtime_remaining {} delta exec {delta_exec} nr_running {}", 605 // self.runtime_remaining, 606 // self.nr_running 607 // ); 608 // fixme: 目前只是简单分配一个时间片 609 self.runtime_remaining = 5000 * NSEC_PER_MSEC as u64; 610 611 if likely(self.current().is_some()) && self.nr_running > 1 { 612 // kerror!("account_cfs_rq_runtime"); 613 self.rq().resched_current(); 614 } 615 } 616 617 /// 计算deadline,如果vruntime到期会重调度 618 pub fn update_deadline(&mut self, se: &Arc<FairSchedEntity>) { 619 // kerror!("vruntime {} deadline {}", se.vruntime, se.deadline); 620 if se.vruntime < se.deadline { 621 return; 622 } 623 624 se.force_mut().slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst); 625 626 se.force_mut().deadline = se.vruntime + se.calculate_delta_fair(se.slice); 627 628 if self.nr_running > 1 { 629 self.rq().resched_current(); 630 self.clear_buddies(se); 631 } 632 } 633 634 /// ## 更新最小虚拟运行时间 635 pub fn update_min_vruntime(&mut self) { 636 let curr = self.current(); 637 638 let mut vruntime = self.min_vruntime; 639 640 if curr.is_some() { 641 let curr = curr.as_ref().unwrap(); 642 if curr.on_rq() { 643 vruntime = curr.vruntime; 644 } else { 645 self.set_current(Weak::default()); 646 } 647 } 648 649 // 找到最小虚拟运行时间的调度实体 650 let leftmost = self.entities.get_first(); 651 if let Some(leftmost) = leftmost { 652 let se = leftmost.1; 653 654 if curr.is_none() { 655 vruntime = se.vruntime; 656 } else { 657 vruntime = vruntime.min(se.vruntime); 658 } 659 } 660 661 self.min_vruntime = self.__update_min_vruntime(vruntime); 662 } 663 664 fn __update_min_vruntime(&mut self, vruntime: u64) -> u64 { 665 let mut min_vruntime = self.min_vruntime; 666 667 let delta = vruntime as i64 - min_vruntime as i64; 668 if delta > 0 { 669 self.avg_vruntime -= self.avg_load * delta; 670 min_vruntime = vruntime; 671 } 672 673 return min_vruntime; 674 } 675 676 // 判断是否为当前任务 677 pub fn is_curr(&self, se: &Arc<FairSchedEntity>) -> bool { 678 if self.current().is_none() { 679 false 680 } else { 681 // 判断当前和传入的se是否相等 682 Arc::ptr_eq(se, self.current().as_ref().unwrap()) 683 } 684 } 685 686 // 修改后 687 pub fn reweight_entity(&mut self, se: Arc<FairSchedEntity>, weight: u64) { 688 // 判断是否为当前任务 689 let is_curr = self.is_curr(&se); 690 691 // 如果se在队列中 692 if se.on_rq() { 693 // 如果是当前任务 694 if is_curr { 695 self.update_current(); 696 } else { 697 // 否则,出队 698 self.inner_dequeue_entity(&se); 699 } 700 701 // 减去该权重 702 self.load.update_load_sub(se.load.weight); 703 } 704 705 self.dequeue_load_avg(&se); 706 707 if !se.on_rq() { 708 se.force_mut().vlag = se.vlag * se.load.weight as i64 / weight as i64; 709 } else { 710 self.reweight_eevdf(&se, weight); 711 } 712 se.force_mut().load.update_load_set(weight); 713 714 // SMP 715 let divider = se.avg.get_pelt_divider(); 716 se.force_mut().avg.load_avg = LoadWeight::scale_load_down(se.load.weight) as usize 717 * se.avg.load_sum as usize 718 / divider; 719 720 self.enqueue_load_avg(se.clone()); 721 722 if se.on_rq() { 723 self.load.update_load_add(se.load.weight); 724 if !is_curr { 725 self.inner_enqueue_entity(&se); 726 } 727 728 self.update_min_vruntime(); 729 } 730 } 731 732 /// 用于重新计算调度实体(sched_entity)的权重(weight)和虚拟运行时间(vruntime) 733 fn reweight_eevdf(&mut self, se: &Arc<FairSchedEntity>, weight: u64) { 734 let old_weight = se.load.weight; 735 let avg_vruntime = self.avg_vruntime(); 736 let mut vlag; 737 if avg_vruntime != se.vruntime { 738 vlag = avg_vruntime as i64 - se.vruntime as i64; 739 vlag = vlag * old_weight as i64 / weight as i64; 740 se.force_mut().vruntime = (avg_vruntime as i64 - vlag) as u64; 741 } 742 743 let mut vslice = se.deadline as i64 - avg_vruntime as i64; 744 vslice = vslice * old_weight as i64 / weight as i64; 745 se.force_mut().deadline = avg_vruntime + vslice as u64; 746 } 747 748 fn avg_vruntime(&self) -> u64 { 749 let curr = self.current(); 750 let mut avg = self.avg_vruntime; 751 let mut load = self.avg_load; 752 753 if let Some(curr) = curr { 754 if curr.on_rq() { 755 let weight = LoadWeight::scale_load_down(curr.load.weight); 756 avg += self.entity_key(&curr) * weight as i64; 757 load += weight as i64; 758 } 759 } 760 761 if load > 0 { 762 if avg < 0 { 763 avg -= load - 1; 764 } 765 766 avg /= load; 767 } 768 769 return self.min_vruntime + avg as u64; 770 } 771 772 #[inline] 773 pub fn entity_key(&self, se: &Arc<FairSchedEntity>) -> i64 { 774 return se.vruntime as i64 - self.min_vruntime as i64; 775 } 776 777 pub fn avg_vruntime_add(&mut self, se: &Arc<FairSchedEntity>) { 778 let weight = LoadWeight::scale_load_down(se.load.weight); 779 780 let key = self.entity_key(se); 781 782 let avg_vruntime = self.avg_vruntime + key * weight as i64; 783 784 self.avg_vruntime = avg_vruntime; 785 self.avg_load += weight as i64; 786 } 787 788 pub fn avg_vruntime_sub(&mut self, se: &Arc<FairSchedEntity>) { 789 let weight = LoadWeight::scale_load_down(se.load.weight); 790 791 let key = self.entity_key(se); 792 793 let avg_vruntime = self.avg_vruntime - key * weight as i64; 794 795 self.avg_vruntime = avg_vruntime; 796 self.avg_load -= weight as i64; 797 } 798 799 /// 为调度实体计算初始vruntime等信息 800 fn place_entity(&mut self, se: Arc<FairSchedEntity>, flags: EnqueueFlag) { 801 let vruntime = self.avg_vruntime(); 802 let mut lag = 0; 803 804 let se = se.force_mut(); 805 se.slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst); 806 807 let mut vslice = se.calculate_delta_fair(se.slice); 808 809 if self.nr_running > 0 { 810 let curr = self.current(); 811 812 lag = se.vlag; 813 814 let mut load = self.avg_load; 815 816 if let Some(curr) = curr { 817 if curr.on_rq() { 818 load += LoadWeight::scale_load_down(curr.load.weight) as i64; 819 } 820 } 821 822 lag *= load + LoadWeight::scale_load_down(se.load.weight) as i64; 823 824 if load == 0 { 825 load = 1; 826 } 827 828 lag /= load; 829 } 830 831 se.vruntime = vruntime - lag as u64; 832 833 if flags.contains(EnqueueFlag::ENQUEUE_INITIAL) { 834 vslice /= 2; 835 } 836 837 se.deadline = se.vruntime + vslice; 838 } 839 840 /// 更新负载均值 841 fn update_load_avg(&mut self, se: &Arc<FairSchedEntity>, flags: UpdateAvgFlags) { 842 let now = self.cfs_rq_clock_pelt(); 843 844 if se.avg.last_update_time > 0 && !flags.contains(UpdateAvgFlags::SKIP_AGE_LOAD) { 845 se.force_mut().update_load_avg(self, now); 846 } 847 848 let mut decayed = self.update_self_load_avg(now); 849 decayed |= se.force_mut().propagate_entity_load_avg() as u32; 850 851 if se.avg.last_update_time > 0 && flags.contains(UpdateAvgFlags::DO_ATTACH) { 852 todo!() 853 } else if flags.contains(UpdateAvgFlags::DO_ATTACH) { 854 self.detach_entity_load_avg(se); 855 } else if decayed > 0 { 856 // cfs_rq_util_change 857 858 todo!() 859 } 860 } 861 862 /// 将实体的负载均值与对应cfs分离 863 fn detach_entity_load_avg(&mut self, se: &Arc<FairSchedEntity>) { 864 self.dequeue_load_avg(se); 865 866 sub_positive(&mut self.avg.util_avg, se.avg.util_avg); 867 sub_positive(&mut (self.avg.util_sum as usize), se.avg.util_sum as usize); 868 self.avg.util_sum = self 869 .avg 870 .util_sum 871 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64); 872 873 sub_positive(&mut self.avg.runnable_avg, se.avg.runnable_avg); 874 sub_positive( 875 &mut (self.avg.runnable_sum as usize), 876 se.avg.runnable_sum as usize, 877 ); 878 self.avg.runnable_sum = self 879 .avg 880 .runnable_sum 881 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64); 882 883 self.propagate = 1; 884 self.prop_runnable_sum += se.avg.load_sum as isize; 885 } 886 887 fn update_self_load_avg(&mut self, now: u64) -> u32 { 888 let mut removed_load = 0; 889 let mut removed_util = 0; 890 let mut removed_runnable = 0; 891 892 let mut decayed = 0; 893 894 if self.removed.lock().nr > 0 { 895 let mut removed_guard = self.removed.lock(); 896 let divider = self.avg.get_pelt_divider(); 897 898 swap::<usize>(&mut removed_guard.util_avg, &mut removed_util); 899 swap::<usize>(&mut removed_guard.load_avg, &mut removed_load); 900 swap::<usize>(&mut removed_guard.runnable_avg, &mut removed_runnable); 901 902 removed_guard.nr = 0; 903 904 let mut r = removed_load; 905 906 sub_positive(&mut self.avg.load_avg, r); 907 sub_positive(&mut (self.avg.load_sum as usize), r * divider); 908 909 self.avg.load_sum = self 910 .avg 911 .load_sum 912 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64); 913 914 r = removed_util; 915 sub_positive(&mut self.avg.util_avg, r); 916 sub_positive(&mut (self.avg.util_sum as usize), r * divider); 917 self.avg.util_sum = self 918 .avg 919 .util_sum 920 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64); 921 922 r = removed_runnable; 923 sub_positive(&mut self.avg.runnable_avg, r); 924 sub_positive(&mut (self.avg.runnable_sum as usize), r * divider); 925 self.avg.runnable_sum = self 926 .avg 927 .runnable_sum 928 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64); 929 930 drop(removed_guard); 931 self.add_task_group_propagate( 932 -(removed_runnable as isize * divider as isize) >> SCHED_CAPACITY_SHIFT, 933 ); 934 935 decayed = 1; 936 } 937 938 decayed |= self.__update_load_avg(now) as u32; 939 940 self.last_update_time_copy = self.avg.last_update_time; 941 942 return decayed; 943 } 944 945 fn __update_load_avg(&mut self, now: u64) -> bool { 946 if self.avg.update_load_sum( 947 now, 948 LoadWeight::scale_load_down(self.load.weight) as u32, 949 self.h_nr_running as u32, 950 self.current().is_some() as u32, 951 ) { 952 self.avg.update_load_avg(1); 953 return true; 954 } 955 956 return false; 957 } 958 959 fn add_task_group_propagate(&mut self, runnable_sum: isize) { 960 self.propagate = 1; 961 self.prop_runnable_sum += runnable_sum; 962 } 963 964 /// 将实体加入队列 965 pub fn enqueue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: EnqueueFlag) { 966 let is_curr = self.is_curr(se); 967 968 if is_curr { 969 self.place_entity(se.clone(), flags); 970 } 971 972 self.update_current(); 973 974 self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG | UpdateAvgFlags::DO_ATTACH); 975 976 se.force_mut().update_runnable(); 977 978 se.update_cfs_group(); 979 980 if !is_curr { 981 self.place_entity(se.clone(), flags); 982 } 983 984 self.account_entity_enqueue(se); 985 986 if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) { 987 se.force_mut().exec_start = 0; 988 } 989 990 if !is_curr { 991 self.inner_enqueue_entity(se); 992 } 993 994 se.force_mut().on_rq = OnRq::Queued; 995 996 if self.nr_running == 1 { 997 // 只有上面加入的 998 // TODO: throttle 999 } 1000 } 1001 1002 pub fn dequeue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: DequeueFlag) { 1003 let mut action = UpdateAvgFlags::UPDATE_TG; 1004 1005 if se.is_task() && se.on_rq == OnRq::Migrating { 1006 action |= UpdateAvgFlags::DO_DETACH; 1007 } 1008 1009 self.update_current(); 1010 1011 self.update_load_avg(se, action); 1012 1013 se.force_mut().update_runnable(); 1014 1015 self.clear_buddies(se); 1016 1017 self.update_entity_lag(se); 1018 1019 if let Some(curr) = self.current() { 1020 if !Arc::ptr_eq(&curr, se) { 1021 self.inner_dequeue_entity(se); 1022 } 1023 } else { 1024 self.inner_dequeue_entity(se); 1025 } 1026 1027 se.force_mut().on_rq = OnRq::None; 1028 1029 self.account_entity_dequeue(se); 1030 1031 // return_cfs_rq_runtime 1032 1033 se.update_cfs_group(); 1034 1035 if flags & (DequeueFlag::DEQUEUE_SAVE | DequeueFlag::DEQUEUE_MOVE) 1036 != DequeueFlag::DEQUEUE_SAVE 1037 { 1038 self.update_min_vruntime(); 1039 } 1040 1041 if self.nr_running == 0 { 1042 self.update_idle_clock_pelt() 1043 } 1044 } 1045 1046 /// 将前一个调度的task放回队列 1047 pub fn put_prev_entity(&mut self, prev: Arc<FairSchedEntity>) { 1048 if prev.on_rq() { 1049 self.update_current(); 1050 } 1051 1052 if prev.on_rq() { 1053 self.inner_enqueue_entity(&prev); 1054 } 1055 1056 self.set_current(Weak::default()); 1057 } 1058 1059 /// 将下一个运行的task设置为current 1060 pub fn set_next_entity(&mut self, se: &Arc<FairSchedEntity>) { 1061 self.clear_buddies(se); 1062 1063 if se.on_rq() { 1064 self.inner_dequeue_entity(se); 1065 self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG); 1066 se.force_mut().vlag = se.deadline as i64; 1067 } 1068 1069 self.set_current(Arc::downgrade(se)); 1070 1071 se.force_mut().prev_sum_exec_runtime = se.sum_exec_runtime; 1072 } 1073 1074 fn update_idle_clock_pelt(&mut self) { 1075 let throttled = if unlikely(self.throttled_count > 0) { 1076 u64::MAX 1077 } else { 1078 self.throttled_clock_pelt_time 1079 }; 1080 1081 self.throttled_clock_pelt = throttled; 1082 } 1083 1084 fn update_entity_lag(&mut self, se: &Arc<FairSchedEntity>) { 1085 let lag = self.avg_vruntime() as i64 - se.vruntime as i64; 1086 1087 let limit = se.calculate_delta_fair((TICK_NESC as u64).max(2 * se.slice)) as i64; 1088 1089 se.force_mut().vlag = if lag < -limit { 1090 -limit 1091 } else if lag > limit { 1092 limit 1093 } else { 1094 lag 1095 } 1096 } 1097 1098 fn account_entity_enqueue(&mut self, se: &Arc<FairSchedEntity>) { 1099 self.load.update_load_add(se.load.weight); 1100 1101 if se.is_task() { 1102 let rq = self.rq(); 1103 let (rq, _guard) = rq.self_lock(); 1104 // TODO:numa 1105 rq.cfs_tasks.push_back(se.clone()); 1106 } 1107 self.nr_running += 1; 1108 if se.is_idle() { 1109 self.idle_nr_running += 1; 1110 } 1111 } 1112 1113 fn account_entity_dequeue(&mut self, se: &Arc<FairSchedEntity>) { 1114 self.load.update_load_sub(se.load.weight); 1115 1116 if se.is_task() { 1117 let rq = self.rq(); 1118 let (rq, _guard) = rq.self_lock(); 1119 1120 // TODO:numa 1121 let _ = rq.cfs_tasks.extract_if(|x| Arc::ptr_eq(x, se)); 1122 } 1123 1124 self.nr_running -= 1; 1125 if se.is_idle() { 1126 self.idle_nr_running -= 1; 1127 } 1128 } 1129 1130 pub fn inner_enqueue_entity(&mut self, se: &Arc<FairSchedEntity>) { 1131 self.avg_vruntime_add(se); 1132 se.force_mut().min_deadline = se.deadline; 1133 self.entities.insert(se.vruntime, se.clone()); 1134 // kwarn!( 1135 // "enqueue pcb {:?} cfsrq {:?}", 1136 // se.pcb().pid(), 1137 // self.entities 1138 // .iter() 1139 // .map(|x| (x.0, x.1.pcb().pid())) 1140 // .collect::<Vec<_>>() 1141 // ); 1142 // send_to_default_serial8250_port( 1143 // format!( 1144 // "enqueue pcb {:?} cfsrq {:?}\n", 1145 // se.pcb().pid(), 1146 // self.entities 1147 // .iter() 1148 // .map(|x| (x.0, x.1.pcb().pid())) 1149 // .collect::<Vec<_>>() 1150 // ) 1151 // .as_bytes(), 1152 // ); 1153 } 1154 1155 fn inner_dequeue_entity(&mut self, se: &Arc<FairSchedEntity>) { 1156 // kwarn!( 1157 // "before dequeue pcb {:?} cfsrq {:?}", 1158 // se.pcb().pid(), 1159 // self.entities 1160 // .iter() 1161 // .map(|x| (x.0, x.1.pcb().pid())) 1162 // .collect::<Vec<_>>() 1163 // ); 1164 1165 // send_to_default_serial8250_port( 1166 // format!( 1167 // "before dequeue pcb {:?} cfsrq {:?}\n", 1168 // se.pcb().pid(), 1169 // self.entities 1170 // .iter() 1171 // .map(|x| (x.0, x.1.pcb().pid())) 1172 // .collect::<Vec<_>>() 1173 // ) 1174 // .as_bytes(), 1175 // ); 1176 1177 let mut i = 1; 1178 while let Some(rm) = self.entities.remove(&se.vruntime) { 1179 if Arc::ptr_eq(&rm, se) { 1180 break; 1181 } 1182 rm.force_mut().vruntime += i; 1183 self.entities.insert(rm.vruntime, rm); 1184 1185 i += 1; 1186 } 1187 // send_to_default_serial8250_port( 1188 // format!( 1189 // "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}\n", 1190 // se.pcb().pid(), 1191 // remove.pcb().pid(), 1192 // self.entities 1193 // .iter() 1194 // .map(|x| (x.0, x.1.pcb().pid())) 1195 // .collect::<Vec<_>>() 1196 // ) 1197 // .as_bytes(), 1198 // ); 1199 // kwarn!( 1200 // "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}", 1201 // se.pcb().pid(), 1202 // remove.pcb().pid(), 1203 // self.entities 1204 // .iter() 1205 // .map(|x| (x.0, x.1.pcb().pid())) 1206 // .collect::<Vec<_>>() 1207 // ); 1208 self.avg_vruntime_sub(se); 1209 } 1210 1211 pub fn enqueue_load_avg(&mut self, se: Arc<FairSchedEntity>) { 1212 self.avg.load_avg += se.avg.load_avg; 1213 self.avg.load_sum += LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum; 1214 } 1215 1216 pub fn dequeue_load_avg(&mut self, se: &Arc<FairSchedEntity>) { 1217 if self.avg.load_avg > se.avg.load_avg { 1218 self.avg.load_avg -= se.avg.load_avg; 1219 } else { 1220 self.avg.load_avg = 0; 1221 }; 1222 1223 let se_load = LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum; 1224 1225 if self.avg.load_sum > se_load { 1226 self.avg.load_sum -= se_load; 1227 } else { 1228 self.avg.load_sum = 0; 1229 } 1230 1231 self.avg.load_sum = self 1232 .avg 1233 .load_sum 1234 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64) 1235 } 1236 1237 pub fn update_task_group_util(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) { 1238 let mut delta_sum = gcfs_rq.avg.load_avg as isize - se.avg.load_avg as isize; 1239 let delta_avg = delta_sum; 1240 1241 if delta_avg == 0 { 1242 return; 1243 } 1244 1245 let divider = self.avg.get_pelt_divider(); 1246 1247 let se = se.force_mut(); 1248 se.avg.util_avg = gcfs_rq.avg.util_avg; 1249 let new_sum = se.avg.util_avg * divider; 1250 delta_sum = new_sum as isize - se.avg.util_sum as isize; 1251 1252 se.avg.util_sum = new_sum as u64; 1253 1254 add_positive(&mut (self.avg.util_avg as isize), delta_avg); 1255 add_positive(&mut (self.avg.util_sum as isize), delta_sum); 1256 1257 self.avg.util_sum = self 1258 .avg 1259 .util_sum 1260 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64); 1261 } 1262 1263 pub fn update_task_group_runnable(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) { 1264 let mut delta_sum = gcfs_rq.avg.runnable_avg as isize - se.avg.runnable_avg as isize; 1265 let delta_avg = delta_sum; 1266 1267 if delta_avg == 0 { 1268 return; 1269 } 1270 1271 let divider = self.avg.get_pelt_divider(); 1272 1273 let se = se.force_mut(); 1274 se.avg.runnable_avg = gcfs_rq.avg.runnable_avg; 1275 let new_sum = se.avg.runnable_sum * divider as u64; 1276 delta_sum = new_sum as isize - se.avg.runnable_sum as isize; 1277 1278 se.avg.runnable_sum = new_sum; 1279 1280 add_positive(&mut (self.avg.runnable_avg as isize), delta_avg); 1281 add_positive(&mut (self.avg.runnable_sum as isize), delta_sum); 1282 1283 self.avg.runnable_sum = self 1284 .avg 1285 .runnable_sum 1286 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64); 1287 } 1288 1289 pub fn update_task_group_load(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &mut CfsRunQueue) { 1290 let mut runnable_sum = gcfs_rq.prop_runnable_sum; 1291 1292 let mut load_sum = 0; 1293 1294 if runnable_sum == 0 { 1295 return; 1296 } 1297 1298 gcfs_rq.prop_runnable_sum = 0; 1299 1300 let divider = self.avg.get_pelt_divider(); 1301 1302 if runnable_sum >= 0 { 1303 runnable_sum += se.avg.load_sum as isize; 1304 runnable_sum = runnable_sum.min(divider as isize); 1305 } else { 1306 if LoadWeight::scale_load_down(gcfs_rq.load.weight) > 0 { 1307 load_sum = gcfs_rq.avg.load_sum / LoadWeight::scale_load_down(gcfs_rq.load.weight); 1308 } 1309 1310 runnable_sum = se.avg.load_sum.min(load_sum) as isize; 1311 } 1312 1313 let running_sum = se.avg.util_sum as isize >> SCHED_CAPACITY_SHIFT; 1314 runnable_sum = runnable_sum.max(running_sum); 1315 1316 load_sum = LoadWeight::scale_load_down(se.load.weight) * runnable_sum as u64; 1317 let load_avg = load_sum / divider as u64; 1318 1319 let delta_avg = load_avg as isize - se.avg.load_avg as isize; 1320 if delta_avg == 0 { 1321 return; 1322 } 1323 1324 let delta_sum = load_sum as isize 1325 - LoadWeight::scale_load_down(se.load.weight) as isize * se.avg.load_sum as isize; 1326 1327 let se = se.force_mut(); 1328 se.avg.load_sum = runnable_sum as u64; 1329 se.avg.load_avg = load_avg as usize; 1330 1331 add_positive(&mut (self.avg.load_avg as isize), delta_avg); 1332 add_positive(&mut (self.avg.util_sum as isize), delta_sum); 1333 1334 self.avg.load_sum = self 1335 .avg 1336 .load_sum 1337 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64); 1338 } 1339 1340 /// pick下一个运行的task 1341 pub fn pick_next_entity(&self) -> Option<Arc<FairSchedEntity>> { 1342 if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY) 1343 && self.next().is_some() 1344 && self.entity_eligible(&self.next().unwrap()) 1345 { 1346 return self.next(); 1347 } 1348 self.entities.get_first().map(|val| val.1.clone()) 1349 } 1350 1351 pub fn entity_eligible(&self, se: &Arc<FairSchedEntity>) -> bool { 1352 let curr = self.current(); 1353 let mut avg = self.avg_vruntime; 1354 let mut load = self.avg_load; 1355 1356 if let Some(curr) = curr { 1357 if curr.on_rq() { 1358 let weight = LoadWeight::scale_load_down(curr.load.weight); 1359 1360 avg += self.entity_key(&curr) * weight as i64; 1361 load += weight as i64; 1362 } 1363 } 1364 1365 return avg >= self.entity_key(se) * load; 1366 } 1367 } 1368 1369 pub struct CompletelyFairScheduler; 1370 1371 impl CompletelyFairScheduler { 1372 /// 寻找到最近公共组长 1373 fn find_matching_se(se: &mut Arc<FairSchedEntity>, pse: &mut Arc<FairSchedEntity>) { 1374 let mut se_depth = se.depth; 1375 let mut pse_depth = pse.depth; 1376 1377 while se_depth > pse_depth { 1378 se_depth -= 1; 1379 *se = se.parent().unwrap(); 1380 } 1381 1382 while pse_depth > se_depth { 1383 pse_depth -= 1; 1384 *pse = pse.parent().unwrap(); 1385 } 1386 1387 while !Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq()) { 1388 *se = se.parent().unwrap(); 1389 *pse = pse.parent().unwrap(); 1390 } 1391 } 1392 } 1393 1394 impl Scheduler for CompletelyFairScheduler { 1395 fn enqueue( 1396 rq: &mut CpuRunQueue, 1397 pcb: Arc<crate::process::ProcessControlBlock>, 1398 mut flags: EnqueueFlag, 1399 ) { 1400 let mut se = pcb.sched_info().sched_entity(); 1401 let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE; 1402 let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| { 1403 if se.on_rq() { 1404 return (false, true); 1405 } 1406 1407 let binding = se.cfs_rq(); 1408 let cfs_rq = binding.force_mut(); 1409 cfs_rq.enqueue_entity(&se, flags); 1410 1411 cfs_rq.h_nr_running += 1; 1412 cfs_rq.idle_h_nr_running += idle_h_nr_running as u64; 1413 1414 if cfs_rq.is_idle() { 1415 idle_h_nr_running = true; 1416 } 1417 1418 // TODO: cfs_rq_throttled 1419 1420 flags = EnqueueFlag::ENQUEUE_WAKEUP; 1421 1422 return (true, true); 1423 }); 1424 1425 if !should_continue { 1426 return; 1427 } 1428 1429 if let Some(mut se) = se { 1430 FairSchedEntity::for_each_in_group(&mut se, |se| { 1431 let binding = se.cfs_rq(); 1432 let cfs_rq = binding.force_mut(); 1433 1434 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG); 1435 1436 let se = se.force_mut(); 1437 se.update_runnable(); 1438 1439 se.update_cfs_group(); 1440 1441 cfs_rq.h_nr_running += 1; 1442 cfs_rq.idle_h_nr_running += idle_h_nr_running as u64; 1443 1444 if cfs_rq.is_idle() { 1445 idle_h_nr_running = true; 1446 } 1447 1448 // TODO: cfs_rq_throttled 1449 1450 return (true, true); 1451 }); 1452 } 1453 1454 rq.add_nr_running(1); 1455 } 1456 1457 fn dequeue( 1458 rq: &mut CpuRunQueue, 1459 pcb: Arc<crate::process::ProcessControlBlock>, 1460 mut flags: DequeueFlag, 1461 ) { 1462 let mut se = pcb.sched_info().sched_entity(); 1463 let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE; 1464 let task_sleep = flags.contains(DequeueFlag::DEQUEUE_SLEEP); 1465 let was_sched_idle = rq.sched_idle_rq(); 1466 1467 let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| { 1468 let binding = se.cfs_rq(); 1469 let cfs_rq = binding.force_mut(); 1470 cfs_rq.dequeue_entity(&se, flags); 1471 1472 cfs_rq.h_nr_running -= 1; 1473 cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64; 1474 1475 if cfs_rq.is_idle() { 1476 idle_h_nr_running = true; 1477 } 1478 1479 // TODO: cfs_rq_throttled 1480 1481 if cfs_rq.load.weight > 0 { 1482 let sep = se.parent(); 1483 1484 if task_sleep && sep.is_some() { 1485 todo!() 1486 } 1487 } 1488 1489 flags |= DequeueFlag::DEQUEUE_SLEEP; 1490 1491 return (true, true); 1492 }); 1493 1494 if !should_continue { 1495 return; 1496 } 1497 1498 if let Some(mut se) = se { 1499 FairSchedEntity::for_each_in_group(&mut se, |se| { 1500 let binding = se.cfs_rq(); 1501 let cfs_rq = binding.force_mut(); 1502 1503 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG); 1504 1505 let se = se.force_mut(); 1506 se.update_runnable(); 1507 1508 se.update_cfs_group(); 1509 1510 cfs_rq.h_nr_running -= 1; 1511 cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64; 1512 1513 if cfs_rq.is_idle() { 1514 idle_h_nr_running = true; 1515 } 1516 1517 // TODO: cfs_rq_throttled 1518 1519 return (true, true); 1520 }); 1521 } 1522 1523 rq.sub_nr_running(1); 1524 1525 if unlikely(!was_sched_idle && rq.sched_idle_rq()) { 1526 rq.next_balance = clock(); 1527 } 1528 } 1529 1530 fn yield_task(rq: &mut CpuRunQueue) { 1531 let curr = rq.current(); 1532 let se = curr.sched_info().sched_entity(); 1533 let binding = se.cfs_rq(); 1534 let cfs_rq = binding.force_mut(); 1535 1536 if unlikely(rq.nr_running == 1) { 1537 return; 1538 } 1539 1540 cfs_rq.clear_buddies(&se); 1541 1542 rq.update_rq_clock(); 1543 1544 cfs_rq.update_current(); 1545 1546 rq.clock_updata_flags |= ClockUpdataFlag::RQCF_REQ_SKIP; 1547 1548 se.force_mut().deadline += se.calculate_delta_fair(se.slice); 1549 } 1550 1551 fn check_preempt_currnet( 1552 rq: &mut CpuRunQueue, 1553 pcb: &Arc<crate::process::ProcessControlBlock>, 1554 wake_flags: WakeupFlags, 1555 ) { 1556 let curr = rq.current(); 1557 let mut se = curr.sched_info().sched_entity(); 1558 let mut pse = pcb.sched_info().sched_entity(); 1559 1560 if unlikely(Arc::ptr_eq(&se, &pse)) { 1561 return; 1562 } 1563 1564 // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/fair.c#8160 1565 1566 let _next_buddy_mark = if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY) 1567 && !wake_flags.contains(WakeupFlags::WF_FORK) 1568 { 1569 FairSchedEntity::for_each_in_group(&mut pse, |se| { 1570 if !se.on_rq() { 1571 return (false, true); 1572 } 1573 1574 if se.is_idle() { 1575 return (false, true); 1576 } 1577 1578 se.cfs_rq().force_mut().next = Arc::downgrade(&se); 1579 1580 return (true, true); 1581 }); 1582 true 1583 } else { 1584 false 1585 }; 1586 1587 if curr.flags().contains(ProcessFlags::NEED_SCHEDULE) { 1588 return; 1589 } 1590 1591 if unlikely(curr.sched_info().policy() == SchedPolicy::IDLE) 1592 && likely(pcb.sched_info().policy() != SchedPolicy::IDLE) 1593 { 1594 rq.resched_current(); 1595 return; 1596 } 1597 1598 if unlikely(pcb.sched_info().policy() != SchedPolicy::CFS) 1599 || !SCHED_FEATURES.contains(SchedFeature::WAKEUP_PREEMPTION) 1600 { 1601 return; 1602 } 1603 1604 Self::find_matching_se(&mut se, &mut pse); 1605 1606 let cse_is_idle = se.is_idle(); 1607 let pse_is_idle = pse.is_idle(); 1608 1609 if cse_is_idle && !pse_is_idle { 1610 rq.resched_current(); 1611 return; 1612 } 1613 1614 if cse_is_idle != pse_is_idle { 1615 return; 1616 } 1617 1618 let cfs_rq = se.cfs_rq(); 1619 cfs_rq.force_mut().update_current(); 1620 1621 if let Some((_, pick_se)) = cfs_rq.entities.get_first() { 1622 if Arc::ptr_eq(pick_se, &pse) { 1623 rq.resched_current(); 1624 return; 1625 } 1626 } 1627 } 1628 1629 fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<crate::process::ProcessControlBlock>> { 1630 let mut cfs_rq = Some(rq.cfs_rq()); 1631 if cfs_rq.as_ref().unwrap().nr_running == 0 { 1632 return None; 1633 } 1634 1635 let mut se; 1636 loop { 1637 let cfs = cfs_rq.unwrap(); 1638 let cfs = cfs.force_mut(); 1639 let curr = cfs.current(); 1640 if let Some(curr) = curr { 1641 if curr.on_rq() { 1642 cfs.update_current(); 1643 } else { 1644 cfs.set_current(Weak::default()); 1645 } 1646 } 1647 1648 se = cfs.pick_next_entity(); 1649 match se.clone() { 1650 Some(val) => cfs_rq = val.my_cfs_rq.clone(), 1651 None => { 1652 break; 1653 } 1654 } 1655 1656 if cfs_rq.is_none() { 1657 break; 1658 } 1659 } 1660 1661 se.map(|se| se.pcb()) 1662 } 1663 1664 fn tick(_rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, queued: bool) { 1665 let mut se = pcb.sched_info().sched_entity(); 1666 1667 FairSchedEntity::for_each_in_group(&mut se, |se| { 1668 let binding = se.clone(); 1669 let binding = binding.cfs_rq(); 1670 let cfs_rq = binding.force_mut(); 1671 1672 cfs_rq.entity_tick(se, queued); 1673 (true, true) 1674 }); 1675 } 1676 1677 fn task_fork(pcb: Arc<ProcessControlBlock>) { 1678 let rq = cpu_rq(smp_get_processor_id().data() as usize); 1679 let se = pcb.sched_info().sched_entity(); 1680 1681 let (rq, _guard) = rq.self_lock(); 1682 1683 rq.update_rq_clock(); 1684 1685 let binding = se.cfs_rq(); 1686 let cfs_rq = binding.force_mut(); 1687 1688 if cfs_rq.current().is_some() { 1689 cfs_rq.update_current(); 1690 } 1691 1692 cfs_rq.place_entity(se.clone(), EnqueueFlag::ENQUEUE_INITIAL); 1693 } 1694 1695 fn pick_next_task( 1696 rq: &mut CpuRunQueue, 1697 prev: Option<Arc<ProcessControlBlock>>, 1698 ) -> Option<Arc<ProcessControlBlock>> { 1699 let mut cfs_rq = rq.cfs_rq(); 1700 if rq.nr_running == 0 { 1701 return None; 1702 } 1703 1704 if prev.is_none() 1705 || (prev.is_some() && prev.as_ref().unwrap().sched_info().policy() != SchedPolicy::CFS) 1706 { 1707 if let Some(prev) = prev { 1708 match prev.sched_info().policy() { 1709 SchedPolicy::RT => todo!(), 1710 SchedPolicy::FIFO => todo!(), 1711 SchedPolicy::CFS => todo!(), 1712 SchedPolicy::IDLE => IdleScheduler::put_prev_task(rq, prev), 1713 } 1714 } 1715 let mut se; 1716 loop { 1717 match cfs_rq.pick_next_entity() { 1718 Some(s) => se = s, 1719 None => return None, 1720 } 1721 1722 cfs_rq.force_mut().set_next_entity(&se); 1723 1724 match &se.my_cfs_rq { 1725 Some(q) => cfs_rq = q.clone(), 1726 None => break, 1727 } 1728 } 1729 1730 return Some(se.pcb()); 1731 } 1732 1733 let prev = prev.unwrap(); 1734 let se = cfs_rq.pick_next_entity(); 1735 1736 if let Some(mut se) = se { 1737 loop { 1738 let curr = cfs_rq.current(); 1739 if let Some(current) = curr { 1740 if current.on_rq() { 1741 cfs_rq.force_mut().update_current() 1742 } else { 1743 cfs_rq.force_mut().set_current(Weak::default()); 1744 } 1745 } 1746 1747 match cfs_rq.pick_next_entity() { 1748 Some(e) => se = e, 1749 None => break, 1750 } 1751 1752 if let Some(q) = se.my_cfs_rq.clone() { 1753 cfs_rq = q; 1754 } else { 1755 break; 1756 } 1757 } 1758 1759 let p = se.pcb(); 1760 1761 if !Arc::ptr_eq(&prev, &p) { 1762 let mut pse = prev.sched_info().sched_entity(); 1763 1764 while !(Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq()) 1765 && Arc::ptr_eq(&se.cfs_rq(), &cfs_rq)) 1766 { 1767 let se_depth = se.depth; 1768 let pse_depth = pse.depth; 1769 1770 if se_depth <= pse_depth { 1771 pse.cfs_rq().force_mut().put_prev_entity(pse.clone()); 1772 pse = pse.parent().unwrap(); 1773 } 1774 1775 if se_depth >= pse_depth { 1776 se.cfs_rq().force_mut().set_next_entity(&se); 1777 se = se.parent().unwrap(); 1778 } 1779 } 1780 1781 cfs_rq.force_mut().put_prev_entity(pse); 1782 cfs_rq.force_mut().set_next_entity(&se); 1783 } 1784 1785 return Some(p); 1786 } else { 1787 return None; 1788 } 1789 } 1790 1791 fn put_prev_task(_rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>) { 1792 let mut se = prev.sched_info().sched_entity(); 1793 1794 FairSchedEntity::for_each_in_group(&mut se, |se| { 1795 let cfs = se.cfs_rq(); 1796 cfs.force_mut().put_prev_entity(se); 1797 1798 return (true, true); 1799 }); 1800 } 1801 } 1802