1 use core::intrinsics::likely; 2 use core::intrinsics::unlikely; 3 use core::mem::swap; 4 use core::sync::atomic::fence; 5 use core::sync::atomic::{AtomicU64, Ordering}; 6 7 use crate::libs::rbtree::RBTree; 8 use crate::libs::spinlock::SpinLock; 9 use crate::process::ProcessControlBlock; 10 use crate::process::ProcessFlags; 11 use crate::sched::clock::ClockUpdataFlag; 12 use crate::sched::{cpu_rq, SchedFeature, SCHED_FEATURES}; 13 use crate::smp::core::smp_get_processor_id; 14 use crate::time::jiffies::TICK_NESC; 15 use crate::time::timer::clock; 16 use crate::time::NSEC_PER_MSEC; 17 use alloc::sync::{Arc, Weak}; 18 19 use super::idle::IdleScheduler; 20 use super::pelt::{add_positive, sub_positive, SchedulerAvg, UpdateAvgFlags, PELT_MIN_DIVIDER}; 21 use super::{ 22 CpuRunQueue, DequeueFlag, EnqueueFlag, LoadWeight, OnRq, SchedPolicy, Scheduler, TaskGroup, 23 WakeupFlags, SCHED_CAPACITY_SHIFT, 24 }; 25 26 /// 用于设置 CPU-bound 任务的最小抢占粒度的参数。 27 /// 默认值为 0.75 毫秒乘以(1 加上 CPU 数量的二进制对数),单位为纳秒。 28 /// 这个值影响到任务在 CPU-bound 情况下的抢占行为。 29 static SYSCTL_SHCED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000); 30 /// 规范化最小抢占粒度参数 31 #[allow(dead_code)] 32 static NORMALIZED_SYSCTL_SCHED_MIN_GRANULARITY: AtomicU64 = AtomicU64::new(750000); 33 34 static SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000); 35 #[allow(dead_code)] 36 static NORMALIZED_SYSCTL_SHCED_BASE_SLICE: AtomicU64 = AtomicU64::new(750000); 37 38 /// 预设的调度延迟任务数量 39 static SCHED_NR_LATENCY: AtomicU64 = AtomicU64::new(8); 40 41 /// 调度实体单位,一个调度实体可以是一个进程、一个进程组或者是一个用户等等划分 42 #[derive(Debug)] 43 pub struct FairSchedEntity { 44 /// 负载相关 45 pub load: LoadWeight, 46 pub deadline: u64, 47 pub min_deadline: u64, 48 49 /// 是否在运行队列中 50 pub on_rq: OnRq, 51 /// 当前调度实体的开始执行时间 52 pub exec_start: u64, 53 /// 总运行时长 54 pub sum_exec_runtime: u64, 55 /// 虚拟运行时间 56 pub vruntime: u64, 57 /// 进程的调度延迟 它等于进程的权重(weight)乘以(V - v_i),其中V是系统当前的时间,v_i是进程的运行时间 58 pub vlag: i64, 59 // 运行时间片 60 pub slice: u64, 61 /// 上一个调度实体运行总时间 62 pub prev_sum_exec_runtime: u64, 63 64 pub avg: SchedulerAvg, 65 66 /// 父节点 67 parent: Weak<FairSchedEntity>, 68 69 pub depth: u32, 70 71 /// 指向自身 72 self_ref: Weak<FairSchedEntity>, 73 74 /// 所在的CFS运行队列 75 cfs_rq: Weak<CfsRunQueue>, 76 77 /// group持有的私有cfs队列 78 my_cfs_rq: Option<Arc<CfsRunQueue>>, 79 80 runnable_weight: u64, 81 82 pcb: Weak<ProcessControlBlock>, 83 } 84 85 impl FairSchedEntity { new() -> Arc<Self>86 pub fn new() -> Arc<Self> { 87 let ret = Arc::new(Self { 88 parent: Weak::new(), 89 self_ref: Weak::new(), 90 pcb: Weak::new(), 91 cfs_rq: Weak::new(), 92 my_cfs_rq: None, 93 on_rq: OnRq::None, 94 slice: SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst), 95 load: Default::default(), 96 deadline: Default::default(), 97 min_deadline: Default::default(), 98 exec_start: Default::default(), 99 sum_exec_runtime: Default::default(), 100 vruntime: Default::default(), 101 vlag: Default::default(), 102 prev_sum_exec_runtime: Default::default(), 103 avg: Default::default(), 104 depth: Default::default(), 105 runnable_weight: Default::default(), 106 }); 107 108 ret.force_mut().self_ref = Arc::downgrade(&ret); 109 110 ret 111 } 112 } 113 114 impl FairSchedEntity { self_arc(&self) -> Arc<FairSchedEntity>115 pub fn self_arc(&self) -> Arc<FairSchedEntity> { 116 self.self_ref.upgrade().unwrap() 117 } 118 119 #[inline] on_rq(&self) -> bool120 pub fn on_rq(&self) -> bool { 121 self.on_rq != OnRq::None 122 } 123 pcb(&self) -> Arc<ProcessControlBlock>124 pub fn pcb(&self) -> Arc<ProcessControlBlock> { 125 self.pcb.upgrade().unwrap() 126 } 127 set_pcb(&mut self, pcb: Weak<ProcessControlBlock>)128 pub fn set_pcb(&mut self, pcb: Weak<ProcessControlBlock>) { 129 self.pcb = pcb 130 } 131 132 #[inline] cfs_rq(&self) -> Arc<CfsRunQueue>133 pub fn cfs_rq(&self) -> Arc<CfsRunQueue> { 134 self.cfs_rq.upgrade().unwrap() 135 } 136 set_cfs(&mut self, cfs: Weak<CfsRunQueue>)137 pub fn set_cfs(&mut self, cfs: Weak<CfsRunQueue>) { 138 self.cfs_rq = cfs; 139 } 140 parent(&self) -> Option<Arc<FairSchedEntity>>141 pub fn parent(&self) -> Option<Arc<FairSchedEntity>> { 142 self.parent.upgrade() 143 } 144 145 #[allow(clippy::mut_from_ref)] force_mut(&self) -> &mut Self146 pub fn force_mut(&self) -> &mut Self { 147 unsafe { 148 let p = self as *const Self as usize; 149 (p as *mut Self).as_mut().unwrap() 150 } 151 } 152 153 /// 判断是否是进程持有的调度实体 154 #[inline] is_task(&self) -> bool155 pub fn is_task(&self) -> bool { 156 // TODO: 调度组 157 true 158 } 159 160 #[inline] is_idle(&self) -> bool161 pub fn is_idle(&self) -> bool { 162 if self.is_task() { 163 return self.pcb().sched_info().policy() == SchedPolicy::IDLE; 164 } 165 166 return self.cfs_rq().is_idle(); 167 } 168 clear_buddies(&self)169 pub fn clear_buddies(&self) { 170 let mut se = self.self_arc(); 171 172 Self::for_each_in_group(&mut se, |se| { 173 let binding = se.cfs_rq(); 174 let cfs_rq = binding.force_mut(); 175 176 if let Some(next) = cfs_rq.next.upgrade() { 177 if !Arc::ptr_eq(&next, &se) { 178 return (false, true); 179 } 180 } 181 cfs_rq.next = Weak::new(); 182 return (true, true); 183 }); 184 } 185 calculate_delta_fair(&self, delta: u64) -> u64186 pub fn calculate_delta_fair(&self, delta: u64) -> u64 { 187 if unlikely(self.load.weight != LoadWeight::NICE_0_LOAD_SHIFT as u64) { 188 return self 189 .force_mut() 190 .load 191 .calculate_delta(delta, LoadWeight::NICE_0_LOAD_SHIFT as u64); 192 }; 193 194 delta 195 } 196 197 /// 更新组内的权重信息 update_cfs_group(&self)198 pub fn update_cfs_group(&self) { 199 if self.my_cfs_rq.is_none() { 200 return; 201 } 202 203 let group_cfs = self.my_cfs_rq.clone().unwrap(); 204 205 let shares = group_cfs.task_group().shares; 206 207 if unlikely(self.load.weight != shares) { 208 // TODO: reweight 209 self.cfs_rq() 210 .force_mut() 211 .reweight_entity(self.self_arc(), shares); 212 } 213 } 214 215 /// 遍历se组,如果返回false则需要调用的函数return, 216 /// 会将se指向其顶层parent 217 /// 该函数会改变se指向 218 /// 参数: 219 /// - se: 对应调度实体 220 /// - f: 对调度实体执行操作的闭包,返回值对应(no_break,should_continue),no_break为假时,退出循环,should_continue为假时表示需要将调用者return 221 /// 222 /// 返回值: 223 /// - bool: 是否需要调度者return 224 /// - Option<Arc<FairSchedEntity>>:最终se的指向 for_each_in_group( se: &mut Arc<FairSchedEntity>, mut f: impl FnMut(Arc<FairSchedEntity>) -> (bool, bool), ) -> (bool, Option<Arc<FairSchedEntity>>)225 pub fn for_each_in_group( 226 se: &mut Arc<FairSchedEntity>, 227 mut f: impl FnMut(Arc<FairSchedEntity>) -> (bool, bool), 228 ) -> (bool, Option<Arc<FairSchedEntity>>) { 229 let mut should_continue; 230 let ret; 231 // 这一步是循环计算,直到根节点 232 // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重 233 loop { 234 let (no_break, flag) = f(se.clone()); 235 should_continue = flag; 236 if !no_break || !should_continue { 237 ret = Some(se.clone()); 238 break; 239 } 240 241 let parent = se.parent(); 242 if parent.is_none() { 243 ret = None; 244 break; 245 } 246 247 *se = parent.unwrap(); 248 } 249 250 (should_continue, ret) 251 } 252 runnable(&self) -> u64253 pub fn runnable(&self) -> u64 { 254 if self.is_task() { 255 return self.on_rq as u64; 256 } else { 257 self.runnable_weight 258 } 259 } 260 261 /// 更新task和其cfsrq的负载均值 propagate_entity_load_avg(&mut self) -> bool262 pub fn propagate_entity_load_avg(&mut self) -> bool { 263 if self.is_task() { 264 return false; 265 } 266 267 let binding = self.my_cfs_rq.clone().unwrap(); 268 let gcfs_rq = binding.force_mut(); 269 270 if gcfs_rq.propagate == 0 { 271 return false; 272 } 273 274 gcfs_rq.propagate = 0; 275 276 let binding = self.cfs_rq(); 277 let cfs_rq = binding.force_mut(); 278 279 cfs_rq.add_task_group_propagate(gcfs_rq.prop_runnable_sum); 280 281 cfs_rq.update_task_group_util(self.self_arc(), gcfs_rq); 282 cfs_rq.update_task_group_runnable(self.self_arc(), gcfs_rq); 283 cfs_rq.update_task_group_load(self.self_arc(), gcfs_rq); 284 285 return true; 286 } 287 288 /// 更新runnable_weight update_runnable(&mut self)289 pub fn update_runnable(&mut self) { 290 if !self.is_task() { 291 self.runnable_weight = self.my_cfs_rq.clone().unwrap().h_nr_running; 292 } 293 } 294 295 /// 初始化实体运行均值 init_entity_runnable_average(&mut self)296 pub fn init_entity_runnable_average(&mut self) { 297 self.avg = SchedulerAvg::default(); 298 299 if self.is_task() { 300 self.avg.load_avg = LoadWeight::scale_load_down(self.load.weight) as usize; 301 } 302 } 303 } 304 305 /// CFS的运行队列,这个队列需确保是percpu的 306 #[allow(dead_code)] 307 #[derive(Debug)] 308 pub struct CfsRunQueue { 309 load: LoadWeight, 310 311 /// 全局运行的调度实体计数器,用于负载均衡 312 nr_running: u64, 313 /// 针对特定 CPU 核心的任务计数器 314 pub h_nr_running: u64, 315 /// 运行时间 316 exec_clock: u64, 317 /// 最少虚拟运行时间 318 min_vruntime: u64, 319 /// remain runtime 320 runtime_remaining: u64, 321 322 /// 存放调度实体的红黑树 323 pub(super) entities: RBTree<u64, Arc<FairSchedEntity>>, 324 325 /// IDLE 326 idle: usize, 327 328 idle_nr_running: u64, 329 330 pub idle_h_nr_running: u64, 331 332 /// 当前运行的调度实体 333 current: Weak<FairSchedEntity>, 334 /// 下一个调度的实体 335 next: Weak<FairSchedEntity>, 336 /// 最后的调度实体 337 last: Weak<FairSchedEntity>, 338 /// 跳过运行的调度实体 339 skip: Weak<FairSchedEntity>, 340 341 avg_load: i64, 342 avg_vruntime: i64, 343 344 last_update_time_copy: u64, 345 346 pub avg: SchedulerAvg, 347 348 rq: Weak<CpuRunQueue>, 349 /// 拥有此队列的taskgroup 350 task_group: Weak<TaskGroup>, 351 352 pub throttled_clock: u64, 353 pub throttled_clock_pelt: u64, 354 pub throttled_clock_pelt_time: u64, 355 pub throttled_pelt_idle: u64, 356 357 pub throttled: bool, 358 pub throttled_count: u64, 359 360 pub removed: SpinLock<CfsRemoved>, 361 362 pub propagate: isize, 363 pub prop_runnable_sum: isize, 364 } 365 366 #[derive(Debug, Default)] 367 pub struct CfsRemoved { 368 pub nr: u32, 369 pub load_avg: usize, 370 pub util_avg: usize, 371 pub runnable_avg: usize, 372 } 373 374 impl CfsRunQueue { new() -> Self375 pub fn new() -> Self { 376 Self { 377 load: LoadWeight::default(), 378 nr_running: 0, 379 h_nr_running: 0, 380 exec_clock: 0, 381 min_vruntime: 1 << 20, 382 entities: RBTree::new(), 383 idle: 0, 384 idle_nr_running: 0, 385 idle_h_nr_running: 0, 386 current: Weak::new(), 387 next: Weak::new(), 388 last: Weak::new(), 389 skip: Weak::new(), 390 avg_load: 0, 391 avg_vruntime: 0, 392 last_update_time_copy: 0, 393 avg: SchedulerAvg::default(), 394 rq: Weak::new(), 395 task_group: Weak::new(), 396 throttled_clock: 0, 397 throttled_clock_pelt: 0, 398 throttled_clock_pelt_time: 0, 399 throttled_pelt_idle: 0, 400 throttled: false, 401 throttled_count: 0, 402 removed: SpinLock::new(CfsRemoved::default()), 403 propagate: 0, 404 prop_runnable_sum: 0, 405 runtime_remaining: 0, 406 } 407 } 408 409 #[inline] rq(&self) -> Arc<CpuRunQueue>410 pub fn rq(&self) -> Arc<CpuRunQueue> { 411 self.rq.upgrade().unwrap() 412 } 413 414 #[inline] set_rq(&mut self, rq: Weak<CpuRunQueue>)415 pub fn set_rq(&mut self, rq: Weak<CpuRunQueue>) { 416 self.rq = rq; 417 } 418 419 #[inline] 420 #[allow(clippy::mut_from_ref)] force_mut(&self) -> &mut Self421 pub fn force_mut(&self) -> &mut Self { 422 unsafe { 423 (self as *const Self as usize as *mut Self) 424 .as_mut() 425 .unwrap() 426 } 427 } 428 429 #[inline] is_idle(&self) -> bool430 pub fn is_idle(&self) -> bool { 431 self.idle > 0 432 } 433 434 #[inline] current(&self) -> Option<Arc<FairSchedEntity>>435 pub fn current(&self) -> Option<Arc<FairSchedEntity>> { 436 self.current.upgrade() 437 } 438 439 #[inline] set_current(&mut self, curr: Weak<FairSchedEntity>)440 pub fn set_current(&mut self, curr: Weak<FairSchedEntity>) { 441 self.current = curr 442 } 443 444 #[inline] next(&self) -> Option<Arc<FairSchedEntity>>445 pub fn next(&self) -> Option<Arc<FairSchedEntity>> { 446 self.next.upgrade() 447 } 448 task_group(&self) -> Arc<TaskGroup>449 pub fn task_group(&self) -> Arc<TaskGroup> { 450 self.task_group.upgrade().unwrap() 451 } 452 453 #[allow(dead_code)] 454 #[inline] bandwidth_used() -> bool455 pub const fn bandwidth_used() -> bool { 456 false 457 } 458 459 /// ## 计算调度周期,基本思想是在一个周期内让每个任务都至少运行一次。 460 /// 这样可以确保所有的任务都能够得到执行,而且可以避免某些任务被长时间地阻塞。 sched_period(nr_running: u64) -> u64461 pub fn sched_period(nr_running: u64) -> u64 { 462 if unlikely(nr_running > SCHED_NR_LATENCY.load(Ordering::SeqCst)) { 463 // 如果当前活跃的任务数量超过了预设的调度延迟任务数量 464 // 调度周期的长度将直接设置为活跃任务数量乘以最小抢占粒度 465 return nr_running * SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst); 466 } else { 467 // 如果活跃任务数量未超过预设的延迟任务数量,那么调度周期的长度将设置为SCHED_NR_LATENCY 468 return SCHED_NR_LATENCY.load(Ordering::SeqCst); 469 } 470 } 471 472 /// ## 计算调度任务的虚拟运行时间片大小 473 /// 474 /// vruntime = runtime / weight 475 #[allow(dead_code)] sched_vslice(&self, entity: Arc<FairSchedEntity>) -> u64476 pub fn sched_vslice(&self, entity: Arc<FairSchedEntity>) -> u64 { 477 let slice = self.sched_slice(entity.clone()); 478 return entity.calculate_delta_fair(slice); 479 } 480 481 /// ## 计算调度任务的实际运行时间片大小 482 #[allow(dead_code)] sched_slice(&self, mut entity: Arc<FairSchedEntity>) -> u64483 pub fn sched_slice(&self, mut entity: Arc<FairSchedEntity>) -> u64 { 484 let mut nr_running = self.nr_running; 485 if SCHED_FEATURES.contains(SchedFeature::ALT_PERIOD) { 486 nr_running = self.h_nr_running; 487 } 488 489 // 计算一个调度周期的整个slice 490 let mut slice = Self::sched_period(nr_running + (!entity.on_rq()) as u64); 491 492 // 这一步是循环计算,直到根节点 493 // 比如有任务组 A ,有进程B,B属于A任务组,那么B的时间分配依赖于A组的权重以及B进程自己的权重 494 FairSchedEntity::for_each_in_group(&mut entity, |se| { 495 if unlikely(!se.on_rq()) { 496 se.cfs_rq().force_mut().load.update_load_add(se.load.weight); 497 } 498 slice = se 499 .cfs_rq() 500 .force_mut() 501 .load 502 .calculate_delta(slice, se.load.weight); 503 504 (true, true) 505 }); 506 507 if SCHED_FEATURES.contains(SchedFeature::BASE_SLICE) { 508 // TODO: IDLE? 509 let min_gran = SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst); 510 511 slice = min_gran.max(slice) 512 } 513 514 slice 515 } 516 517 /// ## 在时间片到期时检查当前任务是否需要被抢占, 518 /// 如果需要,则抢占当前任务,并确保不会由于与其他任务的“好友偏爱(buddy favours)”而重新选举为下一个运行的任务。 519 #[allow(dead_code)] check_preempt_tick(&mut self, curr: Arc<FairSchedEntity>)520 pub fn check_preempt_tick(&mut self, curr: Arc<FairSchedEntity>) { 521 // 计算理想状态下该调度实体的理想运行时间 522 let ideal_runtime = self.sched_slice(curr.clone()); 523 524 let delta_exec = curr.sum_exec_runtime - curr.prev_sum_exec_runtime; 525 526 if delta_exec > ideal_runtime { 527 // 表明实际运行时间长于理想运行时间 528 self.rq().resched_current(); 529 530 self.clear_buddies(&curr); 531 return; 532 } 533 534 if delta_exec < SYSCTL_SHCED_MIN_GRANULARITY.load(Ordering::SeqCst) { 535 return; 536 } 537 538 todo!() 539 } 540 clear_buddies(&mut self, se: &Arc<FairSchedEntity>)541 pub fn clear_buddies(&mut self, se: &Arc<FairSchedEntity>) { 542 if let Some(next) = self.next.upgrade() { 543 if Arc::ptr_eq(&next, se) { 544 se.clear_buddies(); 545 } 546 } 547 } 548 549 /// 处理调度实体的时间片到期事件 entity_tick(&mut self, curr: Arc<FairSchedEntity>, queued: bool)550 pub fn entity_tick(&mut self, curr: Arc<FairSchedEntity>, queued: bool) { 551 // 更新当前调度实体的运行时间统计信息 552 self.update_current(); 553 554 self.update_load_avg(&curr, UpdateAvgFlags::UPDATE_TG); 555 556 // 更新组调度相关 557 curr.update_cfs_group(); 558 559 if queued { 560 self.rq().resched_current(); 561 return; 562 } 563 } 564 565 /// 更新当前调度实体的运行时间统计信息 update_current(&mut self)566 pub fn update_current(&mut self) { 567 let curr = self.current(); 568 if unlikely(curr.is_none()) { 569 return; 570 } 571 572 let now = self.rq().clock_task(); 573 let curr = curr.unwrap(); 574 575 fence(Ordering::SeqCst); 576 if unlikely(now <= curr.exec_start) { 577 // warn!( 578 // "update_current return now <= curr.exec_start now {now} execstart {}", 579 // curr.exec_start 580 // ); 581 return; 582 } 583 584 fence(Ordering::SeqCst); 585 let delta_exec = now - curr.exec_start; 586 587 let curr = curr.force_mut(); 588 589 curr.exec_start = now; 590 591 curr.sum_exec_runtime += delta_exec; 592 593 // 根据实际运行时长加权增加虚拟运行时长 594 curr.vruntime += curr.calculate_delta_fair(delta_exec); 595 fence(Ordering::SeqCst); 596 self.update_deadline(&curr.self_arc()); 597 self.update_min_vruntime(); 598 599 self.account_cfs_rq_runtime(delta_exec); 600 } 601 602 /// 计算当前cfs队列的运行时间是否到期 account_cfs_rq_runtime(&mut self, delta_exec: u64)603 fn account_cfs_rq_runtime(&mut self, delta_exec: u64) { 604 if likely(self.runtime_remaining > delta_exec) { 605 self.runtime_remaining -= delta_exec; 606 // error!("runtime_remaining {}", self.runtime_remaining); 607 return; 608 } 609 610 // warn!( 611 // "runtime_remaining {} delta exec {delta_exec} nr_running {}", 612 // self.runtime_remaining, 613 // self.nr_running 614 // ); 615 // fixme: 目前只是简单分配一个时间片 616 self.runtime_remaining = 5000 * NSEC_PER_MSEC as u64; 617 618 if likely(self.current().is_some()) && self.nr_running > 1 { 619 // error!("account_cfs_rq_runtime"); 620 self.rq().resched_current(); 621 } 622 } 623 624 /// 计算deadline,如果vruntime到期会重调度 update_deadline(&mut self, se: &Arc<FairSchedEntity>)625 pub fn update_deadline(&mut self, se: &Arc<FairSchedEntity>) { 626 // error!("vruntime {} deadline {}", se.vruntime, se.deadline); 627 if se.vruntime < se.deadline { 628 return; 629 } 630 631 se.force_mut().slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst); 632 633 se.force_mut().deadline = se.vruntime + se.calculate_delta_fair(se.slice); 634 635 if self.nr_running > 1 { 636 self.rq().resched_current(); 637 self.clear_buddies(se); 638 } 639 } 640 641 /// ## 更新最小虚拟运行时间 update_min_vruntime(&mut self)642 pub fn update_min_vruntime(&mut self) { 643 let curr = self.current(); 644 645 let mut vruntime = self.min_vruntime; 646 647 if curr.is_some() { 648 let curr = curr.as_ref().unwrap(); 649 if curr.on_rq() { 650 vruntime = curr.vruntime; 651 } else { 652 self.set_current(Weak::default()); 653 } 654 } 655 656 // 找到最小虚拟运行时间的调度实体 657 let leftmost = self.entities.get_first(); 658 if let Some(leftmost) = leftmost { 659 let se = leftmost.1; 660 661 if curr.is_none() { 662 vruntime = se.vruntime; 663 } else { 664 vruntime = vruntime.min(se.vruntime); 665 } 666 } 667 668 self.min_vruntime = self.__update_min_vruntime(vruntime); 669 } 670 __update_min_vruntime(&mut self, vruntime: u64) -> u64671 fn __update_min_vruntime(&mut self, vruntime: u64) -> u64 { 672 let mut min_vruntime = self.min_vruntime; 673 674 let delta = vruntime as i64 - min_vruntime as i64; 675 if delta > 0 { 676 self.avg_vruntime -= self.avg_load * delta; 677 min_vruntime = vruntime; 678 } 679 680 return min_vruntime; 681 } 682 683 // 判断是否为当前任务 is_curr(&self, se: &Arc<FairSchedEntity>) -> bool684 pub fn is_curr(&self, se: &Arc<FairSchedEntity>) -> bool { 685 if self.current().is_none() { 686 false 687 } else { 688 // 判断当前和传入的se是否相等 689 Arc::ptr_eq(se, self.current().as_ref().unwrap()) 690 } 691 } 692 693 // 修改后 reweight_entity(&mut self, se: Arc<FairSchedEntity>, weight: u64)694 pub fn reweight_entity(&mut self, se: Arc<FairSchedEntity>, weight: u64) { 695 // 判断是否为当前任务 696 let is_curr = self.is_curr(&se); 697 698 // 如果se在队列中 699 if se.on_rq() { 700 // 如果是当前任务 701 if is_curr { 702 self.update_current(); 703 } else { 704 // 否则,出队 705 self.inner_dequeue_entity(&se); 706 } 707 708 // 减去该权重 709 self.load.update_load_sub(se.load.weight); 710 } 711 712 self.dequeue_load_avg(&se); 713 714 if !se.on_rq() { 715 se.force_mut().vlag = se.vlag * se.load.weight as i64 / weight as i64; 716 } else { 717 self.reweight_eevdf(&se, weight); 718 } 719 se.force_mut().load.update_load_set(weight); 720 721 // SMP 722 let divider = se.avg.get_pelt_divider(); 723 se.force_mut().avg.load_avg = LoadWeight::scale_load_down(se.load.weight) as usize 724 * se.avg.load_sum as usize 725 / divider; 726 727 self.enqueue_load_avg(se.clone()); 728 729 if se.on_rq() { 730 self.load.update_load_add(se.load.weight); 731 if !is_curr { 732 self.inner_enqueue_entity(&se); 733 } 734 735 self.update_min_vruntime(); 736 } 737 } 738 739 /// 用于重新计算调度实体(sched_entity)的权重(weight)和虚拟运行时间(vruntime) reweight_eevdf(&mut self, se: &Arc<FairSchedEntity>, weight: u64)740 fn reweight_eevdf(&mut self, se: &Arc<FairSchedEntity>, weight: u64) { 741 let old_weight = se.load.weight; 742 let avg_vruntime = self.avg_vruntime(); 743 let mut vlag; 744 if avg_vruntime != se.vruntime { 745 vlag = avg_vruntime as i64 - se.vruntime as i64; 746 vlag = vlag * old_weight as i64 / weight as i64; 747 se.force_mut().vruntime = (avg_vruntime as i64 - vlag) as u64; 748 } 749 750 let mut vslice = se.deadline as i64 - avg_vruntime as i64; 751 vslice = vslice * old_weight as i64 / weight as i64; 752 se.force_mut().deadline = avg_vruntime + vslice as u64; 753 } 754 avg_vruntime(&self) -> u64755 fn avg_vruntime(&self) -> u64 { 756 let curr = self.current(); 757 let mut avg = self.avg_vruntime; 758 let mut load = self.avg_load; 759 760 if let Some(curr) = curr { 761 if curr.on_rq() { 762 let weight = LoadWeight::scale_load_down(curr.load.weight); 763 avg += self.entity_key(&curr) * weight as i64; 764 load += weight as i64; 765 } 766 } 767 768 if load > 0 { 769 if avg < 0 { 770 avg -= load - 1; 771 } 772 773 avg /= load; 774 } 775 776 return self.min_vruntime + avg as u64; 777 } 778 779 #[inline] entity_key(&self, se: &Arc<FairSchedEntity>) -> i64780 pub fn entity_key(&self, se: &Arc<FairSchedEntity>) -> i64 { 781 return se.vruntime as i64 - self.min_vruntime as i64; 782 } 783 avg_vruntime_add(&mut self, se: &Arc<FairSchedEntity>)784 pub fn avg_vruntime_add(&mut self, se: &Arc<FairSchedEntity>) { 785 let weight = LoadWeight::scale_load_down(se.load.weight); 786 787 let key = self.entity_key(se); 788 789 let avg_vruntime = self.avg_vruntime + key * weight as i64; 790 791 self.avg_vruntime = avg_vruntime; 792 self.avg_load += weight as i64; 793 } 794 avg_vruntime_sub(&mut self, se: &Arc<FairSchedEntity>)795 pub fn avg_vruntime_sub(&mut self, se: &Arc<FairSchedEntity>) { 796 let weight = LoadWeight::scale_load_down(se.load.weight); 797 798 let key = self.entity_key(se); 799 800 let avg_vruntime = self.avg_vruntime - key * weight as i64; 801 802 self.avg_vruntime = avg_vruntime; 803 self.avg_load -= weight as i64; 804 } 805 806 /// 为调度实体计算初始vruntime等信息 place_entity(&mut self, se: Arc<FairSchedEntity>, flags: EnqueueFlag)807 fn place_entity(&mut self, se: Arc<FairSchedEntity>, flags: EnqueueFlag) { 808 let vruntime = self.avg_vruntime(); 809 let mut lag = 0; 810 811 let se = se.force_mut(); 812 se.slice = SYSCTL_SHCED_BASE_SLICE.load(Ordering::SeqCst); 813 814 let mut vslice = se.calculate_delta_fair(se.slice); 815 816 if self.nr_running > 0 { 817 let curr = self.current(); 818 819 lag = se.vlag; 820 821 let mut load = self.avg_load; 822 823 if let Some(curr) = curr { 824 if curr.on_rq() { 825 load += LoadWeight::scale_load_down(curr.load.weight) as i64; 826 } 827 } 828 829 lag *= load + LoadWeight::scale_load_down(se.load.weight) as i64; 830 831 if load == 0 { 832 load = 1; 833 } 834 835 lag /= load; 836 } 837 838 se.vruntime = vruntime - lag as u64; 839 840 if flags.contains(EnqueueFlag::ENQUEUE_INITIAL) { 841 vslice /= 2; 842 } 843 844 se.deadline = se.vruntime + vslice; 845 } 846 847 /// 更新负载均值 update_load_avg(&mut self, se: &Arc<FairSchedEntity>, flags: UpdateAvgFlags)848 fn update_load_avg(&mut self, se: &Arc<FairSchedEntity>, flags: UpdateAvgFlags) { 849 let now = self.cfs_rq_clock_pelt(); 850 851 if se.avg.last_update_time > 0 && !flags.contains(UpdateAvgFlags::SKIP_AGE_LOAD) { 852 se.force_mut().update_load_avg(self, now); 853 } 854 855 let mut decayed = self.update_self_load_avg(now); 856 decayed |= se.force_mut().propagate_entity_load_avg() as u32; 857 858 if se.avg.last_update_time > 0 && flags.contains(UpdateAvgFlags::DO_ATTACH) { 859 todo!() 860 } else if flags.contains(UpdateAvgFlags::DO_ATTACH) { 861 self.detach_entity_load_avg(se); 862 } else if decayed > 0 { 863 // cfs_rq_util_change 864 865 todo!() 866 } 867 } 868 869 /// 将实体的负载均值与对应cfs分离 detach_entity_load_avg(&mut self, se: &Arc<FairSchedEntity>)870 fn detach_entity_load_avg(&mut self, se: &Arc<FairSchedEntity>) { 871 self.dequeue_load_avg(se); 872 873 sub_positive(&mut self.avg.util_avg, se.avg.util_avg); 874 sub_positive(&mut (self.avg.util_sum as usize), se.avg.util_sum as usize); 875 self.avg.util_sum = self 876 .avg 877 .util_sum 878 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64); 879 880 sub_positive(&mut self.avg.runnable_avg, se.avg.runnable_avg); 881 sub_positive( 882 &mut (self.avg.runnable_sum as usize), 883 se.avg.runnable_sum as usize, 884 ); 885 self.avg.runnable_sum = self 886 .avg 887 .runnable_sum 888 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64); 889 890 self.propagate = 1; 891 self.prop_runnable_sum += se.avg.load_sum as isize; 892 } 893 update_self_load_avg(&mut self, now: u64) -> u32894 fn update_self_load_avg(&mut self, now: u64) -> u32 { 895 let mut removed_load = 0; 896 let mut removed_util = 0; 897 let mut removed_runnable = 0; 898 899 let mut decayed = 0; 900 901 if self.removed.lock().nr > 0 { 902 let mut removed_guard = self.removed.lock(); 903 let divider = self.avg.get_pelt_divider(); 904 905 swap::<usize>(&mut removed_guard.util_avg, &mut removed_util); 906 swap::<usize>(&mut removed_guard.load_avg, &mut removed_load); 907 swap::<usize>(&mut removed_guard.runnable_avg, &mut removed_runnable); 908 909 removed_guard.nr = 0; 910 911 let mut r = removed_load; 912 913 sub_positive(&mut self.avg.load_avg, r); 914 sub_positive(&mut (self.avg.load_sum as usize), r * divider); 915 916 self.avg.load_sum = self 917 .avg 918 .load_sum 919 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64); 920 921 r = removed_util; 922 sub_positive(&mut self.avg.util_avg, r); 923 sub_positive(&mut (self.avg.util_sum as usize), r * divider); 924 self.avg.util_sum = self 925 .avg 926 .util_sum 927 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64); 928 929 r = removed_runnable; 930 sub_positive(&mut self.avg.runnable_avg, r); 931 sub_positive(&mut (self.avg.runnable_sum as usize), r * divider); 932 self.avg.runnable_sum = self 933 .avg 934 .runnable_sum 935 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64); 936 937 drop(removed_guard); 938 self.add_task_group_propagate( 939 -(removed_runnable as isize * divider as isize) >> SCHED_CAPACITY_SHIFT, 940 ); 941 942 decayed = 1; 943 } 944 945 decayed |= self.__update_load_avg(now) as u32; 946 947 self.last_update_time_copy = self.avg.last_update_time; 948 949 return decayed; 950 } 951 __update_load_avg(&mut self, now: u64) -> bool952 fn __update_load_avg(&mut self, now: u64) -> bool { 953 if self.avg.update_load_sum( 954 now, 955 LoadWeight::scale_load_down(self.load.weight) as u32, 956 self.h_nr_running as u32, 957 self.current().is_some() as u32, 958 ) { 959 self.avg.update_load_avg(1); 960 return true; 961 } 962 963 return false; 964 } 965 add_task_group_propagate(&mut self, runnable_sum: isize)966 fn add_task_group_propagate(&mut self, runnable_sum: isize) { 967 self.propagate = 1; 968 self.prop_runnable_sum += runnable_sum; 969 } 970 971 /// 将实体加入队列 enqueue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: EnqueueFlag)972 pub fn enqueue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: EnqueueFlag) { 973 let is_curr = self.is_curr(se); 974 975 if is_curr { 976 self.place_entity(se.clone(), flags); 977 } 978 979 self.update_current(); 980 981 self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG | UpdateAvgFlags::DO_ATTACH); 982 983 se.force_mut().update_runnable(); 984 985 se.update_cfs_group(); 986 987 if !is_curr { 988 self.place_entity(se.clone(), flags); 989 } 990 991 self.account_entity_enqueue(se); 992 993 if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) { 994 se.force_mut().exec_start = 0; 995 } 996 997 if !is_curr { 998 self.inner_enqueue_entity(se); 999 } 1000 1001 se.force_mut().on_rq = OnRq::Queued; 1002 1003 if self.nr_running == 1 { 1004 // 只有上面加入的 1005 // TODO: throttle 1006 } 1007 } 1008 dequeue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: DequeueFlag)1009 pub fn dequeue_entity(&mut self, se: &Arc<FairSchedEntity>, flags: DequeueFlag) { 1010 let mut action = UpdateAvgFlags::UPDATE_TG; 1011 1012 if se.is_task() && se.on_rq == OnRq::Migrating { 1013 action |= UpdateAvgFlags::DO_DETACH; 1014 } 1015 1016 self.update_current(); 1017 1018 self.update_load_avg(se, action); 1019 1020 se.force_mut().update_runnable(); 1021 1022 self.clear_buddies(se); 1023 1024 self.update_entity_lag(se); 1025 1026 if let Some(curr) = self.current() { 1027 if !Arc::ptr_eq(&curr, se) { 1028 self.inner_dequeue_entity(se); 1029 } 1030 } else { 1031 self.inner_dequeue_entity(se); 1032 } 1033 1034 se.force_mut().on_rq = OnRq::None; 1035 1036 self.account_entity_dequeue(se); 1037 1038 // return_cfs_rq_runtime 1039 1040 se.update_cfs_group(); 1041 1042 if flags & (DequeueFlag::DEQUEUE_SAVE | DequeueFlag::DEQUEUE_MOVE) 1043 != DequeueFlag::DEQUEUE_SAVE 1044 { 1045 self.update_min_vruntime(); 1046 } 1047 1048 if self.nr_running == 0 { 1049 self.update_idle_clock_pelt() 1050 } 1051 } 1052 1053 /// 将前一个调度的task放回队列 put_prev_entity(&mut self, prev: Arc<FairSchedEntity>)1054 pub fn put_prev_entity(&mut self, prev: Arc<FairSchedEntity>) { 1055 if prev.on_rq() { 1056 self.update_current(); 1057 } 1058 1059 if prev.on_rq() { 1060 self.inner_enqueue_entity(&prev); 1061 } 1062 1063 self.set_current(Weak::default()); 1064 } 1065 1066 /// 将下一个运行的task设置为current set_next_entity(&mut self, se: &Arc<FairSchedEntity>)1067 pub fn set_next_entity(&mut self, se: &Arc<FairSchedEntity>) { 1068 self.clear_buddies(se); 1069 1070 if se.on_rq() { 1071 self.inner_dequeue_entity(se); 1072 self.update_load_avg(se, UpdateAvgFlags::UPDATE_TG); 1073 se.force_mut().vlag = se.deadline as i64; 1074 } 1075 1076 self.set_current(Arc::downgrade(se)); 1077 1078 se.force_mut().prev_sum_exec_runtime = se.sum_exec_runtime; 1079 } 1080 update_idle_clock_pelt(&mut self)1081 fn update_idle_clock_pelt(&mut self) { 1082 let throttled = if unlikely(self.throttled_count > 0) { 1083 u64::MAX 1084 } else { 1085 self.throttled_clock_pelt_time 1086 }; 1087 1088 self.throttled_clock_pelt = throttled; 1089 } 1090 update_entity_lag(&mut self, se: &Arc<FairSchedEntity>)1091 fn update_entity_lag(&mut self, se: &Arc<FairSchedEntity>) { 1092 let lag = self.avg_vruntime() as i64 - se.vruntime as i64; 1093 1094 let limit = se.calculate_delta_fair((TICK_NESC as u64).max(2 * se.slice)) as i64; 1095 1096 se.force_mut().vlag = if lag < -limit { 1097 -limit 1098 } else if lag > limit { 1099 limit 1100 } else { 1101 lag 1102 } 1103 } 1104 account_entity_enqueue(&mut self, se: &Arc<FairSchedEntity>)1105 fn account_entity_enqueue(&mut self, se: &Arc<FairSchedEntity>) { 1106 self.load.update_load_add(se.load.weight); 1107 1108 if se.is_task() { 1109 let rq = self.rq(); 1110 let (rq, _guard) = rq.self_lock(); 1111 // TODO:numa 1112 rq.cfs_tasks.push_back(se.clone()); 1113 } 1114 self.nr_running += 1; 1115 if se.is_idle() { 1116 self.idle_nr_running += 1; 1117 } 1118 } 1119 account_entity_dequeue(&mut self, se: &Arc<FairSchedEntity>)1120 fn account_entity_dequeue(&mut self, se: &Arc<FairSchedEntity>) { 1121 self.load.update_load_sub(se.load.weight); 1122 1123 if se.is_task() { 1124 let rq = self.rq(); 1125 let (rq, _guard) = rq.self_lock(); 1126 1127 // TODO:numa 1128 let _ = rq.cfs_tasks.extract_if(|x| Arc::ptr_eq(x, se)); 1129 } 1130 1131 self.nr_running -= 1; 1132 if se.is_idle() { 1133 self.idle_nr_running -= 1; 1134 } 1135 } 1136 inner_enqueue_entity(&mut self, se: &Arc<FairSchedEntity>)1137 pub fn inner_enqueue_entity(&mut self, se: &Arc<FairSchedEntity>) { 1138 self.avg_vruntime_add(se); 1139 se.force_mut().min_deadline = se.deadline; 1140 self.entities.insert(se.vruntime, se.clone()); 1141 // warn!( 1142 // "enqueue pcb {:?} cfsrq {:?}", 1143 // se.pcb().pid(), 1144 // self.entities 1145 // .iter() 1146 // .map(|x| (x.0, x.1.pcb().pid())) 1147 // .collect::<Vec<_>>() 1148 // ); 1149 // send_to_default_serial8250_port( 1150 // format!( 1151 // "enqueue pcb {:?} cfsrq {:?}\n", 1152 // se.pcb().pid(), 1153 // self.entities 1154 // .iter() 1155 // .map(|x| (x.0, x.1.pcb().pid())) 1156 // .collect::<Vec<_>>() 1157 // ) 1158 // .as_bytes(), 1159 // ); 1160 } 1161 inner_dequeue_entity(&mut self, se: &Arc<FairSchedEntity>)1162 fn inner_dequeue_entity(&mut self, se: &Arc<FairSchedEntity>) { 1163 // warn!( 1164 // "before dequeue pcb {:?} cfsrq {:?}", 1165 // se.pcb().pid(), 1166 // self.entities 1167 // .iter() 1168 // .map(|x| (x.0, x.1.pcb().pid())) 1169 // .collect::<Vec<_>>() 1170 // ); 1171 1172 // send_to_default_serial8250_port( 1173 // format!( 1174 // "before dequeue pcb {:?} cfsrq {:?}\n", 1175 // se.pcb().pid(), 1176 // self.entities 1177 // .iter() 1178 // .map(|x| (x.0, x.1.pcb().pid())) 1179 // .collect::<Vec<_>>() 1180 // ) 1181 // .as_bytes(), 1182 // ); 1183 1184 let mut i = 1; 1185 while let Some(rm) = self.entities.remove(&se.vruntime) { 1186 if Arc::ptr_eq(&rm, se) { 1187 break; 1188 } 1189 rm.force_mut().vruntime += i; 1190 self.entities.insert(rm.vruntime, rm); 1191 1192 i += 1; 1193 } 1194 // send_to_default_serial8250_port( 1195 // format!( 1196 // "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}\n", 1197 // se.pcb().pid(), 1198 // remove.pcb().pid(), 1199 // self.entities 1200 // .iter() 1201 // .map(|x| (x.0, x.1.pcb().pid())) 1202 // .collect::<Vec<_>>() 1203 // ) 1204 // .as_bytes(), 1205 // ); 1206 // warn!( 1207 // "after dequeue pcb {:?}(real: {:?}) cfsrq {:?}", 1208 // se.pcb().pid(), 1209 // remove.pcb().pid(), 1210 // self.entities 1211 // .iter() 1212 // .map(|x| (x.0, x.1.pcb().pid())) 1213 // .collect::<Vec<_>>() 1214 // ); 1215 self.avg_vruntime_sub(se); 1216 } 1217 enqueue_load_avg(&mut self, se: Arc<FairSchedEntity>)1218 pub fn enqueue_load_avg(&mut self, se: Arc<FairSchedEntity>) { 1219 self.avg.load_avg += se.avg.load_avg; 1220 self.avg.load_sum += LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum; 1221 } 1222 dequeue_load_avg(&mut self, se: &Arc<FairSchedEntity>)1223 pub fn dequeue_load_avg(&mut self, se: &Arc<FairSchedEntity>) { 1224 if self.avg.load_avg > se.avg.load_avg { 1225 self.avg.load_avg -= se.avg.load_avg; 1226 } else { 1227 self.avg.load_avg = 0; 1228 }; 1229 1230 let se_load = LoadWeight::scale_load_down(se.load.weight) * se.avg.load_sum; 1231 1232 if self.avg.load_sum > se_load { 1233 self.avg.load_sum -= se_load; 1234 } else { 1235 self.avg.load_sum = 0; 1236 } 1237 1238 self.avg.load_sum = self 1239 .avg 1240 .load_sum 1241 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64) 1242 } 1243 update_task_group_util(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue)1244 pub fn update_task_group_util(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) { 1245 let mut delta_sum = gcfs_rq.avg.load_avg as isize - se.avg.load_avg as isize; 1246 let delta_avg = delta_sum; 1247 1248 if delta_avg == 0 { 1249 return; 1250 } 1251 1252 let divider = self.avg.get_pelt_divider(); 1253 1254 let se = se.force_mut(); 1255 se.avg.util_avg = gcfs_rq.avg.util_avg; 1256 let new_sum = se.avg.util_avg * divider; 1257 delta_sum = new_sum as isize - se.avg.util_sum as isize; 1258 1259 se.avg.util_sum = new_sum as u64; 1260 1261 add_positive(&mut (self.avg.util_avg as isize), delta_avg); 1262 add_positive(&mut (self.avg.util_sum as isize), delta_sum); 1263 1264 self.avg.util_sum = self 1265 .avg 1266 .util_sum 1267 .max((self.avg.util_avg * PELT_MIN_DIVIDER) as u64); 1268 } 1269 update_task_group_runnable(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue)1270 pub fn update_task_group_runnable(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &CfsRunQueue) { 1271 let mut delta_sum = gcfs_rq.avg.runnable_avg as isize - se.avg.runnable_avg as isize; 1272 let delta_avg = delta_sum; 1273 1274 if delta_avg == 0 { 1275 return; 1276 } 1277 1278 let divider = self.avg.get_pelt_divider(); 1279 1280 let se = se.force_mut(); 1281 se.avg.runnable_avg = gcfs_rq.avg.runnable_avg; 1282 let new_sum = se.avg.runnable_sum * divider as u64; 1283 delta_sum = new_sum as isize - se.avg.runnable_sum as isize; 1284 1285 se.avg.runnable_sum = new_sum; 1286 1287 add_positive(&mut (self.avg.runnable_avg as isize), delta_avg); 1288 add_positive(&mut (self.avg.runnable_sum as isize), delta_sum); 1289 1290 self.avg.runnable_sum = self 1291 .avg 1292 .runnable_sum 1293 .max((self.avg.runnable_avg * PELT_MIN_DIVIDER) as u64); 1294 } 1295 update_task_group_load(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &mut CfsRunQueue)1296 pub fn update_task_group_load(&mut self, se: Arc<FairSchedEntity>, gcfs_rq: &mut CfsRunQueue) { 1297 let mut runnable_sum = gcfs_rq.prop_runnable_sum; 1298 1299 let mut load_sum = 0; 1300 1301 if runnable_sum == 0 { 1302 return; 1303 } 1304 1305 gcfs_rq.prop_runnable_sum = 0; 1306 1307 let divider = self.avg.get_pelt_divider(); 1308 1309 if runnable_sum >= 0 { 1310 runnable_sum += se.avg.load_sum as isize; 1311 runnable_sum = runnable_sum.min(divider as isize); 1312 } else { 1313 if LoadWeight::scale_load_down(gcfs_rq.load.weight) > 0 { 1314 load_sum = gcfs_rq.avg.load_sum / LoadWeight::scale_load_down(gcfs_rq.load.weight); 1315 } 1316 1317 runnable_sum = se.avg.load_sum.min(load_sum) as isize; 1318 } 1319 1320 let running_sum = se.avg.util_sum as isize >> SCHED_CAPACITY_SHIFT; 1321 runnable_sum = runnable_sum.max(running_sum); 1322 1323 load_sum = LoadWeight::scale_load_down(se.load.weight) * runnable_sum as u64; 1324 let load_avg = load_sum / divider as u64; 1325 1326 let delta_avg = load_avg as isize - se.avg.load_avg as isize; 1327 if delta_avg == 0 { 1328 return; 1329 } 1330 1331 let delta_sum = load_sum as isize 1332 - LoadWeight::scale_load_down(se.load.weight) as isize * se.avg.load_sum as isize; 1333 1334 let se = se.force_mut(); 1335 se.avg.load_sum = runnable_sum as u64; 1336 se.avg.load_avg = load_avg as usize; 1337 1338 add_positive(&mut (self.avg.load_avg as isize), delta_avg); 1339 add_positive(&mut (self.avg.util_sum as isize), delta_sum); 1340 1341 self.avg.load_sum = self 1342 .avg 1343 .load_sum 1344 .max((self.avg.load_avg * PELT_MIN_DIVIDER) as u64); 1345 } 1346 1347 /// pick下一个运行的task pick_next_entity(&self) -> Option<Arc<FairSchedEntity>>1348 pub fn pick_next_entity(&self) -> Option<Arc<FairSchedEntity>> { 1349 if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY) 1350 && self.next().is_some() 1351 && self.entity_eligible(&self.next().unwrap()) 1352 { 1353 return self.next(); 1354 } 1355 self.entities.get_first().map(|val| val.1.clone()) 1356 } 1357 entity_eligible(&self, se: &Arc<FairSchedEntity>) -> bool1358 pub fn entity_eligible(&self, se: &Arc<FairSchedEntity>) -> bool { 1359 let curr = self.current(); 1360 let mut avg = self.avg_vruntime; 1361 let mut load = self.avg_load; 1362 1363 if let Some(curr) = curr { 1364 if curr.on_rq() { 1365 let weight = LoadWeight::scale_load_down(curr.load.weight); 1366 1367 avg += self.entity_key(&curr) * weight as i64; 1368 load += weight as i64; 1369 } 1370 } 1371 1372 return avg >= self.entity_key(se) * load; 1373 } 1374 } 1375 1376 impl Default for CfsRunQueue { default() -> Self1377 fn default() -> Self { 1378 Self::new() 1379 } 1380 } 1381 pub struct CompletelyFairScheduler; 1382 1383 impl CompletelyFairScheduler { 1384 /// 寻找到最近公共组长 find_matching_se(se: &mut Arc<FairSchedEntity>, pse: &mut Arc<FairSchedEntity>)1385 fn find_matching_se(se: &mut Arc<FairSchedEntity>, pse: &mut Arc<FairSchedEntity>) { 1386 let mut se_depth = se.depth; 1387 let mut pse_depth = pse.depth; 1388 1389 while se_depth > pse_depth { 1390 se_depth -= 1; 1391 *se = se.parent().unwrap(); 1392 } 1393 1394 while pse_depth > se_depth { 1395 pse_depth -= 1; 1396 *pse = pse.parent().unwrap(); 1397 } 1398 1399 while !Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq()) { 1400 *se = se.parent().unwrap(); 1401 *pse = pse.parent().unwrap(); 1402 } 1403 } 1404 } 1405 1406 impl Scheduler for CompletelyFairScheduler { enqueue( rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, mut flags: EnqueueFlag, )1407 fn enqueue( 1408 rq: &mut CpuRunQueue, 1409 pcb: Arc<crate::process::ProcessControlBlock>, 1410 mut flags: EnqueueFlag, 1411 ) { 1412 let mut se = pcb.sched_info().sched_entity(); 1413 let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE; 1414 let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| { 1415 if se.on_rq() { 1416 return (false, true); 1417 } 1418 1419 let binding = se.cfs_rq(); 1420 let cfs_rq = binding.force_mut(); 1421 cfs_rq.enqueue_entity(&se, flags); 1422 1423 cfs_rq.h_nr_running += 1; 1424 cfs_rq.idle_h_nr_running += idle_h_nr_running as u64; 1425 1426 if cfs_rq.is_idle() { 1427 idle_h_nr_running = true; 1428 } 1429 1430 // TODO: cfs_rq_throttled 1431 1432 flags = EnqueueFlag::ENQUEUE_WAKEUP; 1433 1434 return (true, true); 1435 }); 1436 1437 if !should_continue { 1438 return; 1439 } 1440 1441 if let Some(mut se) = se { 1442 FairSchedEntity::for_each_in_group(&mut se, |se| { 1443 let binding = se.cfs_rq(); 1444 let cfs_rq = binding.force_mut(); 1445 1446 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG); 1447 1448 let se = se.force_mut(); 1449 se.update_runnable(); 1450 1451 se.update_cfs_group(); 1452 1453 cfs_rq.h_nr_running += 1; 1454 cfs_rq.idle_h_nr_running += idle_h_nr_running as u64; 1455 1456 if cfs_rq.is_idle() { 1457 idle_h_nr_running = true; 1458 } 1459 1460 // TODO: cfs_rq_throttled 1461 1462 return (true, true); 1463 }); 1464 } 1465 1466 rq.add_nr_running(1); 1467 } 1468 dequeue( rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, mut flags: DequeueFlag, )1469 fn dequeue( 1470 rq: &mut CpuRunQueue, 1471 pcb: Arc<crate::process::ProcessControlBlock>, 1472 mut flags: DequeueFlag, 1473 ) { 1474 let mut se = pcb.sched_info().sched_entity(); 1475 let mut idle_h_nr_running = pcb.sched_info().policy() == SchedPolicy::IDLE; 1476 let task_sleep = flags.contains(DequeueFlag::DEQUEUE_SLEEP); 1477 let was_sched_idle = rq.sched_idle_rq(); 1478 1479 let (should_continue, se) = FairSchedEntity::for_each_in_group(&mut se, |se| { 1480 let binding = se.cfs_rq(); 1481 let cfs_rq = binding.force_mut(); 1482 cfs_rq.dequeue_entity(&se, flags); 1483 1484 cfs_rq.h_nr_running -= 1; 1485 cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64; 1486 1487 if cfs_rq.is_idle() { 1488 idle_h_nr_running = true; 1489 } 1490 1491 // TODO: cfs_rq_throttled 1492 1493 if cfs_rq.load.weight > 0 { 1494 let sep = se.parent(); 1495 1496 if task_sleep && sep.is_some() { 1497 todo!() 1498 } 1499 } 1500 1501 flags |= DequeueFlag::DEQUEUE_SLEEP; 1502 1503 return (true, true); 1504 }); 1505 1506 if !should_continue { 1507 return; 1508 } 1509 1510 if let Some(mut se) = se { 1511 FairSchedEntity::for_each_in_group(&mut se, |se| { 1512 let binding = se.cfs_rq(); 1513 let cfs_rq = binding.force_mut(); 1514 1515 cfs_rq.update_load_avg(&se, UpdateAvgFlags::UPDATE_TG); 1516 1517 let se = se.force_mut(); 1518 se.update_runnable(); 1519 1520 se.update_cfs_group(); 1521 1522 cfs_rq.h_nr_running -= 1; 1523 cfs_rq.idle_h_nr_running -= idle_h_nr_running as u64; 1524 1525 if cfs_rq.is_idle() { 1526 idle_h_nr_running = true; 1527 } 1528 1529 // TODO: cfs_rq_throttled 1530 1531 return (true, true); 1532 }); 1533 } 1534 1535 rq.sub_nr_running(1); 1536 1537 if unlikely(!was_sched_idle && rq.sched_idle_rq()) { 1538 rq.next_balance = clock(); 1539 } 1540 } 1541 yield_task(rq: &mut CpuRunQueue)1542 fn yield_task(rq: &mut CpuRunQueue) { 1543 let curr = rq.current(); 1544 let se = curr.sched_info().sched_entity(); 1545 let binding = se.cfs_rq(); 1546 let cfs_rq = binding.force_mut(); 1547 1548 if unlikely(rq.nr_running == 1) { 1549 return; 1550 } 1551 1552 cfs_rq.clear_buddies(&se); 1553 1554 rq.update_rq_clock(); 1555 1556 cfs_rq.update_current(); 1557 1558 rq.clock_updata_flags |= ClockUpdataFlag::RQCF_REQ_SKIP; 1559 1560 se.force_mut().deadline += se.calculate_delta_fair(se.slice); 1561 } 1562 check_preempt_currnet( rq: &mut CpuRunQueue, pcb: &Arc<crate::process::ProcessControlBlock>, wake_flags: WakeupFlags, )1563 fn check_preempt_currnet( 1564 rq: &mut CpuRunQueue, 1565 pcb: &Arc<crate::process::ProcessControlBlock>, 1566 wake_flags: WakeupFlags, 1567 ) { 1568 let curr = rq.current(); 1569 let mut se = curr.sched_info().sched_entity(); 1570 let mut pse = pcb.sched_info().sched_entity(); 1571 1572 if unlikely(Arc::ptr_eq(&se, &pse)) { 1573 return; 1574 } 1575 1576 // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/fair.c#8160 1577 1578 let _next_buddy_mark = if SCHED_FEATURES.contains(SchedFeature::NEXT_BUDDY) 1579 && !wake_flags.contains(WakeupFlags::WF_FORK) 1580 { 1581 FairSchedEntity::for_each_in_group(&mut pse, |se| { 1582 if !se.on_rq() { 1583 return (false, true); 1584 } 1585 1586 if se.is_idle() { 1587 return (false, true); 1588 } 1589 1590 se.cfs_rq().force_mut().next = Arc::downgrade(&se); 1591 1592 return (true, true); 1593 }); 1594 true 1595 } else { 1596 false 1597 }; 1598 1599 if curr.flags().contains(ProcessFlags::NEED_SCHEDULE) { 1600 return; 1601 } 1602 1603 if unlikely(curr.sched_info().policy() == SchedPolicy::IDLE) 1604 && likely(pcb.sched_info().policy() != SchedPolicy::IDLE) 1605 { 1606 rq.resched_current(); 1607 return; 1608 } 1609 1610 if unlikely(pcb.sched_info().policy() != SchedPolicy::CFS) 1611 || !SCHED_FEATURES.contains(SchedFeature::WAKEUP_PREEMPTION) 1612 { 1613 return; 1614 } 1615 1616 Self::find_matching_se(&mut se, &mut pse); 1617 1618 let cse_is_idle = se.is_idle(); 1619 let pse_is_idle = pse.is_idle(); 1620 1621 if cse_is_idle && !pse_is_idle { 1622 rq.resched_current(); 1623 return; 1624 } 1625 1626 if cse_is_idle != pse_is_idle { 1627 return; 1628 } 1629 1630 let cfs_rq = se.cfs_rq(); 1631 cfs_rq.force_mut().update_current(); 1632 1633 if let Some((_, pick_se)) = cfs_rq.entities.get_first() { 1634 if Arc::ptr_eq(pick_se, &pse) { 1635 rq.resched_current(); 1636 return; 1637 } 1638 } 1639 } 1640 pick_task(rq: &mut CpuRunQueue) -> Option<Arc<crate::process::ProcessControlBlock>>1641 fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<crate::process::ProcessControlBlock>> { 1642 let mut cfs_rq = Some(rq.cfs_rq()); 1643 if cfs_rq.as_ref().unwrap().nr_running == 0 { 1644 return None; 1645 } 1646 1647 let mut se; 1648 loop { 1649 let cfs = cfs_rq.unwrap(); 1650 let cfs = cfs.force_mut(); 1651 let curr = cfs.current(); 1652 if let Some(curr) = curr { 1653 if curr.on_rq() { 1654 cfs.update_current(); 1655 } else { 1656 cfs.set_current(Weak::default()); 1657 } 1658 } 1659 1660 se = cfs.pick_next_entity(); 1661 match se.clone() { 1662 Some(val) => cfs_rq = val.my_cfs_rq.clone(), 1663 None => { 1664 break; 1665 } 1666 } 1667 1668 if cfs_rq.is_none() { 1669 break; 1670 } 1671 } 1672 1673 se.map(|se| se.pcb()) 1674 } 1675 tick(_rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, queued: bool)1676 fn tick(_rq: &mut CpuRunQueue, pcb: Arc<crate::process::ProcessControlBlock>, queued: bool) { 1677 let mut se = pcb.sched_info().sched_entity(); 1678 1679 FairSchedEntity::for_each_in_group(&mut se, |se| { 1680 let binding = se.clone(); 1681 let binding = binding.cfs_rq(); 1682 let cfs_rq = binding.force_mut(); 1683 1684 cfs_rq.entity_tick(se, queued); 1685 (true, true) 1686 }); 1687 } 1688 task_fork(pcb: Arc<ProcessControlBlock>)1689 fn task_fork(pcb: Arc<ProcessControlBlock>) { 1690 let rq = cpu_rq(smp_get_processor_id().data() as usize); 1691 let se = pcb.sched_info().sched_entity(); 1692 1693 let (rq, _guard) = rq.self_lock(); 1694 1695 rq.update_rq_clock(); 1696 1697 let binding = se.cfs_rq(); 1698 let cfs_rq = binding.force_mut(); 1699 1700 if cfs_rq.current().is_some() { 1701 cfs_rq.update_current(); 1702 } 1703 1704 cfs_rq.place_entity(se.clone(), EnqueueFlag::ENQUEUE_INITIAL); 1705 } 1706 pick_next_task( rq: &mut CpuRunQueue, prev: Option<Arc<ProcessControlBlock>>, ) -> Option<Arc<ProcessControlBlock>>1707 fn pick_next_task( 1708 rq: &mut CpuRunQueue, 1709 prev: Option<Arc<ProcessControlBlock>>, 1710 ) -> Option<Arc<ProcessControlBlock>> { 1711 let mut cfs_rq = rq.cfs_rq(); 1712 if rq.nr_running == 0 { 1713 return None; 1714 } 1715 1716 if prev.is_none() 1717 || (prev.is_some() && prev.as_ref().unwrap().sched_info().policy() != SchedPolicy::CFS) 1718 { 1719 if let Some(prev) = prev { 1720 match prev.sched_info().policy() { 1721 SchedPolicy::RT => todo!(), 1722 SchedPolicy::FIFO => todo!(), 1723 SchedPolicy::CFS => todo!(), 1724 SchedPolicy::IDLE => IdleScheduler::put_prev_task(rq, prev), 1725 } 1726 } 1727 let mut se; 1728 loop { 1729 match cfs_rq.pick_next_entity() { 1730 Some(s) => se = s, 1731 None => return None, 1732 } 1733 1734 cfs_rq.force_mut().set_next_entity(&se); 1735 1736 match &se.my_cfs_rq { 1737 Some(q) => cfs_rq = q.clone(), 1738 None => break, 1739 } 1740 } 1741 1742 return Some(se.pcb()); 1743 } 1744 1745 let prev = prev.unwrap(); 1746 let se = cfs_rq.pick_next_entity(); 1747 1748 if let Some(mut se) = se { 1749 loop { 1750 let curr = cfs_rq.current(); 1751 if let Some(current) = curr { 1752 if current.on_rq() { 1753 cfs_rq.force_mut().update_current() 1754 } else { 1755 cfs_rq.force_mut().set_current(Weak::default()); 1756 } 1757 } 1758 1759 match cfs_rq.pick_next_entity() { 1760 Some(e) => se = e, 1761 None => break, 1762 } 1763 1764 if let Some(q) = se.my_cfs_rq.clone() { 1765 cfs_rq = q; 1766 } else { 1767 break; 1768 } 1769 } 1770 1771 let p = se.pcb(); 1772 1773 if !Arc::ptr_eq(&prev, &p) { 1774 let mut pse = prev.sched_info().sched_entity(); 1775 1776 while !(Arc::ptr_eq(&se.cfs_rq(), &pse.cfs_rq()) 1777 && Arc::ptr_eq(&se.cfs_rq(), &cfs_rq)) 1778 { 1779 let se_depth = se.depth; 1780 let pse_depth = pse.depth; 1781 1782 if se_depth <= pse_depth { 1783 pse.cfs_rq().force_mut().put_prev_entity(pse.clone()); 1784 pse = pse.parent().unwrap(); 1785 } 1786 1787 if se_depth >= pse_depth { 1788 se.cfs_rq().force_mut().set_next_entity(&se); 1789 se = se.parent().unwrap(); 1790 } 1791 } 1792 1793 cfs_rq.force_mut().put_prev_entity(pse); 1794 cfs_rq.force_mut().set_next_entity(&se); 1795 } 1796 1797 return Some(p); 1798 } else { 1799 return None; 1800 } 1801 } 1802 put_prev_task(_rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>)1803 fn put_prev_task(_rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>) { 1804 let mut se = prev.sched_info().sched_entity(); 1805 1806 FairSchedEntity::for_each_in_group(&mut se, |se| { 1807 let cfs = se.cfs_rq(); 1808 cfs.force_mut().put_prev_entity(se); 1809 1810 return (true, true); 1811 }); 1812 } 1813 } 1814