xref: /DragonOS/kernel/src/smp/cpu/mod.rs (revision 03746da3d9f3ea616cecdb6e581414002075f866)
1 use core::sync::atomic::AtomicU32;
2 
3 use alloc::{sync::Arc, vec::Vec};
4 use log::{debug, error, info};
5 use system_error::SystemError;
6 
7 use crate::{
8     arch::CurrentSMPArch,
9     libs::cpumask::CpuMask,
10     mm::percpu::{PerCpu, PerCpuVar},
11     process::{ProcessControlBlock, ProcessManager},
12     sched::completion::Completion,
13 };
14 
15 use super::{core::smp_get_processor_id, SMPArch};
16 
17 int_like!(ProcessorId, AtomicProcessorId, u32, AtomicU32);
18 
19 impl ProcessorId {
20     pub const INVALID: ProcessorId = ProcessorId::new(u32::MAX);
21 }
22 
23 static mut SMP_CPU_MANAGER: Option<SmpCpuManager> = None;
24 
25 #[inline]
26 pub fn smp_cpu_manager() -> &'static SmpCpuManager {
27     unsafe { SMP_CPU_MANAGER.as_ref().unwrap() }
28 }
29 
30 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
31 pub enum CpuHpState {
32     /// 启动阈值
33     ThresholdBringUp = 0,
34 
35     /// 该CPU是离线的
36     Offline,
37 
38     /// 该CPU是在线的
39     Online,
40 }
41 
42 /// Per-Cpu Cpu的热插拔状态
43 pub struct CpuHpCpuState {
44     /// 当前状态
45     state: CpuHpState,
46     /// 目标状态
47     target_state: CpuHpState,
48     /// 指向热插拔的线程的PCB
49     thread: Option<Arc<ProcessControlBlock>>,
50 
51     /// 当前是否为启动流程
52     bringup: bool,
53     /// 启动完成的信号
54     comp_done_up: Completion,
55 }
56 
57 impl CpuHpCpuState {
58     const fn new() -> Self {
59         Self {
60             state: CpuHpState::Offline,
61             target_state: CpuHpState::Offline,
62             thread: None,
63             bringup: false,
64             comp_done_up: Completion::new(),
65         }
66     }
67 
68     #[allow(dead_code)]
69     pub const fn thread(&self) -> &Option<Arc<ProcessControlBlock>> {
70         &self.thread
71     }
72 }
73 
74 pub struct SmpCpuManager {
75     /// 可用的CPU
76     possible_cpus: CpuMask,
77     /// 出现的CPU
78     present_cpus: CpuMask,
79     /// 出现在系统中的CPU的数量
80     present_cnt: AtomicU32,
81     /// 可用的CPU的数量
82     possible_cnt: AtomicU32,
83     /// CPU的状态
84     cpuhp_state: PerCpuVar<CpuHpCpuState>,
85 }
86 
87 impl SmpCpuManager {
88     fn new() -> Self {
89         let possible_cpus = CpuMask::new();
90         let present_cpus = CpuMask::new();
91         let mut data = Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize);
92         for i in 0..PerCpu::MAX_CPU_NUM {
93             let mut hpstate = CpuHpCpuState::new();
94             hpstate.thread = Some(ProcessManager::idle_pcb()[i as usize].clone());
95             data.push(hpstate);
96         }
97         let cpuhp_state = PerCpuVar::new(data).unwrap();
98 
99         Self {
100             possible_cpus,
101             present_cpus,
102             cpuhp_state,
103             present_cnt: AtomicU32::new(0),
104             possible_cnt: AtomicU32::new(0),
105         }
106     }
107 
108     /// 设置可用的CPU
109     ///
110     /// # Safety
111     ///
112     /// - 该函数不会检查CPU的有效性,调用者需要保证CPU的有效性。
113     /// - 由于possible_cpus是一个全局变量,且为了性能考虑,并不会加锁
114     ///     访问,因此该函数只能在初始化阶段调用。
115     pub unsafe fn set_possible_cpu(&self, cpu: ProcessorId, value: bool) {
116         // 强制获取mut引用,因为该函数只能在初始化阶段调用
117         let p = (self as *const Self as *mut Self).as_mut().unwrap();
118 
119         if let Some(prev) = p.possible_cpus.set(cpu, value) {
120             if prev != value {
121                 if value {
122                     p.possible_cnt
123                         .fetch_add(1, core::sync::atomic::Ordering::SeqCst);
124                 } else {
125                     p.possible_cnt
126                         .fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
127                 }
128             }
129         }
130     }
131 
132     /// 获取可用的CPU
133     pub fn possible_cpus(&self) -> &CpuMask {
134         &self.possible_cpus
135     }
136 
137     #[allow(dead_code)]
138     pub fn possible_cpus_count(&self) -> u32 {
139         self.possible_cnt.load(core::sync::atomic::Ordering::SeqCst)
140     }
141 
142     pub fn present_cpus_count(&self) -> u32 {
143         self.present_cnt.load(core::sync::atomic::Ordering::SeqCst)
144     }
145 
146     pub unsafe fn set_present_cpu(&self, cpu: ProcessorId, value: bool) {
147         // 强制获取mut引用,因为该函数只能在初始化阶段调用
148         let p = (self as *const Self as *mut Self).as_mut().unwrap();
149 
150         if let Some(prev) = p.present_cpus.set(cpu, value) {
151             if prev != value {
152                 if value {
153                     p.present_cnt
154                         .fetch_add(1, core::sync::atomic::Ordering::SeqCst);
155                 } else {
156                     p.present_cnt
157                         .fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
158                 }
159             }
160         }
161     }
162 
163     /// 获取CPU的状态
164     pub fn cpuhp_state(&self, cpu_id: ProcessorId) -> &CpuHpCpuState {
165         unsafe { self.cpuhp_state.force_get(cpu_id) }
166     }
167 
168     #[allow(clippy::mut_from_ref)]
169     fn cpuhp_state_mut(&self, cpu_id: ProcessorId) -> &mut CpuHpCpuState {
170         unsafe { self.cpuhp_state.force_get_mut(cpu_id) }
171     }
172 
173     /// 设置CPU的状态, 返回旧的状态
174     pub unsafe fn set_cpuhp_state(
175         &self,
176         cpu_id: ProcessorId,
177         target_state: CpuHpState,
178     ) -> CpuHpState {
179         let p = self.cpuhp_state.force_get_mut(cpu_id);
180         let old_state = p.state;
181 
182         let bringup = target_state > p.state;
183         p.target_state = target_state;
184         p.bringup = bringup;
185 
186         return old_state;
187     }
188 
189     pub fn set_online_cpu(&self, cpu_id: ProcessorId) {
190         unsafe { self.set_cpuhp_state(cpu_id, CpuHpState::Online) };
191     }
192 
193     /// 获取出现在系统中的CPU
194     #[allow(dead_code)]
195     pub fn present_cpus(&self) -> &CpuMask {
196         &self.present_cpus
197     }
198 
199     /// 启动bsp以外的CPU
200     pub(super) fn bringup_nonboot_cpus(&self) {
201         for cpu_id in self.present_cpus().iter_cpu() {
202             if cpu_id == smp_get_processor_id() {
203                 continue;
204             }
205 
206             debug!("Bring up CPU {}", cpu_id.data());
207 
208             if let Err(e) = self.cpu_up(cpu_id, CpuHpState::Online) {
209                 error!("Failed to bring up CPU {}: {:?}", cpu_id.data(), e);
210             }
211         }
212 
213         info!("All non-boot CPUs have been brought up");
214     }
215 
216     fn cpu_up(&self, cpu_id: ProcessorId, target_state: CpuHpState) -> Result<(), SystemError> {
217         if !self.possible_cpus().get(cpu_id).unwrap_or(false) {
218             return Err(SystemError::EINVAL);
219         }
220 
221         let cpu_state = self.cpuhp_state(cpu_id).state;
222         debug!(
223             "cpu_up: cpu_id: {}, cpu_state: {:?}, target_state: {:?}",
224             cpu_id.data(),
225             cpu_state,
226             target_state
227         );
228         // 如果CPU的状态已经达到或者超过目标状态,则直接返回
229         if cpu_state >= target_state {
230             return Ok(());
231         }
232 
233         unsafe { self.set_cpuhp_state(cpu_id, target_state) };
234         let cpu_state = self.cpuhp_state(cpu_id).state;
235         if cpu_state > CpuHpState::ThresholdBringUp {
236             self.cpuhp_kick_ap(cpu_id, target_state)?;
237         }
238 
239         return Ok(());
240     }
241 
242     fn cpuhp_kick_ap(
243         &self,
244         cpu_id: ProcessorId,
245         target_state: CpuHpState,
246     ) -> Result<(), SystemError> {
247         let prev_state = unsafe { self.set_cpuhp_state(cpu_id, target_state) };
248         let hpstate = self.cpuhp_state_mut(cpu_id);
249         if let Err(e) = self.do_cpuhp_kick_ap(hpstate) {
250             self.cpuhp_reset_state(hpstate, prev_state);
251             self.do_cpuhp_kick_ap(hpstate).ok();
252 
253             return Err(e);
254         }
255 
256         return Ok(());
257     }
258 
259     fn do_cpuhp_kick_ap(&self, cpu_state: &mut CpuHpCpuState) -> Result<(), SystemError> {
260         let pcb = cpu_state.thread.as_ref().ok_or(SystemError::EINVAL)?;
261         let cpu_id = pcb.sched_info().on_cpu().ok_or(SystemError::EINVAL)?;
262 
263         // todo: 等待CPU启动完成
264 
265         ProcessManager::wakeup(cpu_state.thread.as_ref().unwrap())?;
266 
267         CurrentSMPArch::start_cpu(cpu_id, cpu_state)?;
268         assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
269         self.wait_for_ap_thread(cpu_state, cpu_state.bringup);
270 
271         return Ok(());
272     }
273 
274     fn wait_for_ap_thread(&self, cpu_state: &mut CpuHpCpuState, bringup: bool) {
275         if bringup {
276             cpu_state
277                 .comp_done_up
278                 .wait_for_completion()
279                 .expect("failed to wait ap thread");
280         } else {
281             todo!("wait_for_ap_thread")
282         }
283     }
284 
285     /// 完成AP的启动
286     pub fn complete_ap_thread(&self, bringup: bool) {
287         let cpu_id = smp_get_processor_id();
288         let cpu_state = self.cpuhp_state_mut(cpu_id);
289         if bringup {
290             cpu_state.comp_done_up.complete();
291         } else {
292             todo!("complete_ap_thread")
293         }
294     }
295 
296     fn cpuhp_reset_state(&self, st: &mut CpuHpCpuState, prev_state: CpuHpState) {
297         let bringup = !st.bringup;
298         st.target_state = prev_state;
299 
300         st.bringup = bringup;
301     }
302 }
303 
304 pub fn smp_cpu_manager_init(boot_cpu: ProcessorId) {
305     unsafe {
306         SMP_CPU_MANAGER = Some(SmpCpuManager::new());
307     }
308 
309     unsafe { smp_cpu_manager().set_possible_cpu(boot_cpu, true) };
310     unsafe { smp_cpu_manager().set_present_cpu(boot_cpu, true) };
311 
312     SmpCpuManager::arch_init(boot_cpu);
313 }
314