xref: /DragonOS/kernel/src/smp/cpu/mod.rs (revision 7c28051e8c601312d3d0fd7bcb71bc71450d10c0)
1 use core::sync::atomic::AtomicU32;
2 
3 use alloc::{sync::Arc, vec::Vec};
4 use log::{debug, error, info};
5 use system_error::SystemError;
6 
7 use crate::{
8     arch::CurrentSMPArch,
9     libs::cpumask::CpuMask,
10     mm::percpu::{PerCpu, PerCpuVar},
11     process::{ProcessControlBlock, ProcessManager},
12     sched::completion::Completion,
13 };
14 
15 use super::{core::smp_get_processor_id, SMPArch};
16 
17 int_like!(ProcessorId, AtomicProcessorId, u32, AtomicU32);
18 
19 impl ProcessorId {
20     pub const INVALID: ProcessorId = ProcessorId::new(u32::MAX);
21 }
22 
23 static mut SMP_CPU_MANAGER: Option<SmpCpuManager> = None;
24 
25 #[inline]
26 pub fn smp_cpu_manager() -> &'static SmpCpuManager {
27     unsafe { SMP_CPU_MANAGER.as_ref().unwrap() }
28 }
29 
30 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
31 pub enum CpuHpState {
32     /// 启动阈值
33     ThresholdBringUp = 0,
34 
35     /// 该CPU是离线的
36     Offline,
37 
38     /// 该CPU是在线的
39     Online,
40 }
41 
42 /// Per-Cpu Cpu的热插拔状态
43 pub struct CpuHpCpuState {
44     /// 当前状态
45     state: CpuHpState,
46     /// 目标状态
47     target_state: CpuHpState,
48     /// 指向热插拔的线程的PCB
49     thread: Option<Arc<ProcessControlBlock>>,
50 
51     /// 当前是否为启动流程
52     bringup: bool,
53     /// 启动完成的信号
54     comp_done_up: Completion,
55 }
56 
57 impl CpuHpCpuState {
58     const fn new() -> Self {
59         Self {
60             state: CpuHpState::Offline,
61             target_state: CpuHpState::Offline,
62             thread: None,
63             bringup: false,
64             comp_done_up: Completion::new(),
65         }
66     }
67 
68     #[allow(dead_code)]
69     pub const fn thread(&self) -> &Option<Arc<ProcessControlBlock>> {
70         &self.thread
71     }
72 }
73 
74 pub struct SmpCpuManager {
75     /// 可用的CPU
76     possible_cpus: CpuMask,
77     /// 出现的CPU
78     present_cpus: CpuMask,
79     /// 出现在系统中的CPU的数量
80     present_cnt: AtomicU32,
81     /// 可用的CPU的数量
82     possible_cnt: AtomicU32,
83     /// CPU的状态
84     cpuhp_state: PerCpuVar<CpuHpCpuState>,
85 }
86 
87 impl SmpCpuManager {
88     fn new() -> Self {
89         let possible_cpus = CpuMask::new();
90         let present_cpus = CpuMask::new();
91         let mut data = Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize);
92         for i in 0..PerCpu::MAX_CPU_NUM {
93             let mut hpstate = CpuHpCpuState::new();
94             hpstate.thread = Some(ProcessManager::idle_pcb()[i as usize].clone());
95             data.push(hpstate);
96         }
97         let cpuhp_state = PerCpuVar::new(data).unwrap();
98 
99         Self {
100             possible_cpus,
101             present_cpus,
102             cpuhp_state,
103             present_cnt: AtomicU32::new(0),
104             possible_cnt: AtomicU32::new(0),
105         }
106     }
107 
108     /// 设置可用的CPU
109     ///
110     /// # Safety
111     ///
112     /// - 该函数不会检查CPU的有效性,调用者需要保证CPU的有效性。
113     /// - 由于possible_cpus是一个全局变量,且为了性能考虑,并不会加锁
114     ///     访问,因此该函数只能在初始化阶段调用。
115     pub unsafe fn set_possible_cpu(&self, cpu: ProcessorId, value: bool) {
116         // 强制获取mut引用,因为该函数只能在初始化阶段调用
117         let p = (self as *const Self as *mut Self).as_mut().unwrap();
118 
119         if let Some(prev) = p.possible_cpus.set(cpu, value) {
120             if prev != value {
121                 if value {
122                     p.possible_cnt
123                         .fetch_add(1, core::sync::atomic::Ordering::SeqCst);
124                 } else {
125                     p.possible_cnt
126                         .fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
127                 }
128             }
129         }
130     }
131 
132     /// 获取可用的CPU
133     pub fn possible_cpus(&self) -> &CpuMask {
134         &self.possible_cpus
135     }
136 
137     pub fn possible_cpus_count(&self) -> u32 {
138         self.possible_cnt.load(core::sync::atomic::Ordering::SeqCst)
139     }
140 
141     pub fn present_cpus_count(&self) -> u32 {
142         self.present_cnt.load(core::sync::atomic::Ordering::SeqCst)
143     }
144 
145     pub unsafe fn set_present_cpu(&self, cpu: ProcessorId, value: bool) {
146         // 强制获取mut引用,因为该函数只能在初始化阶段调用
147         let p = (self as *const Self as *mut Self).as_mut().unwrap();
148 
149         if let Some(prev) = p.present_cpus.set(cpu, value) {
150             if prev != value {
151                 if value {
152                     p.present_cnt
153                         .fetch_add(1, core::sync::atomic::Ordering::SeqCst);
154                 } else {
155                     p.present_cnt
156                         .fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
157                 }
158             }
159         }
160     }
161 
162     /// 获取CPU的状态
163     pub fn cpuhp_state(&self, cpu_id: ProcessorId) -> &CpuHpCpuState {
164         unsafe { self.cpuhp_state.force_get(cpu_id) }
165     }
166 
167     #[allow(clippy::mut_from_ref)]
168     fn cpuhp_state_mut(&self, cpu_id: ProcessorId) -> &mut CpuHpCpuState {
169         unsafe { self.cpuhp_state.force_get_mut(cpu_id) }
170     }
171 
172     /// 设置CPU的状态, 返回旧的状态
173     pub unsafe fn set_cpuhp_state(
174         &self,
175         cpu_id: ProcessorId,
176         target_state: CpuHpState,
177     ) -> CpuHpState {
178         let p = self.cpuhp_state.force_get_mut(cpu_id);
179         let old_state = p.state;
180 
181         let bringup = target_state > p.state;
182         p.target_state = target_state;
183         p.bringup = bringup;
184 
185         return old_state;
186     }
187 
188     pub fn set_online_cpu(&self, cpu_id: ProcessorId) {
189         unsafe { self.set_cpuhp_state(cpu_id, CpuHpState::Online) };
190     }
191 
192     /// 获取出现在系统中的CPU
193     #[allow(dead_code)]
194     pub fn present_cpus(&self) -> &CpuMask {
195         &self.present_cpus
196     }
197 
198     /// 启动bsp以外的CPU
199     pub(super) fn bringup_nonboot_cpus(&self) {
200         for cpu_id in self.present_cpus().iter_cpu() {
201             if cpu_id == smp_get_processor_id() {
202                 continue;
203             }
204 
205             debug!("Bring up CPU {}", cpu_id.data());
206 
207             if let Err(e) = self.cpu_up(cpu_id, CpuHpState::Online) {
208                 error!("Failed to bring up CPU {}: {:?}", cpu_id.data(), e);
209             }
210         }
211 
212         info!("All non-boot CPUs have been brought up");
213     }
214 
215     fn cpu_up(&self, cpu_id: ProcessorId, target_state: CpuHpState) -> Result<(), SystemError> {
216         if !self.possible_cpus().get(cpu_id).unwrap_or(false) {
217             return Err(SystemError::EINVAL);
218         }
219 
220         let cpu_state = self.cpuhp_state(cpu_id).state;
221         debug!(
222             "cpu_up: cpu_id: {}, cpu_state: {:?}, target_state: {:?}",
223             cpu_id.data(),
224             cpu_state,
225             target_state
226         );
227         // 如果CPU的状态已经达到或者超过目标状态,则直接返回
228         if cpu_state >= target_state {
229             return Ok(());
230         }
231 
232         unsafe { self.set_cpuhp_state(cpu_id, target_state) };
233         let cpu_state = self.cpuhp_state(cpu_id).state;
234         if cpu_state > CpuHpState::ThresholdBringUp {
235             self.cpuhp_kick_ap(cpu_id, target_state)?;
236         }
237 
238         return Ok(());
239     }
240 
241     fn cpuhp_kick_ap(
242         &self,
243         cpu_id: ProcessorId,
244         target_state: CpuHpState,
245     ) -> Result<(), SystemError> {
246         let prev_state = unsafe { self.set_cpuhp_state(cpu_id, target_state) };
247         let hpstate = self.cpuhp_state_mut(cpu_id);
248         if let Err(e) = self.do_cpuhp_kick_ap(hpstate) {
249             self.cpuhp_reset_state(hpstate, prev_state);
250             self.do_cpuhp_kick_ap(hpstate).ok();
251 
252             return Err(e);
253         }
254 
255         return Ok(());
256     }
257 
258     fn do_cpuhp_kick_ap(&self, cpu_state: &mut CpuHpCpuState) -> Result<(), SystemError> {
259         let pcb = cpu_state.thread.as_ref().ok_or(SystemError::EINVAL)?;
260         let cpu_id = pcb.sched_info().on_cpu().ok_or(SystemError::EINVAL)?;
261 
262         // todo: 等待CPU启动完成
263 
264         ProcessManager::wakeup(cpu_state.thread.as_ref().unwrap())?;
265 
266         CurrentSMPArch::start_cpu(cpu_id, cpu_state)?;
267         assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
268         self.wait_for_ap_thread(cpu_state, cpu_state.bringup);
269 
270         return Ok(());
271     }
272 
273     fn wait_for_ap_thread(&self, cpu_state: &mut CpuHpCpuState, bringup: bool) {
274         if bringup {
275             cpu_state
276                 .comp_done_up
277                 .wait_for_completion()
278                 .expect("failed to wait ap thread");
279         } else {
280             todo!("wait_for_ap_thread")
281         }
282     }
283 
284     /// 完成AP的启动
285     pub fn complete_ap_thread(&self, bringup: bool) {
286         let cpu_id = smp_get_processor_id();
287         let cpu_state = self.cpuhp_state_mut(cpu_id);
288         if bringup {
289             cpu_state.comp_done_up.complete();
290         } else {
291             todo!("complete_ap_thread")
292         }
293     }
294 
295     fn cpuhp_reset_state(&self, st: &mut CpuHpCpuState, prev_state: CpuHpState) {
296         let bringup = !st.bringup;
297         st.target_state = prev_state;
298 
299         st.bringup = bringup;
300     }
301 }
302 
303 pub fn smp_cpu_manager_init(boot_cpu: ProcessorId) {
304     unsafe {
305         SMP_CPU_MANAGER = Some(SmpCpuManager::new());
306     }
307 
308     unsafe { smp_cpu_manager().set_possible_cpu(boot_cpu, true) };
309     unsafe { smp_cpu_manager().set_present_cpu(boot_cpu, true) };
310 
311     SmpCpuManager::arch_init(boot_cpu);
312 }
313