1*fae6e9adSlinfeng use super::{PerfEventOps, Result}; 2*fae6e9adSlinfeng use crate::arch::mm::LockedFrameAllocator; 3*fae6e9adSlinfeng use crate::arch::MMArch; 4*fae6e9adSlinfeng use crate::filesystem::vfs::file::PageCache; 5*fae6e9adSlinfeng use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode}; 6*fae6e9adSlinfeng use crate::include::bindings::linux_bpf::{ 7*fae6e9adSlinfeng perf_event_header, perf_event_mmap_page, perf_event_type, 8*fae6e9adSlinfeng }; 9*fae6e9adSlinfeng use crate::libs::spinlock::{SpinLock, SpinLockGuard}; 10*fae6e9adSlinfeng use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame}; 11*fae6e9adSlinfeng use crate::mm::page::{page_manager_lock_irqsave, Page}; 12*fae6e9adSlinfeng use crate::mm::{MemoryManagementArch, PhysAddr}; 13*fae6e9adSlinfeng use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader}; 14*fae6e9adSlinfeng use alloc::string::String; 15*fae6e9adSlinfeng use alloc::sync::Arc; 16*fae6e9adSlinfeng use alloc::vec::Vec; 17*fae6e9adSlinfeng use core::any::Any; 18*fae6e9adSlinfeng use core::fmt::Debug; 19*fae6e9adSlinfeng use system_error::SystemError; 20*fae6e9adSlinfeng const PAGE_SIZE: usize = MMArch::PAGE_SIZE; 21*fae6e9adSlinfeng #[derive(Debug)] 22*fae6e9adSlinfeng pub struct BpfPerfEvent { 23*fae6e9adSlinfeng _args: PerfProbeArgs, 24*fae6e9adSlinfeng data: SpinLock<BpfPerfEventData>, 25*fae6e9adSlinfeng } 26*fae6e9adSlinfeng 27*fae6e9adSlinfeng #[derive(Debug)] 28*fae6e9adSlinfeng pub struct BpfPerfEventData { 29*fae6e9adSlinfeng enabled: bool, 30*fae6e9adSlinfeng mmap_page: RingPage, 31*fae6e9adSlinfeng page_cache: Arc<PageCache>, 32*fae6e9adSlinfeng offset: usize, 33*fae6e9adSlinfeng } 34*fae6e9adSlinfeng 35*fae6e9adSlinfeng #[derive(Debug)] 36*fae6e9adSlinfeng pub struct RingPage { 37*fae6e9adSlinfeng size: usize, 38*fae6e9adSlinfeng ptr: usize, 39*fae6e9adSlinfeng data_region_size: usize, 40*fae6e9adSlinfeng lost: usize, 41*fae6e9adSlinfeng phys_addr: PhysAddr, 42*fae6e9adSlinfeng } 43*fae6e9adSlinfeng 44*fae6e9adSlinfeng impl RingPage { 45*fae6e9adSlinfeng pub fn empty() -> Self { 46*fae6e9adSlinfeng RingPage { 47*fae6e9adSlinfeng ptr: 0, 48*fae6e9adSlinfeng size: 0, 49*fae6e9adSlinfeng data_region_size: 0, 50*fae6e9adSlinfeng lost: 0, 51*fae6e9adSlinfeng phys_addr: PhysAddr::new(0), 52*fae6e9adSlinfeng } 53*fae6e9adSlinfeng } 54*fae6e9adSlinfeng 55*fae6e9adSlinfeng pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self { 56*fae6e9adSlinfeng Self::init(start as _, len, phys_addr) 57*fae6e9adSlinfeng } 58*fae6e9adSlinfeng 59*fae6e9adSlinfeng fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self { 60*fae6e9adSlinfeng assert_eq!(size % PAGE_SIZE, 0); 61*fae6e9adSlinfeng assert!(size / PAGE_SIZE >= 2); 62*fae6e9adSlinfeng // The first page will be filled with perf_event_mmap_page 63*fae6e9adSlinfeng unsafe { 64*fae6e9adSlinfeng let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page); 65*fae6e9adSlinfeng perf_event_mmap_page.data_offset = PAGE_SIZE as u64; 66*fae6e9adSlinfeng perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64; 67*fae6e9adSlinfeng // user will read sample or lost record from data_tail 68*fae6e9adSlinfeng perf_event_mmap_page.data_tail = 0; 69*fae6e9adSlinfeng // kernel will write sample or lost record from data_head 70*fae6e9adSlinfeng perf_event_mmap_page.data_head = 0; 71*fae6e9adSlinfeng // It is a ring buffer. 72*fae6e9adSlinfeng } 73*fae6e9adSlinfeng RingPage { 74*fae6e9adSlinfeng ptr: ptr as usize, 75*fae6e9adSlinfeng size, 76*fae6e9adSlinfeng data_region_size: size - PAGE_SIZE, 77*fae6e9adSlinfeng lost: 0, 78*fae6e9adSlinfeng phys_addr, 79*fae6e9adSlinfeng } 80*fae6e9adSlinfeng } 81*fae6e9adSlinfeng 82*fae6e9adSlinfeng fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool { 83*fae6e9adSlinfeng if (data_head + 1) % self.data_region_size == data_tail { 84*fae6e9adSlinfeng // The buffer is full 85*fae6e9adSlinfeng return false; 86*fae6e9adSlinfeng } 87*fae6e9adSlinfeng let capacity = if data_head >= data_tail { 88*fae6e9adSlinfeng self.data_region_size - data_head + data_tail 89*fae6e9adSlinfeng } else { 90*fae6e9adSlinfeng data_tail - data_head 91*fae6e9adSlinfeng }; 92*fae6e9adSlinfeng data_size <= capacity 93*fae6e9adSlinfeng } 94*fae6e9adSlinfeng 95*fae6e9adSlinfeng pub fn write_event(&mut self, data: &[u8]) -> Result<()> { 96*fae6e9adSlinfeng let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail }; 97*fae6e9adSlinfeng let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head }; 98*fae6e9adSlinfeng // data_tail..data_head is the region that can be written 99*fae6e9adSlinfeng // check if there is enough space to write the event 100*fae6e9adSlinfeng let sample_size = PerfSample::calculate_size(data.len()); 101*fae6e9adSlinfeng 102*fae6e9adSlinfeng let can_write_sample = 103*fae6e9adSlinfeng self.can_write(sample_size, *data_tail as usize, *data_head as usize); 104*fae6e9adSlinfeng // log::error!( 105*fae6e9adSlinfeng // "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}", 106*fae6e9adSlinfeng // can_write_sample, 107*fae6e9adSlinfeng // *data_tail, 108*fae6e9adSlinfeng // *data_head, 109*fae6e9adSlinfeng // data.len(), 110*fae6e9adSlinfeng // self.data_region_size 111*fae6e9adSlinfeng // ); 112*fae6e9adSlinfeng if !can_write_sample { 113*fae6e9adSlinfeng //we need record it to the lost record 114*fae6e9adSlinfeng self.lost += 1; 115*fae6e9adSlinfeng // log::error!( 116*fae6e9adSlinfeng // "Lost record: {}, data_tail: {}, data_head: {}", 117*fae6e9adSlinfeng // self.lost, 118*fae6e9adSlinfeng // *data_tail, 119*fae6e9adSlinfeng // *data_head 120*fae6e9adSlinfeng // ); 121*fae6e9adSlinfeng Ok(()) 122*fae6e9adSlinfeng } else { 123*fae6e9adSlinfeng // we can write the sample to the page 124*fae6e9adSlinfeng // If the lost record is not zero, we need to write the lost record first. 125*fae6e9adSlinfeng let can_write_lost_record = self.can_write( 126*fae6e9adSlinfeng size_of::<LostSamples>(), 127*fae6e9adSlinfeng *data_tail as usize, 128*fae6e9adSlinfeng *data_head as usize, 129*fae6e9adSlinfeng ); 130*fae6e9adSlinfeng if self.lost > 0 && can_write_lost_record { 131*fae6e9adSlinfeng let new_data_head = self.write_lost(*data_head as usize)?; 132*fae6e9adSlinfeng *data_head = new_data_head as u64; 133*fae6e9adSlinfeng // log::info!( 134*fae6e9adSlinfeng // "Write lost record: {}, data_tail: {}, new_data_head: {}", 135*fae6e9adSlinfeng // self.lost, 136*fae6e9adSlinfeng // *data_tail, 137*fae6e9adSlinfeng // *data_head 138*fae6e9adSlinfeng // ); 139*fae6e9adSlinfeng self.lost = 0; 140*fae6e9adSlinfeng self.write_event(data) 141*fae6e9adSlinfeng } else { 142*fae6e9adSlinfeng let new_data_head = self.write_sample(data, *data_head as usize)?; 143*fae6e9adSlinfeng *data_head = new_data_head as u64; 144*fae6e9adSlinfeng // log::info!( 145*fae6e9adSlinfeng // "Write sample record, data_tail: {}, new_data_head: {}", 146*fae6e9adSlinfeng // *data_tail, 147*fae6e9adSlinfeng // *data_head 148*fae6e9adSlinfeng // ); 149*fae6e9adSlinfeng Ok(()) 150*fae6e9adSlinfeng } 151*fae6e9adSlinfeng } 152*fae6e9adSlinfeng } 153*fae6e9adSlinfeng 154*fae6e9adSlinfeng /// Write any data to the page. 155*fae6e9adSlinfeng /// 156*fae6e9adSlinfeng /// Return the new data_head 157*fae6e9adSlinfeng fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> { 158*fae6e9adSlinfeng let data_region_len = self.data_region_size; 159*fae6e9adSlinfeng let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut(); 160*fae6e9adSlinfeng let data_len = data.len(); 161*fae6e9adSlinfeng let end = (data_head + data_len) % data_region_len; 162*fae6e9adSlinfeng let start = data_head; 163*fae6e9adSlinfeng if start < end { 164*fae6e9adSlinfeng data_region[start..end].copy_from_slice(data); 165*fae6e9adSlinfeng } else { 166*fae6e9adSlinfeng let first_len = data_region_len - start; 167*fae6e9adSlinfeng data_region[start..start + first_len].copy_from_slice(&data[..first_len]); 168*fae6e9adSlinfeng data_region[0..end].copy_from_slice(&data[first_len..]); 169*fae6e9adSlinfeng } 170*fae6e9adSlinfeng Ok(end) 171*fae6e9adSlinfeng } 172*fae6e9adSlinfeng 173*fae6e9adSlinfeng /// Write a sample to the page. 174*fae6e9adSlinfeng fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> { 175*fae6e9adSlinfeng let perf_sample = PerfSample { 176*fae6e9adSlinfeng s_hdr: SampleHeader { 177*fae6e9adSlinfeng header: perf_event_header { 178*fae6e9adSlinfeng type_: perf_event_type::PERF_RECORD_SAMPLE as u32, 179*fae6e9adSlinfeng misc: 0, 180*fae6e9adSlinfeng size: size_of::<SampleHeader>() as u16 + data.len() as u16, 181*fae6e9adSlinfeng }, 182*fae6e9adSlinfeng size: data.len() as u32, 183*fae6e9adSlinfeng }, 184*fae6e9adSlinfeng value: data, 185*fae6e9adSlinfeng }; 186*fae6e9adSlinfeng let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?; 187*fae6e9adSlinfeng self.write_any(perf_sample.value, new_head) 188*fae6e9adSlinfeng } 189*fae6e9adSlinfeng 190*fae6e9adSlinfeng /// Write a lost record to the page. 191*fae6e9adSlinfeng /// 192*fae6e9adSlinfeng /// Return the new data_head 193*fae6e9adSlinfeng fn write_lost(&mut self, data_head: usize) -> Result<usize> { 194*fae6e9adSlinfeng let lost = LostSamples { 195*fae6e9adSlinfeng header: perf_event_header { 196*fae6e9adSlinfeng type_: perf_event_type::PERF_RECORD_LOST as u32, 197*fae6e9adSlinfeng misc: 0, 198*fae6e9adSlinfeng size: size_of::<LostSamples>() as u16, 199*fae6e9adSlinfeng }, 200*fae6e9adSlinfeng id: 0, 201*fae6e9adSlinfeng count: self.lost as u64, 202*fae6e9adSlinfeng }; 203*fae6e9adSlinfeng self.write_any(lost.as_bytes(), data_head) 204*fae6e9adSlinfeng } 205*fae6e9adSlinfeng 206*fae6e9adSlinfeng pub fn readable(&self) -> bool { 207*fae6e9adSlinfeng let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail }; 208*fae6e9adSlinfeng let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head }; 209*fae6e9adSlinfeng data_tail != data_head 210*fae6e9adSlinfeng } 211*fae6e9adSlinfeng pub fn as_slice(&self) -> &[u8] { 212*fae6e9adSlinfeng unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) } 213*fae6e9adSlinfeng } 214*fae6e9adSlinfeng pub fn as_mut_slice(&mut self) -> &mut [u8] { 215*fae6e9adSlinfeng unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) } 216*fae6e9adSlinfeng } 217*fae6e9adSlinfeng } 218*fae6e9adSlinfeng 219*fae6e9adSlinfeng impl BpfPerfEvent { 220*fae6e9adSlinfeng pub fn new(args: PerfProbeArgs) -> Self { 221*fae6e9adSlinfeng BpfPerfEvent { 222*fae6e9adSlinfeng _args: args, 223*fae6e9adSlinfeng data: SpinLock::new(BpfPerfEventData { 224*fae6e9adSlinfeng enabled: false, 225*fae6e9adSlinfeng mmap_page: RingPage::empty(), 226*fae6e9adSlinfeng page_cache: PageCache::new(None), 227*fae6e9adSlinfeng offset: 0, 228*fae6e9adSlinfeng }), 229*fae6e9adSlinfeng } 230*fae6e9adSlinfeng } 231*fae6e9adSlinfeng pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> { 232*fae6e9adSlinfeng let mut data = self.data.lock(); 233*fae6e9adSlinfeng // alloc page frame 234*fae6e9adSlinfeng let (phy_addr, page_count) = 235*fae6e9adSlinfeng unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) } 236*fae6e9adSlinfeng .ok_or(SystemError::ENOSPC)?; 237*fae6e9adSlinfeng let mut page_manager_guard = page_manager_lock_irqsave(); 238*fae6e9adSlinfeng let mut cur_phys = PhysPageFrame::new(phy_addr); 239*fae6e9adSlinfeng for i in 0..page_count.data() { 240*fae6e9adSlinfeng let page = Arc::new(Page::new(true, cur_phys.phys_address())); 241*fae6e9adSlinfeng let paddr = cur_phys.phys_address(); 242*fae6e9adSlinfeng page_manager_guard.insert(paddr, &page); 243*fae6e9adSlinfeng data.page_cache.add_page(i, &page); 244*fae6e9adSlinfeng cur_phys = cur_phys.next(); 245*fae6e9adSlinfeng } 246*fae6e9adSlinfeng let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?; 247*fae6e9adSlinfeng // create mmap page 248*fae6e9adSlinfeng let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr); 249*fae6e9adSlinfeng data.mmap_page = mmap_page; 250*fae6e9adSlinfeng data.offset = offset; 251*fae6e9adSlinfeng Ok(()) 252*fae6e9adSlinfeng } 253*fae6e9adSlinfeng 254*fae6e9adSlinfeng pub fn write_event(&self, data: &[u8]) -> Result<()> { 255*fae6e9adSlinfeng let mut inner_data = self.data.lock(); 256*fae6e9adSlinfeng inner_data.mmap_page.write_event(data)?; 257*fae6e9adSlinfeng Ok(()) 258*fae6e9adSlinfeng } 259*fae6e9adSlinfeng } 260*fae6e9adSlinfeng 261*fae6e9adSlinfeng impl Drop for BpfPerfEvent { 262*fae6e9adSlinfeng fn drop(&mut self) { 263*fae6e9adSlinfeng let mut page_manager_guard = page_manager_lock_irqsave(); 264*fae6e9adSlinfeng let data = self.data.lock(); 265*fae6e9adSlinfeng let phy_addr = data.mmap_page.phys_addr; 266*fae6e9adSlinfeng let len = data.mmap_page.size; 267*fae6e9adSlinfeng let page_count = PageFrameCount::new(len / PAGE_SIZE); 268*fae6e9adSlinfeng let mut cur_phys = PhysPageFrame::new(phy_addr); 269*fae6e9adSlinfeng for _ in 0..page_count.data() { 270*fae6e9adSlinfeng page_manager_guard.remove_page(&cur_phys.phys_address()); 271*fae6e9adSlinfeng cur_phys = cur_phys.next(); 272*fae6e9adSlinfeng } 273*fae6e9adSlinfeng } 274*fae6e9adSlinfeng } 275*fae6e9adSlinfeng 276*fae6e9adSlinfeng impl IndexNode for BpfPerfEvent { 277*fae6e9adSlinfeng fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> { 278*fae6e9adSlinfeng self.do_mmap(start, len, offset) 279*fae6e9adSlinfeng } 280*fae6e9adSlinfeng 281*fae6e9adSlinfeng fn read_at( 282*fae6e9adSlinfeng &self, 283*fae6e9adSlinfeng _offset: usize, 284*fae6e9adSlinfeng _len: usize, 285*fae6e9adSlinfeng _buf: &mut [u8], 286*fae6e9adSlinfeng _data: SpinLockGuard<FilePrivateData>, 287*fae6e9adSlinfeng ) -> Result<usize> { 288*fae6e9adSlinfeng panic!("PerfEventInode does not support read") 289*fae6e9adSlinfeng } 290*fae6e9adSlinfeng 291*fae6e9adSlinfeng fn write_at( 292*fae6e9adSlinfeng &self, 293*fae6e9adSlinfeng _offset: usize, 294*fae6e9adSlinfeng _len: usize, 295*fae6e9adSlinfeng _buf: &[u8], 296*fae6e9adSlinfeng _data: SpinLockGuard<FilePrivateData>, 297*fae6e9adSlinfeng ) -> Result<usize> { 298*fae6e9adSlinfeng panic!("PerfEventInode does not support write") 299*fae6e9adSlinfeng } 300*fae6e9adSlinfeng 301*fae6e9adSlinfeng fn fs(&self) -> Arc<dyn FileSystem> { 302*fae6e9adSlinfeng panic!("PerfEventInode does not have a filesystem") 303*fae6e9adSlinfeng } 304*fae6e9adSlinfeng 305*fae6e9adSlinfeng fn as_any_ref(&self) -> &dyn Any { 306*fae6e9adSlinfeng self 307*fae6e9adSlinfeng } 308*fae6e9adSlinfeng fn list(&self) -> Result<Vec<String>> { 309*fae6e9adSlinfeng Err(SystemError::ENOSYS) 310*fae6e9adSlinfeng } 311*fae6e9adSlinfeng 312*fae6e9adSlinfeng fn page_cache(&self) -> Option<Arc<PageCache>> { 313*fae6e9adSlinfeng Some(self.data.lock().page_cache.clone()) 314*fae6e9adSlinfeng } 315*fae6e9adSlinfeng } 316*fae6e9adSlinfeng 317*fae6e9adSlinfeng impl PerfEventOps for BpfPerfEvent { 318*fae6e9adSlinfeng fn enable(&self) -> Result<()> { 319*fae6e9adSlinfeng self.data.lock().enabled = true; 320*fae6e9adSlinfeng Ok(()) 321*fae6e9adSlinfeng } 322*fae6e9adSlinfeng fn disable(&self) -> Result<()> { 323*fae6e9adSlinfeng self.data.lock().enabled = false; 324*fae6e9adSlinfeng Ok(()) 325*fae6e9adSlinfeng } 326*fae6e9adSlinfeng fn readable(&self) -> bool { 327*fae6e9adSlinfeng self.data.lock().mmap_page.readable() 328*fae6e9adSlinfeng } 329*fae6e9adSlinfeng } 330*fae6e9adSlinfeng 331*fae6e9adSlinfeng pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent { 332*fae6e9adSlinfeng BpfPerfEvent::new(args) 333*fae6e9adSlinfeng } 334