1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Facebook
3 #include <linux/sched.h>
4 #include <linux/ptrace.h>
5 #include <stdint.h>
6 #include <stddef.h>
7 #include <stdbool.h>
8 #include <linux/bpf.h>
9 #include <bpf/bpf_helpers.h>
10 
11 #define FUNCTION_NAME_LEN 64
12 #define FILE_NAME_LEN 128
13 #define TASK_COMM_LEN 16
14 
15 typedef struct {
16 	int PyThreadState_frame;
17 	int PyThreadState_thread;
18 	int PyFrameObject_back;
19 	int PyFrameObject_code;
20 	int PyFrameObject_lineno;
21 	int PyCodeObject_filename;
22 	int PyCodeObject_name;
23 	int String_data;
24 	int String_size;
25 } OffsetConfig;
26 
27 typedef struct {
28 	uintptr_t current_state_addr;
29 	uintptr_t tls_key_addr;
30 	OffsetConfig offsets;
31 	bool use_tls;
32 } PidData;
33 
34 typedef struct {
35 	uint32_t success;
36 } Stats;
37 
38 typedef struct {
39 	char name[FUNCTION_NAME_LEN];
40 	char file[FILE_NAME_LEN];
41 } Symbol;
42 
43 typedef struct {
44 	uint32_t pid;
45 	uint32_t tid;
46 	char comm[TASK_COMM_LEN];
47 	int32_t kernel_stack_id;
48 	int32_t user_stack_id;
49 	bool thread_current;
50 	bool pthread_match;
51 	bool stack_complete;
52 	int16_t stack_len;
53 	int32_t stack[STACK_MAX_LEN];
54 
55 	int has_meta;
56 	int metadata;
57 	char dummy_safeguard;
58 } Event;
59 
60 
61 typedef int pid_t;
62 
63 typedef struct {
64 	void* f_back; // PyFrameObject.f_back, previous frame
65 	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
66 	void* co_filename; // PyCodeObject.co_filename
67 	void* co_name; // PyCodeObject.co_name
68 } FrameData;
69 
70 #ifdef SUBPROGS
71 __noinline
72 #else
73 __always_inline
74 #endif
get_thread_state(void * tls_base,PidData * pidData)75 static void *get_thread_state(void *tls_base, PidData *pidData)
76 {
77 	void* thread_state;
78 	int key;
79 
80 	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
81 	bpf_probe_read_user(&thread_state, sizeof(thread_state),
82 			    tls_base + 0x310 + key * 0x10 + 0x08);
83 	return thread_state;
84 }
85 
get_frame_data(void * frame_ptr,PidData * pidData,FrameData * frame,Symbol * symbol)86 static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
87 					   FrameData *frame, Symbol *symbol)
88 {
89 	// read data from PyFrameObject
90 	bpf_probe_read_user(&frame->f_back,
91 			    sizeof(frame->f_back),
92 			    frame_ptr + pidData->offsets.PyFrameObject_back);
93 	bpf_probe_read_user(&frame->f_code,
94 			    sizeof(frame->f_code),
95 			    frame_ptr + pidData->offsets.PyFrameObject_code);
96 
97 	// read data from PyCodeObject
98 	if (!frame->f_code)
99 		return false;
100 	bpf_probe_read_user(&frame->co_filename,
101 			    sizeof(frame->co_filename),
102 			    frame->f_code + pidData->offsets.PyCodeObject_filename);
103 	bpf_probe_read_user(&frame->co_name,
104 			    sizeof(frame->co_name),
105 			    frame->f_code + pidData->offsets.PyCodeObject_name);
106 	// read actual names into symbol
107 	if (frame->co_filename)
108 		bpf_probe_read_user_str(&symbol->file,
109 					sizeof(symbol->file),
110 					frame->co_filename +
111 					pidData->offsets.String_data);
112 	if (frame->co_name)
113 		bpf_probe_read_user_str(&symbol->name,
114 					sizeof(symbol->name),
115 					frame->co_name +
116 					pidData->offsets.String_data);
117 	return true;
118 }
119 
120 struct {
121 	__uint(type, BPF_MAP_TYPE_HASH);
122 	__uint(max_entries, 1);
123 	__type(key, int);
124 	__type(value, PidData);
125 } pidmap SEC(".maps");
126 
127 struct {
128 	__uint(type, BPF_MAP_TYPE_HASH);
129 	__uint(max_entries, 1);
130 	__type(key, int);
131 	__type(value, Event);
132 } eventmap SEC(".maps");
133 
134 struct {
135 	__uint(type, BPF_MAP_TYPE_HASH);
136 	__uint(max_entries, 1);
137 	__type(key, Symbol);
138 	__type(value, int);
139 } symbolmap SEC(".maps");
140 
141 struct {
142 	__uint(type, BPF_MAP_TYPE_ARRAY);
143 	__uint(max_entries, 1);
144 	__type(key, int);
145 	__type(value, Stats);
146 } statsmap SEC(".maps");
147 
148 struct {
149 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
150 	__uint(max_entries, 32);
151 	__uint(key_size, sizeof(int));
152 	__uint(value_size, sizeof(int));
153 } perfmap SEC(".maps");
154 
155 struct {
156 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
157 	__uint(max_entries, 1000);
158 	__uint(key_size, sizeof(int));
159 	__uint(value_size, sizeof(long long) * 127);
160 } stackmap SEC(".maps");
161 
162 #ifdef USE_BPF_LOOP
163 struct process_frame_ctx {
164 	int cur_cpu;
165 	int32_t *symbol_counter;
166 	void *frame_ptr;
167 	FrameData *frame;
168 	PidData *pidData;
169 	Symbol *sym;
170 	Event *event;
171 	bool done;
172 };
173 
process_frame_callback(__u32 i,struct process_frame_ctx * ctx)174 static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
175 {
176 	int zero = 0;
177 	void *frame_ptr = ctx->frame_ptr;
178 	PidData *pidData = ctx->pidData;
179 	FrameData *frame = ctx->frame;
180 	int32_t *symbol_counter = ctx->symbol_counter;
181 	int cur_cpu = ctx->cur_cpu;
182 	Event *event = ctx->event;
183 	Symbol *sym = ctx->sym;
184 
185 	if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
186 		int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
187 		int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
188 
189 		if (!symbol_id) {
190 			bpf_map_update_elem(&symbolmap, sym, &zero, 0);
191 			symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
192 			if (!symbol_id) {
193 				ctx->done = true;
194 				return 1;
195 			}
196 		}
197 		if (*symbol_id == new_symbol_id)
198 			(*symbol_counter)++;
199 
200 		barrier_var(i);
201 		if (i >= STACK_MAX_LEN)
202 			return 1;
203 
204 		event->stack[i] = *symbol_id;
205 
206 		event->stack_len = i + 1;
207 		frame_ptr = frame->f_back;
208 	}
209 	return 0;
210 }
211 #endif /* USE_BPF_LOOP */
212 
213 #ifdef GLOBAL_FUNC
214 __noinline
215 #elif defined(SUBPROGS)
216 static __noinline
217 #else
218 static __always_inline
219 #endif
__on_event(struct bpf_raw_tracepoint_args * ctx)220 int __on_event(struct bpf_raw_tracepoint_args *ctx)
221 {
222 	uint64_t pid_tgid = bpf_get_current_pid_tgid();
223 	pid_t pid = (pid_t)(pid_tgid >> 32);
224 	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
225 	if (!pidData)
226 		return 0;
227 
228 	int zero = 0;
229 	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
230 	if (!event)
231 		return 0;
232 
233 	event->pid = pid;
234 
235 	event->tid = (pid_t)pid_tgid;
236 	bpf_get_current_comm(&event->comm, sizeof(event->comm));
237 
238 	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
239 	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
240 
241 	void* thread_state_current = (void*)0;
242 	bpf_probe_read_user(&thread_state_current,
243 			    sizeof(thread_state_current),
244 			    (void*)(long)pidData->current_state_addr);
245 
246 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
247 	void* tls_base = (void*)task;
248 
249 	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
250 		: thread_state_current;
251 	event->thread_current = thread_state == thread_state_current;
252 
253 	if (pidData->use_tls) {
254 		uint64_t pthread_created;
255 		uint64_t pthread_self;
256 		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
257 				    tls_base + 0x10);
258 
259 		bpf_probe_read_user(&pthread_created,
260 				    sizeof(pthread_created),
261 				    thread_state +
262 				    pidData->offsets.PyThreadState_thread);
263 		event->pthread_match = pthread_created == pthread_self;
264 	} else {
265 		event->pthread_match = 1;
266 	}
267 
268 	if (event->pthread_match || !pidData->use_tls) {
269 		void* frame_ptr;
270 		FrameData frame;
271 		Symbol sym = {};
272 		int cur_cpu = bpf_get_smp_processor_id();
273 
274 		bpf_probe_read_user(&frame_ptr,
275 				    sizeof(frame_ptr),
276 				    thread_state +
277 				    pidData->offsets.PyThreadState_frame);
278 
279 		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
280 		if (symbol_counter == NULL)
281 			return 0;
282 #ifdef USE_BPF_LOOP
283 	struct process_frame_ctx ctx = {
284 		.cur_cpu = cur_cpu,
285 		.symbol_counter = symbol_counter,
286 		.frame_ptr = frame_ptr,
287 		.frame = &frame,
288 		.pidData = pidData,
289 		.sym = &sym,
290 		.event = event,
291 	};
292 
293 	bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
294 	if (ctx.done)
295 		return 0;
296 #else
297 #ifdef NO_UNROLL
298 #pragma clang loop unroll(disable)
299 #else
300 #ifdef UNROLL_COUNT
301 #pragma clang loop unroll_count(UNROLL_COUNT)
302 #else
303 #pragma clang loop unroll(full)
304 #endif
305 #endif /* NO_UNROLL */
306 		/* Unwind python stack */
307 		for (int i = 0; i < STACK_MAX_LEN; ++i) {
308 			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
309 				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
310 				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
311 				if (!symbol_id) {
312 					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
313 					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
314 					if (!symbol_id)
315 						return 0;
316 				}
317 				if (*symbol_id == new_symbol_id)
318 					(*symbol_counter)++;
319 				event->stack[i] = *symbol_id;
320 				event->stack_len = i + 1;
321 				frame_ptr = frame.f_back;
322 			}
323 		}
324 #endif /* USE_BPF_LOOP */
325 		event->stack_complete = frame_ptr == NULL;
326 	} else {
327 		event->stack_complete = 1;
328 	}
329 
330 	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
331 	if (stats)
332 		stats->success++;
333 
334 	event->has_meta = 0;
335 	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
336 	return 0;
337 }
338 
339 SEC("raw_tracepoint/kfree_skb")
on_event(struct bpf_raw_tracepoint_args * ctx)340 int on_event(struct bpf_raw_tracepoint_args* ctx)
341 {
342 	int i, ret = 0;
343 	ret |= __on_event(ctx);
344 	ret |= __on_event(ctx);
345 	ret |= __on_event(ctx);
346 	ret |= __on_event(ctx);
347 	ret |= __on_event(ctx);
348 	return ret;
349 }
350 
351 char _license[] SEC("license") = "GPL";
352