1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * KCSAN core runtime.
4  *
5  * Copyright (C) 2019, Google LLC.
6  */
7 
8 #define pr_fmt(fmt) "kcsan: " fmt
9 
10 #include <linux/atomic.h>
11 #include <linux/bug.h>
12 #include <linux/delay.h>
13 #include <linux/export.h>
14 #include <linux/init.h>
15 #include <linux/kernel.h>
16 #include <linux/list.h>
17 #include <linux/moduleparam.h>
18 #include <linux/percpu.h>
19 #include <linux/preempt.h>
20 #include <linux/sched.h>
21 #include <linux/uaccess.h>
22 
23 #include "encoding.h"
24 #include "kcsan.h"
25 #include "permissive.h"
26 
27 static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
28 unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
29 unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
30 static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH;
31 static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER);
32 
33 #ifdef MODULE_PARAM_PREFIX
34 #undef MODULE_PARAM_PREFIX
35 #endif
36 #define MODULE_PARAM_PREFIX "kcsan."
37 module_param_named(early_enable, kcsan_early_enable, bool, 0);
38 module_param_named(udelay_task, kcsan_udelay_task, uint, 0644);
39 module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644);
40 module_param_named(skip_watch, kcsan_skip_watch, long, 0644);
41 module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444);
42 
43 #ifdef CONFIG_KCSAN_WEAK_MEMORY
44 static bool kcsan_weak_memory = true;
45 module_param_named(weak_memory, kcsan_weak_memory, bool, 0644);
46 #else
47 #define kcsan_weak_memory false
48 #endif
49 
50 bool kcsan_enabled;
51 
52 /* Per-CPU kcsan_ctx for interrupts */
53 static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = {
54 	.scoped_accesses	= {LIST_POISON1, NULL},
55 };
56 
57 /*
58  * Helper macros to index into adjacent slots, starting from address slot
59  * itself, followed by the right and left slots.
60  *
61  * The purpose is 2-fold:
62  *
63  *	1. if during insertion the address slot is already occupied, check if
64  *	   any adjacent slots are free;
65  *	2. accesses that straddle a slot boundary due to size that exceeds a
66  *	   slot's range may check adjacent slots if any watchpoint matches.
67  *
68  * Note that accesses with very large size may still miss a watchpoint; however,
69  * given this should be rare, this is a reasonable trade-off to make, since this
70  * will avoid:
71  *
72  *	1. excessive contention between watchpoint checks and setup;
73  *	2. larger number of simultaneous watchpoints without sacrificing
74  *	   performance.
75  *
76  * Example: SLOT_IDX values for KCSAN_CHECK_ADJACENT=1, where i is [0, 1, 2]:
77  *
78  *   slot=0:  [ 1,  2,  0]
79  *   slot=9:  [10, 11,  9]
80  *   slot=63: [64, 65, 63]
81  */
82 #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS))
83 
84 /*
85  * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary
86  * slot (middle) is fine if we assume that races occur rarely. The set of
87  * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to
88  * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}.
89  */
90 #define SLOT_IDX_FAST(slot, i) (slot + i)
91 
92 /*
93  * Watchpoints, with each entry encoded as defined in encoding.h: in order to be
94  * able to safely update and access a watchpoint without introducing locking
95  * overhead, we encode each watchpoint as a single atomic long. The initial
96  * zero-initialized state matches INVALID_WATCHPOINT.
97  *
98  * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to
99  * use more complicated SLOT_IDX_FAST calculation with modulo in the fast-path.
100  */
101 static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
102 
103 /*
104  * Instructions to skip watching counter, used in should_watch(). We use a
105  * per-CPU counter to avoid excessive contention.
106  */
107 static DEFINE_PER_CPU(long, kcsan_skip);
108 
109 /* For kcsan_prandom_u32_max(). */
110 static DEFINE_PER_CPU(u32, kcsan_rand_state);
111 
find_watchpoint(unsigned long addr,size_t size,bool expect_write,long * encoded_watchpoint)112 static __always_inline atomic_long_t *find_watchpoint(unsigned long addr,
113 						      size_t size,
114 						      bool expect_write,
115 						      long *encoded_watchpoint)
116 {
117 	const int slot = watchpoint_slot(addr);
118 	const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
119 	atomic_long_t *watchpoint;
120 	unsigned long wp_addr_masked;
121 	size_t wp_size;
122 	bool is_write;
123 	int i;
124 
125 	BUILD_BUG_ON(CONFIG_KCSAN_NUM_WATCHPOINTS < NUM_SLOTS);
126 
127 	for (i = 0; i < NUM_SLOTS; ++i) {
128 		watchpoint = &watchpoints[SLOT_IDX_FAST(slot, i)];
129 		*encoded_watchpoint = atomic_long_read(watchpoint);
130 		if (!decode_watchpoint(*encoded_watchpoint, &wp_addr_masked,
131 				       &wp_size, &is_write))
132 			continue;
133 
134 		if (expect_write && !is_write)
135 			continue;
136 
137 		/* Check if the watchpoint matches the access. */
138 		if (matching_access(wp_addr_masked, wp_size, addr_masked, size))
139 			return watchpoint;
140 	}
141 
142 	return NULL;
143 }
144 
145 static inline atomic_long_t *
insert_watchpoint(unsigned long addr,size_t size,bool is_write)146 insert_watchpoint(unsigned long addr, size_t size, bool is_write)
147 {
148 	const int slot = watchpoint_slot(addr);
149 	const long encoded_watchpoint = encode_watchpoint(addr, size, is_write);
150 	atomic_long_t *watchpoint;
151 	int i;
152 
153 	/* Check slot index logic, ensuring we stay within array bounds. */
154 	BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT);
155 	BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT+1) != 0);
156 	BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT) != ARRAY_SIZE(watchpoints)-1);
157 	BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT+1) != ARRAY_SIZE(watchpoints) - NUM_SLOTS);
158 
159 	for (i = 0; i < NUM_SLOTS; ++i) {
160 		long expect_val = INVALID_WATCHPOINT;
161 
162 		/* Try to acquire this slot. */
163 		watchpoint = &watchpoints[SLOT_IDX(slot, i)];
164 		if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, encoded_watchpoint))
165 			return watchpoint;
166 	}
167 
168 	return NULL;
169 }
170 
171 /*
172  * Return true if watchpoint was successfully consumed, false otherwise.
173  *
174  * This may return false if:
175  *
176  *	1. another thread already consumed the watchpoint;
177  *	2. the thread that set up the watchpoint already removed it;
178  *	3. the watchpoint was removed and then re-used.
179  */
180 static __always_inline bool
try_consume_watchpoint(atomic_long_t * watchpoint,long encoded_watchpoint)181 try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
182 {
183 	return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
184 }
185 
186 /* Return true if watchpoint was not touched, false if already consumed. */
consume_watchpoint(atomic_long_t * watchpoint)187 static inline bool consume_watchpoint(atomic_long_t *watchpoint)
188 {
189 	return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT;
190 }
191 
192 /* Remove the watchpoint -- its slot may be reused after. */
remove_watchpoint(atomic_long_t * watchpoint)193 static inline void remove_watchpoint(atomic_long_t *watchpoint)
194 {
195 	atomic_long_set(watchpoint, INVALID_WATCHPOINT);
196 }
197 
get_ctx(void)198 static __always_inline struct kcsan_ctx *get_ctx(void)
199 {
200 	/*
201 	 * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
202 	 * also result in calls that generate warnings in uaccess regions.
203 	 */
204 	return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
205 }
206 
207 static __always_inline void
208 check_access(const volatile void *ptr, size_t size, int type, unsigned long ip);
209 
210 /* Check scoped accesses; never inline because this is a slow-path! */
kcsan_check_scoped_accesses(void)211 static noinline void kcsan_check_scoped_accesses(void)
212 {
213 	struct kcsan_ctx *ctx = get_ctx();
214 	struct kcsan_scoped_access *scoped_access;
215 
216 	if (ctx->disable_scoped)
217 		return;
218 
219 	ctx->disable_scoped++;
220 	list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) {
221 		check_access(scoped_access->ptr, scoped_access->size,
222 			     scoped_access->type, scoped_access->ip);
223 	}
224 	ctx->disable_scoped--;
225 }
226 
227 /* Rules for generic atomic accesses. Called from fast-path. */
228 static __always_inline bool
is_atomic(struct kcsan_ctx * ctx,const volatile void * ptr,size_t size,int type)229 is_atomic(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int type)
230 {
231 	if (type & KCSAN_ACCESS_ATOMIC)
232 		return true;
233 
234 	/*
235 	 * Unless explicitly declared atomic, never consider an assertion access
236 	 * as atomic. This allows using them also in atomic regions, such as
237 	 * seqlocks, without implicitly changing their semantics.
238 	 */
239 	if (type & KCSAN_ACCESS_ASSERT)
240 		return false;
241 
242 	if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) &&
243 	    (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) &&
244 	    !(type & KCSAN_ACCESS_COMPOUND) && IS_ALIGNED((unsigned long)ptr, size))
245 		return true; /* Assume aligned writes up to word size are atomic. */
246 
247 	if (ctx->atomic_next > 0) {
248 		/*
249 		 * Because we do not have separate contexts for nested
250 		 * interrupts, in case atomic_next is set, we simply assume that
251 		 * the outer interrupt set atomic_next. In the worst case, we
252 		 * will conservatively consider operations as atomic. This is a
253 		 * reasonable trade-off to make, since this case should be
254 		 * extremely rare; however, even if extremely rare, it could
255 		 * lead to false positives otherwise.
256 		 */
257 		if ((hardirq_count() >> HARDIRQ_SHIFT) < 2)
258 			--ctx->atomic_next; /* in task, or outer interrupt */
259 		return true;
260 	}
261 
262 	return ctx->atomic_nest_count > 0 || ctx->in_flat_atomic;
263 }
264 
265 static __always_inline bool
should_watch(struct kcsan_ctx * ctx,const volatile void * ptr,size_t size,int type)266 should_watch(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int type)
267 {
268 	/*
269 	 * Never set up watchpoints when memory operations are atomic.
270 	 *
271 	 * Need to check this first, before kcsan_skip check below: (1) atomics
272 	 * should not count towards skipped instructions, and (2) to actually
273 	 * decrement kcsan_atomic_next for consecutive instruction stream.
274 	 */
275 	if (is_atomic(ctx, ptr, size, type))
276 		return false;
277 
278 	if (this_cpu_dec_return(kcsan_skip) >= 0)
279 		return false;
280 
281 	/*
282 	 * NOTE: If we get here, kcsan_skip must always be reset in slow path
283 	 * via reset_kcsan_skip() to avoid underflow.
284 	 */
285 
286 	/* this operation should be watched */
287 	return true;
288 }
289 
290 /*
291  * Returns a pseudo-random number in interval [0, ep_ro). Simple linear
292  * congruential generator, using constants from "Numerical Recipes".
293  */
kcsan_prandom_u32_max(u32 ep_ro)294 static u32 kcsan_prandom_u32_max(u32 ep_ro)
295 {
296 	u32 state = this_cpu_read(kcsan_rand_state);
297 
298 	state = 1664525 * state + 1013904223;
299 	this_cpu_write(kcsan_rand_state, state);
300 
301 	return state % ep_ro;
302 }
303 
reset_kcsan_skip(void)304 static inline void reset_kcsan_skip(void)
305 {
306 	long skip_count = kcsan_skip_watch -
307 			  (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
308 				   kcsan_prandom_u32_max(kcsan_skip_watch) :
309 				   0);
310 	this_cpu_write(kcsan_skip, skip_count);
311 }
312 
kcsan_is_enabled(struct kcsan_ctx * ctx)313 static __always_inline bool kcsan_is_enabled(struct kcsan_ctx *ctx)
314 {
315 	return READ_ONCE(kcsan_enabled) && !ctx->disable_count;
316 }
317 
318 /* Introduce delay depending on context and configuration. */
delay_access(int type)319 static void delay_access(int type)
320 {
321 	unsigned int delay = in_task() ? kcsan_udelay_task : kcsan_udelay_interrupt;
322 	/* For certain access types, skew the random delay to be longer. */
323 	unsigned int skew_delay_order =
324 		(type & (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_ASSERT)) ? 1 : 0;
325 
326 	delay -= IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ?
327 			       kcsan_prandom_u32_max(delay >> skew_delay_order) :
328 			       0;
329 	udelay(delay);
330 }
331 
332 /*
333  * Reads the instrumented memory for value change detection; value change
334  * detection is currently done for accesses up to a size of 8 bytes.
335  */
read_instrumented_memory(const volatile void * ptr,size_t size)336 static __always_inline u64 read_instrumented_memory(const volatile void *ptr, size_t size)
337 {
338 	switch (size) {
339 	case 1:  return READ_ONCE(*(const u8 *)ptr);
340 	case 2:  return READ_ONCE(*(const u16 *)ptr);
341 	case 4:  return READ_ONCE(*(const u32 *)ptr);
342 	case 8:  return READ_ONCE(*(const u64 *)ptr);
343 	default: return 0; /* Ignore; we do not diff the values. */
344 	}
345 }
346 
kcsan_save_irqtrace(struct task_struct * task)347 void kcsan_save_irqtrace(struct task_struct *task)
348 {
349 #ifdef CONFIG_TRACE_IRQFLAGS
350 	task->kcsan_save_irqtrace = task->irqtrace;
351 #endif
352 }
353 
kcsan_restore_irqtrace(struct task_struct * task)354 void kcsan_restore_irqtrace(struct task_struct *task)
355 {
356 #ifdef CONFIG_TRACE_IRQFLAGS
357 	task->irqtrace = task->kcsan_save_irqtrace;
358 #endif
359 }
360 
get_kcsan_stack_depth(void)361 static __always_inline int get_kcsan_stack_depth(void)
362 {
363 #ifdef CONFIG_KCSAN_WEAK_MEMORY
364 	return current->kcsan_stack_depth;
365 #else
366 	BUILD_BUG();
367 	return 0;
368 #endif
369 }
370 
add_kcsan_stack_depth(int val)371 static __always_inline void add_kcsan_stack_depth(int val)
372 {
373 #ifdef CONFIG_KCSAN_WEAK_MEMORY
374 	current->kcsan_stack_depth += val;
375 #else
376 	BUILD_BUG();
377 #endif
378 }
379 
get_reorder_access(struct kcsan_ctx * ctx)380 static __always_inline struct kcsan_scoped_access *get_reorder_access(struct kcsan_ctx *ctx)
381 {
382 #ifdef CONFIG_KCSAN_WEAK_MEMORY
383 	return ctx->disable_scoped ? NULL : &ctx->reorder_access;
384 #else
385 	return NULL;
386 #endif
387 }
388 
389 static __always_inline bool
find_reorder_access(struct kcsan_ctx * ctx,const volatile void * ptr,size_t size,int type,unsigned long ip)390 find_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size,
391 		    int type, unsigned long ip)
392 {
393 	struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx);
394 
395 	if (!reorder_access)
396 		return false;
397 
398 	/*
399 	 * Note: If accesses are repeated while reorder_access is identical,
400 	 * never matches the new access, because !(type & KCSAN_ACCESS_SCOPED).
401 	 */
402 	return reorder_access->ptr == ptr && reorder_access->size == size &&
403 	       reorder_access->type == type && reorder_access->ip == ip;
404 }
405 
406 static inline void
set_reorder_access(struct kcsan_ctx * ctx,const volatile void * ptr,size_t size,int type,unsigned long ip)407 set_reorder_access(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size,
408 		   int type, unsigned long ip)
409 {
410 	struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx);
411 
412 	if (!reorder_access || !kcsan_weak_memory)
413 		return;
414 
415 	/*
416 	 * To avoid nested interrupts or scheduler (which share kcsan_ctx)
417 	 * reading an inconsistent reorder_access, ensure that the below has
418 	 * exclusive access to reorder_access by disallowing concurrent use.
419 	 */
420 	ctx->disable_scoped++;
421 	barrier();
422 	reorder_access->ptr		= ptr;
423 	reorder_access->size		= size;
424 	reorder_access->type		= type | KCSAN_ACCESS_SCOPED;
425 	reorder_access->ip		= ip;
426 	reorder_access->stack_depth	= get_kcsan_stack_depth();
427 	barrier();
428 	ctx->disable_scoped--;
429 }
430 
431 /*
432  * Pull everything together: check_access() below contains the performance
433  * critical operations; the fast-path (including check_access) functions should
434  * all be inlinable by the instrumentation functions.
435  *
436  * The slow-path (kcsan_found_watchpoint, kcsan_setup_watchpoint) are
437  * non-inlinable -- note that, we prefix these with "kcsan_" to ensure they can
438  * be filtered from the stacktrace, as well as give them unique names for the
439  * UACCESS whitelist of objtool. Each function uses user_access_save/restore(),
440  * since they do not access any user memory, but instrumentation is still
441  * emitted in UACCESS regions.
442  */
443 
kcsan_found_watchpoint(const volatile void * ptr,size_t size,int type,unsigned long ip,atomic_long_t * watchpoint,long encoded_watchpoint)444 static noinline void kcsan_found_watchpoint(const volatile void *ptr,
445 					    size_t size,
446 					    int type,
447 					    unsigned long ip,
448 					    atomic_long_t *watchpoint,
449 					    long encoded_watchpoint)
450 {
451 	const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
452 	struct kcsan_ctx *ctx = get_ctx();
453 	unsigned long flags;
454 	bool consumed;
455 
456 	/*
457 	 * We know a watchpoint exists. Let's try to keep the race-window
458 	 * between here and finally consuming the watchpoint below as small as
459 	 * possible -- avoid unneccessarily complex code until consumed.
460 	 */
461 
462 	if (!kcsan_is_enabled(ctx))
463 		return;
464 
465 	/*
466 	 * The access_mask check relies on value-change comparison. To avoid
467 	 * reporting a race where e.g. the writer set up the watchpoint, but the
468 	 * reader has access_mask!=0, we have to ignore the found watchpoint.
469 	 *
470 	 * reorder_access is never created from an access with access_mask set.
471 	 */
472 	if (ctx->access_mask && !find_reorder_access(ctx, ptr, size, type, ip))
473 		return;
474 
475 	/*
476 	 * If the other thread does not want to ignore the access, and there was
477 	 * a value change as a result of this thread's operation, we will still
478 	 * generate a report of unknown origin.
479 	 *
480 	 * Use CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=n to filter.
481 	 */
482 	if (!is_assert && kcsan_ignore_address(ptr))
483 		return;
484 
485 	/*
486 	 * Consuming the watchpoint must be guarded by kcsan_is_enabled() to
487 	 * avoid erroneously triggering reports if the context is disabled.
488 	 */
489 	consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
490 
491 	/* keep this after try_consume_watchpoint */
492 	flags = user_access_save();
493 
494 	if (consumed) {
495 		kcsan_save_irqtrace(current);
496 		kcsan_report_set_info(ptr, size, type, ip, watchpoint - watchpoints);
497 		kcsan_restore_irqtrace(current);
498 	} else {
499 		/*
500 		 * The other thread may not print any diagnostics, as it has
501 		 * already removed the watchpoint, or another thread consumed
502 		 * the watchpoint before this thread.
503 		 */
504 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]);
505 	}
506 
507 	if (is_assert)
508 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
509 	else
510 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]);
511 
512 	user_access_restore(flags);
513 }
514 
515 static noinline void
kcsan_setup_watchpoint(const volatile void * ptr,size_t size,int type,unsigned long ip)516 kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned long ip)
517 {
518 	const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
519 	const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
520 	atomic_long_t *watchpoint;
521 	u64 old, new, diff;
522 	enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
523 	bool interrupt_watcher = kcsan_interrupt_watcher;
524 	unsigned long ua_flags = user_access_save();
525 	struct kcsan_ctx *ctx = get_ctx();
526 	unsigned long access_mask = ctx->access_mask;
527 	unsigned long irq_flags = 0;
528 	bool is_reorder_access;
529 
530 	/*
531 	 * Always reset kcsan_skip counter in slow-path to avoid underflow; see
532 	 * should_watch().
533 	 */
534 	reset_kcsan_skip();
535 
536 	if (!kcsan_is_enabled(ctx))
537 		goto out;
538 
539 	/*
540 	 * Check to-ignore addresses after kcsan_is_enabled(), as we may access
541 	 * memory that is not yet initialized during early boot.
542 	 */
543 	if (!is_assert && kcsan_ignore_address(ptr))
544 		goto out;
545 
546 	if (!check_encodable((unsigned long)ptr, size)) {
547 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_UNENCODABLE_ACCESSES]);
548 		goto out;
549 	}
550 
551 	/*
552 	 * The local CPU cannot observe reordering of its own accesses, and
553 	 * therefore we need to take care of 2 cases to avoid false positives:
554 	 *
555 	 *	1. Races of the reordered access with interrupts. To avoid, if
556 	 *	   the current access is reorder_access, disable interrupts.
557 	 *	2. Avoid races of scoped accesses from nested interrupts (below).
558 	 */
559 	is_reorder_access = find_reorder_access(ctx, ptr, size, type, ip);
560 	if (is_reorder_access)
561 		interrupt_watcher = false;
562 	/*
563 	 * Avoid races of scoped accesses from nested interrupts (or scheduler).
564 	 * Assume setting up a watchpoint for a non-scoped (normal) access that
565 	 * also conflicts with a current scoped access. In a nested interrupt,
566 	 * which shares the context, it would check a conflicting scoped access.
567 	 * To avoid, disable scoped access checking.
568 	 */
569 	ctx->disable_scoped++;
570 
571 	/*
572 	 * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's
573 	 * runtime is entered for every memory access, and potentially useful
574 	 * information is lost if dirtied by KCSAN.
575 	 */
576 	kcsan_save_irqtrace(current);
577 	if (!interrupt_watcher)
578 		local_irq_save(irq_flags);
579 
580 	watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
581 	if (watchpoint == NULL) {
582 		/*
583 		 * Out of capacity: the size of 'watchpoints', and the frequency
584 		 * with which should_watch() returns true should be tweaked so
585 		 * that this case happens very rarely.
586 		 */
587 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_NO_CAPACITY]);
588 		goto out_unlock;
589 	}
590 
591 	atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_SETUP_WATCHPOINTS]);
592 	atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]);
593 
594 	/*
595 	 * Read the current value, to later check and infer a race if the data
596 	 * was modified via a non-instrumented access, e.g. from a device.
597 	 */
598 	old = is_reorder_access ? 0 : read_instrumented_memory(ptr, size);
599 
600 	/*
601 	 * Delay this thread, to increase probability of observing a racy
602 	 * conflicting access.
603 	 */
604 	delay_access(type);
605 
606 	/*
607 	 * Re-read value, and check if it is as expected; if not, we infer a
608 	 * racy access.
609 	 */
610 	if (!is_reorder_access) {
611 		new = read_instrumented_memory(ptr, size);
612 	} else {
613 		/*
614 		 * Reordered accesses cannot be used for value change detection,
615 		 * because the memory location may no longer be accessible and
616 		 * could result in a fault.
617 		 */
618 		new = 0;
619 		access_mask = 0;
620 	}
621 
622 	diff = old ^ new;
623 	if (access_mask)
624 		diff &= access_mask;
625 
626 	/*
627 	 * Check if we observed a value change.
628 	 *
629 	 * Also check if the data race should be ignored (the rules depend on
630 	 * non-zero diff); if it is to be ignored, the below rules for
631 	 * KCSAN_VALUE_CHANGE_MAYBE apply.
632 	 */
633 	if (diff && !kcsan_ignore_data_race(size, type, old, new, diff))
634 		value_change = KCSAN_VALUE_CHANGE_TRUE;
635 
636 	/* Check if this access raced with another. */
637 	if (!consume_watchpoint(watchpoint)) {
638 		/*
639 		 * Depending on the access type, map a value_change of MAYBE to
640 		 * TRUE (always report) or FALSE (never report).
641 		 */
642 		if (value_change == KCSAN_VALUE_CHANGE_MAYBE) {
643 			if (access_mask != 0) {
644 				/*
645 				 * For access with access_mask, we require a
646 				 * value-change, as it is likely that races on
647 				 * ~access_mask bits are expected.
648 				 */
649 				value_change = KCSAN_VALUE_CHANGE_FALSE;
650 			} else if (size > 8 || is_assert) {
651 				/* Always assume a value-change. */
652 				value_change = KCSAN_VALUE_CHANGE_TRUE;
653 			}
654 		}
655 
656 		/*
657 		 * No need to increment 'data_races' counter, as the racing
658 		 * thread already did.
659 		 *
660 		 * Count 'assert_failures' for each failed ASSERT access,
661 		 * therefore both this thread and the racing thread may
662 		 * increment this counter.
663 		 */
664 		if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE)
665 			atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
666 
667 		kcsan_report_known_origin(ptr, size, type, ip,
668 					  value_change, watchpoint - watchpoints,
669 					  old, new, access_mask);
670 	} else if (value_change == KCSAN_VALUE_CHANGE_TRUE) {
671 		/* Inferring a race, since the value should not have changed. */
672 
673 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN]);
674 		if (is_assert)
675 			atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
676 
677 		if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) {
678 			kcsan_report_unknown_origin(ptr, size, type, ip,
679 						    old, new, access_mask);
680 		}
681 	}
682 
683 	/*
684 	 * Remove watchpoint; must be after reporting, since the slot may be
685 	 * reused after this point.
686 	 */
687 	remove_watchpoint(watchpoint);
688 	atomic_long_dec(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]);
689 
690 out_unlock:
691 	if (!interrupt_watcher)
692 		local_irq_restore(irq_flags);
693 	kcsan_restore_irqtrace(current);
694 	ctx->disable_scoped--;
695 
696 	/*
697 	 * Reordered accesses cannot be used for value change detection,
698 	 * therefore never consider for reordering if access_mask is set.
699 	 * ASSERT_EXCLUSIVE are not real accesses, ignore them as well.
700 	 */
701 	if (!access_mask && !is_assert)
702 		set_reorder_access(ctx, ptr, size, type, ip);
703 out:
704 	user_access_restore(ua_flags);
705 }
706 
707 static __always_inline void
check_access(const volatile void * ptr,size_t size,int type,unsigned long ip)708 check_access(const volatile void *ptr, size_t size, int type, unsigned long ip)
709 {
710 	atomic_long_t *watchpoint;
711 	long encoded_watchpoint;
712 
713 	/*
714 	 * Do nothing for 0 sized check; this comparison will be optimized out
715 	 * for constant sized instrumentation (__tsan_{read,write}N).
716 	 */
717 	if (unlikely(size == 0))
718 		return;
719 
720 again:
721 	/*
722 	 * Avoid user_access_save in fast-path: find_watchpoint is safe without
723 	 * user_access_save, as the address that ptr points to is only used to
724 	 * check if a watchpoint exists; ptr is never dereferenced.
725 	 */
726 	watchpoint = find_watchpoint((unsigned long)ptr, size,
727 				     !(type & KCSAN_ACCESS_WRITE),
728 				     &encoded_watchpoint);
729 	/*
730 	 * It is safe to check kcsan_is_enabled() after find_watchpoint in the
731 	 * slow-path, as long as no state changes that cause a race to be
732 	 * detected and reported have occurred until kcsan_is_enabled() is
733 	 * checked.
734 	 */
735 
736 	if (unlikely(watchpoint != NULL))
737 		kcsan_found_watchpoint(ptr, size, type, ip, watchpoint, encoded_watchpoint);
738 	else {
739 		struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */
740 
741 		if (unlikely(should_watch(ctx, ptr, size, type))) {
742 			kcsan_setup_watchpoint(ptr, size, type, ip);
743 			return;
744 		}
745 
746 		if (!(type & KCSAN_ACCESS_SCOPED)) {
747 			struct kcsan_scoped_access *reorder_access = get_reorder_access(ctx);
748 
749 			if (reorder_access) {
750 				/*
751 				 * reorder_access check: simulates reordering of
752 				 * the access after subsequent operations.
753 				 */
754 				ptr = reorder_access->ptr;
755 				type = reorder_access->type;
756 				ip = reorder_access->ip;
757 				/*
758 				 * Upon a nested interrupt, this context's
759 				 * reorder_access can be modified (shared ctx).
760 				 * We know that upon return, reorder_access is
761 				 * always invalidated by setting size to 0 via
762 				 * __tsan_func_exit(). Therefore we must read
763 				 * and check size after the other fields.
764 				 */
765 				barrier();
766 				size = READ_ONCE(reorder_access->size);
767 				if (size)
768 					goto again;
769 			}
770 		}
771 
772 		/*
773 		 * Always checked last, right before returning from runtime;
774 		 * if reorder_access is valid, checked after it was checked.
775 		 */
776 		if (unlikely(ctx->scoped_accesses.prev))
777 			kcsan_check_scoped_accesses();
778 	}
779 }
780 
781 /* === Public interface ===================================================== */
782 
kcsan_init(void)783 void __init kcsan_init(void)
784 {
785 	int cpu;
786 
787 	BUG_ON(!in_task());
788 
789 	for_each_possible_cpu(cpu)
790 		per_cpu(kcsan_rand_state, cpu) = (u32)get_cycles();
791 
792 	/*
793 	 * We are in the init task, and no other tasks should be running;
794 	 * WRITE_ONCE without memory barrier is sufficient.
795 	 */
796 	if (kcsan_early_enable) {
797 		pr_info("enabled early\n");
798 		WRITE_ONCE(kcsan_enabled, true);
799 	}
800 
801 	if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) ||
802 	    IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) ||
803 	    IS_ENABLED(CONFIG_KCSAN_PERMISSIVE) ||
804 	    IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {
805 		pr_warn("non-strict mode configured - use CONFIG_KCSAN_STRICT=y to see all data races\n");
806 	} else {
807 		pr_info("strict mode configured\n");
808 	}
809 }
810 
811 /* === Exported interface =================================================== */
812 
kcsan_disable_current(void)813 void kcsan_disable_current(void)
814 {
815 	++get_ctx()->disable_count;
816 }
817 EXPORT_SYMBOL(kcsan_disable_current);
818 
kcsan_enable_current(void)819 void kcsan_enable_current(void)
820 {
821 	if (get_ctx()->disable_count-- == 0) {
822 		/*
823 		 * Warn if kcsan_enable_current() calls are unbalanced with
824 		 * kcsan_disable_current() calls, which causes disable_count to
825 		 * become negative and should not happen.
826 		 */
827 		kcsan_disable_current(); /* restore to 0, KCSAN still enabled */
828 		kcsan_disable_current(); /* disable to generate warning */
829 		WARN(1, "Unbalanced %s()", __func__);
830 		kcsan_enable_current();
831 	}
832 }
833 EXPORT_SYMBOL(kcsan_enable_current);
834 
kcsan_enable_current_nowarn(void)835 void kcsan_enable_current_nowarn(void)
836 {
837 	if (get_ctx()->disable_count-- == 0)
838 		kcsan_disable_current();
839 }
840 EXPORT_SYMBOL(kcsan_enable_current_nowarn);
841 
kcsan_nestable_atomic_begin(void)842 void kcsan_nestable_atomic_begin(void)
843 {
844 	/*
845 	 * Do *not* check and warn if we are in a flat atomic region: nestable
846 	 * and flat atomic regions are independent from each other.
847 	 * See include/linux/kcsan.h: struct kcsan_ctx comments for more
848 	 * comments.
849 	 */
850 
851 	++get_ctx()->atomic_nest_count;
852 }
853 EXPORT_SYMBOL(kcsan_nestable_atomic_begin);
854 
kcsan_nestable_atomic_end(void)855 void kcsan_nestable_atomic_end(void)
856 {
857 	if (get_ctx()->atomic_nest_count-- == 0) {
858 		/*
859 		 * Warn if kcsan_nestable_atomic_end() calls are unbalanced with
860 		 * kcsan_nestable_atomic_begin() calls, which causes
861 		 * atomic_nest_count to become negative and should not happen.
862 		 */
863 		kcsan_nestable_atomic_begin(); /* restore to 0 */
864 		kcsan_disable_current(); /* disable to generate warning */
865 		WARN(1, "Unbalanced %s()", __func__);
866 		kcsan_enable_current();
867 	}
868 }
869 EXPORT_SYMBOL(kcsan_nestable_atomic_end);
870 
kcsan_flat_atomic_begin(void)871 void kcsan_flat_atomic_begin(void)
872 {
873 	get_ctx()->in_flat_atomic = true;
874 }
875 EXPORT_SYMBOL(kcsan_flat_atomic_begin);
876 
kcsan_flat_atomic_end(void)877 void kcsan_flat_atomic_end(void)
878 {
879 	get_ctx()->in_flat_atomic = false;
880 }
881 EXPORT_SYMBOL(kcsan_flat_atomic_end);
882 
kcsan_atomic_next(int n)883 void kcsan_atomic_next(int n)
884 {
885 	get_ctx()->atomic_next = n;
886 }
887 EXPORT_SYMBOL(kcsan_atomic_next);
888 
kcsan_set_access_mask(unsigned long mask)889 void kcsan_set_access_mask(unsigned long mask)
890 {
891 	get_ctx()->access_mask = mask;
892 }
893 EXPORT_SYMBOL(kcsan_set_access_mask);
894 
895 struct kcsan_scoped_access *
kcsan_begin_scoped_access(const volatile void * ptr,size_t size,int type,struct kcsan_scoped_access * sa)896 kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
897 			  struct kcsan_scoped_access *sa)
898 {
899 	struct kcsan_ctx *ctx = get_ctx();
900 
901 	check_access(ptr, size, type, _RET_IP_);
902 
903 	ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
904 
905 	INIT_LIST_HEAD(&sa->list);
906 	sa->ptr = ptr;
907 	sa->size = size;
908 	sa->type = type;
909 	sa->ip = _RET_IP_;
910 
911 	if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */
912 		INIT_LIST_HEAD(&ctx->scoped_accesses);
913 	list_add(&sa->list, &ctx->scoped_accesses);
914 
915 	ctx->disable_count--;
916 	return sa;
917 }
918 EXPORT_SYMBOL(kcsan_begin_scoped_access);
919 
kcsan_end_scoped_access(struct kcsan_scoped_access * sa)920 void kcsan_end_scoped_access(struct kcsan_scoped_access *sa)
921 {
922 	struct kcsan_ctx *ctx = get_ctx();
923 
924 	if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__))
925 		return;
926 
927 	ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
928 
929 	list_del(&sa->list);
930 	if (list_empty(&ctx->scoped_accesses))
931 		/*
932 		 * Ensure we do not enter kcsan_check_scoped_accesses()
933 		 * slow-path if unnecessary, and avoids requiring list_empty()
934 		 * in the fast-path (to avoid a READ_ONCE() and potential
935 		 * uaccess warning).
936 		 */
937 		ctx->scoped_accesses.prev = NULL;
938 
939 	ctx->disable_count--;
940 
941 	check_access(sa->ptr, sa->size, sa->type, sa->ip);
942 }
943 EXPORT_SYMBOL(kcsan_end_scoped_access);
944 
__kcsan_check_access(const volatile void * ptr,size_t size,int type)945 void __kcsan_check_access(const volatile void *ptr, size_t size, int type)
946 {
947 	check_access(ptr, size, type, _RET_IP_);
948 }
949 EXPORT_SYMBOL(__kcsan_check_access);
950 
951 #define DEFINE_MEMORY_BARRIER(name, order_before_cond)				\
952 	void __kcsan_##name(void)						\
953 	{									\
954 		struct kcsan_scoped_access *sa = get_reorder_access(get_ctx());	\
955 		if (!sa)							\
956 			return;							\
957 		if (order_before_cond)						\
958 			sa->size = 0;						\
959 	}									\
960 	EXPORT_SYMBOL(__kcsan_##name)
961 
962 DEFINE_MEMORY_BARRIER(mb, true);
963 DEFINE_MEMORY_BARRIER(wmb, sa->type & (KCSAN_ACCESS_WRITE | KCSAN_ACCESS_COMPOUND));
964 DEFINE_MEMORY_BARRIER(rmb, !(sa->type & KCSAN_ACCESS_WRITE) || (sa->type & KCSAN_ACCESS_COMPOUND));
965 DEFINE_MEMORY_BARRIER(release, true);
966 
967 /*
968  * KCSAN uses the same instrumentation that is emitted by supported compilers
969  * for ThreadSanitizer (TSAN).
970  *
971  * When enabled, the compiler emits instrumentation calls (the functions
972  * prefixed with "__tsan" below) for all loads and stores that it generated;
973  * inline asm is not instrumented.
974  *
975  * Note that, not all supported compiler versions distinguish aligned/unaligned
976  * accesses, but e.g. recent versions of Clang do. We simply alias the unaligned
977  * version to the generic version, which can handle both.
978  */
979 
980 #define DEFINE_TSAN_READ_WRITE(size)                                           \
981 	void __tsan_read##size(void *ptr);                                     \
982 	void __tsan_read##size(void *ptr)                                      \
983 	{                                                                      \
984 		check_access(ptr, size, 0, _RET_IP_);                          \
985 	}                                                                      \
986 	EXPORT_SYMBOL(__tsan_read##size);                                      \
987 	void __tsan_unaligned_read##size(void *ptr)                            \
988 		__alias(__tsan_read##size);                                    \
989 	EXPORT_SYMBOL(__tsan_unaligned_read##size);                            \
990 	void __tsan_write##size(void *ptr);                                    \
991 	void __tsan_write##size(void *ptr)                                     \
992 	{                                                                      \
993 		check_access(ptr, size, KCSAN_ACCESS_WRITE, _RET_IP_);         \
994 	}                                                                      \
995 	EXPORT_SYMBOL(__tsan_write##size);                                     \
996 	void __tsan_unaligned_write##size(void *ptr)                           \
997 		__alias(__tsan_write##size);                                   \
998 	EXPORT_SYMBOL(__tsan_unaligned_write##size);                           \
999 	void __tsan_read_write##size(void *ptr);                               \
1000 	void __tsan_read_write##size(void *ptr)                                \
1001 	{                                                                      \
1002 		check_access(ptr, size,                                        \
1003 			     KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE,       \
1004 			     _RET_IP_);                                        \
1005 	}                                                                      \
1006 	EXPORT_SYMBOL(__tsan_read_write##size);                                \
1007 	void __tsan_unaligned_read_write##size(void *ptr)                      \
1008 		__alias(__tsan_read_write##size);                              \
1009 	EXPORT_SYMBOL(__tsan_unaligned_read_write##size)
1010 
1011 DEFINE_TSAN_READ_WRITE(1);
1012 DEFINE_TSAN_READ_WRITE(2);
1013 DEFINE_TSAN_READ_WRITE(4);
1014 DEFINE_TSAN_READ_WRITE(8);
1015 DEFINE_TSAN_READ_WRITE(16);
1016 
1017 void __tsan_read_range(void *ptr, size_t size);
__tsan_read_range(void * ptr,size_t size)1018 void __tsan_read_range(void *ptr, size_t size)
1019 {
1020 	check_access(ptr, size, 0, _RET_IP_);
1021 }
1022 EXPORT_SYMBOL(__tsan_read_range);
1023 
1024 void __tsan_write_range(void *ptr, size_t size);
__tsan_write_range(void * ptr,size_t size)1025 void __tsan_write_range(void *ptr, size_t size)
1026 {
1027 	check_access(ptr, size, KCSAN_ACCESS_WRITE, _RET_IP_);
1028 }
1029 EXPORT_SYMBOL(__tsan_write_range);
1030 
1031 /*
1032  * Use of explicit volatile is generally disallowed [1], however, volatile is
1033  * still used in various concurrent context, whether in low-level
1034  * synchronization primitives or for legacy reasons.
1035  * [1] https://lwn.net/Articles/233479/
1036  *
1037  * We only consider volatile accesses atomic if they are aligned and would pass
1038  * the size-check of compiletime_assert_rwonce_type().
1039  */
1040 #define DEFINE_TSAN_VOLATILE_READ_WRITE(size)                                  \
1041 	void __tsan_volatile_read##size(void *ptr);                            \
1042 	void __tsan_volatile_read##size(void *ptr)                             \
1043 	{                                                                      \
1044 		const bool is_atomic = size <= sizeof(long long) &&            \
1045 				       IS_ALIGNED((unsigned long)ptr, size);   \
1046 		if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic)      \
1047 			return;                                                \
1048 		check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0,   \
1049 			     _RET_IP_);                                        \
1050 	}                                                                      \
1051 	EXPORT_SYMBOL(__tsan_volatile_read##size);                             \
1052 	void __tsan_unaligned_volatile_read##size(void *ptr)                   \
1053 		__alias(__tsan_volatile_read##size);                           \
1054 	EXPORT_SYMBOL(__tsan_unaligned_volatile_read##size);                   \
1055 	void __tsan_volatile_write##size(void *ptr);                           \
1056 	void __tsan_volatile_write##size(void *ptr)                            \
1057 	{                                                                      \
1058 		const bool is_atomic = size <= sizeof(long long) &&            \
1059 				       IS_ALIGNED((unsigned long)ptr, size);   \
1060 		if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic)      \
1061 			return;                                                \
1062 		check_access(ptr, size,                                        \
1063 			     KCSAN_ACCESS_WRITE |                              \
1064 				     (is_atomic ? KCSAN_ACCESS_ATOMIC : 0),    \
1065 			     _RET_IP_);                                        \
1066 	}                                                                      \
1067 	EXPORT_SYMBOL(__tsan_volatile_write##size);                            \
1068 	void __tsan_unaligned_volatile_write##size(void *ptr)                  \
1069 		__alias(__tsan_volatile_write##size);                          \
1070 	EXPORT_SYMBOL(__tsan_unaligned_volatile_write##size)
1071 
1072 DEFINE_TSAN_VOLATILE_READ_WRITE(1);
1073 DEFINE_TSAN_VOLATILE_READ_WRITE(2);
1074 DEFINE_TSAN_VOLATILE_READ_WRITE(4);
1075 DEFINE_TSAN_VOLATILE_READ_WRITE(8);
1076 DEFINE_TSAN_VOLATILE_READ_WRITE(16);
1077 
1078 /*
1079  * Function entry and exit are used to determine the validty of reorder_access.
1080  * Reordering of the access ends at the end of the function scope where the
1081  * access happened. This is done for two reasons:
1082  *
1083  *	1. Artificially limits the scope where missing barriers are detected.
1084  *	   This minimizes false positives due to uninstrumented functions that
1085  *	   contain the required barriers but were missed.
1086  *
1087  *	2. Simplifies generating the stack trace of the access.
1088  */
1089 void __tsan_func_entry(void *call_pc);
__tsan_func_entry(void * call_pc)1090 noinline void __tsan_func_entry(void *call_pc)
1091 {
1092 	if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY))
1093 		return;
1094 
1095 	add_kcsan_stack_depth(1);
1096 }
1097 EXPORT_SYMBOL(__tsan_func_entry);
1098 
1099 void __tsan_func_exit(void);
__tsan_func_exit(void)1100 noinline void __tsan_func_exit(void)
1101 {
1102 	struct kcsan_scoped_access *reorder_access;
1103 
1104 	if (!IS_ENABLED(CONFIG_KCSAN_WEAK_MEMORY))
1105 		return;
1106 
1107 	reorder_access = get_reorder_access(get_ctx());
1108 	if (!reorder_access)
1109 		goto out;
1110 
1111 	if (get_kcsan_stack_depth() <= reorder_access->stack_depth) {
1112 		/*
1113 		 * Access check to catch cases where write without a barrier
1114 		 * (supposed release) was last access in function: because
1115 		 * instrumentation is inserted before the real access, a data
1116 		 * race due to the write giving up a c-s would only be caught if
1117 		 * we do the conflicting access after.
1118 		 */
1119 		check_access(reorder_access->ptr, reorder_access->size,
1120 			     reorder_access->type, reorder_access->ip);
1121 		reorder_access->size = 0;
1122 		reorder_access->stack_depth = INT_MIN;
1123 	}
1124 out:
1125 	add_kcsan_stack_depth(-1);
1126 }
1127 EXPORT_SYMBOL(__tsan_func_exit);
1128 
1129 void __tsan_init(void);
__tsan_init(void)1130 void __tsan_init(void)
1131 {
1132 }
1133 EXPORT_SYMBOL(__tsan_init);
1134 
1135 /*
1136  * Instrumentation for atomic builtins (__atomic_*, __sync_*).
1137  *
1138  * Normal kernel code _should not_ be using them directly, but some
1139  * architectures may implement some or all atomics using the compilers'
1140  * builtins.
1141  *
1142  * Note: If an architecture decides to fully implement atomics using the
1143  * builtins, because they are implicitly instrumented by KCSAN (and KASAN,
1144  * etc.), implementing the ARCH_ATOMIC interface (to get instrumentation via
1145  * atomic-instrumented) is no longer necessary.
1146  *
1147  * TSAN instrumentation replaces atomic accesses with calls to any of the below
1148  * functions, whose job is to also execute the operation itself.
1149  */
1150 
kcsan_atomic_builtin_memorder(int memorder)1151 static __always_inline void kcsan_atomic_builtin_memorder(int memorder)
1152 {
1153 	if (memorder == __ATOMIC_RELEASE ||
1154 	    memorder == __ATOMIC_SEQ_CST ||
1155 	    memorder == __ATOMIC_ACQ_REL)
1156 		__kcsan_release();
1157 }
1158 
1159 #define DEFINE_TSAN_ATOMIC_LOAD_STORE(bits)                                                        \
1160 	u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder);                      \
1161 	u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder)                       \
1162 	{                                                                                          \
1163 		kcsan_atomic_builtin_memorder(memorder);                                           \
1164 		if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {                                    \
1165 			check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC, _RET_IP_);    \
1166 		}                                                                                  \
1167 		return __atomic_load_n(ptr, memorder);                                             \
1168 	}                                                                                          \
1169 	EXPORT_SYMBOL(__tsan_atomic##bits##_load);                                                 \
1170 	void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder);                   \
1171 	void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder)                    \
1172 	{                                                                                          \
1173 		kcsan_atomic_builtin_memorder(memorder);                                           \
1174 		if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {                                    \
1175 			check_access(ptr, bits / BITS_PER_BYTE,                                    \
1176 				     KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC, _RET_IP_);          \
1177 		}                                                                                  \
1178 		__atomic_store_n(ptr, v, memorder);                                                \
1179 	}                                                                                          \
1180 	EXPORT_SYMBOL(__tsan_atomic##bits##_store)
1181 
1182 #define DEFINE_TSAN_ATOMIC_RMW(op, bits, suffix)                                                   \
1183 	u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder);                 \
1184 	u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder)                  \
1185 	{                                                                                          \
1186 		kcsan_atomic_builtin_memorder(memorder);                                           \
1187 		if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {                                    \
1188 			check_access(ptr, bits / BITS_PER_BYTE,                                    \
1189 				     KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE |                  \
1190 					     KCSAN_ACCESS_ATOMIC, _RET_IP_);                       \
1191 		}                                                                                  \
1192 		return __atomic_##op##suffix(ptr, v, memorder);                                    \
1193 	}                                                                                          \
1194 	EXPORT_SYMBOL(__tsan_atomic##bits##_##op)
1195 
1196 /*
1197  * Note: CAS operations are always classified as write, even in case they
1198  * fail. We cannot perform check_access() after a write, as it might lead to
1199  * false positives, in cases such as:
1200  *
1201  *	T0: __atomic_compare_exchange_n(&p->flag, &old, 1, ...)
1202  *
1203  *	T1: if (__atomic_load_n(&p->flag, ...)) {
1204  *		modify *p;
1205  *		p->flag = 0;
1206  *	    }
1207  *
1208  * The only downside is that, if there are 3 threads, with one CAS that
1209  * succeeds, another CAS that fails, and an unmarked racing operation, we may
1210  * point at the wrong CAS as the source of the race. However, if we assume that
1211  * all CAS can succeed in some other execution, the data race is still valid.
1212  */
1213 #define DEFINE_TSAN_ATOMIC_CMPXCHG(bits, strength, weak)                                           \
1214 	int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp,          \
1215 							      u##bits val, int mo, int fail_mo);   \
1216 	int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp,          \
1217 							      u##bits val, int mo, int fail_mo)    \
1218 	{                                                                                          \
1219 		kcsan_atomic_builtin_memorder(mo);                                                 \
1220 		if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {                                    \
1221 			check_access(ptr, bits / BITS_PER_BYTE,                                    \
1222 				     KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE |                  \
1223 					     KCSAN_ACCESS_ATOMIC, _RET_IP_);                       \
1224 		}                                                                                  \
1225 		return __atomic_compare_exchange_n(ptr, exp, val, weak, mo, fail_mo);              \
1226 	}                                                                                          \
1227 	EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_##strength)
1228 
1229 #define DEFINE_TSAN_ATOMIC_CMPXCHG_VAL(bits)                                                       \
1230 	u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \
1231 							   int mo, int fail_mo);                   \
1232 	u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \
1233 							   int mo, int fail_mo)                    \
1234 	{                                                                                          \
1235 		kcsan_atomic_builtin_memorder(mo);                                                 \
1236 		if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {                                    \
1237 			check_access(ptr, bits / BITS_PER_BYTE,                                    \
1238 				     KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE |                  \
1239 					     KCSAN_ACCESS_ATOMIC, _RET_IP_);                       \
1240 		}                                                                                  \
1241 		__atomic_compare_exchange_n(ptr, &exp, val, 0, mo, fail_mo);                       \
1242 		return exp;                                                                        \
1243 	}                                                                                          \
1244 	EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_val)
1245 
1246 #define DEFINE_TSAN_ATOMIC_OPS(bits)                                                               \
1247 	DEFINE_TSAN_ATOMIC_LOAD_STORE(bits);                                                       \
1248 	DEFINE_TSAN_ATOMIC_RMW(exchange, bits, _n);                                                \
1249 	DEFINE_TSAN_ATOMIC_RMW(fetch_add, bits, );                                                 \
1250 	DEFINE_TSAN_ATOMIC_RMW(fetch_sub, bits, );                                                 \
1251 	DEFINE_TSAN_ATOMIC_RMW(fetch_and, bits, );                                                 \
1252 	DEFINE_TSAN_ATOMIC_RMW(fetch_or, bits, );                                                  \
1253 	DEFINE_TSAN_ATOMIC_RMW(fetch_xor, bits, );                                                 \
1254 	DEFINE_TSAN_ATOMIC_RMW(fetch_nand, bits, );                                                \
1255 	DEFINE_TSAN_ATOMIC_CMPXCHG(bits, strong, 0);                                               \
1256 	DEFINE_TSAN_ATOMIC_CMPXCHG(bits, weak, 1);                                                 \
1257 	DEFINE_TSAN_ATOMIC_CMPXCHG_VAL(bits)
1258 
1259 DEFINE_TSAN_ATOMIC_OPS(8);
1260 DEFINE_TSAN_ATOMIC_OPS(16);
1261 DEFINE_TSAN_ATOMIC_OPS(32);
1262 DEFINE_TSAN_ATOMIC_OPS(64);
1263 
1264 void __tsan_atomic_thread_fence(int memorder);
__tsan_atomic_thread_fence(int memorder)1265 void __tsan_atomic_thread_fence(int memorder)
1266 {
1267 	kcsan_atomic_builtin_memorder(memorder);
1268 	__atomic_thread_fence(memorder);
1269 }
1270 EXPORT_SYMBOL(__tsan_atomic_thread_fence);
1271 
1272 /*
1273  * In instrumented files, we emit instrumentation for barriers by mapping the
1274  * kernel barriers to an __atomic_signal_fence(), which is interpreted specially
1275  * and otherwise has no relation to a real __atomic_signal_fence(). No known
1276  * kernel code uses __atomic_signal_fence().
1277  *
1278  * Since fsanitize=thread instrumentation handles __atomic_signal_fence(), which
1279  * are turned into calls to __tsan_atomic_signal_fence(), such instrumentation
1280  * can be disabled via the __no_kcsan function attribute (vs. an explicit call
1281  * which could not). When __no_kcsan is requested, __atomic_signal_fence()
1282  * generates no code.
1283  *
1284  * Note: The result of using __atomic_signal_fence() with KCSAN enabled is
1285  * potentially limiting the compiler's ability to reorder operations; however,
1286  * if barriers were instrumented with explicit calls (without LTO), the compiler
1287  * couldn't optimize much anyway. The result of a hypothetical architecture
1288  * using __atomic_signal_fence() in normal code would be KCSAN false negatives.
1289  */
1290 void __tsan_atomic_signal_fence(int memorder);
__tsan_atomic_signal_fence(int memorder)1291 noinline void __tsan_atomic_signal_fence(int memorder)
1292 {
1293 	switch (memorder) {
1294 	case __KCSAN_BARRIER_TO_SIGNAL_FENCE_mb:
1295 		__kcsan_mb();
1296 		break;
1297 	case __KCSAN_BARRIER_TO_SIGNAL_FENCE_wmb:
1298 		__kcsan_wmb();
1299 		break;
1300 	case __KCSAN_BARRIER_TO_SIGNAL_FENCE_rmb:
1301 		__kcsan_rmb();
1302 		break;
1303 	case __KCSAN_BARRIER_TO_SIGNAL_FENCE_release:
1304 		__kcsan_release();
1305 		break;
1306 	default:
1307 		break;
1308 	}
1309 }
1310 EXPORT_SYMBOL(__tsan_atomic_signal_fence);
1311