1 /* Atomic operations.  X86 version.
2    Copyright (C) 2018-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #ifndef _X86_ATOMIC_MACHINE_H
20 #define _X86_ATOMIC_MACHINE_H 1
21 
22 #include <stdint.h>
23 #include <tls.h>			/* For tcbhead_t.  */
24 #include <libc-pointer-arith.h>		/* For cast_to_integer.  */
25 
26 #define LOCK_PREFIX "lock;"
27 
28 #define USE_ATOMIC_COMPILER_BUILTINS	1
29 
30 #ifdef __x86_64__
31 # define __HAVE_64B_ATOMICS		1
32 # define SP_REG				"rsp"
33 # define SEG_REG			"fs"
34 # define BR_CONSTRAINT			"q"
35 # define IBR_CONSTRAINT			"iq"
36 #else
37 /* Since the Pentium, i386 CPUs have supported 64-bit atomics, but the
38    i386 psABI supplement provides only 4-byte alignment for uint64_t
39    inside structs, so it is currently not possible to use 64-bit
40    atomics on this platform.  */
41 # define __HAVE_64B_ATOMICS		0
42 # define SP_REG				"esp"
43 # define SEG_REG			"gs"
44 # define BR_CONSTRAINT			"r"
45 # define IBR_CONSTRAINT			"ir"
46 #endif
47 #define ATOMIC_EXCHANGE_USES_CAS	0
48 
49 #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
50   __sync_val_compare_and_swap (mem, oldval, newval)
51 #define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
52   (! __sync_bool_compare_and_swap (mem, oldval, newval))
53 
54 
55 #define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \
56   ({ __typeof (*mem) ret;						      \
57      __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
58 		       "je 0f\n\t"					      \
59 		       "lock\n"						      \
60 		       "0:\tcmpxchgb %b2, %1"				      \
61 		       : "=a" (ret), "=m" (*mem)			      \
62 		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
63 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
64      ret; })
65 
66 #define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \
67   ({ __typeof (*mem) ret;						      \
68      __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
69 		       "je 0f\n\t"					      \
70 		       "lock\n"						      \
71 		       "0:\tcmpxchgw %w2, %1"				      \
72 		       : "=a" (ret), "=m" (*mem)			      \
73 		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
74 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
75      ret; })
76 
77 #define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \
78   ({ __typeof (*mem) ret;						      \
79      __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
80 		       "je 0f\n\t"					      \
81 		       "lock\n"						      \
82 		       "0:\tcmpxchgl %2, %1"				      \
83 		       : "=a" (ret), "=m" (*mem)			      \
84 		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
85 			 "i" (offsetof (tcbhead_t, multiple_threads)));       \
86      ret; })
87 
88 #ifdef __x86_64__
89 # define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
90   ({ __typeof (*mem) ret;						      \
91      __asm __volatile ("cmpl $0, %%fs:%P5\n\t"				      \
92 		       "je 0f\n\t"					      \
93 		       "lock\n"						      \
94 		       "0:\tcmpxchgq %q2, %1"				      \
95 		       : "=a" (ret), "=m" (*mem)			      \
96 		       : "q" ((int64_t) cast_to_integer (newval)),	      \
97 			 "m" (*mem),					      \
98 			 "0" ((int64_t) cast_to_integer (oldval)),	      \
99 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
100      ret; })
101 # define do_exchange_and_add_val_64_acq(pfx, mem, value) 0
102 # define do_add_val_64_acq(pfx, mem, value) do { } while (0)
103 #else
104 /* XXX We do not really need 64-bit compare-and-exchange.  At least
105    not in the moment.  Using it would mean causing portability
106    problems since not many other 32-bit architectures have support for
107    such an operation.  So don't define any code for now.  If it is
108    really going to be used the code below can be used on Intel Pentium
109    and later, but NOT on i486.  */
110 # define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
111   ({ __typeof (*mem) ret = *(mem);					      \
112      __atomic_link_error ();						      \
113      ret = (newval);							      \
114      ret = (oldval);							      \
115      ret; })
116 
117 # define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval)	      \
118   ({ __typeof (*mem) ret = *(mem);					      \
119      __atomic_link_error ();						      \
120      ret = (newval);							      \
121      ret = (oldval);							      \
122      ret; })
123 
124 # define do_exchange_and_add_val_64_acq(pfx, mem, value) \
125   ({ __typeof (value) __addval = (value);				      \
126      __typeof (*mem) __result;						      \
127      __typeof (mem) __memp = (mem);					      \
128      __typeof (*mem) __tmpval;						      \
129      __result = *__memp;						      \
130      do									      \
131        __tmpval = __result;						      \
132      while ((__result = pfx##_compare_and_exchange_val_64_acq		      \
133 	     (__memp, __result + __addval, __result)) == __tmpval);	      \
134      __result; })
135 
136 # define do_add_val_64_acq(pfx, mem, value) \
137   {									      \
138     __typeof (value) __addval = (value);				      \
139     __typeof (mem) __memp = (mem);					      \
140     __typeof (*mem) __oldval = *__memp;					      \
141     __typeof (*mem) __tmpval;						      \
142     do									      \
143       __tmpval = __oldval;						      \
144     while ((__oldval = pfx##_compare_and_exchange_val_64_acq		      \
145 	    (__memp, __oldval + __addval, __oldval)) == __tmpval);	      \
146   }
147 #endif
148 
149 
150 /* Note that we need no lock prefix.  */
151 #define atomic_exchange_acq(mem, newvalue) \
152   ({ __typeof (*mem) result;						      \
153      if (sizeof (*mem) == 1)						      \
154        __asm __volatile ("xchgb %b0, %1"				      \
155 			 : "=q" (result), "=m" (*mem)			      \
156 			 : "0" (newvalue), "m" (*mem));			      \
157      else if (sizeof (*mem) == 2)					      \
158        __asm __volatile ("xchgw %w0, %1"				      \
159 			 : "=r" (result), "=m" (*mem)			      \
160 			 : "0" (newvalue), "m" (*mem));			      \
161      else if (sizeof (*mem) == 4)					      \
162        __asm __volatile ("xchgl %0, %1"					      \
163 			 : "=r" (result), "=m" (*mem)			      \
164 			 : "0" (newvalue), "m" (*mem));			      \
165      else if (__HAVE_64B_ATOMICS)					      \
166        __asm __volatile ("xchgq %q0, %1"				      \
167 			 : "=r" (result), "=m" (*mem)			      \
168 			 : "0" ((int64_t) cast_to_integer (newvalue)),        \
169 			   "m" (*mem));					      \
170      else								      \
171        {								      \
172 	 result = 0;							      \
173 	 __atomic_link_error ();					      \
174        }								      \
175      result; })
176 
177 
178 #define __arch_exchange_and_add_body(lock, pfx, mem, value) \
179   ({ __typeof (*mem) __result;						      \
180      __typeof (value) __addval = (value);				      \
181      if (sizeof (*mem) == 1)						      \
182        __asm __volatile (lock "xaddb %b0, %1"				      \
183 			 : "=q" (__result), "=m" (*mem)			      \
184 			 : "0" (__addval), "m" (*mem),			      \
185 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
186      else if (sizeof (*mem) == 2)					      \
187        __asm __volatile (lock "xaddw %w0, %1"				      \
188 			 : "=r" (__result), "=m" (*mem)			      \
189 			 : "0" (__addval), "m" (*mem),			      \
190 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
191      else if (sizeof (*mem) == 4)					      \
192        __asm __volatile (lock "xaddl %0, %1"				      \
193 			 : "=r" (__result), "=m" (*mem)			      \
194 			 : "0" (__addval), "m" (*mem),			      \
195 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
196      else if (__HAVE_64B_ATOMICS)					      \
197        __asm __volatile (lock "xaddq %q0, %1"				      \
198 			 : "=r" (__result), "=m" (*mem)			      \
199 			 : "0" ((int64_t) cast_to_integer (__addval)),     \
200 			   "m" (*mem),					      \
201 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
202      else								      \
203        __result = do_exchange_and_add_val_64_acq (pfx, (mem), __addval);      \
204      __result; })
205 
206 #define atomic_exchange_and_add(mem, value) \
207   __sync_fetch_and_add (mem, value)
208 
209 #define __arch_exchange_and_add_cprefix \
210   "cmpl $0, %%" SEG_REG ":%P4\n\tje 0f\n\tlock\n0:\t"
211 
212 #define catomic_exchange_and_add(mem, value) \
213   __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, __arch_c,    \
214 				mem, value)
215 
216 
217 #define __arch_add_body(lock, pfx, apfx, mem, value) \
218   do {									      \
219     if (__builtin_constant_p (value) && (value) == 1)			      \
220       pfx##_increment (mem);						      \
221     else if (__builtin_constant_p (value) && (value) == -1)		      \
222       pfx##_decrement (mem);						      \
223     else if (sizeof (*mem) == 1)					      \
224       __asm __volatile (lock "addb %b1, %0"				      \
225 			: "=m" (*mem)					      \
226 			: IBR_CONSTRAINT (value), "m" (*mem),		      \
227 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
228     else if (sizeof (*mem) == 2)					      \
229       __asm __volatile (lock "addw %w1, %0"				      \
230 			: "=m" (*mem)					      \
231 			: "ir" (value), "m" (*mem),			      \
232 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
233     else if (sizeof (*mem) == 4)					      \
234       __asm __volatile (lock "addl %1, %0"				      \
235 			: "=m" (*mem)					      \
236 			: "ir" (value), "m" (*mem),			      \
237 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
238     else if (__HAVE_64B_ATOMICS)					      \
239       __asm __volatile (lock "addq %q1, %0"				      \
240 			: "=m" (*mem)					      \
241 			: "ir" ((int64_t) cast_to_integer (value)),	      \
242 			  "m" (*mem),					      \
243 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
244     else								      \
245       do_add_val_64_acq (apfx, (mem), (value));				      \
246   } while (0)
247 
248 # define atomic_add(mem, value) \
249   __arch_add_body (LOCK_PREFIX, atomic, __arch, mem, value)
250 
251 #define __arch_add_cprefix \
252   "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
253 
254 #define catomic_add(mem, value) \
255   __arch_add_body (__arch_add_cprefix, atomic, __arch_c, mem, value)
256 
257 
258 #define atomic_add_negative(mem, value) \
259   ({ unsigned char __result;						      \
260      if (sizeof (*mem) == 1)						      \
261        __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1"		      \
262 			 : "=m" (*mem), "=qm" (__result)		      \
263 			 : IBR_CONSTRAINT (value), "m" (*mem));		      \
264      else if (sizeof (*mem) == 2)					      \
265        __asm __volatile (LOCK_PREFIX "addw %w2, %0; sets %1"		      \
266 			 : "=m" (*mem), "=qm" (__result)		      \
267 			 : "ir" (value), "m" (*mem));			      \
268      else if (sizeof (*mem) == 4)					      \
269        __asm __volatile (LOCK_PREFIX "addl %2, %0; sets %1"		      \
270 			 : "=m" (*mem), "=qm" (__result)		      \
271 			 : "ir" (value), "m" (*mem));			      \
272      else if (__HAVE_64B_ATOMICS)					      \
273        __asm __volatile (LOCK_PREFIX "addq %q2, %0; sets %1"		      \
274 			 : "=m" (*mem), "=qm" (__result)		      \
275 			 : "ir" ((int64_t) cast_to_integer (value)),	      \
276 			   "m" (*mem));					      \
277      else								      \
278        __atomic_link_error ();						      \
279      __result; })
280 
281 
282 #define atomic_add_zero(mem, value) \
283   ({ unsigned char __result;						      \
284      if (sizeof (*mem) == 1)						      \
285        __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1"		      \
286 			 : "=m" (*mem), "=qm" (__result)		      \
287 			 : IBR_CONSTRAINT (value), "m" (*mem));		      \
288      else if (sizeof (*mem) == 2)					      \
289        __asm __volatile (LOCK_PREFIX "addw %w2, %0; setz %1"		      \
290 			 : "=m" (*mem), "=qm" (__result)		      \
291 			 : "ir" (value), "m" (*mem));			      \
292      else if (sizeof (*mem) == 4)					      \
293        __asm __volatile (LOCK_PREFIX "addl %2, %0; setz %1"		      \
294 			 : "=m" (*mem), "=qm" (__result)		      \
295 			 : "ir" (value), "m" (*mem));			      \
296      else if (__HAVE_64B_ATOMICS)					      \
297        __asm __volatile (LOCK_PREFIX "addq %q2, %0; setz %1"		      \
298 			 : "=m" (*mem), "=qm" (__result)		      \
299 			 : "ir" ((int64_t) cast_to_integer (value)),	      \
300 			   "m" (*mem));					      \
301      else								      \
302        __atomic_link_error ();					      \
303      __result; })
304 
305 
306 #define __arch_increment_body(lock, pfx, mem) \
307   do {									      \
308     if (sizeof (*mem) == 1)						      \
309       __asm __volatile (lock "incb %b0"					      \
310 			: "=m" (*mem)					      \
311 			: "m" (*mem),					      \
312 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
313     else if (sizeof (*mem) == 2)					      \
314       __asm __volatile (lock "incw %w0"					      \
315 			: "=m" (*mem)					      \
316 			: "m" (*mem),					      \
317 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
318     else if (sizeof (*mem) == 4)					      \
319       __asm __volatile (lock "incl %0"					      \
320 			: "=m" (*mem)					      \
321 			: "m" (*mem),					      \
322 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
323     else if (__HAVE_64B_ATOMICS)					      \
324       __asm __volatile (lock "incq %q0"					      \
325 			: "=m" (*mem)					      \
326 			: "m" (*mem),					      \
327 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
328     else								      \
329       do_add_val_64_acq (pfx, mem, 1);					      \
330   } while (0)
331 
332 #define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, __arch, mem)
333 
334 #define __arch_increment_cprefix \
335   "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
336 
337 #define catomic_increment(mem) \
338   __arch_increment_body (__arch_increment_cprefix, __arch_c, mem)
339 
340 
341 #define atomic_increment_and_test(mem) \
342   ({ unsigned char __result;						      \
343      if (sizeof (*mem) == 1)						      \
344        __asm __volatile (LOCK_PREFIX "incb %b0; sete %b1"		      \
345 			 : "=m" (*mem), "=qm" (__result)		      \
346 			 : "m" (*mem));					      \
347      else if (sizeof (*mem) == 2)					      \
348        __asm __volatile (LOCK_PREFIX "incw %w0; sete %w1"		      \
349 			 : "=m" (*mem), "=qm" (__result)		      \
350 			 : "m" (*mem));					      \
351      else if (sizeof (*mem) == 4)					      \
352        __asm __volatile (LOCK_PREFIX "incl %0; sete %1"			      \
353 			 : "=m" (*mem), "=qm" (__result)		      \
354 			 : "m" (*mem));					      \
355      else if (__HAVE_64B_ATOMICS)					      \
356        __asm __volatile (LOCK_PREFIX "incq %q0; sete %1"		      \
357 			 : "=m" (*mem), "=qm" (__result)		      \
358 			 : "m" (*mem));					      \
359      else								      \
360        __atomic_link_error ();					      \
361      __result; })
362 
363 
364 #define __arch_decrement_body(lock, pfx, mem) \
365   do {									      \
366     if (sizeof (*mem) == 1)						      \
367       __asm __volatile (lock "decb %b0"					      \
368 			: "=m" (*mem)					      \
369 			: "m" (*mem),					      \
370 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
371     else if (sizeof (*mem) == 2)					      \
372       __asm __volatile (lock "decw %w0"					      \
373 			: "=m" (*mem)					      \
374 			: "m" (*mem),					      \
375 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
376     else if (sizeof (*mem) == 4)					      \
377       __asm __volatile (lock "decl %0"					      \
378 			: "=m" (*mem)					      \
379 			: "m" (*mem),					      \
380 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
381     else if (__HAVE_64B_ATOMICS)					      \
382       __asm __volatile (lock "decq %q0"					      \
383 			: "=m" (*mem)					      \
384 			: "m" (*mem),					      \
385 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
386     else								      \
387       do_add_val_64_acq (pfx, mem, -1);					      \
388   } while (0)
389 
390 #define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, __arch, mem)
391 
392 #define __arch_decrement_cprefix \
393   "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
394 
395 #define catomic_decrement(mem) \
396   __arch_decrement_body (__arch_decrement_cprefix, __arch_c, mem)
397 
398 
399 #define atomic_decrement_and_test(mem) \
400   ({ unsigned char __result;						      \
401      if (sizeof (*mem) == 1)						      \
402        __asm __volatile (LOCK_PREFIX "decb %b0; sete %1"		      \
403 			 : "=m" (*mem), "=qm" (__result)		      \
404 			 : "m" (*mem));					      \
405      else if (sizeof (*mem) == 2)					      \
406        __asm __volatile (LOCK_PREFIX "decw %w0; sete %1"		      \
407 			 : "=m" (*mem), "=qm" (__result)		      \
408 			 : "m" (*mem));					      \
409      else if (sizeof (*mem) == 4)					      \
410        __asm __volatile (LOCK_PREFIX "decl %0; sete %1"			      \
411 			 : "=m" (*mem), "=qm" (__result)		      \
412 			 : "m" (*mem));					      \
413      else								      \
414        __asm __volatile (LOCK_PREFIX "decq %q0; sete %1"		      \
415 			 : "=m" (*mem), "=qm" (__result)		      \
416 			 : "m" (*mem));					      \
417      __result; })
418 
419 
420 #define atomic_bit_set(mem, bit) \
421   do {									      \
422     if (sizeof (*mem) == 1)						      \
423       __asm __volatile (LOCK_PREFIX "orb %b2, %0"			      \
424 			: "=m" (*mem)					      \
425 			: "m" (*mem), IBR_CONSTRAINT (1L << (bit)));	      \
426     else if (sizeof (*mem) == 2)					      \
427       __asm __volatile (LOCK_PREFIX "orw %w2, %0"			      \
428 			: "=m" (*mem)					      \
429 			: "m" (*mem), "ir" (1L << (bit)));		      \
430     else if (sizeof (*mem) == 4)					      \
431       __asm __volatile (LOCK_PREFIX "orl %2, %0"			      \
432 			: "=m" (*mem)					      \
433 			: "m" (*mem), "ir" (1L << (bit)));		      \
434     else if (__builtin_constant_p (bit) && (bit) < 32)			      \
435       __asm __volatile (LOCK_PREFIX "orq %2, %0"			      \
436 			: "=m" (*mem)					      \
437 			: "m" (*mem), "i" (1L << (bit)));		      \
438     else if (__HAVE_64B_ATOMICS)					      \
439       __asm __volatile (LOCK_PREFIX "orq %q2, %0"			      \
440 			: "=m" (*mem)					      \
441 			: "m" (*mem), "r" (1UL << (bit)));		      \
442     else								      \
443       __atomic_link_error ();						      \
444   } while (0)
445 
446 
447 #define atomic_bit_test_set(mem, bit) \
448   ({ unsigned char __result;						      \
449      if (sizeof (*mem) == 1)						      \
450        __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0"		      \
451 			 : "=q" (__result), "=m" (*mem)			      \
452 			 : "m" (*mem), IBR_CONSTRAINT (bit));		      \
453      else if (sizeof (*mem) == 2)					      \
454        __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0"		      \
455 			 : "=q" (__result), "=m" (*mem)			      \
456 			 : "m" (*mem), "ir" (bit));			      \
457      else if (sizeof (*mem) == 4)					      \
458        __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0"		      \
459 			 : "=q" (__result), "=m" (*mem)			      \
460 			 : "m" (*mem), "ir" (bit));			      \
461      else if (__HAVE_64B_ATOMICS)					      \
462        __asm __volatile (LOCK_PREFIX "btsq %3, %1; setc %0"		      \
463 			 : "=q" (__result), "=m" (*mem)			      \
464 			 : "m" (*mem), "ir" (bit));			      \
465      else							      	      \
466        __atomic_link_error ();					      \
467      __result; })
468 
469 
470 #define __arch_and_body(lock, mem, mask) \
471   do {									      \
472     if (sizeof (*mem) == 1)						      \
473       __asm __volatile (lock "andb %b1, %0"				      \
474 			: "=m" (*mem)					      \
475 			: IBR_CONSTRAINT (mask), "m" (*mem),		      \
476 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
477     else if (sizeof (*mem) == 2)					      \
478       __asm __volatile (lock "andw %w1, %0"				      \
479 			: "=m" (*mem)					      \
480 			: "ir" (mask), "m" (*mem),			      \
481 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
482     else if (sizeof (*mem) == 4)					      \
483       __asm __volatile (lock "andl %1, %0"				      \
484 			: "=m" (*mem)					      \
485 			: "ir" (mask), "m" (*mem),			      \
486 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
487     else if (__HAVE_64B_ATOMICS)					      \
488       __asm __volatile (lock "andq %q1, %0"				      \
489 			: "=m" (*mem)					      \
490 			: "ir" (mask), "m" (*mem),			      \
491 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
492     else								      \
493       __atomic_link_error ();						      \
494   } while (0)
495 
496 #define __arch_cprefix \
497   "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
498 
499 #define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask)
500 
501 #define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask)
502 
503 
504 #define __arch_or_body(lock, mem, mask) \
505   do {									      \
506     if (sizeof (*mem) == 1)						      \
507       __asm __volatile (lock "orb %b1, %0"				      \
508 			: "=m" (*mem)					      \
509 			: IBR_CONSTRAINT (mask), "m" (*mem),		      \
510 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
511     else if (sizeof (*mem) == 2)					      \
512       __asm __volatile (lock "orw %w1, %0"				      \
513 			: "=m" (*mem)					      \
514 			: "ir" (mask), "m" (*mem),			      \
515 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
516     else if (sizeof (*mem) == 4)					      \
517       __asm __volatile (lock "orl %1, %0"				      \
518 			: "=m" (*mem)					      \
519 			: "ir" (mask), "m" (*mem),			      \
520 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
521     else if (__HAVE_64B_ATOMICS)					      \
522       __asm __volatile (lock "orq %q1, %0"				      \
523 			: "=m" (*mem)					      \
524 			: "ir" (mask), "m" (*mem),			      \
525 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
526     else								      \
527       __atomic_link_error ();						      \
528   } while (0)
529 
530 #define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
531 
532 #define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
533 
534 /* We don't use mfence because it is supposedly slower due to having to
535    provide stronger guarantees (e.g., regarding self-modifying code).  */
536 #define atomic_full_barrier() \
537     __asm __volatile (LOCK_PREFIX "orl $0, (%%" SP_REG ")" ::: "memory")
538 #define atomic_read_barrier() __asm ("" ::: "memory")
539 #define atomic_write_barrier() __asm ("" ::: "memory")
540 
541 #define atomic_spin_nop() __asm ("pause")
542 
543 #endif /* atomic-machine.h */
544