1 /* Atomic operations.  PowerPC64 version.
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 /*  POWER6 adds a "Mutex Hint" to the Load and Reserve instruction.
20     This is a hint to the hardware to expect additional updates adjacent
21     to the lock word or not.  If we are acquiring a Mutex, the hint
22     should be true. Otherwise we releasing a Mutex or doing a simple
23     atomic operation.  In that case we don't expect additional updates
24     adjacent to the lock word after the Store Conditional and the hint
25     should be false.  */
26 
27 #if defined _ARCH_PWR6 || defined _ARCH_PWR6X
28 # define MUTEX_HINT_ACQ	",1"
29 # define MUTEX_HINT_REL	",0"
30 #else
31 # define MUTEX_HINT_ACQ
32 # define MUTEX_HINT_REL
33 #endif
34 
35 #define __HAVE_64B_ATOMICS 1
36 #define USE_ATOMIC_COMPILER_BUILTINS 0
37 #define ATOMIC_EXCHANGE_USES_CAS 1
38 
39 /* The 32-bit exchange_bool is different on powerpc64 because the subf
40    does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
41    (a load word and zero (high 32) form) load.
42    In powerpc64 register values are 64-bit by default,  including oldval.
43    The value in old val unknown sign extension, lwarx loads the 32-bit
44    value as unsigned.  So we explicitly clear the high 32 bits in oldval.  */
45 #define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \
46 ({									      \
47   unsigned int __tmp, __tmp2;						      \
48   __asm __volatile ("   clrldi  %1,%1,32\n"				      \
49 		    "1:	lwarx	%0,0,%2" MUTEX_HINT_ACQ "\n"	 	      \
50 		    "	subf.	%0,%1,%0\n"				      \
51 		    "	bne	2f\n"					      \
52 		    "	stwcx.	%4,0,%2\n"				      \
53 		    "	bne-	1b\n"					      \
54 		    "2:	" __ARCH_ACQ_INSTR				      \
55 		    : "=&r" (__tmp), "=r" (__tmp2)			      \
56 		    : "b" (mem), "1" (oldval), "r" (newval)		      \
57 		    : "cr0", "memory");					      \
58   __tmp != 0;								      \
59 })
60 
61 /*
62  * Only powerpc64 processors support Load doubleword and reserve index (ldarx)
63  * and Store doubleword conditional indexed (stdcx) instructions.  So here
64  * we define the 64-bit forms.
65  */
66 #define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \
67 ({									      \
68   unsigned long	__tmp;							      \
69   __asm __volatile (							      \
70 		    "1:	ldarx	%0,0,%1" MUTEX_HINT_ACQ "\n"		      \
71 		    "	subf.	%0,%2,%0\n"				      \
72 		    "	bne	2f\n"					      \
73 		    "	stdcx.	%3,0,%1\n"				      \
74 		    "	bne-	1b\n"					      \
75 		    "2:	" __ARCH_ACQ_INSTR				      \
76 		    : "=&r" (__tmp)					      \
77 		    : "b" (mem), "r" (oldval), "r" (newval)		      \
78 		    : "cr0", "memory");					      \
79   __tmp != 0;								      \
80 })
81 
82 #define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
83   ({									      \
84       __typeof (*(mem)) __tmp;						      \
85       __typeof (mem)  __memp = (mem);					      \
86       __asm __volatile (						      \
87 		        "1:	ldarx	%0,0,%1" MUTEX_HINT_ACQ "\n"	      \
88 		        "	cmpd	%0,%2\n"			      \
89 		        "	bne	2f\n"				      \
90 		        "	stdcx.	%3,0,%1\n"			      \
91 		        "	bne-	1b\n"				      \
92 		        "2:	" __ARCH_ACQ_INSTR			      \
93 		        : "=&r" (__tmp)					      \
94 		        : "b" (__memp), "r" (oldval), "r" (newval)	      \
95 		        : "cr0", "memory");				      \
96       __tmp;								      \
97   })
98 
99 #define __arch_compare_and_exchange_val_64_rel(mem, newval, oldval) \
100   ({									      \
101       __typeof (*(mem)) __tmp;						      \
102       __typeof (mem)  __memp = (mem);					      \
103       __asm __volatile (__ARCH_REL_INSTR "\n"				      \
104 		        "1:	ldarx	%0,0,%1" MUTEX_HINT_REL "\n"	      \
105 		        "	cmpd	%0,%2\n"			      \
106 		        "	bne	2f\n"				      \
107 		        "	stdcx.	%3,0,%1\n"			      \
108 		        "	bne-	1b\n"				      \
109 		        "2:	"					      \
110 		        : "=&r" (__tmp)					      \
111 		        : "b" (__memp), "r" (oldval), "r" (newval)	      \
112 		        : "cr0", "memory");				      \
113       __tmp;								      \
114   })
115 
116 #define __arch_atomic_exchange_64_acq(mem, value) \
117     ({									      \
118       __typeof (*mem) __val;						      \
119       __asm __volatile (__ARCH_REL_INSTR "\n"				      \
120 			"1:	ldarx	%0,0,%2" MUTEX_HINT_ACQ "\n"	      \
121 			"	stdcx.	%3,0,%2\n"			      \
122 			"	bne-	1b\n"				      \
123 		  " " __ARCH_ACQ_INSTR					      \
124 			: "=&r" (__val), "=m" (*mem)			      \
125 			: "b" (mem), "r" (value), "m" (*mem)		      \
126 			: "cr0", "memory");				      \
127       __val;								      \
128     })
129 
130 #define __arch_atomic_exchange_64_rel(mem, value) \
131     ({									      \
132       __typeof (*mem) __val;						      \
133       __asm __volatile (__ARCH_REL_INSTR "\n"				      \
134 			"1:	ldarx	%0,0,%2" MUTEX_HINT_REL "\n"	      \
135 			"	stdcx.	%3,0,%2\n"			      \
136 			"	bne-	1b"				      \
137 			: "=&r" (__val), "=m" (*mem)			      \
138 			: "b" (mem), "r" (value), "m" (*mem)		      \
139 			: "cr0", "memory");				      \
140       __val;								      \
141     })
142 
143 #define __arch_atomic_exchange_and_add_64(mem, value) \
144     ({									      \
145       __typeof (*mem) __val, __tmp;					      \
146       __asm __volatile ("1:	ldarx	%0,0,%3\n"			      \
147 			"	add	%1,%0,%4\n"			      \
148 			"	stdcx.	%1,0,%3\n"			      \
149 			"	bne-	1b"				      \
150 			: "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
151 			: "b" (mem), "r" (value), "m" (*mem)		      \
152 			: "cr0", "memory");				      \
153       __val;								      \
154     })
155 
156 #define __arch_atomic_exchange_and_add_64_acq(mem, value) \
157     ({									      \
158       __typeof (*mem) __val, __tmp;					      \
159       __asm __volatile ("1:	ldarx	%0,0,%3" MUTEX_HINT_ACQ "\n"	      \
160 			"	add	%1,%0,%4\n"			      \
161 			"	stdcx.	%1,0,%3\n"			      \
162 			"	bne-	1b\n"				      \
163 			__ARCH_ACQ_INSTR				      \
164 			: "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
165 			: "b" (mem), "r" (value), "m" (*mem)		      \
166 			: "cr0", "memory");				      \
167       __val;								      \
168     })
169 
170 #define __arch_atomic_exchange_and_add_64_rel(mem, value) \
171     ({									      \
172       __typeof (*mem) __val, __tmp;					      \
173       __asm __volatile (__ARCH_REL_INSTR "\n"				      \
174 			"1:	ldarx	%0,0,%3" MUTEX_HINT_REL "\n"	      \
175 			"	add	%1,%0,%4\n"			      \
176 			"	stdcx.	%1,0,%3\n"			      \
177 			"	bne-	1b"				      \
178 			: "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
179 			: "b" (mem), "r" (value), "m" (*mem)		      \
180 			: "cr0", "memory");				      \
181       __val;								      \
182     })
183 
184 #define __arch_atomic_increment_val_64(mem) \
185     ({									      \
186       __typeof (*(mem)) __val;						      \
187       __asm __volatile ("1:	ldarx	%0,0,%2\n"			      \
188 			"	addi	%0,%0,1\n"			      \
189 			"	stdcx.	%0,0,%2\n"			      \
190 			"	bne-	1b"				      \
191 			: "=&b" (__val), "=m" (*mem)			      \
192 			: "b" (mem), "m" (*mem)				      \
193 			: "cr0", "memory");				      \
194       __val;								      \
195     })
196 
197 #define __arch_atomic_decrement_val_64(mem) \
198     ({									      \
199       __typeof (*(mem)) __val;						      \
200       __asm __volatile ("1:	ldarx	%0,0,%2\n"			      \
201 			"	subi	%0,%0,1\n"			      \
202 			"	stdcx.	%0,0,%2\n"			      \
203 			"	bne-	1b"				      \
204 			: "=&b" (__val), "=m" (*mem)			      \
205 			: "b" (mem), "m" (*mem)				      \
206 			: "cr0", "memory");				      \
207       __val;								      \
208     })
209 
210 #define __arch_atomic_decrement_if_positive_64(mem) \
211   ({ int __val, __tmp;							      \
212      __asm __volatile ("1:	ldarx	%0,0,%3\n"			      \
213 		       "	cmpdi	0,%0,0\n"			      \
214 		       "	addi	%1,%0,-1\n"			      \
215 		       "	ble	2f\n"				      \
216 		       "	stdcx.	%1,0,%3\n"			      \
217 		       "	bne-	1b\n"				      \
218 		       "2:	" __ARCH_ACQ_INSTR			      \
219 		       : "=&b" (__val), "=&r" (__tmp), "=m" (*mem)	      \
220 		       : "b" (mem), "m" (*mem)				      \
221 		       : "cr0", "memory");				      \
222      __val;								      \
223   })
224 
225 /*
226  * All powerpc64 processors support the new "light weight"  sync (lwsync).
227  */
228 #define atomic_read_barrier()	__asm ("lwsync" ::: "memory")
229 /*
230  * "light weight" sync can also be used for the release barrier.
231  */
232 #define __ARCH_REL_INSTR	"lwsync"
233 #define atomic_write_barrier()	__asm ("lwsync" ::: "memory")
234 
235 /*
236  * Include the rest of the atomic ops macros which are common to both
237  * powerpc32 and powerpc64.
238  */
239 #include_next <atomic-machine.h>
240