1/* Thread-local storage handling in the ELF dynamic linker.
2   AArch64 version.
3   Copyright (C) 2011-2022 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library; if not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22#include <tls.h>
23#include "tlsdesc.h"
24
25#define NSAVEDQREGPAIRS	16
26#define SAVE_Q_REGISTERS				\
27	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\
28	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
29	stp	 q2,  q3, [sp, #32*1];			\
30	stp	 q4,  q5, [sp, #32*2];			\
31	stp	 q6,  q7, [sp, #32*3];			\
32	stp	 q8,  q9, [sp, #32*4];			\
33	stp	q10, q11, [sp, #32*5];			\
34	stp	q12, q13, [sp, #32*6];			\
35	stp	q14, q15, [sp, #32*7];			\
36	stp	q16, q17, [sp, #32*8];			\
37	stp	q18, q19, [sp, #32*9];			\
38	stp	q20, q21, [sp, #32*10];			\
39	stp	q22, q23, [sp, #32*11];			\
40	stp	q24, q25, [sp, #32*12];			\
41	stp	q26, q27, [sp, #32*13];			\
42	stp	q28, q29, [sp, #32*14];			\
43	stp	q30, q31, [sp, #32*15];
44
45#define RESTORE_Q_REGISTERS				\
46	ldp	 q2,  q3, [sp, #32*1];			\
47	ldp	 q4,  q5, [sp, #32*2];			\
48	ldp	 q6,  q7, [sp, #32*3];			\
49	ldp	 q8,  q9, [sp, #32*4];			\
50	ldp	q10, q11, [sp, #32*5];			\
51	ldp	q12, q13, [sp, #32*6];			\
52	ldp	q14, q15, [sp, #32*7];			\
53	ldp	q16, q17, [sp, #32*8];			\
54	ldp	q18, q19, [sp, #32*9];			\
55	ldp	q20, q21, [sp, #32*10];			\
56	ldp	q22, q23, [sp, #32*11];			\
57	ldp	q24, q25, [sp, #32*12];			\
58	ldp	q26, q27, [sp, #32*13];			\
59	ldp	q28, q29, [sp, #32*14];			\
60	ldp	q30, q31, [sp, #32*15];			\
61	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\
62	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
63
64	.text
65
66	/* Compute the thread pointer offset for symbols in the static
67	   TLS block. The offset is the same for all threads.
68	   Prototype:
69	   _dl_tlsdesc_return (tlsdesc *) ;
70	 */
71	.hidden _dl_tlsdesc_return
72	.global	_dl_tlsdesc_return
73	.type	_dl_tlsdesc_return,%function
74	cfi_startproc
75	.align 2
76_dl_tlsdesc_return:
77	BTI_C
78	PTR_ARG (0)
79	ldr	PTR_REG (0), [x0, #PTR_SIZE]
80	RET
81	cfi_endproc
82	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
83
84	/* Handler for undefined weak TLS symbols.
85	   Prototype:
86	   _dl_tlsdesc_undefweak (tlsdesc *);
87
88	   The second word of the descriptor contains the addend.
89	   Return the addend minus the thread pointer. This ensures
90	   that when the caller adds on the thread pointer it gets back
91	   the addend.  */
92
93	.hidden _dl_tlsdesc_undefweak
94	.global	_dl_tlsdesc_undefweak
95	.type	_dl_tlsdesc_undefweak,%function
96	cfi_startproc
97	.align  2
98_dl_tlsdesc_undefweak:
99	BTI_C
100	str	x1, [sp, #-16]!
101	cfi_adjust_cfa_offset (16)
102	PTR_ARG (0)
103	ldr	PTR_REG (0), [x0, #PTR_SIZE]
104	mrs	x1, tpidr_el0
105	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
106	ldr	x1, [sp], #16
107	cfi_adjust_cfa_offset (-16)
108	RET
109	cfi_endproc
110	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
111
112#ifdef SHARED
113	/* Handler for dynamic TLS symbols.
114	   Prototype:
115	   _dl_tlsdesc_dynamic (tlsdesc *) ;
116
117	   The second word of the descriptor points to a
118	   tlsdesc_dynamic_arg structure.
119
120	   Returns the offset between the thread pointer and the
121	   object referenced by the argument.
122
123	   ptrdiff_t
124	   __attribute__ ((__regparm__ (1)))
125	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
126	   {
127	     struct tlsdesc_dynamic_arg *td = tdp->arg;
128	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
129	     if (__builtin_expect (td->gen_count <= dtv[0].counter
130		&& (dtv[td->tlsinfo.ti_module].pointer.val
131		    != TLS_DTV_UNALLOCATED),
132		1))
133	       return dtv[td->tlsinfo.ti_module].pointer.val
134		+ td->tlsinfo.ti_offset
135		- __thread_pointer;
136
137	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
138	   }
139	 */
140
141	.hidden _dl_tlsdesc_dynamic
142	.global	_dl_tlsdesc_dynamic
143	.type	_dl_tlsdesc_dynamic,%function
144	cfi_startproc
145	.align 2
146_dl_tlsdesc_dynamic:
147	BTI_C
148	PTR_ARG (0)
149
150	/* Save just enough registers to support fast path, if we fall
151	   into slow path we will save additional registers.  */
152	stp	x1,  x2, [sp, #-32]!
153	cfi_adjust_cfa_offset (32)
154	stp	x3,  x4, [sp, #16]
155	cfi_rel_offset (x1, 0)
156	cfi_rel_offset (x2, 8)
157	cfi_rel_offset (x3, 16)
158	cfi_rel_offset (x4, 24)
159
160	mrs	x4, tpidr_el0
161	ldr	PTR_REG (1), [x0,#TLSDESC_ARG]
162	ldr	PTR_REG (0), [x4,#TCBHEAD_DTV]
163	ldr	PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
164	ldr	PTR_REG (2), [x0,#DTV_COUNTER]
165	cmp	PTR_REG (3), PTR_REG (2)
166	b.hi	2f
167	/* Load r2 = td->tlsinfo.ti_module and r3 = td->tlsinfo.ti_offset.  */
168	ldp	PTR_REG (2), PTR_REG (3), [x1,#TLSDESC_MODID]
169	add	PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
170	ldr	PTR_REG (0), [x0] /* Load val member of DTV entry.  */
171	cmp	PTR_REG (0), #TLS_DTV_UNALLOCATED
172	b.eq	2f
173	cfi_remember_state
174	sub	PTR_REG (3), PTR_REG (3), PTR_REG (4)
175	add	PTR_REG (0), PTR_REG (0), PTR_REG (3)
1761:
177	ldp	 x3,  x4, [sp, #16]
178	ldp	 x1,  x2, [sp], #32
179	cfi_adjust_cfa_offset (-32)
180	RET
1812:
182	/* This is the slow path. We need to call __tls_get_addr() which
183	   means we need to save and restore all the register that the
184	   callee will trash.  */
185
186	/* Save the remaining registers that we must treat as caller save.  */
187	cfi_restore_state
188# if HAVE_AARCH64_PAC_RET
189	PACIASP
190	cfi_window_save
191# endif
192# define NSAVEXREGPAIRS 8
193	stp	x29, x30, [sp,#-16*NSAVEXREGPAIRS]!
194	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
195	cfi_rel_offset (x29, 0)
196	cfi_rel_offset (x30, 8)
197	mov	x29, sp
198	stp	 x5,  x6, [sp, #16*1]
199	stp	 x7,  x8, [sp, #16*2]
200	stp	 x9, x10, [sp, #16*3]
201	stp	x11, x12, [sp, #16*4]
202	stp	x13, x14, [sp, #16*5]
203	stp	x15, x16, [sp, #16*6]
204	stp	x17, x18, [sp, #16*7]
205	cfi_rel_offset (x5, 16*1)
206	cfi_rel_offset (x6, 16*1+8)
207	cfi_rel_offset (x7, 16*2)
208	cfi_rel_offset (x8, 16*2+8)
209	cfi_rel_offset (x9, 16*3)
210	cfi_rel_offset (x10, 16*3+8)
211	cfi_rel_offset (x11, 16*4)
212	cfi_rel_offset (x12, 16*4+8)
213	cfi_rel_offset (x13, 16*5)
214	cfi_rel_offset (x14, 16*5+8)
215	cfi_rel_offset (x15, 16*6)
216	cfi_rel_offset (x16, 16*6+8)
217	cfi_rel_offset (x17, 16*7)
218	cfi_rel_offset (x18, 16*7+8)
219
220	SAVE_Q_REGISTERS
221
222	mov	x0, x1
223	bl	__tls_get_addr
224
225	mrs	x1, tpidr_el0
226	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
227
228	RESTORE_Q_REGISTERS
229
230	ldp	 x5,  x6, [sp, #16*1]
231	ldp	 x7,  x8, [sp, #16*2]
232	ldp	 x9, x10, [sp, #16*3]
233	ldp	x11, x12, [sp, #16*4]
234	ldp	x13, x14, [sp, #16*5]
235	ldp	x15, x16, [sp, #16*6]
236	ldp	x17, x18, [sp, #16*7]
237
238	ldp	x29, x30, [sp], #16*NSAVEXREGPAIRS
239	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
240	cfi_restore (x29)
241	cfi_restore (x30)
242# if HAVE_AARCH64_PAC_RET
243	AUTIASP
244	cfi_window_save
245# endif
246	b	1b
247	cfi_endproc
248	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
249# undef NSAVEXREGPAIRS
250#endif
251