1 /* futex operations for glibc-internal use.  Stub version; do not include
2    this file directly.
3    Copyright (C) 2014-2022 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
14    Lesser General Public License for more details.
15 
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <https://www.gnu.org/licenses/>.  */
19 
20 #ifndef STUB_FUTEX_INTERNAL_H
21 #define STUB_FUTEX_INTERNAL_H
22 
23 #include <sys/time.h>
24 #include <stdio.h>
25 #include <stdbool.h>
26 #include <lowlevellock-futex.h>
27 #include <libc-diag.h>
28 
29 /* This file defines futex operations used internally in glibc.  A futex
30    consists of the so-called futex word in userspace, which is of type
31    unsigned int and represents an application-specific condition, and kernel
32    state associated with this particular futex word (e.g., wait queues).  The
33    futex operations we provide are wrappers for the futex syscalls and add
34    glibc-specific error checking of the syscall return value.  We abort on
35    error codes that are caused by bugs in glibc or in the calling application,
36    or when an error code is not known.  We return error codes that can arise
37    in correct executions to the caller.  Each operation calls out exactly the
38    return values that callers need to handle.
39 
40    The private flag must be either FUTEX_PRIVATE or FUTEX_SHARED.
41    FUTEX_PRIVATE is always supported, and the implementation can internally
42    use FUTEX_SHARED when FUTEX_PRIVATE is requested.  FUTEX_SHARED is not
43    necessarily supported (use futex_supports_pshared to detect this).
44 
45    We expect callers to only use these operations if futexes and the
46    specific futex operations being used are supported (e.g., FUTEX_SHARED).
47 
48    Given that waking other threads waiting on a futex involves concurrent
49    accesses to the futex word, you must use atomic operations to access the
50    futex word.
51 
52    Both absolute and relative timeouts can be used.  An absolute timeout
53    expires when the given specific point in time on the specified clock
54    passes, or when it already has passed.  A relative timeout expires when
55    the given duration of time on the CLOCK_MONOTONIC clock passes.
56 
57    Due to POSIX requirements on when synchronization data structures such
58    as mutexes or semaphores can be destroyed and due to the futex design
59    having separate fast/slow paths for wake-ups, we need to consider that
60    futex_wake calls might effectively target a data structure that has been
61    destroyed and reused for another object, or unmapped; thus, some
62    errors or spurious wake-ups can happen in correct executions that would
63    not be possible in a program using just a single futex whose lifetime
64    does not end before the program terminates.  For background, see:
65    https://sourceware.org/ml/libc-alpha/2014-04/msg00075.html
66    https://lkml.org/lkml/2014/11/27/472  */
67 
68 /* Defined this way for interoperability with lowlevellock.
69    FUTEX_PRIVATE must be zero because the initializers for pthread_mutex_t,
70    pthread_rwlock_t, and pthread_cond_t initialize the respective field of
71    those structures to zero, and we want FUTEX_PRIVATE to be the default.  */
72 #define FUTEX_PRIVATE LLL_PRIVATE
73 #define FUTEX_SHARED  LLL_SHARED
74 #if FUTEX_PRIVATE != 0
75 # error FUTEX_PRIVATE must be equal to 0
76 #endif
77 
78 #ifndef __NR_futex_time64
79 # define __NR_futex_time64 __NR_futex
80 #endif
81 
82 /* Calls __libc_fatal with an error message.  Convenience function for
83    concrete implementations of the futex interface.  */
84 static __always_inline __attribute__ ((__noreturn__)) void
futex_fatal_error(void)85 futex_fatal_error (void)
86 {
87   __libc_fatal ("The futex facility returned an unexpected error code.\n");
88 }
89 
90 
91 /* The Linux kernel treats provides absolute timeouts based on the
92    CLOCK_REALTIME clock and relative timeouts measured against the
93    CLOCK_MONOTONIC clock.
94 
95    We expect a Linux kernel version of 2.6.22 or more recent (since this
96    version, EINTR is not returned on spurious wake-ups anymore).  */
97 
98 /* Returns EINVAL if PSHARED is neither PTHREAD_PROCESS_PRIVATE nor
99    PTHREAD_PROCESS_SHARED; otherwise, returns 0 if PSHARED is supported, and
100    ENOTSUP if not.  */
101 static __always_inline int
futex_supports_pshared(int pshared)102 futex_supports_pshared (int pshared)
103 {
104   if (__glibc_likely (pshared == PTHREAD_PROCESS_PRIVATE))
105     return 0;
106   else if (pshared == PTHREAD_PROCESS_SHARED)
107     return 0;
108   else
109     return EINVAL;
110 }
111 
112 /* Atomically wrt other futex operations on the same futex, this blocks iff
113    the value *FUTEX_WORD matches the expected value.  This is
114    semantically equivalent to:
115      l = <get lock associated with futex> (FUTEX_WORD);
116      wait_flag = <get wait_flag associated with futex> (FUTEX_WORD);
117      lock (l);
118      val = atomic_load_relaxed (FUTEX_WORD);
119      if (val != expected) { unlock (l); return EAGAIN; }
120      atomic_store_relaxed (wait_flag, true);
121      unlock (l);
122      // Now block; can time out in futex_time_wait (see below)
123      while (atomic_load_relaxed(wait_flag) && !<spurious wake-up>);
124 
125    Note that no guarantee of a happens-before relation between a woken
126    futex_wait and a futex_wake is documented; however, this does not matter
127    in practice because we have to consider spurious wake-ups (see below),
128    and thus would not be able to reliably reason about which futex_wake woke
129    us.
130 
131    Returns 0 if woken by a futex operation or spuriously.  (Note that due to
132    the POSIX requirements mentioned above, we need to conservatively assume
133    that unrelated futex_wake operations could wake this futex; it is easiest
134    to just be prepared for spurious wake-ups.)
135    Returns EAGAIN if the futex word did not match the expected value.
136    Returns EINTR if waiting was interrupted by a signal.
137 
138    Note that some previous code in glibc assumed the underlying futex
139    operation (e.g., syscall) to start with or include the equivalent of a
140    seq_cst fence; this allows one to avoid an explicit seq_cst fence before
141    a futex_wait call when synchronizing similar to Dekker synchronization.
142    However, we make no such guarantee here.  */
143 static __always_inline int
futex_wait(unsigned int * futex_word,unsigned int expected,int private)144 futex_wait (unsigned int *futex_word, unsigned int expected, int private)
145 {
146   int err = lll_futex_timed_wait (futex_word, expected, NULL, private);
147   switch (err)
148     {
149     case 0:
150     case -EAGAIN:
151     case -EINTR:
152       return -err;
153 
154     case -ETIMEDOUT: /* Cannot have happened as we provided no timeout.  */
155     case -EFAULT: /* Must have been caused by a glibc or application bug.  */
156     case -EINVAL: /* Either due to wrong alignment or due to the timeout not
157 		     being normalized.  Must have been caused by a glibc or
158 		     application bug.  */
159     case -ENOSYS: /* Must have been caused by a glibc bug.  */
160     /* No other errors are documented at this time.  */
161     default:
162       futex_fatal_error ();
163     }
164 }
165 
166 /* Like futex_wait but does not provide any indication why we stopped waiting.
167    Thus, when this function returns, you have to always check FUTEX_WORD to
168    determine whether you need to continue waiting, and you cannot detect
169    whether the waiting was interrupted by a signal.  Example use:
170      while (atomic_load_relaxed (&futex_word) == 23)
171        futex_wait_simple (&futex_word, 23, FUTEX_PRIVATE);
172    This is common enough to make providing this wrapper worthwhile.  */
173 static __always_inline void
futex_wait_simple(unsigned int * futex_word,unsigned int expected,int private)174 futex_wait_simple (unsigned int *futex_word, unsigned int expected,
175 		   int private)
176 {
177   ignore_value (futex_wait (futex_word, expected, private));
178 }
179 
180 /* Check whether the specified clockid is supported by
181    futex_abstimed_wait and futex_abstimed_wait_cancelable.  */
182 static __always_inline int
futex_abstimed_supported_clockid(clockid_t clockid)183 futex_abstimed_supported_clockid (clockid_t clockid)
184 {
185   return lll_futex_supported_clockid (clockid);
186 }
187 
188 /* Atomically wrt other futex operations on the same futex, this unblocks the
189    specified number of processes, or all processes blocked on this futex if
190    there are fewer than the specified number.  Semantically, this is
191    equivalent to:
192      l = <get lock associated with futex> (FUTEX_WORD);
193      lock (l);
194      for (res = 0; PROCESSES_TO_WAKE > 0; PROCESSES_TO_WAKE--, res++) {
195        if (<no process blocked on futex>) break;
196        wf = <get wait_flag of a process blocked on futex> (FUTEX_WORD);
197        // No happens-before guarantee with woken futex_wait (see above)
198        atomic_store_relaxed (wf, 0);
199      }
200      return res;
201 
202    Note that we need to support futex_wake calls to past futexes whose memory
203    has potentially been reused due to POSIX' requirements on synchronization
204    object destruction (see above); therefore, we must not report or abort
205    on most errors.  */
206 static __always_inline void
futex_wake(unsigned int * futex_word,int processes_to_wake,int private)207 futex_wake (unsigned int* futex_word, int processes_to_wake, int private)
208 {
209   int res = lll_futex_wake (futex_word, processes_to_wake, private);
210   /* No error.  Ignore the number of woken processes.  */
211   if (res >= 0)
212     return;
213   switch (res)
214     {
215     case -EFAULT: /* Could have happened due to memory reuse.  */
216     case -EINVAL: /* Could be either due to incorrect alignment (a bug in
217 		     glibc or in the application) or due to memory being
218 		     reused for a PI futex.  We cannot distinguish between the
219 		     two causes, and one of them is correct use, so we do not
220 		     act in this case.  */
221       return;
222     case -ENOSYS: /* Must have been caused by a glibc bug.  */
223     /* No other errors are documented at this time.  */
224     default:
225       futex_fatal_error ();
226     }
227 }
228 
229 /* The operation checks the value of the futex, if the value is 0, then
230    it is atomically set to the caller's thread ID.  If the futex value is
231    nonzero, it is atomically sets the FUTEX_WAITERS bit, which signals wrt
232    other futex owner that it cannot unlock the futex in user space by
233    atomically by setting its value to 0.
234 
235    If more than one wait operations is issued, the enqueueing of the waiters
236    are done in descending priority order.
237 
238    The ABSTIME arguments provides an absolute timeout (measured against the
239    CLOCK_REALTIME or CLOCK_MONOTONIC clock).  If TIMEOUT is NULL, the operation
240    will block indefinitely.
241 
242    Returns:
243 
244      - 0 if woken by a PI unlock operation or spuriously.
245      - EAGAIN if the futex owner thread ID is about to exit, but has not yet
246        handled the state cleanup.
247      - EDEADLK if the futex is already locked by the caller.
248      - ESRCH if the thread ID int he futex does not exist.
249      - EINVAL is the state is corrupted or if there is a waiter on the
250        futex.
251      - ETIMEDOUT if the ABSTIME expires.
252 */
253 int __futex_lock_pi64 (int *futex_word, clockid_t clockid,
254 		       const struct __timespec64 *abstime, int private);
255 
256 /* Wakes the top priority waiter that called a futex_lock_pi operation on
257    the futex.
258 
259    Returns the same values as futex_lock_pi under those same conditions;
260    additionally, returns EPERM when the caller is not allowed to attach
261    itself to the futex.  */
262 static __always_inline int
futex_unlock_pi(unsigned int * futex_word,int private)263 futex_unlock_pi (unsigned int *futex_word, int private)
264 {
265   int err = lll_futex_timed_unlock_pi (futex_word, private);
266   switch (err)
267     {
268     case 0:
269     case -EAGAIN:
270     case -EINTR:
271     case -ETIMEDOUT:
272     case -ESRCH:
273     case -EDEADLK:
274     case -ENOSYS:
275     case -EPERM:  /*  The caller is not allowed to attach itself to the futex.
276 		      Used to check if PI futexes are supported by the
277 		      kernel.  */
278       return -err;
279 
280     case -EINVAL: /* Either due to wrong alignment or due to the timeout not
281 		     being normalized.  Must have been caused by a glibc or
282 		     application bug.  */
283     case -EFAULT: /* Must have been caused by a glibc or application bug.  */
284     /* No other errors are documented at this time.  */
285     default:
286       futex_fatal_error ();
287     }
288 }
289 
290 /* Like futex_wait, but will eventually time out (i.e., stop being blocked)
291    after the duration of time provided (i.e., ABSTIME) has passed using the
292    clock specified by CLOCKID (currently only CLOCK_REALTIME and
293    CLOCK_MONOTONIC, the ones support by lll_futex_supported_clockid). ABSTIME
294    can also equal NULL, in which case this function behaves equivalent to
295    futex_wait.
296 
297    Returns the same values as futex_wait under those same conditions;
298    additionally, returns ETIMEDOUT if the timeout expired.
299 
300    The call acts as a cancellation entrypoint.  */
301 int
302 __futex_abstimed_wait_cancelable64 (unsigned int* futex_word,
303                                     unsigned int expected, clockid_t clockid,
304                                     const struct __timespec64* abstime,
305                                     int private);
306 libc_hidden_proto (__futex_abstimed_wait_cancelable64);
307 
308 int
309 __futex_abstimed_wait64 (unsigned int* futex_word, unsigned int expected,
310                          clockid_t clockid,
311                          const struct __timespec64* abstime,
312                          int private);
313 libc_hidden_proto (__futex_abstimed_wait64);
314 
315 
316 static __always_inline int
__futex_clocklock64(int * futex,clockid_t clockid,const struct __timespec64 * abstime,int private)317 __futex_clocklock64 (int *futex, clockid_t clockid,
318                      const struct __timespec64 *abstime, int private)
319 {
320   if (__glibc_unlikely (atomic_compare_and_exchange_bool_acq (futex, 1, 0)))
321     {
322       while (atomic_exchange_acq (futex, 2) != 0)
323         {
324 	  int err = 0;
325           err = __futex_abstimed_wait64 ((unsigned int *) futex, 2, clockid,
326 					 abstime, private);
327           if (err == EINVAL || err == ETIMEDOUT || err == EOVERFLOW)
328             return err;
329         }
330     }
331   return 0;
332 }
333 
334 #endif  /* futex-internal.h */
335