1 /* Run a test case in an isolated namespace.
2    Copyright (C) 2018-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #define _FILE_OFFSET_BITS 64
20 
21 #include <array_length.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sched.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <dirent.h>
30 #include <string.h>
31 #include <sys/stat.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/wait.h>
35 #include <stdarg.h>
36 #include <sys/sysmacros.h>
37 #include <ctype.h>
38 #include <utime.h>
39 #include <errno.h>
40 #include <error.h>
41 #include <libc-pointer-arith.h>
42 
43 #ifdef __linux__
44 #include <sys/mount.h>
45 #endif
46 
47 #include <support/support.h>
48 #include <support/xunistd.h>
49 #include <support/capture_subprocess.h>
50 #include "check.h"
51 #include "test-driver.h"
52 
53 #ifndef __linux__
54 #define mount(s,t,fs,f,d) no_mount()
no_mount(void)55 int no_mount (void)
56 {
57   FAIL_UNSUPPORTED("mount not supported; port needed");
58 }
59 #endif
60 
61 int verbose = 0;
62 
63 /* Running a test in a container is tricky.  There are two main
64    categories of things to do:
65 
66    1. "Once" actions, like setting up the container and doing an
67       install into it.
68 
69    2. "Per-test" actions, like copying in support files and
70       configuring the container.
71 
72 
73    "Once" actions:
74 
75    * mkdir $buildroot/testroot.pristine/
76    * install into it
77      * default glibc install
78      * create /bin for /bin/sh
79      * create $(complocaledir) so localedef tests work with default paths.
80      * install /bin/sh, /bin/echo, and /bin/true.
81    * rsync to $buildroot/testroot.root/
82 
83    "Per-test" actions:
84    * maybe rsync to $buildroot/testroot.root/
85    * copy support files and test binary
86    * chroot/unshare
87    * set up any mounts (like /proc)
88    * run ldconfig
89 
90    Magic files:
91 
92    For test $srcdir/foo/mytest.c we look for $srcdir/foo/mytest.root
93    and, if found...
94 
95    * mytest.root/ is rsync'd into container
96    * mytest.root/preclean.req causes fresh rsync (with delete) before
97      test if present
98    * mytest.root/mytest.script has a list of "commands" to run:
99        syntax:
100          # comment
101 	 pidns <comment>
102          su
103          mv FILE FILE
104 	 cp FILE FILE
105 	 rm FILE
106 	 cwd PATH
107 	 exec FILE
108 	 mkdirp MODE DIR
109 
110        variables:
111 	 $B/ build dir, equivalent to $(common-objpfx)
112 	 $S/ source dir, equivalent to $(srcdir)
113 	 $I/ install dir, equivalent to $(prefix)
114 	 $L/ library dir (in container), equivalent to $(libdir)
115 	 $complocaledir/ compiled locale dir, equivalent to $(complocaledir)
116 	 / container's root
117 
118 	 If FILE begins with any of these variables then they will be
119 	 substituted for the described value.
120 
121 	 The goal is to expose as many of the runtime's configured paths
122 	 via variables so they can be used to setup the container environment
123 	 before execution reaches the test.
124 
125        details:
126          - '#': A comment.
127 	 - 'pidns': Require a separate PID namespace, prints comment if it can't
128 	    (default is a shared pid namespace)
129          - 'su': Enables running test as root in the container.
130          - 'mv': A minimal move files command.
131          - 'cp': A minimal copy files command.
132          - 'rm': A minimal remove files command.
133 	 - 'cwd': set test working directory
134 	 - 'exec': change test binary location (may end in /)
135 	 - 'mkdirp': A minimal "mkdir -p FILE" command.
136 
137    * mytest.root/postclean.req causes fresh rsync (with delete) after
138      test if present
139 
140    * mytest.root/ldconfig.run causes ldconfig to be issued prior
141      test execution (to setup the initial ld.so.cache).
142 
143    Note that $srcdir/foo/mytest.script may be used instead of a
144    $srcdir/foo/mytest.root/mytest.script in the sysroot template, if
145    there is no other reason for a sysroot.
146 
147    Design goals:
148 
149    * independent of other packages which may not be installed (like
150      rsync or Docker, or even "cp")
151 
152    * Simple, easy to review code (i.e. prefer simple naive code over
153      complex efficient code)
154 
155    * The current implementation is parallel-make-safe, but only in
156      that it uses a lock to prevent parallel access to the testroot.  */
157 
158 
159 /* Utility Functions */
160 
161 /* Like xunlink, but it's OK if the file already doesn't exist.  */
162 void
maybe_xunlink(const char * path)163 maybe_xunlink (const char *path)
164 {
165   int rv = unlink (path);
166   if (rv < 0 && errno != ENOENT)
167     FAIL_EXIT1 ("unlink (\"%s\"): %m", path);
168 }
169 
170 /* Like xmkdir, but it's OK if the directory already exists.  */
171 void
maybe_xmkdir(const char * path,mode_t mode)172 maybe_xmkdir (const char *path, mode_t mode)
173 {
174   struct stat st;
175 
176   if (stat (path, &st) == 0
177       && S_ISDIR (st.st_mode))
178     return;
179   xmkdir (path, mode);
180 }
181 
182 /* Temporarily concatenate multiple strings into one.  Allows up to 10
183    temporary results; use xstrdup () if you need them to be
184    permanent.  */
185 static char *
concat(const char * str,...)186 concat (const char *str, ...)
187 {
188   /* Assume initialized to NULL/zero.  */
189   static char *bufs[10];
190   static size_t buflens[10];
191   static int bufn = 0;
192   int n;
193   size_t len;
194   va_list ap, ap2;
195   char *cp;
196   char *next;
197 
198   va_start (ap, str);
199   va_copy (ap2, ap);
200 
201   n = bufn;
202   bufn = (bufn + 1) % 10;
203   len = strlen (str);
204 
205   while ((next = va_arg (ap, char *)) != NULL)
206     len = len + strlen (next);
207 
208   va_end (ap);
209 
210   if (bufs[n] == NULL)
211     {
212       bufs[n] = xmalloc (len + 1); /* NUL */
213       buflens[n] = len + 1;
214     }
215   else if (buflens[n] < len + 1)
216     {
217       bufs[n] = xrealloc (bufs[n], len + 1); /* NUL */
218       buflens[n] = len + 1;
219     }
220 
221   strcpy (bufs[n], str);
222   cp = strchr (bufs[n], '\0');
223   while ((next = va_arg (ap2, char *)) != NULL)
224     {
225       strcpy (cp, next);
226       cp = strchr (cp, '\0');
227     }
228   *cp = 0;
229   va_end (ap2);
230 
231   return bufs[n];
232 }
233 
234 #ifdef CLONE_NEWNS
235 /* Like the above, but put spaces between words.  Caller frees.  */
236 static char *
concat_words(char ** words,int num_words)237 concat_words (char **words, int num_words)
238 {
239   int len = 0;
240   int i;
241   char *rv, *p;
242 
243   for (i = 0; i < num_words; i ++)
244     {
245       len += strlen (words[i]);
246       len ++;
247     }
248 
249   p = rv = (char *) xmalloc (len);
250 
251   for (i = 0; i < num_words; i ++)
252     {
253       if (i > 0)
254 	p = stpcpy (p, " ");
255       p = stpcpy (p, words[i]);
256     }
257 
258   return rv;
259 }
260 #endif
261 
262 /* Try to mount SRC onto DEST.  */
263 static void
trymount(const char * src,const char * dest)264 trymount (const char *src, const char *dest)
265 {
266   if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0)
267     FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest);
268 }
269 
270 /* Special case of above for devices like /dev/zero where we have to
271    mount a device over a device, not a directory over a directory.  */
272 static void
devmount(const char * new_root_path,const char * which)273 devmount (const char *new_root_path, const char *which)
274 {
275   int fd;
276   fd = open (concat (new_root_path, "/dev/", which, NULL),
277 	     O_CREAT | O_TRUNC | O_RDWR, 0777);
278   xclose (fd);
279 
280   trymount (concat ("/dev/", which, NULL),
281 	    concat (new_root_path, "/dev/", which, NULL));
282 }
283 
284 /* Returns true if the string "looks like" an environement variable
285    being set.  */
286 static int
is_env_setting(const char * a)287 is_env_setting (const char *a)
288 {
289   int count_name = 0;
290 
291   while (*a)
292     {
293       if (isalnum (*a) || *a == '_')
294 	++count_name;
295       else if (*a == '=' && count_name > 0)
296 	return 1;
297       else
298 	return 0;
299       ++a;
300     }
301   return 0;
302 }
303 
304 /* Break the_line into words and store in the_words.  Max nwords,
305    returns actual count.  */
306 static int
tokenize(char * the_line,char ** the_words,int nwords)307 tokenize (char *the_line, char **the_words, int nwords)
308 {
309   int rv = 0;
310 
311   while (nwords > 0)
312     {
313       /* Skip leading whitespace, if any.  */
314       while (*the_line && isspace (*the_line))
315 	++the_line;
316 
317       /* End of line?  */
318       if (*the_line == 0)
319 	return rv;
320 
321       /* THE_LINE points to a non-whitespace character, so we have a
322 	 word.  */
323       *the_words = the_line;
324       ++the_words;
325       nwords--;
326       ++rv;
327 
328       /* Skip leading whitespace, if any.  */
329       while (*the_line && ! isspace (*the_line))
330 	++the_line;
331 
332       /* We now point at the trailing NUL *or* some whitespace.  */
333       if (*the_line == 0)
334 	return rv;
335 
336       /* It was whitespace, skip and keep tokenizing.  */
337       *the_line++ = 0;
338     }
339 
340   /* We get here if we filled the words buffer.  */
341   return rv;
342 }
343 
344 
345 /* Mini-RSYNC implementation.  Optimize later.      */
346 
347 /* A few routines for an "rsync buffer" which stores the paths we're
348    working on.  We continuously grow and shrink the paths in each
349    buffer so there's lot of re-use.  */
350 
351 /* We rely on "initialized to zero" to set these up.  */
352 typedef struct
353 {
354   char *buf;
355   size_t len;
356   size_t size;
357 } path_buf;
358 
359 static path_buf spath, dpath;
360 
361 static void
r_setup(char * path,path_buf * pb)362 r_setup (char *path, path_buf * pb)
363 {
364   size_t len = strlen (path);
365   if (pb->buf == NULL || pb->size < len + 1)
366     {
367       /* Round up.  This is an arbitrary number, just to keep from
368 	 reallocing too often.  */
369       size_t sz = ALIGN_UP (len + 1, 512);
370       if (pb->buf == NULL)
371 	pb->buf = (char *) xmalloc (sz);
372       else
373 	pb->buf = (char *) xrealloc (pb->buf, sz);
374       if (pb->buf == NULL)
375 	FAIL_EXIT1 ("Out of memory while rsyncing\n");
376 
377       pb->size = sz;
378     }
379   strcpy (pb->buf, path);
380   pb->len = len;
381 }
382 
383 static void
r_append(const char * path,path_buf * pb)384 r_append (const char *path, path_buf * pb)
385 {
386   size_t len = strlen (path) + pb->len;
387   if (pb->size < len + 1)
388     {
389       /* Round up */
390       size_t sz = ALIGN_UP (len + 1, 512);
391       pb->buf = (char *) xrealloc (pb->buf, sz);
392       if (pb->buf == NULL)
393 	FAIL_EXIT1 ("Out of memory while rsyncing\n");
394 
395       pb->size = sz;
396     }
397   strcpy (pb->buf + pb->len, path);
398   pb->len = len;
399 }
400 
401 static int
file_exists(char * path)402 file_exists (char *path)
403 {
404   struct stat st;
405   if (lstat (path, &st) == 0)
406     return 1;
407   return 0;
408 }
409 
410 static void
recursive_remove(char * path)411 recursive_remove (char *path)
412 {
413   pid_t child;
414   int status;
415 
416   child = fork ();
417 
418   switch (child) {
419   case -1:
420     perror("fork");
421     FAIL_EXIT1 ("Unable to fork");
422   case 0:
423     /* Child.  */
424     execlp ("rm", "rm", "-rf", path, NULL);
425     FAIL_EXIT1 ("exec rm: %m");
426   default:
427     /* Parent.  */
428     waitpid (child, &status, 0);
429     /* "rm" would have already printed a suitable error message.  */
430     if (! WIFEXITED (status)
431 	|| WEXITSTATUS (status) != 0)
432       FAIL_EXIT1 ("exec child returned status: %d", status);
433 
434     break;
435   }
436 }
437 
438 /* Used for both rsync and the mytest.script "cp" command.  */
439 static void
copy_one_file(const char * sname,const char * dname)440 copy_one_file (const char *sname, const char *dname)
441 {
442   int sfd, dfd;
443   struct stat st;
444   struct utimbuf times;
445 
446   sfd = open (sname, O_RDONLY);
447   if (sfd < 0)
448     FAIL_EXIT1 ("unable to open %s for reading\n", sname);
449 
450   if (fstat (sfd, &st) < 0)
451     FAIL_EXIT1 ("unable to fstat %s\n", sname);
452 
453   dfd = open (dname, O_WRONLY | O_TRUNC | O_CREAT, 0600);
454   if (dfd < 0)
455     FAIL_EXIT1 ("unable to open %s for writing\n", dname);
456 
457   xcopy_file_range (sfd, 0, dfd, 0, st.st_size, 0);
458 
459   xclose (sfd);
460   xclose (dfd);
461 
462   if (chmod (dname, st.st_mode & 0777) < 0)
463     FAIL_EXIT1 ("chmod %s: %s\n", dname, strerror (errno));
464 
465   times.actime = st.st_atime;
466   times.modtime = st.st_mtime;
467   if (utime (dname, &times) < 0)
468     FAIL_EXIT1 ("utime %s: %s\n", dname, strerror (errno));
469 }
470 
471 /* We don't check *everything* about the two files to see if a copy is
472    needed, just the minimum to make sure we get the latest copy.  */
473 static int
need_sync(char * ap,char * bp,struct stat * a,struct stat * b)474 need_sync (char *ap, char *bp, struct stat *a, struct stat *b)
475 {
476   if ((a->st_mode & S_IFMT) != (b->st_mode & S_IFMT))
477     return 1;
478 
479   if (S_ISLNK (a->st_mode))
480     {
481       int rv;
482       char *al, *bl;
483 
484       if (a->st_size != b->st_size)
485 	return 1;
486 
487       al = xreadlink (ap);
488       bl = xreadlink (bp);
489       rv = strcmp (al, bl);
490       free (al);
491       free (bl);
492       if (rv == 0)
493 	return 0; /* links are same */
494       return 1; /* links differ */
495     }
496 
497   if (verbose)
498     {
499       if (a->st_size != b->st_size)
500 	printf ("SIZE\n");
501       if ((a->st_mode & 0777) != (b->st_mode & 0777))
502 	printf ("MODE\n");
503       if (a->st_mtime != b->st_mtime)
504 	printf ("TIME\n");
505     }
506 
507   if (a->st_size == b->st_size
508       && ((a->st_mode & 0777) == (b->st_mode & 0777))
509       && a->st_mtime == b->st_mtime)
510     return 0;
511 
512   return 1;
513 }
514 
515 static void
rsync_1(path_buf * src,path_buf * dest,int and_delete,int force_copies)516 rsync_1 (path_buf * src, path_buf * dest, int and_delete, int force_copies)
517 {
518   DIR *dir;
519   struct dirent *de;
520   struct stat s, d;
521 
522   r_append ("/", src);
523   r_append ("/", dest);
524 
525   if (verbose)
526     printf ("sync %s to %s%s%s\n", src->buf, dest->buf,
527 	    and_delete ? " and delete" : "",
528 	    force_copies ? " (forced)" : "");
529 
530   size_t staillen = src->len;
531 
532   size_t dtaillen = dest->len;
533 
534   dir = opendir (src->buf);
535 
536   while ((de = readdir (dir)) != NULL)
537     {
538       if (strcmp (de->d_name, ".") == 0
539 	  || strcmp (de->d_name, "..") == 0)
540 	continue;
541 
542       src->len = staillen;
543       r_append (de->d_name, src);
544       dest->len = dtaillen;
545       r_append (de->d_name, dest);
546 
547       s.st_mode = ~0;
548       d.st_mode = ~0;
549 
550       if (lstat (src->buf, &s) != 0)
551 	FAIL_EXIT1 ("%s obtained by readdir, but stat failed.\n", src->buf);
552 
553       /* It's OK if this one fails, since we know the file might be
554 	 missing.  */
555       lstat (dest->buf, &d);
556 
557       if (! force_copies && ! need_sync (src->buf, dest->buf, &s, &d))
558 	{
559 	  if (S_ISDIR (s.st_mode))
560 	    rsync_1 (src, dest, and_delete, force_copies);
561 	  continue;
562 	}
563 
564       if (d.st_mode != ~0)
565 	switch (d.st_mode & S_IFMT)
566 	  {
567 	  case S_IFDIR:
568 	    if (!S_ISDIR (s.st_mode))
569 	      {
570 		if (verbose)
571 		  printf ("-D %s\n", dest->buf);
572 		recursive_remove (dest->buf);
573 	      }
574 	    break;
575 
576 	  default:
577 	    if (verbose)
578 	      printf ("-F %s\n", dest->buf);
579 	    maybe_xunlink (dest->buf);
580 	    break;
581 	  }
582 
583       switch (s.st_mode & S_IFMT)
584 	{
585 	case S_IFREG:
586 	  if (verbose)
587 	    printf ("+F %s\n", dest->buf);
588 	  copy_one_file (src->buf, dest->buf);
589 	  break;
590 
591 	case S_IFDIR:
592 	  if (verbose)
593 	    printf ("+D %s\n", dest->buf);
594 	  maybe_xmkdir (dest->buf, (s.st_mode & 0777) | 0700);
595 	  rsync_1 (src, dest, and_delete, force_copies);
596 	  break;
597 
598 	case S_IFLNK:
599 	  {
600 	    char *lp;
601 	    if (verbose)
602 	      printf ("+L %s\n", dest->buf);
603 	    lp = xreadlink (src->buf);
604 	    xsymlink (lp, dest->buf);
605 	    free (lp);
606 	    break;
607 	  }
608 
609 	default:
610 	  break;
611 	}
612     }
613 
614   closedir (dir);
615   src->len = staillen;
616   src->buf[staillen] = 0;
617   dest->len = dtaillen;
618   dest->buf[dtaillen] = 0;
619 
620   if (!and_delete)
621     return;
622 
623   /* The rest of this function removes any files/directories in DEST
624      that do not exist in SRC.  This is triggered as part of a
625      preclean or postsclean step.  */
626 
627   dir = opendir (dest->buf);
628 
629   while ((de = readdir (dir)) != NULL)
630     {
631       if (strcmp (de->d_name, ".") == 0
632 	  || strcmp (de->d_name, "..") == 0)
633 	continue;
634 
635       src->len = staillen;
636       r_append (de->d_name, src);
637       dest->len = dtaillen;
638       r_append (de->d_name, dest);
639 
640       s.st_mode = ~0;
641       d.st_mode = ~0;
642 
643       lstat (src->buf, &s);
644 
645       if (lstat (dest->buf, &d) != 0)
646 	FAIL_EXIT1 ("%s obtained by readdir, but stat failed.\n", dest->buf);
647 
648       if (s.st_mode == ~0)
649 	{
650 	  /* dest exists and src doesn't, clean it.  */
651 	  switch (d.st_mode & S_IFMT)
652 	    {
653 	    case S_IFDIR:
654 	      if (!S_ISDIR (s.st_mode))
655 		{
656 		  if (verbose)
657 		    printf ("-D %s\n", dest->buf);
658 		  recursive_remove (dest->buf);
659 		}
660 	      break;
661 
662 	    default:
663 	      if (verbose)
664 		printf ("-F %s\n", dest->buf);
665 	      maybe_xunlink (dest->buf);
666 	      break;
667 	    }
668 	}
669     }
670 
671   closedir (dir);
672 }
673 
674 static void
rsync(char * src,char * dest,int and_delete,int force_copies)675 rsync (char *src, char *dest, int and_delete, int force_copies)
676 {
677   r_setup (src, &spath);
678   r_setup (dest, &dpath);
679 
680   rsync_1 (&spath, &dpath, and_delete, force_copies);
681 }
682 
683 
684 
685 /* See if we can detect what the user needs to do to get unshare
686    support working for us.  */
687 void
check_for_unshare_hints(int require_pidns)688 check_for_unshare_hints (int require_pidns)
689 {
690   static struct {
691     const char *path;
692     int bad_value, good_value, for_pidns;
693   } files[] = {
694     /* Default Debian Linux disables user namespaces, but allows a way
695        to enable them.  */
696     { "/proc/sys/kernel/unprivileged_userns_clone", 0, 1, 0 },
697     /* ALT Linux has an alternate way of doing the same.  */
698     { "/proc/sys/kernel/userns_restrict", 1, 0, 0 },
699     /* Linux kernel >= 4.9 has a configurable limit on the number of
700        each namespace.  Some distros set the limit to zero to disable the
701        corresponding namespace as a "security policy".  */
702     { "/proc/sys/user/max_user_namespaces", 0, 1024, 0 },
703     { "/proc/sys/user/max_mnt_namespaces", 0, 1024, 0 },
704     { "/proc/sys/user/max_pid_namespaces", 0, 1024, 1 },
705   };
706   FILE *f;
707   int i, val;
708 
709   for (i = 0; i < array_length (files); i++)
710     {
711       if (!require_pidns && files[i].for_pidns)
712         continue;
713 
714       f = fopen (files[i].path, "r");
715       if (f == NULL)
716         continue;
717 
718       val = -1; /* Sentinel.  */
719       fscanf (f, "%d", &val);
720       if (val != files[i].bad_value)
721 	continue;
722 
723       printf ("To enable test-container, please run this as root:\n");
724       printf ("  echo %d > %s\n", files[i].good_value, files[i].path);
725       return;
726     }
727 }
728 
729 static void
run_ldconfig(void * x)730 run_ldconfig (void *x __attribute__((unused)))
731 {
732   char *prog = xasprintf ("%s/ldconfig", support_install_rootsbindir);
733   char *args[] = { prog, NULL };
734 
735   execv (args[0], args);
736   FAIL_EXIT1 ("execv: %m");
737 }
738 
739 int
main(int argc,char ** argv)740 main (int argc, char **argv)
741 {
742   pid_t child;
743   char *pristine_root_path;
744   char *new_root_path;
745   char *new_cwd_path;
746   char *new_objdir_path;
747   char *new_srcdir_path;
748   char **new_child_proc;
749   char *new_child_exec;
750   char *command_root;
751   char *command_base;
752   char *command_basename;
753   char *so_base;
754   int do_postclean = 0;
755   bool do_ldconfig = false;
756   char *change_cwd = NULL;
757 
758   int pipes[2];
759   char pid_buf[20];
760 
761   uid_t original_uid;
762   gid_t original_gid;
763   /* If set, the test runs as root instead of the user running the testsuite.  */
764   int be_su = 0;
765   int require_pidns = 0;
766 #ifdef CLONE_NEWNS
767   const char *pidns_comment = NULL;
768 #endif
769   int do_proc_mounts = 0;
770   int UMAP;
771   int GMAP;
772   /* Used for "%lld %lld 1" so need not be large.  */
773   char tmp[100];
774   struct stat st;
775   int lock_fd;
776 
777   setbuf (stdout, NULL);
778 
779   /* The command line we're expecting looks like this:
780      env <set some vars> ld.so <library path> test-binary
781 
782      We need to peel off any "env" or "ld.so" portion of the command
783      line, and keep track of which env vars we should preserve and
784      which we drop.  */
785 
786   if (argc < 2)
787     {
788       fprintf (stderr, "Usage: test-container <program to run> <args...>\n");
789       exit (1);
790     }
791 
792   if (strcmp (argv[1], "-v") == 0)
793     {
794       verbose = 1;
795       ++argv;
796       --argc;
797     }
798 
799   if (strcmp (argv[1], "env") == 0)
800     {
801       ++argv;
802       --argc;
803       while (is_env_setting (argv[1]))
804 	{
805 	  /* If there are variables we do NOT want to propogate, this
806 	     is where the test for them goes.  */
807 	    {
808 	      /* Need to keep these.  Note that putenv stores a
809 	         pointer to our argv.  */
810 	      putenv (argv[1]);
811 	    }
812 	  ++argv;
813 	  --argc;
814 	}
815     }
816 
817   if (strcmp (argv[1], support_objdir_elf_ldso) == 0)
818     {
819       ++argv;
820       --argc;
821       while (argv[1][0] == '-')
822 	{
823 	  if (strcmp (argv[1], "--library-path") == 0)
824 	    {
825 	      ++argv;
826 	      --argc;
827 	    }
828 	  ++argv;
829 	  --argc;
830 	}
831     }
832 
833   pristine_root_path = xstrdup (concat (support_objdir_root,
834 				       "/testroot.pristine", NULL));
835   new_root_path = xstrdup (concat (support_objdir_root,
836 				  "/testroot.root", NULL));
837   new_cwd_path = get_current_dir_name ();
838   new_child_proc = argv + 1;
839   new_child_exec = argv[1];
840 
841   lock_fd = open (concat (pristine_root_path, "/lock.fd", NULL),
842 		 O_CREAT | O_TRUNC | O_RDWR, 0666);
843   if (lock_fd < 0)
844     FAIL_EXIT1 ("Cannot create testroot lock.\n");
845 
846   while (flock (lock_fd, LOCK_EX) != 0)
847     {
848       if (errno != EINTR)
849 	FAIL_EXIT1 ("Cannot lock testroot.\n");
850     }
851 
852   xmkdirp (new_root_path, 0755);
853 
854   /* We look for extra setup info in a subdir in the same spot as the
855      test, with the same name but a ".root" extension.  This is that
856      directory.  We try to look in the source tree if the path we're
857      given refers to the build tree, but we rely on the path to be
858      absolute.  This is what the glibc makefiles do.  */
859   command_root = concat (argv[1], ".root", NULL);
860   if (strncmp (command_root, support_objdir_root,
861 	       strlen (support_objdir_root)) == 0
862       && command_root[strlen (support_objdir_root)] == '/')
863     command_root = concat (support_srcdir_root,
864 			   argv[1] + strlen (support_objdir_root),
865 			   ".root", NULL);
866   command_root = xstrdup (command_root);
867 
868   /* This cuts off the ".root" we appended above.  */
869   command_base = xstrdup (command_root);
870   command_base[strlen (command_base) - 5] = 0;
871 
872   /* This is the basename of the test we're running.  */
873   command_basename = strrchr (command_base, '/');
874   if (command_basename == NULL)
875     command_basename = command_base;
876   else
877     ++command_basename;
878 
879   /* Shared object base directory.  */
880   so_base = xstrdup (argv[1]);
881   if (strrchr (so_base, '/') != NULL)
882     strrchr (so_base, '/')[1] = 0;
883 
884   if (file_exists (concat (command_root, "/postclean.req", NULL)))
885     do_postclean = 1;
886 
887   if (file_exists (concat (command_root, "/ldconfig.run", NULL)))
888     do_ldconfig = true;
889 
890   rsync (pristine_root_path, new_root_path,
891 	 file_exists (concat (command_root, "/preclean.req", NULL)), 0);
892 
893   if (stat (command_root, &st) >= 0
894       && S_ISDIR (st.st_mode))
895     rsync (command_root, new_root_path, 0, 1);
896 
897   new_objdir_path = xstrdup (concat (new_root_path,
898 				    support_objdir_root, NULL));
899   new_srcdir_path = xstrdup (concat (new_root_path,
900 				    support_srcdir_root, NULL));
901 
902   /* new_cwd_path starts with '/' so no "/" needed between the two.  */
903   xmkdirp (concat (new_root_path, new_cwd_path, NULL), 0755);
904   xmkdirp (new_srcdir_path, 0755);
905   xmkdirp (new_objdir_path, 0755);
906 
907   original_uid = getuid ();
908   original_gid = getgid ();
909 
910   /* Handle the cp/mv/rm "script" here.  */
911   {
912     char *the_line = NULL;
913     size_t line_len = 0;
914     char *fname = concat (command_root, "/",
915 			  command_basename, ".script", NULL);
916     char *the_words[3];
917     FILE *f = fopen (fname, "r");
918 
919     if (verbose && f)
920       fprintf (stderr, "running %s\n", fname);
921 
922     if (f == NULL)
923       {
924 	/* Try foo.script instead of foo.root/foo.script, as a shortcut.  */
925 	fname = concat (command_base, ".script", NULL);
926 	f = fopen (fname, "r");
927 	if (verbose && f)
928 	  fprintf (stderr, "running %s\n", fname);
929       }
930 
931     /* Note that we do NOT look for a Makefile-generated foo.script in
932        the build directory.  If that is ever needed, this is the place
933        to add it.  */
934 
935     /* This is where we "interpret" the mini-script which is <test>.script.  */
936     if (f != NULL)
937       {
938 	while (getline (&the_line, &line_len, f) > 0)
939 	  {
940 	    int nt = tokenize (the_line, the_words, 3);
941 	    int i;
942 
943 	    /* Expand variables.  */
944 	    for (i = 1; i < nt; ++i)
945 	      {
946 		if (memcmp (the_words[i], "$B/", 3) == 0)
947 		  the_words[i] = concat (support_objdir_root,
948 					 the_words[i] + 2, NULL);
949 		else if (memcmp (the_words[i], "$S/", 3) == 0)
950 		  the_words[i] = concat (support_srcdir_root,
951 					 the_words[i] + 2, NULL);
952 		else if (memcmp (the_words[i], "$I/", 3) == 0)
953 		  the_words[i] = concat (new_root_path,
954 					 support_install_prefix,
955 					 the_words[i] + 2, NULL);
956 		else if (memcmp (the_words[i], "$L/", 3) == 0)
957 		  the_words[i] = concat (new_root_path,
958 					 support_libdir_prefix,
959 					 the_words[i] + 2, NULL);
960 		else if (memcmp (the_words[i], "$complocaledir/", 15) == 0)
961 		  the_words[i] = concat (new_root_path,
962 					 support_complocaledir_prefix,
963 					 the_words[i] + 14, NULL);
964 		/* "exec" and "cwd" use inside-root paths.  */
965 		else if (strcmp (the_words[0], "exec") != 0
966 			 && strcmp (the_words[0], "cwd") != 0
967 			 && the_words[i][0] == '/')
968 		  the_words[i] = concat (new_root_path,
969 					 the_words[i], NULL);
970 	      }
971 
972 	    if (nt == 3 && the_words[2][strlen (the_words[2]) - 1] == '/')
973 	      {
974 		char *r = strrchr (the_words[1], '/');
975 		if (r)
976 		  the_words[2] = concat (the_words[2], r + 1, NULL);
977 		else
978 		  the_words[2] = concat (the_words[2], the_words[1], NULL);
979 	      }
980 
981 	    /* Run the following commands in the_words[0] with NT number of
982 	       arguments (including the command).  */
983 
984 	    if (nt == 2 && strcmp (the_words[0], "so") == 0)
985 	      {
986 		the_words[2] = concat (new_root_path, support_libdir_prefix,
987 				       "/", the_words[1], NULL);
988 		the_words[1] = concat (so_base, the_words[1], NULL);
989 		copy_one_file (the_words[1], the_words[2]);
990 	      }
991 	    else if (nt == 3 && strcmp (the_words[0], "cp") == 0)
992 	      {
993 		copy_one_file (the_words[1], the_words[2]);
994 	      }
995 	    else if (nt == 3 && strcmp (the_words[0], "mv") == 0)
996 	      {
997 		if (rename (the_words[1], the_words[2]) < 0)
998 		  FAIL_EXIT1 ("rename %s -> %s: %s", the_words[1],
999 			      the_words[2], strerror (errno));
1000 	      }
1001 	    else if (nt == 3 && strcmp (the_words[0], "chmod") == 0)
1002 	      {
1003 		long int m;
1004 		errno = 0;
1005 		m = strtol (the_words[1], NULL, 0);
1006 		TEST_COMPARE (errno, 0);
1007 		if (chmod (the_words[2], m) < 0)
1008 		    FAIL_EXIT1 ("chmod %s: %s\n",
1009 				the_words[2], strerror (errno));
1010 
1011 	      }
1012 	    else if (nt == 2 && strcmp (the_words[0], "rm") == 0)
1013 	      {
1014 		maybe_xunlink (the_words[1]);
1015 	      }
1016 	    else if (nt >= 2 && strcmp (the_words[0], "exec") == 0)
1017 	      {
1018 		/* The first argument is the desired location and name
1019 		   of the test binary as we wish to exec it; we will
1020 		   copy the binary there.  The second (optional)
1021 		   argument is the value to pass as argv[0], it
1022 		   defaults to the same as the first argument.  */
1023 		char *new_exec_path = the_words[1];
1024 
1025 		/* If the new exec path ends with a slash, that's the
1026 		 * directory, and use the old test base name.  */
1027 		if (new_exec_path [strlen(new_exec_path) - 1] == '/')
1028 		    new_exec_path = concat (new_exec_path,
1029 					    basename (new_child_proc[0]),
1030 					    NULL);
1031 
1032 
1033 		/* new_child_proc is in the build tree, so has the
1034 		   same path inside the chroot as outside.  The new
1035 		   exec path is, by definition, relative to the
1036 		   chroot.  */
1037 		copy_one_file (new_child_proc[0],  concat (new_root_path,
1038 							   new_exec_path,
1039 							   NULL));
1040 
1041 		new_child_exec =  xstrdup (new_exec_path);
1042 		if (the_words[2])
1043 		  new_child_proc[0] = xstrdup (the_words[2]);
1044 		else
1045 		  new_child_proc[0] = new_child_exec;
1046 	      }
1047 	    else if (nt == 2 && strcmp (the_words[0], "cwd") == 0)
1048 	      {
1049 		change_cwd = xstrdup (the_words[1]);
1050 	      }
1051 	    else if (nt == 1 && strcmp (the_words[0], "su") == 0)
1052 	      {
1053 		be_su = 1;
1054 	      }
1055 	    else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0)
1056 	      {
1057 		require_pidns = 1;
1058 #ifdef CLONE_NEWNS
1059 		if (nt > 1)
1060 		  pidns_comment = concat_words (the_words + 1, nt - 1);
1061 #endif
1062 	      }
1063 	    else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0)
1064 	      {
1065 		long int m;
1066 		errno = 0;
1067 		m = strtol (the_words[1], NULL, 0);
1068 		TEST_COMPARE (errno, 0);
1069 		xmkdirp (the_words[2], m);
1070 	      }
1071 	    else if (nt > 0 && the_words[0][0] != '#')
1072 	      {
1073 		fprintf (stderr, "\033[31minvalid [%s]\033[0m\n", the_words[0]);
1074 		exit (1);
1075 	      }
1076 	  }
1077 	fclose (f);
1078       }
1079   }
1080 
1081   if (do_postclean)
1082     {
1083       pid_t pc_pid = fork ();
1084 
1085       if (pc_pid < 0)
1086 	{
1087 	  FAIL_EXIT1 ("Can't fork for post-clean");
1088 	}
1089       else if (pc_pid > 0)
1090 	{
1091 	  /* Parent.  */
1092 	  int status;
1093 	  waitpid (pc_pid, &status, 0);
1094 
1095 	  /* Child has exited, we can post-clean the test root.  */
1096 	  printf("running post-clean rsync\n");
1097 	  rsync (pristine_root_path, new_root_path, 1, 0);
1098 
1099 	  if (WIFEXITED (status))
1100 	    exit (WEXITSTATUS (status));
1101 
1102 	  if (WIFSIGNALED (status))
1103 	    {
1104 	      printf ("%%SIGNALLED%%\n");
1105 	      exit (77);
1106 	    }
1107 
1108 	  printf ("%%EXITERROR%%\n");
1109 	  exit (78);
1110 	}
1111 
1112       /* Child continues.  */
1113     }
1114 
1115   /* This is the last point in the program where we're still in the
1116      "normal" namespace.  */
1117 
1118 #ifdef CLONE_NEWNS
1119   /* The unshare here gives us our own spaces and capabilities.  */
1120   if (unshare (CLONE_NEWUSER | CLONE_NEWNS
1121 	       | (require_pidns ? CLONE_NEWPID : 0)) < 0)
1122     {
1123       /* Older kernels may not support all the options, or security
1124 	 policy may block this call.  */
1125       if (errno == EINVAL || errno == EPERM || errno == ENOSPC)
1126 	{
1127 	  int saved_errno = errno;
1128 	  if (errno == EPERM || errno == ENOSPC)
1129 	    check_for_unshare_hints (require_pidns);
1130 	  FAIL_UNSUPPORTED ("unable to unshare user/fs: %s", strerror (saved_errno));
1131 	}
1132       /* We're about to exit anyway, it's "safe" to call unshare again
1133 	 just to see if the CLONE_NEWPID caused the error.  */
1134       else if (require_pidns && unshare (CLONE_NEWUSER | CLONE_NEWNS) >= 0)
1135 	FAIL_EXIT1 ("unable to unshare pid ns: %s : %s", strerror (errno),
1136 		    pidns_comment ? pidns_comment : "required by test");
1137       else
1138 	FAIL_EXIT1 ("unable to unshare user/fs: %s", strerror (errno));
1139     }
1140 #else
1141   /* Some targets may not support unshare at all.  */
1142   FAIL_UNSUPPORTED ("unshare support missing");
1143 #endif
1144 
1145   /* Some systems, by default, all mounts leak out of the namespace.  */
1146   if (mount ("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
1147     FAIL_EXIT1 ("could not create a private mount namespace\n");
1148 
1149   trymount (support_srcdir_root, new_srcdir_path);
1150   trymount (support_objdir_root, new_objdir_path);
1151 
1152   /* It may not be possible to mount /proc directly.  */
1153   if (! require_pidns)
1154   {
1155     char *new_proc = concat (new_root_path, "/proc", NULL);
1156     xmkdirp (new_proc, 0755);
1157     trymount ("/proc", new_proc);
1158     do_proc_mounts = 1;
1159   }
1160 
1161   xmkdirp (concat (new_root_path, "/dev", NULL), 0755);
1162   devmount (new_root_path, "null");
1163   devmount (new_root_path, "zero");
1164   devmount (new_root_path, "urandom");
1165 
1166   /* We're done with the "old" root, switch to the new one.  */
1167   if (chroot (new_root_path) < 0)
1168     FAIL_EXIT1 ("Can't chroot to %s - ", new_root_path);
1169 
1170   if (chdir (new_cwd_path) < 0)
1171     FAIL_EXIT1 ("Can't cd to new %s - ", new_cwd_path);
1172 
1173   /* This is to pass the "outside" PID to the child, which will be PID
1174      1.  */
1175   if (pipe2 (pipes, O_CLOEXEC) < 0)
1176     FAIL_EXIT1 ("Can't create pid pipe");
1177 
1178   /* To complete the containerization, we need to fork () at least
1179      once.  We can't exec, nor can we somehow link the new child to
1180      our parent.  So we run the child and propogate it's exit status
1181      up.  */
1182   child = fork ();
1183   if (child < 0)
1184     FAIL_EXIT1 ("Unable to fork");
1185   else if (child > 0)
1186     {
1187       /* Parent.  */
1188       int status;
1189 
1190       /* Send the child's "outside" pid to it.  */
1191       write (pipes[1], &child, sizeof(child));
1192       close (pipes[0]);
1193       close (pipes[1]);
1194 
1195       waitpid (child, &status, 0);
1196 
1197       if (WIFEXITED (status))
1198 	exit (WEXITSTATUS (status));
1199 
1200       if (WIFSIGNALED (status))
1201 	{
1202 	  printf ("%%SIGNALLED%%\n");
1203 	  exit (77);
1204 	}
1205 
1206       printf ("%%EXITERROR%%\n");
1207       exit (78);
1208     }
1209 
1210   /* The rest is the child process, which is now PID 1 and "in" the
1211      new root.  */
1212 
1213   if (do_ldconfig)
1214     {
1215       struct support_capture_subprocess result =
1216         support_capture_subprocess (run_ldconfig, NULL);
1217       support_capture_subprocess_check (&result, "execv", 0, sc_allow_none);
1218     }
1219 
1220   /* Get our "outside" pid from our parent.  We use this to help with
1221      debugging from outside the container.  */
1222   read (pipes[0], &child, sizeof(child));
1223   close (pipes[0]);
1224   close (pipes[1]);
1225   sprintf (pid_buf, "%lu", (long unsigned)child);
1226   setenv ("PID_OUTSIDE_CONTAINER", pid_buf, 0);
1227 
1228   maybe_xmkdir ("/tmp", 0755);
1229 
1230   if (require_pidns)
1231     {
1232       /* Now that we're pid 1 (effectively "root") we can mount /proc  */
1233       maybe_xmkdir ("/proc", 0777);
1234       if (mount ("proc", "/proc", "proc", 0, NULL) != 0)
1235 	{
1236 	  /* This happens if we're trying to create a nested container,
1237 	     like if the build is running under podman, and we lack
1238 	     priviledges.
1239 
1240 	     Ideally we would WARN here, but that would just add noise to
1241 	     *every* test-container test, and the ones that care should
1242 	     have their own relevent diagnostics.
1243 
1244 	     FAIL_EXIT1 ("Unable to mount /proc: ");  */
1245 	}
1246       else
1247 	do_proc_mounts = 1;
1248     }
1249 
1250   if (do_proc_mounts)
1251     {
1252       /* We map our original UID to the same UID in the container so we
1253 	 can own our own files normally.  */
1254       UMAP = open ("/proc/self/uid_map", O_WRONLY);
1255       if (UMAP < 0)
1256 	FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
1257 
1258       sprintf (tmp, "%lld %lld 1\n",
1259 	       (long long) (be_su ? 0 : original_uid), (long long) original_uid);
1260       write (UMAP, tmp, strlen (tmp));
1261       xclose (UMAP);
1262 
1263       /* We must disable setgroups () before we can map our groups, else we
1264 	 get EPERM.  */
1265       GMAP = open ("/proc/self/setgroups", O_WRONLY);
1266       if (GMAP >= 0)
1267 	{
1268 	  /* We support kernels old enough to not have this.  */
1269 	  write (GMAP, "deny\n", 5);
1270 	  xclose (GMAP);
1271 	}
1272 
1273       /* We map our original GID to the same GID in the container so we
1274 	 can own our own files normally.  */
1275       GMAP = open ("/proc/self/gid_map", O_WRONLY);
1276       if (GMAP < 0)
1277 	FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
1278 
1279       sprintf (tmp, "%lld %lld 1\n",
1280 	       (long long) (be_su ? 0 : original_gid), (long long) original_gid);
1281       write (GMAP, tmp, strlen (tmp));
1282       xclose (GMAP);
1283     }
1284 
1285   if (change_cwd)
1286     {
1287       if (chdir (change_cwd) < 0)
1288 	FAIL_EXIT1 ("Can't cd to %s inside container - ", change_cwd);
1289     }
1290 
1291   /* Now run the child.  */
1292   execvp (new_child_exec, new_child_proc);
1293 
1294   /* Or don't run the child?  */
1295   FAIL_EXIT1 ("Unable to exec %s: %s\n", new_child_exec, strerror (errno));
1296 
1297   /* Because gcc won't know error () never returns...  */
1298   exit (EXIT_UNSUPPORTED);
1299 }
1300