1 /* String tables for ld.so.cache construction.  Implementation.
2    Copyright (C) 2020-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published
7    by the Free Software Foundation; version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
17 
18 #include <assert.h>
19 #include <error.h>
20 #include <ldconfig.h>
21 #include <libintl.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stringtable.h>
25 
26 static void
stringtable_init(struct stringtable * table)27 stringtable_init (struct stringtable *table)
28 {
29   table->count = 0;
30 
31   /* This needs to be a power of two.  128 is sufficient to keep track
32      of 42 DSOs without resizing (assuming two strings per DSOs).
33      glibc itself comes with more than 20 DSOs, so 64 would likely to
34      be too small.  */
35   table->allocated = 128;
36 
37   table->entries = xcalloc (table->allocated, sizeof (table->entries[0]));
38 }
39 
40 /* 32-bit FNV-1a hash function.  */
41 static uint32_t
fnv1a(const char * string,size_t length)42 fnv1a (const char *string, size_t length)
43 {
44   const unsigned char *p = (const unsigned char *) string;
45   uint32_t hash = 2166136261U;
46   for (size_t i = 0; i < length; ++i)
47     {
48       hash ^= p[i];
49       hash *= 16777619U;
50     }
51   return hash;
52 }
53 
54 /* Double the capacity of the hash table.  */
55 static void
stringtable_rehash(struct stringtable * table)56 stringtable_rehash (struct stringtable *table)
57 {
58   /* This computation cannot overflow because the old total in-memory
59      size of the hash table is larger than the computed value.  */
60   uint32_t new_allocated = table->allocated * 2;
61   struct stringtable_entry **new_entries
62     = xcalloc (new_allocated, sizeof (table->entries[0]));
63 
64   uint32_t mask = new_allocated - 1;
65   for (uint32_t i = 0; i < table->allocated; ++i)
66     for (struct stringtable_entry *e = table->entries[i]; e != NULL; )
67       {
68         struct stringtable_entry *next = e->next;
69         uint32_t hash = fnv1a (e->string, e->length);
70         uint32_t new_index = hash & mask;
71         e->next = new_entries[new_index];
72         new_entries[new_index] = e;
73         e = next;
74       }
75 
76   free (table->entries);
77   table->entries = new_entries;
78   table->allocated = new_allocated;
79 }
80 
81 struct stringtable_entry *
stringtable_add(struct stringtable * table,const char * string)82 stringtable_add (struct stringtable *table, const char *string)
83 {
84   /* Check for a zero-initialized table.  */
85   if (table->allocated == 0)
86     stringtable_init (table);
87 
88   size_t length = strlen (string);
89   if (length > (1U << 30))
90     error (EXIT_FAILURE, 0, _("String table string is too long"));
91   uint32_t hash = fnv1a (string, length);
92 
93   /* Return a previously-existing entry.  */
94   for (struct stringtable_entry *e
95          = table->entries[hash & (table->allocated - 1)];
96        e != NULL; e = e->next)
97     if (e->length == length && memcmp (e->string, string, length) == 0)
98       return e;
99 
100   /* Increase the size of the table if necessary.  Keep utilization
101      below two thirds.  */
102   if (table->count >= (1U << 30))
103     error (EXIT_FAILURE, 0, _("String table has too many entries"));
104   if (table->count * 3 > table->allocated * 2)
105     stringtable_rehash (table);
106 
107   /* Add the new table entry.  */
108   ++table->count;
109   struct stringtable_entry *e
110     = xmalloc (offsetof (struct stringtable_entry, string) + length + 1);
111   uint32_t index = hash & (table->allocated - 1);
112   e->next = table->entries[index];
113   table->entries[index] = e;
114   e->length = length;
115   e->offset = 0;
116   memcpy (e->string, string, length + 1);
117   return e;
118 }
119 
120 /* Sort reversed strings in reverse lexicographic order.  This is used
121    for tail merging.  */
122 static int
finalize_compare(const void * l,const void * r)123 finalize_compare (const void *l, const void *r)
124 {
125   struct stringtable_entry *left = *(struct stringtable_entry **) l;
126   struct stringtable_entry *right = *(struct stringtable_entry **) r;
127   size_t to_compare;
128   if (left->length < right->length)
129     to_compare = left->length;
130   else
131     to_compare = right->length;
132   for (size_t i = 1; i <= to_compare; ++i)
133     {
134       unsigned char lch = left->string[left->length - i];
135       unsigned char rch = right->string[right->length - i];
136       if (lch != rch)
137         return rch - lch;
138     }
139   if (left->length == right->length)
140     return 0;
141   else if (left->length < right->length)
142     /* Longer strings should come first.  */
143     return 1;
144   else
145     return -1;
146 }
147 
148 void
stringtable_finalize(struct stringtable * table,struct stringtable_finalized * result)149 stringtable_finalize (struct stringtable *table,
150                       struct stringtable_finalized *result)
151 {
152   if (table->count == 0)
153     {
154       result->strings = xstrdup ("");
155       result->size = 0;
156       return;
157     }
158 
159   /* Optimize the order of the strings.  */
160   struct stringtable_entry **array = xcalloc (table->count, sizeof (*array));
161   {
162     size_t j = 0;
163     for (uint32_t i = 0; i < table->allocated; ++i)
164       for (struct stringtable_entry *e = table->entries[i]; e != NULL;
165            e = e->next)
166         {
167           array[j] = e;
168           ++j;
169         }
170     assert (j == table->count);
171   }
172   qsort (array, table->count, sizeof (*array), finalize_compare);
173 
174   /* Assign offsets, using tail merging (sharing suffixes) if possible.  */
175   array[0]->offset = 0;
176   for (uint32_t j = 1; j < table->count; ++j)
177     {
178       struct stringtable_entry *previous = array[j - 1];
179       struct stringtable_entry *current = array[j];
180       if (previous->length >= current->length
181           && memcmp (&previous->string[previous->length - current->length],
182                      current->string, current->length) == 0)
183         current->offset = (previous->offset + previous->length
184                            - current->length);
185       else if (__builtin_add_overflow (previous->offset,
186                                        previous->length + 1,
187                                        &current->offset))
188         error (EXIT_FAILURE, 0, _("String table is too large"));
189     }
190 
191   /* Allocate the result string.  */
192   {
193     struct stringtable_entry *last = array[table->count - 1];
194     if (__builtin_add_overflow (last->offset, last->length + 1,
195                                 &result->size))
196       error (EXIT_FAILURE, 0, _("String table is too large"));
197   }
198   /* The strings are copied from the hash table, so the array is no
199      longer needed.  */
200   free (array);
201   result->strings = xcalloc (result->size, 1);
202 
203   /* Copy the strings.  */
204   for (uint32_t i = 0; i < table->allocated; ++i)
205     for (struct stringtable_entry *e = table->entries[i]; e != NULL;
206          e = e->next)
207       if (result->strings[e->offset] == '\0')
208         memcpy (&result->strings[e->offset], e->string, e->length + 1);
209 }
210