/* SPDX-License-Identifier: LGPL-2.1-or-later */ #include #include #include #include #include #include "alloc-util.h" #include "escape.h" #include "extract-word.h" #include "fileio.h" #include "gunicode.h" #include "locale-util.h" #include "macro.h" #include "memory-util.h" #include "string-util.h" #include "strv.h" #include "terminal-util.h" #include "utf8.h" #include "util.h" char* first_word(const char *s, const char *word) { size_t sl, wl; const char *p; assert(s); assert(word); /* Checks if the string starts with the specified word, either * followed by NUL or by whitespace. Returns a pointer to the * NUL or the first character after the whitespace. */ sl = strlen(s); wl = strlen(word); if (sl < wl) return NULL; if (wl == 0) return (char*) s; if (memcmp(s, word, wl) != 0) return NULL; p = s + wl; if (*p == 0) return (char*) p; if (!strchr(WHITESPACE, *p)) return NULL; p += strspn(p, WHITESPACE); return (char*) p; } char *strnappend(const char *s, const char *suffix, size_t b) { size_t a; char *r; if (!s && !suffix) return strdup(""); if (!s) return strndup(suffix, b); if (!suffix) return strdup(s); assert(s); assert(suffix); a = strlen(s); if (b > SIZE_MAX - a) return NULL; r = new(char, a+b+1); if (!r) return NULL; memcpy(r, s, a); memcpy(r+a, suffix, b); r[a+b] = 0; return r; } char *strjoin_real(const char *x, ...) { va_list ap; size_t l = 1; char *r, *p; va_start(ap, x); for (const char *t = x; t; t = va_arg(ap, const char *)) { size_t n; n = strlen(t); if (n > SIZE_MAX - l) { va_end(ap); return NULL; } l += n; } va_end(ap); p = r = new(char, l); if (!r) return NULL; va_start(ap, x); for (const char *t = x; t; t = va_arg(ap, const char *)) p = stpcpy(p, t); va_end(ap); *p = 0; return r; } char *strstrip(char *s) { if (!s) return NULL; /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */ return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE); } char *delete_chars(char *s, const char *bad) { char *f, *t; /* Drops all specified bad characters, regardless where in the string */ if (!s) return NULL; if (!bad) bad = WHITESPACE; for (f = s, t = s; *f; f++) { if (strchr(bad, *f)) continue; *(t++) = *f; } *t = 0; return s; } char *delete_trailing_chars(char *s, const char *bad) { char *c = s; /* Drops all specified bad characters, at the end of the string */ if (!s) return NULL; if (!bad) bad = WHITESPACE; for (char *p = s; *p; p++) if (!strchr(bad, *p)) c = p + 1; *c = 0; return s; } char *truncate_nl(char *s) { assert(s); s[strcspn(s, NEWLINE)] = 0; return s; } char ascii_tolower(char x) { if (x >= 'A' && x <= 'Z') return x - 'A' + 'a'; return x; } char ascii_toupper(char x) { if (x >= 'a' && x <= 'z') return x - 'a' + 'A'; return x; } char *ascii_strlower(char *t) { assert(t); for (char *p = t; *p; p++) *p = ascii_tolower(*p); return t; } char *ascii_strupper(char *t) { assert(t); for (char *p = t; *p; p++) *p = ascii_toupper(*p); return t; } char *ascii_strlower_n(char *t, size_t n) { if (n <= 0) return t; for (size_t i = 0; i < n; i++) t[i] = ascii_tolower(t[i]); return t; } int ascii_strcasecmp_n(const char *a, const char *b, size_t n) { for (; n > 0; a++, b++, n--) { int x, y; x = (int) (uint8_t) ascii_tolower(*a); y = (int) (uint8_t) ascii_tolower(*b); if (x != y) return x - y; } return 0; } int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) { int r; r = ascii_strcasecmp_n(a, b, MIN(n, m)); if (r != 0) return r; return CMP(n, m); } bool chars_intersect(const char *a, const char *b) { /* Returns true if any of the chars in a are in b. */ for (const char *p = a; *p; p++) if (strchr(b, *p)) return true; return false; } bool string_has_cc(const char *p, const char *ok) { assert(p); /* * Check if a string contains control characters. If 'ok' is * non-NULL it may be a string containing additional CCs to be * considered OK. */ for (const char *t = p; *t; t++) { if (ok && strchr(ok, *t)) continue; if (char_is_cc(*t)) return true; } return false; } static int write_ellipsis(char *buf, bool unicode) { if (unicode || is_locale_utf8()) { buf[0] = 0xe2; /* tri-dot ellipsis: … */ buf[1] = 0x80; buf[2] = 0xa6; } else { buf[0] = '.'; buf[1] = '.'; buf[2] = '.'; } return 3; } static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { size_t x, need_space, suffix_len; char *t; assert(s); assert(percent <= 100); assert(new_length != SIZE_MAX); if (old_length <= new_length) return strndup(s, old_length); /* Special case short ellipsations */ switch (new_length) { case 0: return strdup(""); case 1: if (is_locale_utf8()) return strdup("…"); else return strdup("."); case 2: if (!is_locale_utf8()) return strdup(".."); break; default: break; } /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage, * either for the UTF-8 encoded character or for three ASCII characters. */ need_space = is_locale_utf8() ? 1 : 3; t = new(char, new_length+3); if (!t) return NULL; assert(new_length >= need_space); x = ((new_length - need_space) * percent + 50) / 100; assert(x <= new_length - need_space); memcpy(t, s, x); write_ellipsis(t + x, false); suffix_len = new_length - x - need_space; memcpy(t + x + 3, s + old_length - suffix_len, suffix_len); *(t + x + 3 + suffix_len) = '\0'; return t; } char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { size_t x, k, len, len2; const char *i, *j; char *e; int r; /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8 * strings. * * Ellipsation is done in a locale-dependent way: * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...") * 2. Otherwise, a unicode ellipsis is used ("…") * * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or * the current locale is UTF-8. */ assert(s); assert(percent <= 100); if (new_length == SIZE_MAX) return strndup(s, old_length); if (new_length == 0) return strdup(""); /* If no multibyte characters use ascii_ellipsize_mem for speed */ if (ascii_is_valid_n(s, old_length)) return ascii_ellipsize_mem(s, old_length, new_length, percent); x = ((new_length - 1) * percent) / 100; assert(x <= new_length - 1); k = 0; for (i = s; i < s + old_length; i = utf8_next_char(i)) { char32_t c; int w; r = utf8_encoded_to_unichar(i, &c); if (r < 0) return NULL; w = unichar_iswide(c) ? 2 : 1; if (k + w <= x) k += w; else break; } for (j = s + old_length; j > i; ) { char32_t c; int w; const char *jj; jj = utf8_prev_char(j); r = utf8_encoded_to_unichar(jj, &c); if (r < 0) return NULL; w = unichar_iswide(c) ? 2 : 1; if (k + w <= new_length) { k += w; j = jj; } else break; } assert(i <= j); /* we don't actually need to ellipsize */ if (i == j) return memdup_suffix0(s, old_length); /* make space for ellipsis, if possible */ if (j < s + old_length) j = utf8_next_char(j); else if (i > s) i = utf8_prev_char(i); len = i - s; len2 = s + old_length - j; e = new(char, len + 3 + len2 + 1); if (!e) return NULL; /* printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n", old_length, new_length, x, len, len2, k); */ memcpy(e, s, len); write_ellipsis(e + len, true); memcpy(e + len + 3, j, len2); *(e + len + 3 + len2) = '\0'; return e; } char *cellescape(char *buf, size_t len, const char *s) { /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII * characters are copied as they are, everything else is escaped. The result * is different then if escaping and ellipsization was performed in two * separate steps, because each sequence is either stored in full or skipped. * * This function should be used for logging about strings which expected to * be plain ASCII in a safe way. * * An ellipsis will be used if s is too long. It was always placed at the * very end. */ size_t i = 0, last_char_width[4] = {}, k = 0; assert(len > 0); /* at least a terminating NUL */ for (;;) { char four[4]; int w; if (*s == 0) /* terminating NUL detected? then we are done! */ goto done; w = cescape_char(*s, four); if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's * ellipsize at the previous location */ break; /* OK, there was space, let's add this escaped character to the buffer */ memcpy(buf + i, four, w); i += w; /* And remember its width in the ring buffer */ last_char_width[k] = w; k = (k + 1) % 4; s++; } /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4 * characters ideally, but the buffer is shorter than that in the first place take what we can get */ for (size_t j = 0; j < ELEMENTSOF(last_char_width); j++) { if (i + 4 <= len) /* nice, we reached our space goal */ break; k = k == 0 ? 3 : k - 1; if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */ break; assert(i >= last_char_width[k]); i -= last_char_width[k]; } if (i + 4 <= len) /* yay, enough space */ i += write_ellipsis(buf + i, false); else if (i + 3 <= len) { /* only space for ".." */ buf[i++] = '.'; buf[i++] = '.'; } else if (i + 2 <= len) /* only space for a single "." */ buf[i++] = '.'; else assert(i + 1 <= len); done: buf[i] = '\0'; return buf; } char* strshorten(char *s, size_t l) { assert(s); if (strnlen(s, l+1) > l) s[l] = 0; return s; } char *strreplace(const char *text, const char *old_string, const char *new_string) { size_t l, old_len, new_len; char *t, *ret = NULL; const char *f; assert(old_string); assert(new_string); if (!text) return NULL; old_len = strlen(old_string); new_len = strlen(new_string); l = strlen(text); if (!GREEDY_REALLOC(ret, l+1)) return NULL; f = text; t = ret; while (*f) { size_t d, nl; if (!startswith(f, old_string)) { *(t++) = *(f++); continue; } d = t - ret; nl = l - old_len + new_len; if (!GREEDY_REALLOC(ret, nl + 1)) return mfree(ret); l = nl; t = ret + d; t = stpcpy(t, new_string); f += old_len; } *t = 0; return ret; } static void advance_offsets( ssize_t diff, size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */ size_t shift[static 2], size_t size) { if (!offsets) return; assert(shift); if ((size_t) diff < offsets[0]) shift[0] += size; if ((size_t) diff < offsets[1]) shift[1] += size; } char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { const char *begin = NULL; enum { STATE_OTHER, STATE_ESCAPE, STATE_CSI, STATE_CSO, } state = STATE_OTHER; char *obuf = NULL; size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0; FILE *f; assert(ibuf); assert(*ibuf); /* This does three things: * * 1. Replaces TABs by 8 spaces * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences * 4. Strip trailing \r characters (since they would "move the cursor", but have no * other effect). * * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as * are any other special characters. Truncated ANSI sequences are left-as is too. This call is * supposed to suppress the most basic formatting noise, but nothing else. * * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */ isz = _isz ? *_isz : strlen(*ibuf); /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we * created f here and it doesn't leave our scope. */ f = open_memstream_unlocked(&obuf, &osz); if (!f) return NULL; for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) { switch (state) { case STATE_OTHER: if (i >= *ibuf + isz) /* EOT */ break; if (*i == '\r') { n_carriage_returns++; break; } else if (*i == '\n') /* Ignore carriage returns before new line */ n_carriage_returns = 0; for (; n_carriage_returns > 0; n_carriage_returns--) fputc('\r', f); if (*i == '\x1B') state = STATE_ESCAPE; else if (*i == '\t') { fputs(" ", f); advance_offsets(i - *ibuf, highlight, shift, 7); } else fputc(*i, f); break; case STATE_ESCAPE: assert(n_carriage_returns == 0); if (i >= *ibuf + isz) { /* EOT */ fputc('\x1B', f); advance_offsets(i - *ibuf, highlight, shift, 1); break; } else if (*i == '[') { /* ANSI CSI */ state = STATE_CSI; begin = i + 1; } else if (*i == ']') { /* ANSI CSO */ state = STATE_CSO; begin = i + 1; } else { fputc('\x1B', f); fputc(*i, f); advance_offsets(i - *ibuf, highlight, shift, 1); state = STATE_OTHER; } break; case STATE_CSI: assert(n_carriage_returns == 0); if (i >= *ibuf + isz || /* EOT … */ !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */ fputc('\x1B', f); fputc('[', f); advance_offsets(i - *ibuf, highlight, shift, 2); state = STATE_OTHER; i = begin-1; } else if (*i == 'm') state = STATE_OTHER; break; case STATE_CSO: assert(n_carriage_returns == 0); if (i >= *ibuf + isz || /* EOT … */ (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */ fputc('\x1B', f); fputc(']', f); advance_offsets(i - *ibuf, highlight, shift, 2); state = STATE_OTHER; i = begin-1; } else if (*i == '\a') state = STATE_OTHER; break; } } if (fflush_and_check(f) < 0) { fclose(f); return mfree(obuf); } fclose(f); free_and_replace(*ibuf, obuf); if (_isz) *_isz = osz; if (highlight) { highlight[0] += shift[0]; highlight[1] += shift[1]; } return *ibuf; } char *strextend_with_separator_internal(char **x, const char *separator, ...) { size_t f, l, l_separator; bool need_separator; char *nr, *p; va_list ap; assert(x); l = f = strlen_ptr(*x); need_separator = !isempty(*x); l_separator = strlen_ptr(separator); va_start(ap, separator); for (;;) { const char *t; size_t n; t = va_arg(ap, const char *); if (!t) break; n = strlen(t); if (need_separator) n += l_separator; if (n >= SIZE_MAX - l) { va_end(ap); return NULL; } l += n; need_separator = true; } va_end(ap); need_separator = !isempty(*x); nr = realloc(*x, GREEDY_ALLOC_ROUND_UP(l+1)); if (!nr) return NULL; *x = nr; p = nr + f; va_start(ap, separator); for (;;) { const char *t; t = va_arg(ap, const char *); if (!t) break; if (need_separator && separator) p = stpcpy(p, separator); p = stpcpy(p, t); need_separator = true; } va_end(ap); assert(p == nr + l); *p = 0; return p; } int strextendf_with_separator(char **x, const char *separator, const char *format, ...) { size_t m, a, l_separator; va_list ap; int l; /* Appends a formatted string to the specified string. Don't use this in inner loops, since then * we'll spend a tonload of time in determining the length of the string passed in, over and over * again. */ assert(x); assert(format); l_separator = isempty(*x) ? 0 : strlen_ptr(separator); /* Let's try to use the allocated buffer, if there's room at the end still. Otherwise let's extend by 64 chars. */ if (*x) { m = strlen(*x); a = MALLOC_SIZEOF_SAFE(*x); assert(a >= m + 1); } else m = a = 0; if (a - m < 17 + l_separator) { /* if there's less than 16 chars space, then enlarge the buffer first */ char *n; if (_unlikely_(l_separator > SIZE_MAX - 64)) /* overflow check #1 */ return -ENOMEM; if (_unlikely_(m > SIZE_MAX - 64 - l_separator)) /* overflow check #2 */ return -ENOMEM; n = realloc(*x, m + 64 + l_separator); if (!n) return -ENOMEM; *x = n; a = MALLOC_SIZEOF_SAFE(*x); } /* Now, let's try to format the string into it */ memcpy_safe(*x + m, separator, l_separator); va_start(ap, format); l = vsnprintf(*x + m + l_separator, a - m - l_separator, format, ap); va_end(ap); assert(l >= 0); if ((size_t) l < a - m - l_separator) { char *n; /* Nice! This worked. We are done. But first, let's return the extra space we don't * need. This should be a cheap operation, since we only lower the allocation size here, * never increase. */ n = realloc(*x, m + (size_t) l + l_separator + 1); if (n) *x = n; } else { char *n; /* Wasn't enough. Then let's allocate exactly what we need. */ if (_unlikely_((size_t) l > SIZE_MAX - (l_separator + 1))) /* overflow check #1 */ goto oom; if (_unlikely_(m > SIZE_MAX - ((size_t) l + l_separator + 1))) /* overflow check #2 */ goto oom; a = m + (size_t) l + l_separator + 1; n = realloc(*x, a); if (!n) goto oom; *x = n; va_start(ap, format); l = vsnprintf(*x + m + l_separator, a - m - l_separator, format, ap); va_end(ap); assert((size_t) l < a - m - l_separator); } return 0; oom: /* truncate the bytes added after the first vsnprintf() attempt again */ (*x)[m] = 0; return -ENOMEM; } char *strrep(const char *s, unsigned n) { char *r, *p; size_t l; assert(s); l = strlen(s); p = r = malloc(l * n + 1); if (!r) return NULL; for (unsigned i = 0; i < n; i++) p = stpcpy(p, s); *p = 0; return r; } int split_pair(const char *s, const char *sep, char **l, char **r) { char *x, *a, *b; assert(s); assert(sep); assert(l); assert(r); if (isempty(sep)) return -EINVAL; x = strstr(s, sep); if (!x) return -EINVAL; a = strndup(s, x - s); if (!a) return -ENOMEM; b = strdup(x + strlen(sep)); if (!b) { free(a); return -ENOMEM; } *l = a; *r = b; return 0; } int free_and_strdup(char **p, const char *s) { char *t; assert(p); /* Replaces a string pointer with a strdup()ed new string, * possibly freeing the old one. */ if (streq_ptr(*p, s)) return 0; if (s) { t = strdup(s); if (!t) return -ENOMEM; } else t = NULL; free(*p); *p = t; return 1; } int free_and_strndup(char **p, const char *s, size_t l) { char *t; assert(p); assert(s || l == 0); /* Replaces a string pointer with a strndup()ed new string, * freeing the old one. */ if (!*p && !s) return 0; if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0')) return 0; if (s) { t = strndup(s, l); if (!t) return -ENOMEM; } else t = NULL; free_and_replace(*p, t); return 1; } bool string_is_safe(const char *p) { if (!p) return false; /* Checks if the specified string contains no quotes or control characters */ for (const char *t = p; *t; t++) { if (*t > 0 && *t < ' ') /* no control characters */ return false; if (strchr(QUOTES "\\\x7f", *t)) return false; } return true; } char* string_erase(char *x) { if (!x) return NULL; /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we * used them. */ explicit_bzero_safe(x, strlen(x)); return x; } int string_truncate_lines(const char *s, size_t n_lines, char **ret) { const char *p = s, *e = s; bool truncation_applied = false; char *copy; size_t n = 0; assert(s); /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not * generated either. */ for (;;) { size_t k; k = strcspn(p, "\n"); if (p[k] == 0) { if (k == 0) /* final empty line */ break; if (n >= n_lines) /* above threshold */ break; e = p + k; /* last line to include */ break; } assert(p[k] == '\n'); if (n >= n_lines) break; if (k > 0) e = p + k; p += k + 1; n++; } /* e points after the last character we want to keep */ if (isempty(e)) copy = strdup(s); else { if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that * isn't a new-line or a series of them */ truncation_applied = true; copy = strndup(s, e - s); } if (!copy) return -ENOMEM; *ret = copy; return truncation_applied; } int string_extract_line(const char *s, size_t i, char **ret) { const char *p = s; size_t c = 0; /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that, * and == 0 if we are looking at the last line or already beyond the last line. As special * optimization, if the first line is requested and the string only consists of one line we return * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very * common case. */ for (;;) { const char *q; q = strchr(p, '\n'); if (i == c) { /* The line we are looking for! */ if (q) { char *m; m = strndup(p, q - p); if (!m) return -ENOMEM; *ret = m; return !isempty(q + 1); /* more coming? */ } else { if (p == s) *ret = NULL; /* Just use the input string */ else { char *m; m = strdup(p); if (!m) return -ENOMEM; *ret = m; } return 0; /* The end */ } } if (!q) { char *m; /* No more lines, return empty line */ m = strdup(""); if (!m) return -ENOMEM; *ret = m; return 0; /* The end */ } p = q + 1; c++; } } int string_contains_word_strv(const char *string, const char *separators, char **words, const char **ret_word) { /* In the default mode with no separators specified, we split on whitespace and * don't coalesce separators. */ const ExtractFlags flags = separators ? EXTRACT_DONT_COALESCE_SEPARATORS : 0; const char *found = NULL; for (const char *p = string;;) { _cleanup_free_ char *w = NULL; int r; r = extract_first_word(&p, &w, separators, flags); if (r < 0) return r; if (r == 0) break; found = strv_find(words, w); if (found) break; } if (ret_word) *ret_word = found; return !!found; } bool streq_skip_trailing_chars(const char *s1, const char *s2, const char *ok) { if (!s1 && !s2) return true; if (!s1 || !s2) return false; if (!ok) ok = WHITESPACE; for (; *s1 && *s2; s1++, s2++) if (*s1 != *s2) break; return in_charset(s1, ok) && in_charset(s2, ok); } char *string_replace_char(char *str, char old_char, char new_char) { assert(str); assert(old_char != '\0'); assert(new_char != '\0'); assert(old_char != new_char); for (char *p = strchr(str, old_char); p; p = strchr(p + 1, old_char)) *p = new_char; return str; } size_t strspn_from_end(const char *str, const char *accept) { size_t n = 0; if (isempty(str)) return 0; if (isempty(accept)) return 0; for (const char *p = str + strlen(str); p > str && strchr(accept, p[-1]); p--) n++; return n; }