lib/string_helpers.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Helpers for formatting and printing strings
   4  *
   5  * Copyright 31 August 2008 James Bottomley
   6  * Copyright (C) 2013, Intel Corporation
   7  */
   8 #include <linux/bug.h>
   9 #include <linux/kernel.h>
  10 #include <linux/math64.h>
  11 #include <linux/export.h>
  12 #include <linux/ctype.h>
  13 #include <linux/errno.h>
  14 #include <linux/fs.h>
  15 #include <linux/limits.h>
  16 #include <linux/mm.h>
  17 #include <linux/slab.h>
  18 #include <linux/string.h>
  19 #include <linux/string_helpers.h>
  20
  21 /**
  22  * string_get_size - get the size in the specified units
  23  * @size:       The size to be converted in blocks
  24  * @blk_size:   Size of the block (use 1 for size in bytes)
  25  * @units:      units to use (powers of 1000 or 1024)
  26  * @buf:        buffer to format to
  27  * @len:        length of buffer
  28  *
  29  * This function returns a string formatted to 3 significant figures
  30  * giving the size in the required units.  @buf should have room for
  31  * at least 9 bytes and will always be zero terminated.
  32  *
  33  */
  34 void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
  35                      char *buf, int len)
  36 {
  37         static const char *const units_10[] = {
  38                 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
  39         };
  40         static const char *const units_2[] = {
  41                 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
  42         };
  43         static const char *const *const units_str[] = {
  44                 [STRING_UNITS_10] = units_10,
  45                 [STRING_UNITS_2] = units_2,
  46         };
  47         static const unsigned int divisor[] = {
  48                 [STRING_UNITS_10] = 1000,
  49                 [STRING_UNITS_2] = 1024,
  50         };
  51         static const unsigned int rounding[] = { 500, 50, 5 };
  52         int i = 0, j;
  53         u32 remainder = 0, sf_cap;
  54         char tmp[8];
  55         const char *unit;
  56
  57         tmp[0] = '\0';
  58
  59         if (blk_size == 0)
  60                 size = 0;
  61         if (size == 0)
  62                 goto out;
  63
  64         /* This is Napier's algorithm.  Reduce the original block size to
  65          *
  66          * coefficient * divisor[units]^i
  67          *
  68          * we do the reduction so both coefficients are just under 32 bits so
  69          * that multiplying them together won't overflow 64 bits and we keep
  70          * as much precision as possible in the numbers.
  71          *
  72          * Note: it's safe to throw away the remainders here because all the
  73          * precision is in the coefficients.
  74          */
  75         while (blk_size >> 32) {
  76                 do_div(blk_size, divisor[units]);
  77                 i++;
  78         }
  79
  80         while (size >> 32) {
  81                 do_div(size, divisor[units]);
  82                 i++;
  83         }
  84
  85         /* now perform the actual multiplication keeping i as the sum of the
  86          * two logarithms */
  87         size *= blk_size;
  88
  89         /* and logarithmically reduce it until it's just under the divisor */
  90         while (size >= divisor[units]) {
  91                 remainder = do_div(size, divisor[units]);
  92                 i++;
  93         }
  94
  95         /* work out in j how many digits of precision we need from the
  96          * remainder */
  97         sf_cap = size;
  98         for (j = 0; sf_cap*10 < 1000; j++)
  99                 sf_cap *= 10;
 100
 101         if (units == STRING_UNITS_2) {
 102                 /* express the remainder as a decimal.  It's currently the
 103                  * numerator of a fraction whose denominator is
 104                  * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
 105                 remainder *= 1000;
 106                 remainder >>= 10;
 107         }
 108
 109         /* add a 5 to the digit below what will be printed to ensure
 110          * an arithmetical round up and carry it through to size */
 111         remainder += rounding[j];
 112         if (remainder >= 1000) {
 113                 remainder -= 1000;
 114                 size += 1;
 115         }
 116
 117         if (j) {
 118                 snprintf(tmp, sizeof(tmp), ".%03u", remainder);
 119                 tmp[j+1] = '\0';
 120         }
 121
 122  out:
 123         if (i >= ARRAY_SIZE(units_2))
 124                 unit = "UNK";
 125         else
 126                 unit = units_str[units][i];
 127
 128         snprintf(buf, len, "%u%s %s", (u32)size,
 129                  tmp, unit);
 130 }
 131 EXPORT_SYMBOL(string_get_size);
 132
 133 static bool unescape_space(char **src, char **dst)
 134 {
 135         char *p = *dst, *q = *src;
 136
 137         switch (*q) {
 138         case 'n':
 139                 *p = '\n';
 140                 break;
 141         case 'r':
 142                 *p = '\r';
 143                 break;
 144         case 't':
 145                 *p = '\t';
 146                 break;
 147         case 'v':
 148                 *p = '\v';
 149                 break;
 150         case 'f':
 151                 *p = '\f';
 152                 break;
 153         default:
 154                 return false;
 155         }
 156         *dst += 1;
 157         *src += 1;
 158         return true;
 159 }
 160
 161 static bool unescape_octal(char **src, char **dst)
 162 {
 163         char *p = *dst, *q = *src;
 164         u8 num;
 165
 166         if (isodigit(*q) == 0)
 167                 return false;
 168
 169         num = (*q++) & 7;
 170         while (num < 32 && isodigit(*q) && (q - *src < 3)) {
 171                 num <<= 3;
 172                 num += (*q++) & 7;
 173         }
 174         *p = num;
 175         *dst += 1;
 176         *src = q;
 177         return true;
 178 }
 179
 180 static bool unescape_hex(char **src, char **dst)
 181 {
 182         char *p = *dst, *q = *src;
 183         int digit;
 184         u8 num;
 185
 186         if (*q++ != 'x')
 187                 return false;
 188
 189         num = digit = hex_to_bin(*q++);
 190         if (digit < 0)
 191                 return false;
 192
 193         digit = hex_to_bin(*q);
 194         if (digit >= 0) {
 195                 q++;
 196                 num = (num << 4) | digit;
 197         }
 198         *p = num;
 199         *dst += 1;
 200         *src = q;
 201         return true;
 202 }
 203
 204 static bool unescape_special(char **src, char **dst)
 205 {
 206         char *p = *dst, *q = *src;
 207
 208         switch (*q) {
 209         case '\"':
 210                 *p = '\"';
 211                 break;
 212         case '\\':
 213                 *p = '\\';
 214                 break;
 215         case 'a':
 216                 *p = '\a';
 217                 break;
 218         case 'e':
 219                 *p = '\e';
 220                 break;
 221         default:
 222                 return false;
 223         }
 224         *dst += 1;
 225         *src += 1;
 226         return true;
 227 }
 228
 229 /**
 230  * string_unescape - unquote characters in the given string
 231  * @src:        source buffer (escaped)
 232  * @dst:        destination buffer (unescaped)
 233  * @size:       size of the destination buffer (0 to unlimit)
 234  * @flags:      combination of the flags.
 235  *
 236  * Description:
 237  * The function unquotes characters in the given string.
 238  *
 239  * Because the size of the output will be the same as or less than the size of
 240  * the input, the transformation may be performed in place.
 241  *
 242  * Caller must provide valid source and destination pointers. Be aware that
 243  * destination buffer will always be NULL-terminated. Source string must be
 244  * NULL-terminated as well.  The supported flags are::
 245  *
 246  *      UNESCAPE_SPACE:
 247  *              '\f' - form feed
 248  *              '\n' - new line
 249  *              '\r' - carriage return
 250  *              '\t' - horizontal tab
 251  *              '\v' - vertical tab
 252  *      UNESCAPE_OCTAL:
 253  *              '\NNN' - byte with octal value NNN (1 to 3 digits)
 254  *      UNESCAPE_HEX:
 255  *              '\xHH' - byte with hexadecimal value HH (1 to 2 digits)
 256  *      UNESCAPE_SPECIAL:
 257  *              '\"' - double quote
 258  *              '\\' - backslash
 259  *              '\a' - alert (BEL)
 260  *              '\e' - escape
 261  *      UNESCAPE_ANY:
 262  *              all previous together
 263  *
 264  * Return:
 265  * The amount of the characters processed to the destination buffer excluding
 266  * trailing '\0' is returned.
 267  */
 268 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 269 {
 270         char *out = dst;
 271
 272         while (*src && --size) {
 273                 if (src[0] == '\\' && src[1] != '\0' && size > 1) {
 274                         src++;
 275                         size--;
 276
 277                         if (flags & UNESCAPE_SPACE &&
 278                                         unescape_space(&src, &out))
 279                                 continue;
 280
 281                         if (flags & UNESCAPE_OCTAL &&
 282                                         unescape_octal(&src, &out))
 283                                 continue;
 284
 285                         if (flags & UNESCAPE_HEX &&
 286                                         unescape_hex(&src, &out))
 287                                 continue;
 288
 289                         if (flags & UNESCAPE_SPECIAL &&
 290                                         unescape_special(&src, &out))
 291                                 continue;
 292
 293                         *out++ = '\\';
 294                 }
 295                 *out++ = *src++;
 296         }
 297         *out = '\0';
 298
 299         return out - dst;
 300 }
 301 EXPORT_SYMBOL(string_unescape);
 302
 303 static bool escape_passthrough(unsigned char c, char **dst, char *end)
 304 {
 305         char *out = *dst;
 306
 307         if (out < end)
 308                 *out = c;
 309         *dst = out + 1;
 310         return true;
 311 }
 312
 313 static bool escape_space(unsigned char c, char **dst, char *end)
 314 {
 315         char *out = *dst;
 316         unsigned char to;
 317
 318         switch (c) {
 319         case '\n':
 320                 to = 'n';
 321                 break;
 322         case '\r':
 323                 to = 'r';
 324                 break;
 325         case '\t':
 326                 to = 't';
 327                 break;
 328         case '\v':
 329                 to = 'v';
 330                 break;
 331         case '\f':
 332                 to = 'f';
 333                 break;
 334         default:
 335                 return false;
 336         }
 337
 338         if (out < end)
 339                 *out = '\\';
 340         ++out;
 341         if (out < end)
 342                 *out = to;
 343         ++out;
 344
 345         *dst = out;
 346         return true;
 347 }
 348
 349 static bool escape_special(unsigned char c, char **dst, char *end)
 350 {
 351         char *out = *dst;
 352         unsigned char to;
 353
 354         switch (c) {
 355         case '\\':
 356                 to = '\\';
 357                 break;
 358         case '\a':
 359                 to = 'a';
 360                 break;
 361         case '\e':
 362                 to = 'e';
 363                 break;
 364         case '"':
 365                 to = '"';
 366                 break;
 367         default:
 368                 return false;
 369         }
 370
 371         if (out < end)
 372                 *out = '\\';
 373         ++out;
 374         if (out < end)
 375                 *out = to;
 376         ++out;
 377
 378         *dst = out;
 379         return true;
 380 }
 381
 382 static bool escape_null(unsigned char c, char **dst, char *end)
 383 {
 384         char *out = *dst;
 385
 386         if (c)
 387                 return false;
 388
 389         if (out < end)
 390                 *out = '\\';
 391         ++out;
 392         if (out < end)
 393                 *out = '0';
 394         ++out;
 395
 396         *dst = out;
 397         return true;
 398 }
 399
 400 static bool escape_octal(unsigned char c, char **dst, char *end)
 401 {
 402         char *out = *dst;
 403
 404         if (out < end)
 405                 *out = '\\';
 406         ++out;
 407         if (out < end)
 408                 *out = ((c >> 6) & 0x07) + '0';
 409         ++out;
 410         if (out < end)
 411                 *out = ((c >> 3) & 0x07) + '0';
 412         ++out;
 413         if (out < end)
 414                 *out = ((c >> 0) & 0x07) + '0';
 415         ++out;
 416
 417         *dst = out;
 418         return true;
 419 }
 420
 421 static bool escape_hex(unsigned char c, char **dst, char *end)
 422 {
 423         char *out = *dst;
 424
 425         if (out < end)
 426                 *out = '\\';
 427         ++out;
 428         if (out < end)
 429                 *out = 'x';
 430         ++out;
 431         if (out < end)
 432                 *out = hex_asc_hi(c);
 433         ++out;
 434         if (out < end)
 435                 *out = hex_asc_lo(c);
 436         ++out;
 437
 438         *dst = out;
 439         return true;
 440 }
 441
 442 /**
 443  * string_escape_mem - quote characters in the given memory buffer
 444  * @src:        source buffer (unescaped)
 445  * @isz:        source buffer size
 446  * @dst:        destination buffer (escaped)
 447  * @osz:        destination buffer size
 448  * @flags:      combination of the flags
 449  * @only:       NULL-terminated string containing characters used to limit
 450  *              the selected escape class. If characters are included in @only
 451  *              that would not normally be escaped by the classes selected
 452  *              in @flags, they will be copied to @dst unescaped.
 453  *
 454  * Description:
 455  * The process of escaping byte buffer includes several parts. They are applied
 456  * in the following sequence.
 457  *
 458  *      1. The character is not matched to the one from @only string and thus
 459  *         must go as-is to the output.
 460  *      2. The character is matched to the printable and ASCII classes, if asked,
 461  *         and in case of match it passes through to the output.
 462  *      3. The character is matched to the printable or ASCII class, if asked,
 463  *         and in case of match it passes through to the output.
 464  *      4. The character is checked if it falls into the class given by @flags.
 465  *         %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
 466  *         character. Note that they actually can't go together, otherwise
 467  *         %ESCAPE_HEX will be ignored.
 468  *
 469  * Caller must provide valid source and destination pointers. Be aware that
 470  * destination buffer will not be NULL-terminated, thus caller have to append
 471  * it if needs. The supported flags are::
 472  *
 473  *      %ESCAPE_SPACE: (special white space, not space itself)
 474  *              '\f' - form feed
 475  *              '\n' - new line
 476  *              '\r' - carriage return
 477  *              '\t' - horizontal tab
 478  *              '\v' - vertical tab
 479  *      %ESCAPE_SPECIAL:
 480  *              '\"' - double quote
 481  *              '\\' - backslash
 482  *              '\a' - alert (BEL)
 483  *              '\e' - escape
 484  *      %ESCAPE_NULL:
 485  *              '\0' - null
 486  *      %ESCAPE_OCTAL:
 487  *              '\NNN' - byte with octal value NNN (3 digits)
 488  *      %ESCAPE_ANY:
 489  *              all previous together
 490  *      %ESCAPE_NP:
 491  *              escape only non-printable characters, checked by isprint()
 492  *      %ESCAPE_ANY_NP:
 493  *              all previous together
 494  *      %ESCAPE_HEX:
 495  *              '\xHH' - byte with hexadecimal value HH (2 digits)
 496  *      %ESCAPE_NA:
 497  *              escape only non-ascii characters, checked by isascii()
 498  *      %ESCAPE_NAP:
 499  *              escape only non-printable or non-ascii characters
 500  *      %ESCAPE_APPEND:
 501  *              append characters from @only to be escaped by the given classes
 502  *
 503  * %ESCAPE_APPEND would help to pass additional characters to the escaped, when
 504  * one of %ESCAPE_NP, %ESCAPE_NA, or %ESCAPE_NAP is provided.
 505  *
 506  * One notable caveat, the %ESCAPE_NAP, %ESCAPE_NP and %ESCAPE_NA have the
 507  * higher priority than the rest of the flags (%ESCAPE_NAP is the highest).
 508  * It doesn't make much sense to use either of them without %ESCAPE_OCTAL
 509  * or %ESCAPE_HEX, because they cover most of the other character classes.
 510  * %ESCAPE_NAP can utilize %ESCAPE_SPACE or %ESCAPE_SPECIAL in addition to
 511  * the above.
 512  *
 513  * Return:
 514  * The total size of the escaped output that would be generated for
 515  * the given input and flags. To check whether the output was
 516  * truncated, compare the return value to osz. There is room left in
 517  * dst for a '\0' terminator if and only if ret < osz.
 518  */
 519 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
 520                       unsigned int flags, const char *only)
 521 {
 522         char *p = dst;
 523         char *end = p + osz;
 524         bool is_dict = only && *only;
 525         bool is_append = flags & ESCAPE_APPEND;
 526
 527         while (isz--) {
 528                 unsigned char c = *src++;
 529                 bool in_dict = is_dict && strchr(only, c);
 530
 531                 /*
 532                  * Apply rules in the following sequence:
 533                  *      - the @only string is supplied and does not contain a
 534                  *        character under question
 535                  *      - the character is printable and ASCII, when @flags has
 536                  *        %ESCAPE_NAP bit set
 537                  *      - the character is printable, when @flags has
 538                  *        %ESCAPE_NP bit set
 539                  *      - the character is ASCII, when @flags has
 540                  *        %ESCAPE_NA bit set
 541                  *      - the character doesn't fall into a class of symbols
 542                  *        defined by given @flags
 543                  * In these cases we just pass through a character to the
 544                  * output buffer.
 545                  *
 546                  * When %ESCAPE_APPEND is passed, the characters from @only
 547                  * have been excluded from the %ESCAPE_NAP, %ESCAPE_NP, and
 548                  * %ESCAPE_NA cases.
 549                  */
 550                 if (!(is_append || in_dict) && is_dict &&
 551                                           escape_passthrough(c, &p, end))
 552                         continue;
 553
 554                 if (!(is_append && in_dict) && isascii(c) && isprint(c) &&
 555                     flags & ESCAPE_NAP && escape_passthrough(c, &p, end))
 556                         continue;
 557
 558                 if (!(is_append && in_dict) && isprint(c) &&
 559                     flags & ESCAPE_NP && escape_passthrough(c, &p, end))
 560                         continue;
 561
 562                 if (!(is_append && in_dict) && isascii(c) &&
 563                     flags & ESCAPE_NA && escape_passthrough(c, &p, end))
 564                         continue;
 565
 566                 if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
 567                         continue;
 568
 569                 if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
 570                         continue;
 571
 572                 if (flags & ESCAPE_NULL && escape_null(c, &p, end))
 573                         continue;
 574
 575                 /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
 576                 if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
 577                         continue;
 578
 579                 if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
 580                         continue;
 581
 582                 escape_passthrough(c, &p, end);
 583         }
 584
 585         return p - dst;
 586 }
 587 EXPORT_SYMBOL(string_escape_mem);
 588
 589 /*
 590  * Return an allocated string that has been escaped of special characters
 591  * and double quotes, making it safe to log in quotes.
 592  */
 593 char *kstrdup_quotable(const char *src, gfp_t gfp)
 594 {
 595         size_t slen, dlen;
 596         char *dst;
 597         const int flags = ESCAPE_HEX;
 598         const char esc[] = "\f\n\r\t\v\a\e\\\"";
 599
 600         if (!src)
 601                 return NULL;
 602         slen = strlen(src);
 603
 604         dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
 605         dst = kmalloc(dlen + 1, gfp);
 606         if (!dst)
 607                 return NULL;
 608
 609         WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
 610         dst[dlen] = '\0';
 611
 612         return dst;
 613 }
 614 EXPORT_SYMBOL_GPL(kstrdup_quotable);
 615
 616 /*
 617  * Returns allocated NULL-terminated string containing process
 618  * command line, with inter-argument NULLs replaced with spaces,
 619  * and other special characters escaped.
 620  */
 621 char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
 622 {
 623         char *buffer, *quoted;
 624         int i, res;
 625
 626         buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
 627         if (!buffer)
 628                 return NULL;
 629
 630         res = get_cmdline(task, buffer, PAGE_SIZE - 1);
 631         buffer[res] = '\0';
 632
 633         /* Collapse trailing NULLs, leave res pointing to last non-NULL. */
 634         while (--res >= 0 && buffer[res] == '\0')
 635                 ;
 636
 637         /* Replace inter-argument NULLs. */
 638         for (i = 0; i <= res; i++)
 639                 if (buffer[i] == '\0')
 640                         buffer[i] = ' ';
 641
 642         /* Make sure result is printable. */
 643         quoted = kstrdup_quotable(buffer, gfp);
 644         kfree(buffer);
 645         return quoted;
 646 }
 647 EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);
 648
 649 /*
 650  * Returns allocated NULL-terminated string containing pathname,
 651  * with special characters escaped, able to be safely logged. If
 652  * there is an error, the leading character will be "<".
 653  */
 654 char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
 655 {
 656         char *temp, *pathname;
 657
 658         if (!file)
 659                 return kstrdup("<unknown>", gfp);
 660
 661         /* We add 11 spaces for ' (deleted)' to be appended */
 662         temp = kmalloc(PATH_MAX + 11, GFP_KERNEL);
 663         if (!temp)
 664                 return kstrdup("<no_memory>", gfp);
 665
 666         pathname = file_path(file, temp, PATH_MAX + 11);
 667         if (IS_ERR(pathname))
 668                 pathname = kstrdup("<too_long>", gfp);
 669         else
 670                 pathname = kstrdup_quotable(pathname, gfp);
 671
 672         kfree(temp);
 673         return pathname;
 674 }
 675 EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
 676
 677 /**
 678  * kfree_strarray - free a number of dynamically allocated strings contained
 679  *                  in an array and the array itself
 680  *
 681  * @array: Dynamically allocated array of strings to free.
 682  * @n: Number of strings (starting from the beginning of the array) to free.
 683  *
 684  * Passing a non-NULL @array and @n == 0 as well as NULL @array are valid
 685  * use-cases. If @array is NULL, the function does nothing.
 686  */
 687 void kfree_strarray(char **array, size_t n)
 688 {
 689         unsigned int i;
 690
 691         if (!array)
 692                 return;
 693
 694         for (i = 0; i < n; i++)
 695                 kfree(array[i]);
 696         kfree(array);
 697 }
 698 EXPORT_SYMBOL_GPL(kfree_strarray);
 699
 700 /**
 701  * memcpy_and_pad - Copy one buffer to another with padding
 702  * @dest: Where to copy to
 703  * @dest_len: The destination buffer size
 704  * @src: Where to copy from
 705  * @count: The number of bytes to copy
 706  * @pad: Character to use for padding if space is left in destination.
 707  */
 708 void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
 709                     int pad)
 710 {
 711         if (dest_len > count) {
 712                 memcpy(dest, src, count);
 713                 memset(dest + count, pad,  dest_len - count);
 714         } else {
 715                 memcpy(dest, src, dest_len);
 716         }
 717 }
 718 EXPORT_SYMBOL(memcpy_and_pad);