LCOV - code coverage report
Current view: top level - basic - extract-word.c (source / functions) Hit Total Coverage
Test: main_coverage.info Lines: 133 139 95.7 %
Date: 2019-08-22 15:41:25 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: LGPL-2.1+ */
       2             : 
       3             : #include <errno.h>
       4             : #include <stdarg.h>
       5             : #include <stdbool.h>
       6             : #include <stddef.h>
       7             : #include <stdint.h>
       8             : #include <stdlib.h>
       9             : #include <string.h>
      10             : #include <syslog.h>
      11             : 
      12             : #include "alloc-util.h"
      13             : #include "escape.h"
      14             : #include "extract-word.h"
      15             : #include "log.h"
      16             : #include "macro.h"
      17             : #include "string-util.h"
      18             : #include "utf8.h"
      19             : 
      20       14697 : int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
      21       14697 :         _cleanup_free_ char *s = NULL;
      22       14697 :         size_t allocated = 0, sz = 0;
      23             :         char c;
      24             :         int r;
      25             : 
      26       14697 :         char quote = 0;                 /* 0 or ' or " */
      27       14697 :         bool backslash = false;         /* whether we've just seen a backslash */
      28             : 
      29       14697 :         assert(p);
      30       14697 :         assert(ret);
      31             : 
      32             :         /* Bail early if called after last value or with no input */
      33       14697 :         if (!*p)
      34        2573 :                 goto finish;
      35       12124 :         c = **p;
      36             : 
      37       12124 :         if (!separators)
      38        5887 :                 separators = WHITESPACE;
      39             : 
      40             :         /* Parses the first word of a string, and returns it in
      41             :          * *ret. Removes all quotes in the process. When parsing fails
      42             :          * (because of an uneven number of quotes or similar), leaves
      43             :          * the pointer *p at the first invalid character. */
      44             : 
      45       12124 :         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
      46        2196 :                 if (!GREEDY_REALLOC(s, allocated, sz+1))
      47           0 :                         return -ENOMEM;
      48             : 
      49          99 :         for (;; (*p)++, c = **p) {
      50       12223 :                 if (c == 0)
      51          24 :                         goto finish_force_terminate;
      52       12199 :                 else if (strchr(separators, c)) {
      53         105 :                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
      54           6 :                                 (*p)++;
      55           6 :                                 goto finish_force_next;
      56             :                         }
      57             :                 } else {
      58             :                         /* We found a non-blank character, so we will always
      59             :                          * want to return a string (even if it is empty),
      60             :                          * allocate it here. */
      61       12094 :                         if (!GREEDY_REALLOC(s, allocated, sz+1))
      62           0 :                                 return -ENOMEM;
      63       12094 :                         break;
      64             :                 }
      65             :         }
      66             : 
      67         430 :         for (;; (*p)++, c = **p) {
      68       12524 :                 if (backslash) {
      69          91 :                         if (!GREEDY_REALLOC(s, allocated, sz+7))
      70           0 :                                 return -ENOMEM;
      71             : 
      72          91 :                         if (c == 0) {
      73          27 :                                 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
      74           6 :                                     (!quote || flags & EXTRACT_RELAX)) {
      75             :                                         /* If we find an unquoted trailing backslash and we're in
      76             :                                          * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
      77             :                                          * output.
      78             :                                          *
      79             :                                          * Unbalanced quotes will only be allowed in EXTRACT_RELAX
      80             :                                          * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
      81             :                                          */
      82           8 :                                         s[sz++] = '\\';
      83           8 :                                         goto finish_force_terminate;
      84             :                                 }
      85          19 :                                 if (flags & EXTRACT_RELAX)
      86           5 :                                         goto finish_force_terminate;
      87          14 :                                 return -EINVAL;
      88             :                         }
      89             : 
      90          64 :                         if (flags & EXTRACT_CUNESCAPE) {
      91          48 :                                 bool eight_bit = false;
      92             :                                 char32_t u;
      93             : 
      94          48 :                                 r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
      95          48 :                                 if (r < 0) {
      96          22 :                                         if (flags & EXTRACT_CUNESCAPE_RELAX) {
      97          14 :                                                 s[sz++] = '\\';
      98          14 :                                                 s[sz++] = c;
      99             :                                         } else
     100           8 :                                                 return -EINVAL;
     101             :                                 } else {
     102          26 :                                         (*p) += r - 1;
     103             : 
     104          26 :                                         if (eight_bit)
     105           5 :                                                 s[sz++] = u;
     106             :                                         else
     107          21 :                                                 sz += utf8_encode_unichar(s + sz, u);
     108             :                                 }
     109             :                         } else
     110          16 :                                 s[sz++] = c;
     111             : 
     112          56 :                         backslash = false;
     113             : 
     114       12433 :                 } else if (quote) {     /* inside either single or double quotes */
     115        1037 :                         for (;; (*p)++, c = **p) {
     116        1208 :                                 if (c == 0) {
     117          26 :                                         if (flags & EXTRACT_RELAX)
     118           3 :                                                 goto finish_force_terminate;
     119          23 :                                         return -EINVAL;
     120        1182 :                                 } else if (c == quote) {        /* found the end quote */
     121         122 :                                         quote = 0;
     122         122 :                                         break;
     123        1060 :                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
     124          23 :                                         backslash = true;
     125          23 :                                         break;
     126             :                                 } else {
     127        1037 :                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
     128           0 :                                                 return -ENOMEM;
     129             : 
     130        1037 :                                         s[sz++] = c;
     131             :                                 }
     132             :                         }
     133             : 
     134             :                 } else {
     135      192211 :                         for (;; (*p)++, c = **p) {
     136      204473 :                                 if (c == 0)
     137        2726 :                                         goto finish_force_terminate;
     138      201747 :                                 else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_UNQUOTE)) {
     139         161 :                                         quote = c;
     140         161 :                                         break;
     141      201586 :                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
     142          68 :                                         backslash = true;
     143          68 :                                         break;
     144      201518 :                                 } else if (strchr(separators, c)) {
     145        9307 :                                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
     146        1750 :                                                 (*p)++;
     147        1750 :                                                 goto finish_force_next;
     148             :                                         }
     149             :                                         /* Skip additional coalesced separators. */
     150       10276 :                                         for (;; (*p)++, c = **p) {
     151       17833 :                                                 if (c == 0)
     152          44 :                                                         goto finish_force_terminate;
     153       17789 :                                                 if (!strchr(separators, c))
     154        7513 :                                                         break;
     155             :                                         }
     156        7513 :                                         goto finish;
     157             : 
     158             :                                 } else {
     159      192211 :                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
     160           0 :                                                 return -ENOMEM;
     161             : 
     162      192211 :                                         s[sz++] = c;
     163             :                                 }
     164             :                         }
     165             :                 }
     166             :         }
     167             : 
     168        2810 : finish_force_terminate:
     169        2810 :         *p = NULL;
     170       12896 : finish:
     171       12896 :         if (!s) {
     172        2593 :                 *p = NULL;
     173        2593 :                 *ret = NULL;
     174        2593 :                 return 0;
     175             :         }
     176             : 
     177       10303 : finish_force_next:
     178       12059 :         s[sz] = 0;
     179       12059 :         *ret = TAKE_PTR(s);
     180             : 
     181       12059 :         return 1;
     182             : }
     183             : 
     184         141 : int extract_first_word_and_warn(
     185             :                 const char **p,
     186             :                 char **ret,
     187             :                 const char *separators,
     188             :                 ExtractFlags flags,
     189             :                 const char *unit,
     190             :                 const char *filename,
     191             :                 unsigned line,
     192             :                 const char *rvalue) {
     193             : 
     194             :         /* Try to unquote it, if it fails, warn about it and try again
     195             :          * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
     196             :          * backslashes verbatim in invalid escape sequences. */
     197             : 
     198             :         const char *save;
     199             :         int r;
     200             : 
     201         141 :         save = *p;
     202         141 :         r = extract_first_word(p, ret, separators, flags);
     203         141 :         if (r >= 0)
     204         125 :                 return r;
     205             : 
     206          16 :         if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
     207             : 
     208             :                 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
     209          16 :                 *p = save;
     210          16 :                 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
     211          16 :                 if (r >= 0) {
     212             :                         /* It worked this time, hence it must have been an invalid escape sequence. */
     213           9 :                         log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
     214           9 :                         return r;
     215             :                 }
     216             : 
     217             :                 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
     218           7 :                 if (r == -EINVAL)
     219           7 :                         return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
     220             :         }
     221             : 
     222             :         /* Can be any error, report it */
     223           0 :         return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
     224             : }
     225             : 
     226             : /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
     227             :  * an object that undergoes default argument promotion as an argument to va_start).
     228             :  * Let's make sure that ExtractFlags fits into an unsigned int. */
     229             : assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
     230             : 
     231          90 : int extract_many_words(const char **p, const char *separators, unsigned flags, ...) {
     232             :         va_list ap;
     233             :         char **l;
     234          90 :         int n = 0, i, c, r;
     235             : 
     236             :         /* Parses a number of words from a string, stripping any
     237             :          * quotes if necessary. */
     238             : 
     239          90 :         assert(p);
     240             : 
     241             :         /* Count how many words are expected */
     242          90 :         va_start(ap, flags);
     243             :         for (;;) {
     244         599 :                 if (!va_arg(ap, char **))
     245          90 :                         break;
     246         509 :                 n++;
     247             :         }
     248          90 :         va_end(ap);
     249             : 
     250          90 :         if (n <= 0)
     251           1 :                 return 0;
     252             : 
     253             :         /* Read all words into a temporary array */
     254          89 :         l = newa0(char*, n);
     255         482 :         for (c = 0; c < n; c++) {
     256             : 
     257         429 :                 r = extract_first_word(p, &l[c], separators, flags);
     258         429 :                 if (r < 0) {
     259             :                         int j;
     260             : 
     261           4 :                         for (j = 0; j < c; j++)
     262           2 :                                 free(l[j]);
     263             : 
     264           2 :                         return r;
     265             :                 }
     266             : 
     267         427 :                 if (r == 0)
     268          34 :                         break;
     269             :         }
     270             : 
     271             :         /* If we managed to parse all words, return them in the passed
     272             :          * in parameters */
     273          87 :         va_start(ap, flags);
     274         584 :         for (i = 0; i < n; i++) {
     275             :                 char **v;
     276             : 
     277         497 :                 v = va_arg(ap, char **);
     278         497 :                 assert(v);
     279             : 
     280         497 :                 *v = l[i];
     281             :         }
     282          87 :         va_end(ap);
     283             : 
     284          87 :         return c;
     285             : }

Generated by: LCOV version 1.14