LCOV - code coverage report
Current view: top level - basic - locale-util.c (source / functions) Hit Total Coverage
Test: main_coverage.info Lines: 113 160 70.6 %
Date: 2019-08-22 15:41:25 Functions: 10 12 83.3 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: LGPL-2.1+ */
       2             : 
       3             : #include <dirent.h>
       4             : #include <errno.h>
       5             : #include <fcntl.h>
       6             : #include <ftw.h>
       7             : #include <langinfo.h>
       8             : #include <libintl.h>
       9             : #include <locale.h>
      10             : #include <stddef.h>
      11             : #include <stdint.h>
      12             : #include <stdlib.h>
      13             : #include <string.h>
      14             : #include <sys/mman.h>
      15             : #include <sys/stat.h>
      16             : 
      17             : #include "def.h"
      18             : #include "dirent-util.h"
      19             : #include "env-util.h"
      20             : #include "fd-util.h"
      21             : #include "hashmap.h"
      22             : #include "locale-util.h"
      23             : #include "path-util.h"
      24             : #include "set.h"
      25             : #include "string-table.h"
      26             : #include "string-util.h"
      27             : #include "strv.h"
      28             : #include "utf8.h"
      29             : 
      30         851 : static char *normalize_locale(const char *name) {
      31             :         const char *e;
      32             : 
      33             :         /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
      34             :          * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
      35             :          * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
      36             :          * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
      37             :          * that for UTF-8 however, since it's kinda the only charset that matters. */
      38             : 
      39         851 :         e = endswith(name, ".utf8");
      40         851 :         if (e) {
      41         294 :                 _cleanup_free_ char *prefix = NULL;
      42             : 
      43         294 :                 prefix = strndup(name, e - name);
      44         294 :                 if (!prefix)
      45           0 :                         return NULL;
      46             : 
      47         294 :                 return strjoin(prefix, ".UTF-8");
      48             :         }
      49             : 
      50         557 :         e = strstr(name, ".utf8@");
      51         557 :         if (e) {
      52          10 :                 _cleanup_free_ char *prefix = NULL;
      53             : 
      54          10 :                 prefix = strndup(name, e - name);
      55          10 :                 if (!prefix)
      56           0 :                         return NULL;
      57             : 
      58          10 :                 return strjoin(prefix, ".UTF-8@", e + 6);
      59             :         }
      60             : 
      61         547 :         return strdup(name);
      62             : }
      63             : 
      64           1 : static int add_locales_from_archive(Set *locales) {
      65             :         /* Stolen from glibc... */
      66             : 
      67             :         struct locarhead {
      68             :                 uint32_t magic;
      69             :                 /* Serial number.  */
      70             :                 uint32_t serial;
      71             :                 /* Name hash table.  */
      72             :                 uint32_t namehash_offset;
      73             :                 uint32_t namehash_used;
      74             :                 uint32_t namehash_size;
      75             :                 /* String table.  */
      76             :                 uint32_t string_offset;
      77             :                 uint32_t string_used;
      78             :                 uint32_t string_size;
      79             :                 /* Table with locale records.  */
      80             :                 uint32_t locrectab_offset;
      81             :                 uint32_t locrectab_used;
      82             :                 uint32_t locrectab_size;
      83             :                 /* MD5 sum hash table.  */
      84             :                 uint32_t sumhash_offset;
      85             :                 uint32_t sumhash_used;
      86             :                 uint32_t sumhash_size;
      87             :         };
      88             : 
      89             :         struct namehashent {
      90             :                 /* Hash value of the name.  */
      91             :                 uint32_t hashval;
      92             :                 /* Offset of the name in the string table.  */
      93             :                 uint32_t name_offset;
      94             :                 /* Offset of the locale record.  */
      95             :                 uint32_t locrec_offset;
      96             :         };
      97             : 
      98             :         const struct locarhead *h;
      99             :         const struct namehashent *e;
     100           1 :         const void *p = MAP_FAILED;
     101           1 :         _cleanup_close_ int fd = -1;
     102           1 :         size_t sz = 0;
     103             :         struct stat st;
     104             :         size_t i;
     105             :         int r;
     106             : 
     107           1 :         fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
     108           1 :         if (fd < 0)
     109           0 :                 return errno == ENOENT ? 0 : -errno;
     110             : 
     111           1 :         if (fstat(fd, &st) < 0)
     112           0 :                 return -errno;
     113             : 
     114           1 :         if (!S_ISREG(st.st_mode))
     115           0 :                 return -EBADMSG;
     116             : 
     117           1 :         if (st.st_size < (off_t) sizeof(struct locarhead))
     118           0 :                 return -EBADMSG;
     119             : 
     120           1 :         p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
     121           1 :         if (p == MAP_FAILED)
     122           0 :                 return -errno;
     123             : 
     124           1 :         h = (const struct locarhead *) p;
     125           1 :         if (h->magic != 0xde020109 ||
     126           1 :             h->namehash_offset + h->namehash_size > st.st_size ||
     127           1 :             h->string_offset + h->string_size > st.st_size ||
     128           1 :             h->locrectab_offset + h->locrectab_size > st.st_size ||
     129           1 :             h->sumhash_offset + h->sumhash_size > st.st_size) {
     130           0 :                 r = -EBADMSG;
     131           0 :                 goto finish;
     132             :         }
     133             : 
     134           1 :         e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
     135        1368 :         for (i = 0; i < h->namehash_size; i++) {
     136             :                 char *z;
     137             : 
     138        1367 :                 if (e[i].locrec_offset == 0)
     139         552 :                         continue;
     140             : 
     141         815 :                 if (!utf8_is_valid((char*) p + e[i].name_offset))
     142           0 :                         continue;
     143             : 
     144         815 :                 z = normalize_locale((char*) p + e[i].name_offset);
     145         815 :                 if (!z) {
     146           0 :                         r = -ENOMEM;
     147           0 :                         goto finish;
     148             :                 }
     149             : 
     150         815 :                 r = set_consume(locales, z);
     151         815 :                 if (r < 0)
     152           0 :                         goto finish;
     153             :         }
     154             : 
     155           1 :         r = 0;
     156             : 
     157           1 :  finish:
     158           1 :         if (p != MAP_FAILED)
     159           1 :                 munmap((void*) p, sz);
     160             : 
     161           1 :         return r;
     162             : }
     163             : 
     164           1 : static int add_locales_from_libdir (Set *locales) {
     165           1 :         _cleanup_closedir_ DIR *dir = NULL;
     166             :         struct dirent *entry;
     167             :         int r;
     168             : 
     169           1 :         dir = opendir("/usr/lib/locale");
     170           1 :         if (!dir)
     171           0 :                 return errno == ENOENT ? 0 : -errno;
     172             : 
     173          40 :         FOREACH_DIRENT(entry, dir, return -errno) {
     174             :                 char *z;
     175             : 
     176          37 :                 dirent_ensure_type(dir, entry);
     177             : 
     178          37 :                 if (entry->d_type != DT_DIR)
     179           1 :                         continue;
     180             : 
     181          36 :                 z = normalize_locale(entry->d_name);
     182          36 :                 if (!z)
     183           0 :                         return -ENOMEM;
     184             : 
     185          36 :                 r = set_consume(locales, z);
     186          36 :                 if (r < 0 && r != -EEXIST)
     187           0 :                         return r;
     188             :         }
     189             : 
     190           1 :         return 0;
     191             : }
     192             : 
     193           1 : int get_locales(char ***ret) {
     194           1 :         _cleanup_set_free_ Set *locales = NULL;
     195           1 :         _cleanup_strv_free_ char **l = NULL;
     196             :         int r;
     197             : 
     198           1 :         locales = set_new(&string_hash_ops);
     199           1 :         if (!locales)
     200           0 :                 return -ENOMEM;
     201             : 
     202           1 :         r = add_locales_from_archive(locales);
     203           1 :         if (r < 0 && r != -ENOENT)
     204           0 :                 return r;
     205             : 
     206           1 :         r = add_locales_from_libdir(locales);
     207           1 :         if (r < 0)
     208           0 :                 return r;
     209             : 
     210           1 :         l = set_get_strv(locales);
     211           1 :         if (!l)
     212           0 :                 return -ENOMEM;
     213             : 
     214           1 :         r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
     215           1 :         if (r == -ENXIO || r == 0) {
     216             :                 char **a, **b;
     217             : 
     218             :                 /* Filter out non-UTF-8 locales, because it's 2019, by default */
     219         818 :                 for (a = b = l; *a; a++) {
     220             : 
     221         817 :                         if (endswith(*a, "UTF-8") ||
     222         537 :                             strstr(*a, ".UTF-8@"))
     223         290 :                                 *(b++) = *a;
     224             :                         else
     225         527 :                                 free(*a);
     226             :                 }
     227             : 
     228           1 :                 *b = NULL;
     229             : 
     230           0 :         } else if (r < 0)
     231           0 :                 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
     232             : 
     233           1 :         strv_sort(l);
     234             : 
     235           1 :         *ret = TAKE_PTR(l);
     236             : 
     237           1 :         return 0;
     238             : }
     239             : 
     240         299 : bool locale_is_valid(const char *name) {
     241             : 
     242         299 :         if (isempty(name))
     243           1 :                 return false;
     244             : 
     245         298 :         if (strlen(name) >= 128)
     246           0 :                 return false;
     247             : 
     248         298 :         if (!utf8_is_valid(name))
     249           0 :                 return false;
     250             : 
     251         298 :         if (!filename_is_valid(name))
     252           1 :                 return false;
     253             : 
     254         297 :         if (!string_is_safe(name))
     255           1 :                 return false;
     256             : 
     257         296 :         return true;
     258             : }
     259             : 
     260           0 : void init_gettext(void) {
     261           0 :         setlocale(LC_ALL, "");
     262           0 :         textdomain(GETTEXT_PACKAGE);
     263           0 : }
     264             : 
     265         590 : bool is_locale_utf8(void) {
     266             :         const char *set;
     267             :         static int cached_answer = -1;
     268             : 
     269             :         /* Note that we default to 'true' here, since today UTF8 is
     270             :          * pretty much supported everywhere. */
     271             : 
     272         590 :         if (cached_answer >= 0)
     273         583 :                 goto out;
     274             : 
     275           7 :         if (!setlocale(LC_ALL, "")) {
     276           0 :                 cached_answer = true;
     277           0 :                 goto out;
     278             :         }
     279             : 
     280           7 :         set = nl_langinfo(CODESET);
     281           7 :         if (!set) {
     282           0 :                 cached_answer = true;
     283           0 :                 goto out;
     284             :         }
     285             : 
     286           7 :         if (streq(set, "UTF-8")) {
     287           7 :                 cached_answer = true;
     288           7 :                 goto out;
     289             :         }
     290             : 
     291             :         /* For LC_CTYPE=="C" return true, because CTYPE is effectively
     292             :          * unset and everything can do to UTF-8 nowadays. */
     293           0 :         set = setlocale(LC_CTYPE, NULL);
     294           0 :         if (!set) {
     295           0 :                 cached_answer = true;
     296           0 :                 goto out;
     297             :         }
     298             : 
     299             :         /* Check result, but ignore the result if C was set
     300             :          * explicitly. */
     301           0 :         cached_answer =
     302           0 :                 STR_IN_SET(set, "C", "POSIX") &&
     303           0 :                 !getenv("LC_ALL") &&
     304           0 :                 !getenv("LC_CTYPE") &&
     305           0 :                 !getenv("LANG");
     306             : 
     307         590 : out:
     308         590 :         return (bool) cached_answer;
     309             : }
     310             : 
     311           7 : static bool emoji_enabled(void) {
     312             :         static int cached_emoji_enabled = -1;
     313             : 
     314           7 :         if (cached_emoji_enabled < 0) {
     315             :                 int val;
     316             : 
     317           1 :                 val = getenv_bool("SYSTEMD_EMOJI");
     318           1 :                 if (val < 0)
     319           1 :                         cached_emoji_enabled =
     320           2 :                                 is_locale_utf8() &&
     321           1 :                                 !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
     322             :                 else
     323           0 :                         cached_emoji_enabled = val;
     324             :         }
     325             : 
     326           7 :         return cached_emoji_enabled;
     327             : }
     328             : 
     329          33 : const char *special_glyph(SpecialGlyph code) {
     330             : 
     331             :         /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
     332             :          * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
     333             :          * works reasonably well on the Linux console. For details see:
     334             :          *
     335             :          * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
     336             :          */
     337             : 
     338             :         static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
     339             :                 /* ASCII fallback */
     340             :                 [false] = {
     341             :                         [SPECIAL_GLYPH_TREE_VERTICAL]           = "| ",
     342             :                         [SPECIAL_GLYPH_TREE_BRANCH]             = "|-",
     343             :                         [SPECIAL_GLYPH_TREE_RIGHT]              = "`-",
     344             :                         [SPECIAL_GLYPH_TREE_SPACE]              = "  ",
     345             :                         [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = ">",
     346             :                         [SPECIAL_GLYPH_BLACK_CIRCLE]            = "*",
     347             :                         [SPECIAL_GLYPH_BULLET]                  = "*",
     348             :                         [SPECIAL_GLYPH_ARROW]                   = "->",
     349             :                         [SPECIAL_GLYPH_MDASH]                   = "-",
     350             :                         [SPECIAL_GLYPH_ELLIPSIS]                = "...",
     351             :                         [SPECIAL_GLYPH_MU]                      = "u",
     352             :                         [SPECIAL_GLYPH_CHECK_MARK]              = "+",
     353             :                         [SPECIAL_GLYPH_CROSS_MARK]              = "-",
     354             :                         [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = ":-]",
     355             :                         [SPECIAL_GLYPH_HAPPY_SMILEY]            = ":-}",
     356             :                         [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = ":-)",
     357             :                         [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = ":-|",
     358             :                         [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
     359             :                         [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = ":-{️",
     360             :                         [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = ":-[",
     361             :                 },
     362             : 
     363             :                 /* UTF-8 */
     364             :                 [true] = {
     365             :                         [SPECIAL_GLYPH_TREE_VERTICAL]           = "\342\224\202 ",            /* │  */
     366             :                         [SPECIAL_GLYPH_TREE_BRANCH]             = "\342\224\234\342\224\200", /* ├─ */
     367             :                         [SPECIAL_GLYPH_TREE_RIGHT]              = "\342\224\224\342\224\200", /* └─ */
     368             :                         [SPECIAL_GLYPH_TREE_SPACE]              = "  ",                       /*    */
     369             :                         [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = "\342\200\243",             /* ‣ */
     370             :                         [SPECIAL_GLYPH_BLACK_CIRCLE]            = "\342\227\217",             /* ● */
     371             :                         [SPECIAL_GLYPH_BULLET]                  = "\342\200\242",             /* • */
     372             :                         [SPECIAL_GLYPH_ARROW]                   = "\342\206\222",             /* → */
     373             :                         [SPECIAL_GLYPH_MDASH]                   = "\342\200\223",             /* – */
     374             :                         [SPECIAL_GLYPH_ELLIPSIS]                = "\342\200\246",             /* … */
     375             :                         [SPECIAL_GLYPH_MU]                      = "\316\274",                 /* μ */
     376             :                         [SPECIAL_GLYPH_CHECK_MARK]              = "\342\234\223",             /* ✓ */
     377             :                         [SPECIAL_GLYPH_CROSS_MARK]              = "\342\234\227",             /* ✗ */
     378             :                         [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = "\360\237\230\207",         /* 😇 */
     379             :                         [SPECIAL_GLYPH_HAPPY_SMILEY]            = "\360\237\230\200",         /* 😀 */
     380             :                         [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = "\360\237\231\202",         /* 🙂 */
     381             :                         [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = "\360\237\230\220",         /* 😐 */
     382             :                         [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201",         /* 🙁 */
     383             :                         [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = "\360\237\230\250",         /* 😨️️ */
     384             :                         [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = "\360\237\244\242",         /* 🤢 */
     385             :                 },
     386             :         };
     387             : 
     388          33 :         assert(code < _SPECIAL_GLYPH_MAX);
     389             : 
     390          33 :         return draw_table[code >= _SPECIAL_GLYPH_FIRST_SMILEY ? emoji_enabled() : is_locale_utf8()][code];
     391             : }
     392             : 
     393           0 : void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
     394             :         LocaleVariable i;
     395             : 
     396           0 :         if (!l)
     397           0 :                 return;
     398             : 
     399           0 :         for (i = 0; i < _VARIABLE_LC_MAX; i++)
     400           0 :                 l[i] = mfree(l[i]);
     401             : }
     402             : 
     403             : static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
     404             :         [VARIABLE_LANG] = "LANG",
     405             :         [VARIABLE_LANGUAGE] = "LANGUAGE",
     406             :         [VARIABLE_LC_CTYPE] = "LC_CTYPE",
     407             :         [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
     408             :         [VARIABLE_LC_TIME] = "LC_TIME",
     409             :         [VARIABLE_LC_COLLATE] = "LC_COLLATE",
     410             :         [VARIABLE_LC_MONETARY] = "LC_MONETARY",
     411             :         [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
     412             :         [VARIABLE_LC_PAPER] = "LC_PAPER",
     413             :         [VARIABLE_LC_NAME] = "LC_NAME",
     414             :         [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
     415             :         [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
     416             :         [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
     417             :         [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
     418             : };
     419             : 
     420          32 : DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);

Generated by: LCOV version 1.14