Line data Source code
1 : /* gunicode.c - Unicode manipulation functions
2 : *
3 : * Copyright (C) 1999, 2000 Tom Tromey
4 : * Copyright © 2000, 2005 Red Hat, Inc.
5 : */
6 :
7 : #include "gunicode.h"
8 :
9 : #define unichar uint32_t
10 :
11 : /**
12 : * g_utf8_prev_char:
13 : * @p: a pointer to a position within a UTF-8 encoded string
14 : *
15 : * Finds the previous UTF-8 character in the string before @p.
16 : *
17 : * @p does not have to be at the beginning of a UTF-8 character. No check
18 : * is made to see if the character found is actually valid other than
19 : * it starts with an appropriate byte. If @p might be the first
20 : * character of the string, you must use g_utf8_find_prev_char() instead.
21 : *
22 : * Return value: a pointer to the found character.
23 : **/
24 : char *
25 7979 : utf8_prev_char (const char *p)
26 : {
27 : for (;;)
28 : {
29 4573 : p--;
30 7979 : if ((*p & 0xc0) != 0x80)
31 3406 : return (char *)p;
32 : }
33 : }
34 :
35 : struct Interval
36 : {
37 : unichar start, end;
38 : };
39 :
40 : static int
41 319927 : interval_compare (const void *key, const void *elt)
42 : {
43 319927 : unichar c = (unichar) (long) (key);
44 319927 : struct Interval *interval = (struct Interval *)elt;
45 :
46 319927 : if (c < interval->start)
47 300114 : return -1;
48 19813 : if (c > interval->end)
49 15188 : return +1;
50 :
51 4625 : return 0;
52 : }
53 :
54 : /*
55 : * NOTE:
56 : *
57 : * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
58 : * generated from the Unicode Character Database's file
59 : * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
60 : * in this way:
61 : *
62 : * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
63 : *
64 : * Last update for Unicode 6.0.
65 : */
66 :
67 : /**
68 : * g_unichar_iswide:
69 : * @c: a Unicode character
70 : *
71 : * Determines if a character is typically rendered in a double-width
72 : * cell.
73 : *
74 : * Return value: %TRUE if the character is wide
75 : **/
76 : bool
77 54092 : unichar_iswide (unichar c)
78 : {
79 : /* See NOTE earlier for how to update this table. */
80 : static const struct Interval wide[] = {
81 : {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
82 : {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
83 : {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
84 : {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
85 : {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
86 : {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
87 : {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
88 : {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A},
89 : {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
90 : {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */
91 : {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
92 : };
93 :
94 54092 : if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
95 : interval_compare))
96 4625 : return true;
97 :
98 49467 : return false;
99 : }
100 :
101 : const char utf8_skip_data[256] = {
102 : 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
103 : 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
104 : 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
105 : 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
106 : 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
107 : 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
108 : 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
109 : 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
110 : };
|