LCOV - code coverage report
Current view: top level - basic - strbuf.c (source / functions) Hit Total Coverage
Test: main_coverage.info Lines: 73 84 86.9 %
Date: 2019-08-22 15:41:25 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: LGPL-2.1+ */
       2             : 
       3             : #include <errno.h>
       4             : #include <stdlib.h>
       5             : #include <string.h>
       6             : 
       7             : #include "alloc-util.h"
       8             : #include "sort-util.h"
       9             : #include "strbuf.h"
      10             : 
      11             : /*
      12             :  * Strbuf stores given strings in a single continuous allocated memory
      13             :  * area. Identical strings are de-duplicated and return the same offset
      14             :  * as the first string stored. If the tail of a string already exists
      15             :  * in the buffer, the tail is returned.
      16             :  *
      17             :  * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
      18             :  * information about the stored strings.
      19             :  *
      20             :  * Example of udev rules:
      21             :  *   $ ./udevadm test .
      22             :  *   ...
      23             :  *   read rules file: /usr/lib/udev/rules.d/99-systemd.rules
      24             :  *   rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
      25             :  *   23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
      26             :  *   ...
      27             :  */
      28             : 
      29           6 : struct strbuf *strbuf_new(void) {
      30             :         struct strbuf *str;
      31             : 
      32           6 :         str = new(struct strbuf, 1);
      33           6 :         if (!str)
      34           0 :                 return NULL;
      35           6 :         *str = (struct strbuf) {
      36           6 :                 .buf = new0(char, 1),
      37           6 :                 .root = new0(struct strbuf_node, 1),
      38             :                 .len = 1,
      39             :                 .nodes_count = 1,
      40             :         };
      41           6 :         if (!str->buf || !str->root) {
      42           0 :                 free(str->buf);
      43           0 :                 free(str->root);
      44           0 :                 return mfree(str);
      45             :         }
      46             : 
      47           6 :         return str;
      48             : }
      49             : 
      50       75663 : static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
      51             :         size_t i;
      52             : 
      53      151320 :         for (i = 0; i < node->children_count; i++)
      54       75657 :                 strbuf_node_cleanup(node->children[i].child);
      55       75663 :         free(node->children);
      56       75663 :         return mfree(node);
      57             : }
      58             : 
      59             : /* clean up trie data, leave only the string buffer */
      60          10 : void strbuf_complete(struct strbuf *str) {
      61          10 :         if (!str)
      62           0 :                 return;
      63          10 :         if (str->root)
      64           6 :                 str->root = strbuf_node_cleanup(str->root);
      65             : }
      66             : 
      67             : /* clean up everything */
      68           6 : void strbuf_cleanup(struct strbuf *str) {
      69           6 :         if (!str)
      70           0 :                 return;
      71             : 
      72           6 :         strbuf_complete(str);
      73           6 :         free(str->buf);
      74           6 :         free(str);
      75             : }
      76             : 
      77     5162551 : static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
      78             :                                const struct strbuf_child_entry *n2) {
      79     5162551 :         return n1->c - n2->c;
      80             : }
      81             : 
      82       75657 : static void bubbleinsert(struct strbuf_node *node,
      83             :                          uint8_t c,
      84             :                          struct strbuf_node *node_child) {
      85             : 
      86       75657 :         struct strbuf_child_entry new = {
      87             :                 .c = c,
      88             :                 .child = node_child,
      89             :         };
      90       75657 :         int left = 0, right = node->children_count;
      91             : 
      92      165167 :         while (right > left) {
      93       89510 :                 int middle = (right + left) / 2 ;
      94       89510 :                 if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
      95       42721 :                         left = middle + 1;
      96             :                 else
      97       46789 :                         right = middle;
      98             :         }
      99             : 
     100       75657 :         memmove(node->children + left + 1, node->children + left,
     101       75657 :                 sizeof(struct strbuf_child_entry) * (node->children_count - left));
     102       75657 :         node->children[left] = new;
     103             : 
     104       75657 :         node->children_count++;
     105       75657 : }
     106             : 
     107             : /* add string, return the index/offset into the buffer */
     108      387866 : ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
     109             :         uint8_t c;
     110             :         struct strbuf_node *node;
     111             :         size_t depth;
     112             :         char *buf_new;
     113             :         struct strbuf_child_entry *child;
     114             :         struct strbuf_node *node_child;
     115             :         ssize_t off;
     116             : 
     117      387866 :         if (!str->root)
     118           0 :                 return -EINVAL;
     119             : 
     120             :         /* search string; start from last character to find possibly matching tails */
     121             : 
     122      387866 :         str->in_count++;
     123      387866 :         if (len == 0) {
     124       16955 :                 str->dedup_count++;
     125       16955 :                 return 0;
     126             :         }
     127      370911 :         str->in_len += len;
     128             : 
     129      370911 :         node = str->root;
     130     2267146 :         for (depth = 0; depth <= len; depth++) {
     131             :                 struct strbuf_child_entry search;
     132             : 
     133             :                 /* match against current node */
     134     2267146 :                 off = node->value_off + node->value_len - len;
     135     2267146 :                 if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
     136      295254 :                         str->dedup_len += len;
     137      295254 :                         str->dedup_count++;
     138      295254 :                         return off;
     139             :                 }
     140             : 
     141     1971892 :                 c = s[len - 1 - depth];
     142             : 
     143             :                 /* lookup child node */
     144     1971892 :                 search.c = c;
     145     1971892 :                 child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp);
     146     1971892 :                 if (!child)
     147       75657 :                         break;
     148     1896235 :                 node = child->child;
     149             :         }
     150             : 
     151             :         /* add new string */
     152       75657 :         buf_new = realloc(str->buf, str->len + len+1);
     153       75657 :         if (!buf_new)
     154           0 :                 return -ENOMEM;
     155       75657 :         str->buf = buf_new;
     156       75657 :         off = str->len;
     157       75657 :         memcpy(str->buf + off, s, len);
     158       75657 :         str->len += len;
     159       75657 :         str->buf[str->len++] = '\0';
     160             : 
     161             :         /* new node */
     162       75657 :         node_child = new(struct strbuf_node, 1);
     163       75657 :         if (!node_child)
     164           0 :                 return -ENOMEM;
     165       75657 :         *node_child = (struct strbuf_node) {
     166             :                 .value_off = off,
     167             :                 .value_len = len,
     168             :         };
     169             : 
     170             :         /* extend array, add new entry, sort for bisection */
     171       75657 :         child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry));
     172       75657 :         if (!child) {
     173           0 :                 free(node_child);
     174           0 :                 return -ENOMEM;
     175             :         }
     176             : 
     177       75657 :         str->nodes_count++;
     178             : 
     179       75657 :         node->children = child;
     180       75657 :         bubbleinsert(node, c, node_child);
     181             : 
     182       75657 :         return off;
     183             : }

Generated by: LCOV version 1.14