From b6f33604bd2809d9845e39701089f5e428b73c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Staffan=20Gim=C3=A5ker?= Date: Thu, 16 Feb 2012 15:56:19 +0100 Subject: Compact the trie/mtrie node tables where possible, to reduce memory usage. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Staffan Gimåker --- src/mtrie.cpp | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/trie.cpp | 91 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 215 insertions(+), 10 deletions(-) diff --git a/src/mtrie.cpp b/src/mtrie.cpp index fe9c3d4..fd57f00 100644 --- a/src/mtrie.cpp +++ b/src/mtrie.cpp @@ -72,10 +72,8 @@ bool xs::mtrie_t::add_helper (unsigned char *prefix_, size_t size_, // We are at the node corresponding to the prefix. We are done. if (!size_) { bool result = !pipes; - if (!pipes) { - pipes = new (std::nothrow) pipes_t; - alloc_assert (pipes); - } + if (!pipes) + pipes = new pipes_t; pipes->insert (pipe_); return result; } @@ -187,28 +185,82 @@ void xs::mtrie_t::rm_helper (pipe_t *pipe_, unsigned char **buff_, buffsize_++; next.node->rm_helper (pipe_, buff_, buffsize_, maxbuffsize_, func_, arg_); + + // Prune the node if it was made redundant by the removal if (next.node->is_redundant ()) { delete next.node; next.node = 0; count = 0; --live_nodes; + xs_assert (live_nodes == 0); } return; } // If there are multiple subnodes. + // + // New min non-null character in the node table after the removal + unsigned char new_min = min; + // New max non-null character in the node table after the removal + unsigned char new_max = min + count - 1; for (unsigned short c = 0; c != count; c++) { (*buff_) [buffsize_] = min + c; if (next.table [c]) { next.table [c]->rm_helper (pipe_, buff_, buffsize_ + 1, maxbuffsize_, func_, arg_); + + // Prune redudant nodes from the mtrie if (next.table [c]->is_redundant ()) { delete next.table [c]; next.table [c] = 0; + + xs_assert (live_nodes > 0); --live_nodes; } + else { + if (c + min > new_min) + new_min = c + min; + if (c + min < new_max) + new_max = c + min; + } } } + + xs_assert (count > 1); + + // Compact the node table if possible + if (live_nodes == 1) { + // If there's only one live node in the table we can + // switch to using the more compact single-node + // representation + xs_assert (new_min == new_max); + xs_assert (new_min >= min && new_min < min + count); + mtrie_t *node = next.table [new_min - min]; + xs_assert (node); + free (next.table); + next.node = node; + count = 1; + min = new_min; + } + else if (live_nodes > 1 && (new_min > min || new_max < min + count - 1)) { + xs_assert (new_max - new_min + 1 > 1); + + mtrie_t **old_table = next.table; + xs_assert (new_min > min || new_max < min + count - 1); + xs_assert (new_min >= min); + xs_assert (new_max <= min + count - 1); + xs_assert (new_max - new_min + 1 < count); + + count = new_max - new_min + 1; + next.table = (mtrie_t**) malloc (sizeof (mtrie_t*) * count); + xs_assert (next.table); + + memmove (next.table, old_table + (new_min - min), + sizeof (mtrie_t*) * count); + free (old_table); + + min = new_min; + } } bool xs::mtrie_t::rm (unsigned char *prefix_, size_t size_, pipe_t *pipe_) @@ -245,13 +297,83 @@ bool xs::mtrie_t::rm_helper (unsigned char *prefix_, size_t size_, if (next_node->is_redundant ()) { delete next_node; + xs_assert (count > 0); + if (count == 1) { next.node = 0; count = 0; + --live_nodes; + xs_assert (live_nodes == 0); } - else + else { next.table [c - min] = 0; - --live_nodes; + xs_assert (live_nodes > 1); + --live_nodes; + + // Compact the table if possible + if (live_nodes == 1) { + // If there's only one live node in the table we can + // switch to using the more compact single-node + // representation + mtrie_t *node = 0; + for (unsigned short i = 0; i < count; ++i) { + if (next.table [i]) { + node = next.table [i]; + min = i + min; + break; + } + } + + xs_assert (node); + free (next.table); + next.node = node; + count = 1; + } + else if (c == min) { + // We can compact the table "from the left" + unsigned char new_min = min; + for (unsigned short i = 1; i < count; ++i) { + if (next.table [i]) { + new_min = i + min; + break; + } + } + xs_assert (new_min != min); + + mtrie_t **old_table = next.table; + xs_assert (new_min > min); + xs_assert (count > new_min - min); + + count = count - (new_min - min); + next.table = (mtrie_t**) malloc (sizeof (mtrie_t*) * count); + xs_assert (next.table); + + memmove (next.table, old_table + (new_min - min), + sizeof (mtrie_t*) * count); + free (old_table); + + min = new_min; + } + else if (c == min + count - 1) { + // We can compact the table "from the right" + unsigned short new_count = count; + for (unsigned short i = 1; i < count; ++i) { + if (next.table [count - 1 - i]) { + new_count = count - i; + break; + } + } + xs_assert (new_count != count); + count = new_count; + + mtrie_t **old_table = next.table; + next.table = (mtrie_t**) malloc (sizeof (mtrie_t*) * count); + xs_assert (next.table); + + memmove (next.table, old_table, sizeof (mtrie_t*) * count); + free (old_table); + } + } } return ret; diff --git a/src/trie.cpp b/src/trie.cpp index ea34787..7bb13e3 100644 --- a/src/trie.cpp +++ b/src/trie.cpp @@ -117,16 +117,18 @@ bool xs::trie_t::add (unsigned char *prefix_, size_t size_) if (count == 1) { if (!next.node) { next.node = new (std::nothrow) trie_t; - ++live_nodes; xs_assert (next.node); + ++live_nodes; + xs_assert (live_nodes == 1); } return next.node->add (prefix_ + 1, size_ - 1); } else { if (!next.table [c - min]) { next.table [c - min] = new (std::nothrow) trie_t; - ++live_nodes; xs_assert (next.table [c - min]); + ++live_nodes; + xs_assert (live_nodes > 1); } return next.table [c - min]->add (prefix_ + 1, size_ - 1); } @@ -155,15 +157,96 @@ bool xs::trie_t::rm (unsigned char *prefix_, size_t size_) bool ret = next_node->rm (prefix_ + 1, size_ - 1); + // Prune redundant nodes if (next_node->is_redundant ()) { delete next_node; + xs_assert (count > 0); + if (count == 1) { + // The just pruned node is was the only live node next.node = 0; count = 0; + --live_nodes; + xs_assert (live_nodes == 0); } - else + else { next.table [c - min] = 0; - --live_nodes; + xs_assert (live_nodes > 1); + --live_nodes; + + // Compact the table if possible + if (live_nodes == 1) { + // We can switch to using the more compact single-node + // representation since the table only contains one live node + trie_t *node = 0; + // Since we always compact the table the pruned node must + // either be the left-most or right-most ptr in the node + // table + if (c == min) { + // The pruned node is the left-most node ptr in the + // node table => keep the right-most node + node = next.table [count - 1]; + } + else if (c == min + count - 1) { + // The pruned node is the right-most node ptr in the + // node table => keep the left-most node + node = next.table [0]; + } + + xs_assert (node); + free (next.table); + next.node = node; + count = 1; + } + else if (c == min) { + // We can compact the table "from the left". + // Find the left-most non-null node ptr, which we'll use as + // our new min + unsigned char new_min = min; + for (unsigned short i = 1; i < count; ++i) { + if (next.table [i]) { + new_min = i + min; + break; + } + } + xs_assert (new_min != min); + + trie_t **old_table = next.table; + xs_assert (new_min > min); + xs_assert (count > new_min - min); + + count = count - (new_min - min); + next.table = (trie_t**) malloc (sizeof (trie_t*) * count); + xs_assert (next.table); + + memmove (next.table, old_table + (new_min - min), + sizeof (trie_t*) * count); + free (old_table); + + min = new_min; + } + else if (c == min + count - 1) { + // We can compact the table "from the right". + // Find the right-most non-null node ptr, which we'll use to + // determine the new table size + unsigned short new_count = count; + for (unsigned short i = 1; i < count; ++i) { + if (next.table [count - 1 - i]) { + new_count = count - i; + break; + } + } + xs_assert (new_count != count); + count = new_count; + + trie_t **old_table = next.table; + next.table = (trie_t**) malloc (sizeof (trie_t*) * count); + xs_assert (next.table); + + memmove (next.table, old_table, sizeof (trie_t*) * count); + free (old_table); + } + } } return ret; -- cgit v1.2.3