diff options
Diffstat (limited to 'src/fs/fs_sharetree.c')
-rw-r--r-- | src/fs/fs_sharetree.c | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/src/fs/fs_sharetree.c b/src/fs/fs_sharetree.c new file mode 100644 index 0000000..c929428 --- /dev/null +++ b/src/fs/fs_sharetree.c @@ -0,0 +1,452 @@ +/* + This file is part of GNUnet + (C) 2005-2012 Christian Grothoff (and other contributing authors) + + GNUnet is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + GNUnet is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNUnet; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ + +/** + * @file fs/fs_sharetree.c + * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree + * @author LRN + * @author Christian Grothoff + */ +#include "platform.h" +#include "gnunet_fs_service.h" +#include "gnunet_scheduler_lib.h" +#include <pthread.h> + + +/** + * Entry for each unique keyword to track how often + * it occured. Contains the keyword and the counter. + */ +struct KeywordCounter +{ + + /** + * This is a doubly-linked list + */ + struct KeywordCounter *prev; + + /** + * This is a doubly-linked list + */ + struct KeywordCounter *next; + + /** + * Keyword that was found. + */ + const char *value; + + /** + * How many files have this keyword? + */ + unsigned int count; + +}; + + +/** + * Aggregate information we keep for meta data in each directory. + */ +struct MetaCounter +{ + + /** + * This is a doubly-linked list + */ + struct MetaCounter *prev; + + /** + * This is a doubly-linked list + */ + struct MetaCounter *next; + + /** + * Name of the plugin that provided that piece of metadata + */ + const char *plugin_name; + + /** + * MIME-type of the metadata itself + */ + const char *data_mime_type; + + /** + * The actual meta data. + */ + const char *data; + + /** + * Number of bytes in 'data'. + */ + size_t data_size; + + /** + * Type of the data + */ + enum EXTRACTOR_MetaType type; + + /** + * Format of the data + */ + enum EXTRACTOR_MetaFormat format; + + /** + * How many files have meta entries matching this value? + * (type and format do not have to match). + */ + unsigned int count; + +}; + + +/** + * A structure that forms a singly-linked list that serves as a stack + * for metadata-processing function. + */ +struct TrimContext +{ + + /** + * Map from the hash over the keyword to an 'struct KeywordCounter *' + * counter that says how often this keyword was + * encountered in the current directory. + */ + struct GNUNET_CONTAINER_MultiHashMap *keywordcounter; + + /** + * Map from the hash over the metadata to an 'struct MetaCounter *' + * counter that says how often this metadata was + * encountered in the current directory. + */ + struct GNUNET_CONTAINER_MultiHashMap *metacounter; + + /** + * Position we are currently manipulating. + */ + struct GNUNET_FS_ShareTreeItem *pos; + + /** + * Number of times an item has to be found to be moved to the parent. + */ + unsigned int move_threshold; + +}; + + +/** + * Add the given keyword to the keyword statistics tracker. + * + * @param cls the multihashmap we store the keyword counters in + * @param keyword the keyword to count + * @param is_mandatory ignored + * @return always GNUNET_OK + */ +static int +add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory) +{ + struct GNUNET_CONTAINER_MultiHashMap *mcm = cls; + struct KeywordCounter *cnt; + GNUNET_HashCode hc; + size_t klen; + + klen = strlen (keyword) + 1; + GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); + cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc); + if (cnt == NULL) + { + cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen); + cnt->value = (const char *) &cnt[1]; + memcpy (&cnt[1], keyword, klen); + GNUNET_assert (GNUNET_OK == + GNUNET_CONTAINER_multihashmap_put (mcm, + &hc, cnt, + GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY)); + } + cnt->count++; + return GNUNET_OK; +} + + +/** + * Function called on each meta data item. Increments the + * respective counter. + * + * @param cls the container multihashmap to update + * @param plugin_name name of the plugin that produced this value; + * special values can be used (i.e. '<zlib>' for zlib being + * used in the main libextractor library and yielding + * meta data). + * @param type libextractor-type describing the meta data + * @param format basic format information about data + * @param data_mime_type mime-type of data (not of the original file); + * can be NULL (if mime-type is not known) + * @param data actual meta-data found + * @param data_len number of bytes in data + * @return GNUNET_OK to continue extracting / iterating + */ +static int +add_to_meta_counter (void *cls, const char *plugin_name, + enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, + const char *data_mime_type, const char *data, size_t data_len) +{ + struct GNUNET_CONTAINER_MultiHashMap *map = cls; + GNUNET_HashCode key; + struct MetaCounter *cnt; + + GNUNET_CRYPTO_hash (data, data_len, &key); + cnt = GNUNET_CONTAINER_multihashmap_get (map, &key); + if (cnt == NULL) + { + cnt = GNUNET_malloc (sizeof (struct MetaCounter)); + cnt->data = data; + cnt->data_size = data_len; + cnt->plugin_name = plugin_name; + cnt->type = type; + cnt->format = format; + cnt->data_mime_type = data_mime_type; + GNUNET_assert (GNUNET_OK == + GNUNET_CONTAINER_multihashmap_put (map, + &key, cnt, + GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY)); + } + cnt->count++; + return 0; +} + + +/** + * Remove keywords above the threshold. + * + * @param cls the 'struct TrimContext' with the pos to remove the keywords from + * @param keyword the keyword to check + * @param is_mandatory ignored + * @return always GNUNET_OK + */ +static int +remove_high_frequency_keywords (void *cls, const char *keyword, int is_mandatory) +{ + struct TrimContext *tc = cls; + struct KeywordCounter *counter; + GNUNET_HashCode hc; + size_t klen; + + klen = strlen (keyword) + 1; + GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); + counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc); + GNUNET_assert (NULL != counter); + if (counter->count < tc->move_threshold) + return GNUNET_OK; + GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri, + counter->value); + return GNUNET_OK; +} + + +/** + * Move "frequent" keywords over to the target ksk uri, free the + * counters. + * + * @param cls the 'struct TrimContext' + * @param key key of the entry + * @param value the 'struct KeywordCounter' + * @return GNUNET_YES (always) + */ +static int +migrate_and_drop_keywords (void *cls, const GNUNET_HashCode * key, void *value) +{ + struct TrimContext *tc = cls; + struct KeywordCounter *counter = value; + + if (counter->count >= tc->move_threshold) + { + if (NULL == tc->pos->ksk_uri) + tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1, &counter->value); + else + GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value, GNUNET_NO); + } + GNUNET_assert (GNUNET_YES == + GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter, + key, + counter)); + GNUNET_free (counter); + return GNUNET_YES; +} + + +/** + * Copy "frequent" metadata items over to the + * target metadata container, free the counters. + * + * @param cls the 'struct TrimContext' + * @param key key of the entry + * @param value the 'struct KeywordCounter' + * @return GNUNET_YES (always) + */ +static int +migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value) +{ + struct TrimContext *tc = cls; + struct MetaCounter *counter = value; + + if (counter->count >= tc->move_threshold) + { + if (NULL == tc->pos->meta) + tc->pos->meta = GNUNET_CONTAINER_meta_data_create (); + GNUNET_CONTAINER_meta_data_insert (tc->pos->meta, + counter->plugin_name, + counter->type, + counter->format, + counter->data_mime_type, counter->data, + counter->data_size); + } + GNUNET_assert (GNUNET_YES == + GNUNET_CONTAINER_multihashmap_remove (tc->metacounter, + key, + counter)); + GNUNET_free (counter); + return GNUNET_YES; +} + + +/** + * Process a share item tree, moving frequent keywords up and + * copying frequent metadata up. + * + * @param tc trim context with hash maps to use + * @param tree tree to trim + */ +static void +share_tree_trim (struct TrimContext *tc, + struct GNUNET_FS_ShareTreeItem *tree) +{ + struct GNUNET_FS_ShareTreeItem *pos; + unsigned int num_children; + + /* first, trim all children */ + num_children = 0; + for (pos = tree->children_head; NULL != pos; pos = pos->next) + { + share_tree_trim (tc, pos); + num_children++; + } + + /* consider adding filename to directory meta data */ + if (tree->is_directory == GNUNET_YES) + { + const char *user = getenv ("USER"); + if ( (user == NULL) || + (0 != strncasecmp (user, tree->short_filename, strlen(user)))) + { + /* only use filename if it doesn't match $USER */ + if (NULL == tree->meta) + tree->meta = GNUNET_CONTAINER_meta_data_create (); + GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>", + EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", tree->short_filename, + strlen (tree->short_filename) + 1); + } + } + + if (1 >= num_children) + return; /* nothing to trim */ + + /* now, count keywords and meta data in children */ + for (pos = tree->children_head; NULL != pos; pos = pos->next) + { + if (NULL != pos->meta) + GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter, tc->metacounter); + if (NULL != pos->ksk_uri) + GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter, tc->keywordcounter); + } + + /* calculate threshold for moving keywords / meta data */ + tc->move_threshold = 1 + (num_children / 2); + + /* remove high-frequency keywords from children */ + for (pos = tree->children_head; NULL != pos; pos = pos->next) + { + tc->pos = pos; + if (NULL != pos->ksk_uri) + { + struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri); + GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy, &remove_high_frequency_keywords, tc); + GNUNET_FS_uri_destroy (ksk_uri_copy); + } + } + + /* add high-frequency meta data and keywords to parent */ + tc->pos = tree; + GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter, + &migrate_and_drop_keywords, + tc); + GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter, + &migrate_and_drop_metadata, + tc); +} + + +/** + * Process a share item tree, moving frequent keywords up and + * copying frequent metadata up. + * + * @param toplevel toplevel directory in the tree, returned by the scanner + */ +void +GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel) +{ + struct TrimContext tc; + + if (toplevel == NULL) + return; + tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024); + tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024); + share_tree_trim (&tc, toplevel); + GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter); + GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter); +} + + +/** + * Release memory of a share item tree. + * + * @param toplevel toplevel of the tree to be freed + */ +void +GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel) +{ + struct GNUNET_FS_ShareTreeItem *pos; + + while (NULL != (pos = toplevel->children_head)) + GNUNET_FS_share_tree_free (pos); + if (NULL != toplevel->parent) + GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head, + toplevel->parent->children_tail, + toplevel); + if (NULL != toplevel->meta) + GNUNET_CONTAINER_meta_data_destroy (toplevel->meta); + if (NULL != toplevel->ksk_uri) + GNUNET_FS_uri_destroy (toplevel->ksk_uri); + GNUNET_free_non_null (toplevel->filename); + GNUNET_free_non_null (toplevel->short_filename); + GNUNET_free (toplevel); +} + +/* end fs_sharetree.c */ + |