/*
This file is part of GNUnet
Copyright (C) 2005-2012 GNUnet e.V.
GNUnet is free software: you can redistribute it and/or modify it
under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
GNUnet is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
/**
* @file fs/fs_sharetree.c
* @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree
* @author LRN
* @author Christian Grothoff
*/
#include "platform.h"
#include "gnunet_fs_service.h"
#include "gnunet_scheduler_lib.h"
#include
/**
* Entry for each unique keyword to track how often
* it occured. Contains the keyword and the counter.
*/
struct KeywordCounter
{
/**
* This is a doubly-linked list
*/
struct KeywordCounter *prev;
/**
* This is a doubly-linked list
*/
struct KeywordCounter *next;
/**
* Keyword that was found.
*/
const char *value;
/**
* How many files have this keyword?
*/
unsigned int count;
};
/**
* Aggregate information we keep for meta data in each directory.
*/
struct MetaCounter
{
/**
* This is a doubly-linked list
*/
struct MetaCounter *prev;
/**
* This is a doubly-linked list
*/
struct MetaCounter *next;
/**
* Name of the plugin that provided that piece of metadata
*/
const char *plugin_name;
/**
* MIME-type of the metadata itself
*/
const char *data_mime_type;
/**
* The actual meta data.
*/
const char *data;
/**
* Number of bytes in 'data'.
*/
size_t data_size;
/**
* Type of the data
*/
enum EXTRACTOR_MetaType type;
/**
* Format of the data
*/
enum EXTRACTOR_MetaFormat format;
/**
* How many files have meta entries matching this value?
* (type and format do not have to match).
*/
unsigned int count;
};
/**
* A structure that forms a singly-linked list that serves as a stack
* for metadata-processing function.
*/
struct TrimContext
{
/**
* Map from the hash over the keyword to an 'struct KeywordCounter *'
* counter that says how often this keyword was
* encountered in the current directory.
*/
struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
/**
* Map from the hash over the metadata to an 'struct MetaCounter *'
* counter that says how often this metadata was
* encountered in the current directory.
*/
struct GNUNET_CONTAINER_MultiHashMap *metacounter;
/**
* Position we are currently manipulating.
*/
struct GNUNET_FS_ShareTreeItem *pos;
/**
* Number of times an item has to be found to be moved to the parent.
*/
unsigned int move_threshold;
};
/**
* Add the given keyword to the keyword statistics tracker.
*
* @param cls the multihashmap we store the keyword counters in
* @param keyword the keyword to count
* @param is_mandatory ignored
* @return always GNUNET_OK
*/
static int
add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
{
struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
struct KeywordCounter *cnt;
struct GNUNET_HashCode hc;
size_t klen;
klen = strlen (keyword) + 1;
GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
if (cnt == NULL)
{
cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
cnt->value = (const char *) &cnt[1];
GNUNET_memcpy (&cnt[1], keyword, klen);
GNUNET_assert (GNUNET_OK ==
GNUNET_CONTAINER_multihashmap_put (mcm,
&hc, cnt,
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
}
cnt->count++;
return GNUNET_OK;
}
/**
* Function called on each meta data item. Increments the
* respective counter.
*
* @param cls the container multihashmap to update
* @param plugin_name name of the plugin that produced this value;
* special values can be used (i.e. '<zlib>' for zlib being
* used in the main libextractor library and yielding
* meta data).
* @param type libextractor-type describing the meta data
* @param format basic format information about data
* @param data_mime_type mime-type of data (not of the original file);
* can be NULL (if mime-type is not known)
* @param data actual meta-data found
* @param data_len number of bytes in data
* @return 0 to continue extracting / iterating
*/
static int
add_to_meta_counter (void *cls, const char *plugin_name,
enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
const char *data_mime_type, const char *data, size_t data_len)
{
struct GNUNET_CONTAINER_MultiHashMap *map = cls;
struct GNUNET_HashCode key;
struct MetaCounter *cnt;
GNUNET_CRYPTO_hash (data, data_len, &key);
cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
if (NULL == cnt)
{
cnt = GNUNET_new (struct MetaCounter);
cnt->data = data;
cnt->data_size = data_len;
cnt->plugin_name = plugin_name;
cnt->type = type;
cnt->format = format;
cnt->data_mime_type = data_mime_type;
GNUNET_assert (GNUNET_OK ==
GNUNET_CONTAINER_multihashmap_put (map,
&key, cnt,
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
}
cnt->count++;
return 0;
}
/**
* Remove keywords above the threshold.
*
* @param cls the 'struct TrimContext' with the pos to remove the keywords from
* @param keyword the keyword to check
* @param is_mandatory ignored
* @return always GNUNET_OK
*/
static int
remove_high_frequency_keywords (void *cls, const char *keyword, int is_mandatory)
{
struct TrimContext *tc = cls;
struct KeywordCounter *counter;
struct GNUNET_HashCode hc;
size_t klen;
klen = strlen (keyword) + 1;
GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
GNUNET_assert (NULL != counter);
if (counter->count < tc->move_threshold)
return GNUNET_OK;
GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
counter->value);
return GNUNET_OK;
}
/**
* Move "frequent" keywords over to the target ksk uri, free the
* counters.
*
* @param cls the 'struct TrimContext'
* @param key key of the entry
* @param value the 'struct KeywordCounter'
* @return GNUNET_YES (always)
*/
static int
migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode * key, void *value)
{
struct TrimContext *tc = cls;
struct KeywordCounter *counter = value;
if (counter->count >= tc->move_threshold)
{
if (NULL == tc->pos->ksk_uri)
tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1, &counter->value);
else
GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value, GNUNET_NO);
}
GNUNET_assert (GNUNET_YES ==
GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
key,
counter));
GNUNET_free (counter);
return GNUNET_YES;
}
/**
* Copy "frequent" metadata items over to the
* target metadata container, free the counters.
*
* @param cls the 'struct TrimContext'
* @param key key of the entry
* @param value the 'struct KeywordCounter'
* @return GNUNET_YES (always)
*/
static int
migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode * key, void *value)
{
struct TrimContext *tc = cls;
struct MetaCounter *counter = value;
if (counter->count >= tc->move_threshold)
{
if (NULL == tc->pos->meta)
tc->pos->meta = GNUNET_CONTAINER_meta_data_create ();
GNUNET_CONTAINER_meta_data_insert (tc->pos->meta,
counter->plugin_name,
counter->type,
counter->format,
counter->data_mime_type, counter->data,
counter->data_size);
}
GNUNET_assert (GNUNET_YES ==
GNUNET_CONTAINER_multihashmap_remove (tc->metacounter,
key,
counter));
GNUNET_free (counter);
return GNUNET_YES;
}
/**
* Process a share item tree, moving frequent keywords up and
* copying frequent metadata up.
*
* @param tc trim context with hash maps to use
* @param tree tree to trim
*/
static void
share_tree_trim (struct TrimContext *tc,
struct GNUNET_FS_ShareTreeItem *tree)
{
struct GNUNET_FS_ShareTreeItem *pos;
unsigned int num_children;
/* first, trim all children */
num_children = 0;
for (pos = tree->children_head; NULL != pos; pos = pos->next)
{
share_tree_trim (tc, pos);
num_children++;
}
/* consider adding filename to directory meta data */
if (tree->is_directory == GNUNET_YES)
{
const char *user = getenv ("USER");
if ( (user == NULL) ||
(0 != strncasecmp (user, tree->short_filename, strlen(user))))
{
/* only use filename if it doesn't match $USER */
if (NULL == tree->meta)
tree->meta = GNUNET_CONTAINER_meta_data_create ();
GNUNET_CONTAINER_meta_data_insert (tree->meta, "",
EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
EXTRACTOR_METAFORMAT_UTF8,
"text/plain", tree->short_filename,
strlen (tree->short_filename) + 1);
}
}
if (1 >= num_children)
return; /* nothing to trim */
/* now, count keywords and meta data in children */
for (pos = tree->children_head; NULL != pos; pos = pos->next)
{
if (NULL != pos->meta)
GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter, tc->metacounter);
if (NULL != pos->ksk_uri)
GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter, tc->keywordcounter);
}
/* calculate threshold for moving keywords / meta data */
tc->move_threshold = 1 + (num_children / 2);
/* remove high-frequency keywords from children */
for (pos = tree->children_head; NULL != pos; pos = pos->next)
{
tc->pos = pos;
if (NULL != pos->ksk_uri)
{
struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy, &remove_high_frequency_keywords, tc);
GNUNET_FS_uri_destroy (ksk_uri_copy);
}
}
/* add high-frequency meta data and keywords to parent */
tc->pos = tree;
GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
&migrate_and_drop_keywords,
tc);
GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter,
&migrate_and_drop_metadata,
tc);
}
/**
* Process a share item tree, moving frequent keywords up and
* copying frequent metadata up.
*
* @param toplevel toplevel directory in the tree, returned by the scanner
*/
void
GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
{
struct TrimContext tc;
if (toplevel == NULL)
return;
tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
share_tree_trim (&tc, toplevel);
GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter);
}
/**
* Release memory of a share item tree.
*
* @param toplevel toplevel of the tree to be freed
*/
void
GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
{
struct GNUNET_FS_ShareTreeItem *pos;
while (NULL != (pos = toplevel->children_head))
GNUNET_FS_share_tree_free (pos);
if (NULL != toplevel->parent)
GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head,
toplevel->parent->children_tail,
toplevel);
if (NULL != toplevel->meta)
GNUNET_CONTAINER_meta_data_destroy (toplevel->meta);
if (NULL != toplevel->ksk_uri)
GNUNET_FS_uri_destroy (toplevel->ksk_uri);
GNUNET_free_non_null (toplevel->filename);
GNUNET_free_non_null (toplevel->short_filename);
GNUNET_free (toplevel);
}
/* end fs_sharetree.c */