aboutsummaryrefslogtreecommitdiff
path: root/src/util/container_meta_data.c
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2010-01-03 21:42:52 +0000
committerChristian Grothoff <christian@grothoff.org>2010-01-03 21:42:52 +0000
commit3c5249af8087b0b1a4f131a3e19a9759639ab5db (patch)
tree697ff622324a7d259dc59da63205f398661a56fa /src/util/container_meta_data.c
parentebd1325d27e2062f3fb8e92766178ab0660c2dac (diff)
adapting main code base to work with libextractor 0.6 API; testcases will follow later
Diffstat (limited to 'src/util/container_meta_data.c')
-rw-r--r--src/util/container_meta_data.c1214
1 files changed, 835 insertions, 379 deletions
diff --git a/src/util/container_meta_data.c b/src/util/container_meta_data.c
index 912ac2684d..e4d8737c89 100644
--- a/src/util/container_meta_data.c
+++ b/src/util/container_meta_data.c
@@ -1,6 +1,6 @@
/*
This file is part of GNUnet.
- (C) 2003, 2004, 2005, 2006, 2008, 2009 Christian Grothoff (and other contributing authors)
+ (C) 2003, 2004, 2005, 2006, 2008, 2009, 2010 Christian Grothoff (and other contributing authors)
GNUnet is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -32,12 +32,46 @@
#include <extractor.h>
#include <zlib.h>
-#define EXTRA_CHECKS ALLOW_EXTRA_CHECKS
-
-struct Item
+/**
+ * Meta data item.
+ */
+struct MetaItem
{
- EXTRACTOR_KeywordType type;
+ /**
+ * This is a linked list.
+ */
+ struct MetaItem *next;
+
+ /**
+ * Name of the extracting plugin.
+ */
+ char *plugin_name;
+
+ /**
+ * Mime-type of data.
+ */
+ char *mime_type;
+
+ /**
+ * The actual meta data.
+ */
char *data;
+
+ /**
+ * Number of bytes in 'data'.
+ */
+ size_t data_size;
+
+ /**
+ * Type of the meta data.
+ */
+ enum EXTRACTOR_MetaType type;
+
+ /**
+ * Format of the meta data.
+ */
+ enum EXTRACTOR_MetaFormat format;
+
};
/**
@@ -45,86 +79,224 @@ struct Item
*/
struct GNUNET_CONTAINER_MetaData
{
- uint32_t itemCount;
- struct Item *items;
+ /**
+ * Linked list of the meta data items.
+ */
+ struct MetaItem *items;
+
+ /**
+ * Complete serialized and compressed buffer of the items.
+ * NULL if we have not computed that buffer yet.
+ */
+ char *sbuf;
+
+ /**
+ * Number of bytes in 'sbuf'. 0 if the buffer is stale.
+ */
+ size_t sbuf_size;
+
+ /**
+ * Number of items in the linked list.
+ */
+ unsigned int item_count;
+
};
+
/**
* Create a fresh struct CONTAINER_MetaData token.
+ *
+ * @return empty meta-data container
*/
struct GNUNET_CONTAINER_MetaData *
GNUNET_CONTAINER_meta_data_create ()
{
- struct GNUNET_CONTAINER_MetaData *ret;
- ret = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData));
- ret->items = NULL;
- ret->itemCount = 0;
- return ret;
+ return GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData));
}
+
+/**
+ * Free meta data item.
+ *
+ * @param item item to free
+ */
+static void
+meta_item_free (struct MetaItem *item)
+{
+ GNUNET_free_non_null (item->plugin_name);
+ GNUNET_free_non_null (item->mime_type);
+ GNUNET_free_non_null (item->data);
+ GNUNET_free (item);
+}
+
+
+/**
+ * The meta data has changed, invalidate its serialization
+ * buffer.
+ *
+ * @param md meta data that changed
+ */
+static void
+invalidate_sbuf (struct GNUNET_CONTAINER_MetaData *md)
+{
+ if (md->sbuf == NULL)
+ return;
+ GNUNET_free (md->sbuf);
+ md->sbuf = NULL;
+ md->sbuf_size = 0;
+}
+
+
/**
* Free meta data.
+ *
+ * @param md what to free
*/
void
GNUNET_CONTAINER_meta_data_destroy (struct GNUNET_CONTAINER_MetaData *md)
{
- int i;
+ struct MetaItem *item;
if (md == NULL)
return;
- for (i = 0; i < md->itemCount; i++)
- GNUNET_free (md->items[i].data);
- GNUNET_array_grow (md->items, md->itemCount, 0);
+ while (NULL != (item = md->items))
+ {
+ md->items = item->next;
+ meta_item_free (item);
+ }
+ GNUNET_free_non_null (md->sbuf);
GNUNET_free (md);
}
+
/**
- * Add the current time as the publication date
- * to the meta-data.
+ * Test if two MDs are equal. We consider them equal if
+ * the meta types, formats and content match (we do not
+ * include the mime types and plugins names in this
+ * consideration).
+ *
+ * @param md1 first value to check
+ * @param md2 other value to check
+ * @return GNUNET_YES if they are equal
*/
-void
-GNUNET_CONTAINER_meta_data_add_publication_date (struct
- GNUNET_CONTAINER_MetaData
- *md)
+int
+GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData
+ *md1,
+ const struct GNUNET_CONTAINER_MetaData
+ *md2)
{
- char *dat;
- struct GNUNET_TIME_Absolute t;
+ struct MetaItem *i;
+ struct MetaItem *j;
+ int found;
- t = GNUNET_TIME_absolute_get ();
- GNUNET_CONTAINER_meta_data_delete (md, EXTRACTOR_PUBLICATION_DATE, NULL);
- dat = GNUNET_STRINGS_absolute_time_to_string (t);
- GNUNET_CONTAINER_meta_data_insert (md, EXTRACTOR_PUBLICATION_DATE, dat);
- GNUNET_free (dat);
+ if (md1 == md2)
+ return GNUNET_YES;
+ if (md1->item_count != md2->item_count)
+ return GNUNET_NO;
+
+ i = md1->items;
+ while (NULL != i)
+ {
+ found = GNUNET_NO;
+ j = md2->items;
+ while (NULL != j)
+ {
+ if ( (i->type == j->type) &&
+ (i->format == j->format) &&
+ (i->data_size == j->data_size) &&
+ (0 == memcmp (i->data,
+ j->data,
+ i->data_size)))
+ {
+ found = GNUNET_YES;
+ break;
+ }
+ j = j->next;
+ }
+ if (found == GNUNET_NO)
+ return GNUNET_NO;
+ i = i->next;
+ }
+ return GNUNET_YES;
}
+
/**
- * Extend metadata.
+ * Extend metadata. Note that the list of meta data items is
+ * sorted by size (largest first).
+ *
+ * @param md metadata to extend
+ * @param plugin_name name of the plugin that produced this value;
+ * special values can be used (i.e. '<zlib>' for zlib being
+ * used in the main libextractor library and yielding
+ * meta data).
+ * @param type libextractor-type describing the meta data
+ * @param format basic format information about data
+ * @param data_mime_type mime-type of data (not of the original file);
+ * can be NULL (if mime-type is not known)
+ * @param data actual meta-data found
+ * @param data_len number of bytes in data
* @return GNUNET_OK on success, GNUNET_SYSERR if this entry already exists
+ * data_mime_type and plugin_name are not considered for "exists" checks
*/
int
GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md,
- EXTRACTOR_KeywordType type,
- const char *data)
+ const char *plugin_name,
+ enum EXTRACTOR_MetaType type,
+ enum EXTRACTOR_MetaFormat format,
+ const char *data_mime_type,
+ const char *data,
+ size_t data_len)
{
- uint32_t idx;
+ struct MetaItem *prev;
+ struct MetaItem *pos;
+ struct MetaItem *i;
char *p;
- GNUNET_assert (data != NULL);
- for (idx = 0; idx < md->itemCount; idx++)
+ prev = NULL;
+ pos = md->items;
+ while (NULL != pos)
{
- if ((md->items[idx].type == type) &&
- (0 == strcmp (md->items[idx].data, data)))
- return GNUNET_SYSERR;
+ if (pos->data_size < data_len)
+ break;
+ if ( (pos->type == type) &&
+ (pos->format == format) &&
+ (pos->data_size == data_len) &&
+ (0 == memcmp (pos->data,
+ data,
+ data_len)))
+ {
+ if ( (pos->mime_type == NULL) &&
+ (data_mime_type != NULL) )
+ {
+ pos->mime_type = GNUNET_strdup (data_mime_type);
+ invalidate_sbuf (md);
+ }
+ return GNUNET_SYSERR;
+ }
+ prev = pos;
+ pos = pos->next;
}
- idx = md->itemCount;
- GNUNET_array_grow (md->items, md->itemCount, md->itemCount + 1);
- md->items[idx].type = type;
- md->items[idx].data = p = GNUNET_strdup (data);
-
+ md->item_count++;
+ i = GNUNET_malloc (sizeof (struct MetaItem));
+ i->type = type;
+ i->format = format;
+ i->data_size = data_len;
+ i->next = pos;
+ if (prev == NULL)
+ md->items = i;
+ else
+ prev->next = i;
+ i->mime_type = (data_mime_type == NULL) ? NULL : GNUNET_strdup (data_mime_type);
+ i->plugin_name = (plugin_name == NULL) ? NULL : GNUNET_strdup (plugin_name);
+ i->data = GNUNET_malloc (data_len);
+ memcpy (i->data, data, data_len);
/* change OS native dir separators to unix '/' and others to '_' */
- if (type == EXTRACTOR_FILENAME)
+ if (type == EXTRACTOR_METATYPE_FILENAME)
{
- while (*p != '\0')
+ p = i->data;
+ while ( (*p != '\0') &&
+ (p < i->data + data_len) )
{
if (*p == DIR_SEPARATOR)
*p = '/';
@@ -133,10 +305,11 @@ GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md,
p++;
}
}
-
+ invalidate_sbuf (md);
return GNUNET_OK;
}
+
/**
* Remove an item.
*
@@ -144,36 +317,78 @@ GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md,
* @param type type of the item to remove
* @param data specific value to remove, NULL to remove all
* entries of the given type
+ * @param data_len number of bytes in data
* @return GNUNET_OK on success, GNUNET_SYSERR if the item does not exist in md
*/
int
GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md,
- EXTRACTOR_KeywordType type,
- const char *data)
+ enum EXTRACTOR_MetaType type,
+ const char *data,
+ size_t data_len)
{
- uint32_t idx;
- int ret = GNUNET_SYSERR;
- for (idx = 0; idx < md->itemCount; idx++)
+ struct MetaItem *pos;
+ struct MetaItem *prev;
+
+ prev = NULL;
+ pos = md->items;
+ while (NULL != pos)
{
- if ((md->items[idx].type == type) &&
- ((data == NULL) || (0 == strcmp (md->items[idx].data, data))))
- {
- GNUNET_free (md->items[idx].data);
- md->items[idx] = md->items[md->itemCount - 1];
- GNUNET_array_grow (md->items, md->itemCount, md->itemCount - 1);
- if (data == NULL)
- {
- ret = GNUNET_OK;
- continue;
- }
- return GNUNET_OK;
- }
+ if ( (pos->type == type) &&
+ ( (data == NULL) ||
+ ( (pos->data_size == data_len) &&
+ (0 == memcmp (pos->data,
+ data,
+ data_len))) ) )
+ {
+ if (prev == NULL)
+ md->items = pos->next;
+ else
+ prev->next = pos->next;
+ meta_item_free (pos);
+ md->item_count--;
+ invalidate_sbuf (md);
+ return GNUNET_OK;
+ }
+ prev = pos;
+ pos = pos->next;
}
- return ret;
+ return GNUNET_SYSERR;
}
+
/**
- * Iterate over MD entries, excluding thumbnails.
+ * Add the current time as the publication date
+ * to the meta-data.
+ *
+ * @param md metadata to modify
+ */
+void
+GNUNET_CONTAINER_meta_data_add_publication_date (struct
+ GNUNET_CONTAINER_MetaData
+ *md)
+{
+ char *dat;
+ struct GNUNET_TIME_Absolute t;
+
+ t = GNUNET_TIME_absolute_get ();
+ GNUNET_CONTAINER_meta_data_delete (md,
+ EXTRACTOR_METATYPE_PUBLICATION_DATE,
+ NULL,
+ 0);
+ dat = GNUNET_STRINGS_absolute_time_to_string (t);
+ GNUNET_CONTAINER_meta_data_insert (md,
+ "<gnunet>",
+ EXTRACTOR_METATYPE_PUBLICATION_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ dat,
+ strlen(dat)+1);
+ GNUNET_free (dat);
+}
+
+
+/**
+ * Iterate over MD entries.
*
* @param md metadata to inspect
* @param iter function to call on each entry
@@ -181,51 +396,71 @@ GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md,
* @return number of entries
*/
int
-GNUNET_CONTAINER_meta_data_get_contents (const struct
- GNUNET_CONTAINER_MetaData *md,
- GNUNET_CONTAINER_MetaDataProcessor
- iter, void *iter_cls)
+GNUNET_CONTAINER_meta_data_iterate (const struct
+ GNUNET_CONTAINER_MetaData *md,
+ EXTRACTOR_MetaDataProcessor
+ iter, void *iter_cls)
{
- uint32_t i;
- uint32_t sub;
+ struct MetaItem *pos;
- sub = 0;
- for (i = 0; i < md->itemCount; i++)
+ if (iter == NULL)
+ return md->item_count;
+ pos = md->items;
+ while (NULL != pos)
{
- if (!EXTRACTOR_isBinaryType (md->items[i].type))
- {
- if ((iter != NULL) &&
- (GNUNET_OK != iter (iter_cls,
- md->items[i].type, md->items[i].data)))
- return GNUNET_SYSERR;
- }
- else
- sub++;
+ if (0 != iter (iter_cls,
+ pos->plugin_name,
+ pos->type,
+ pos->format,
+ pos->mime_type,
+ pos->data,
+ pos->data_size))
+ return md->item_count;
+ pos = pos->next;
}
- return (int) (md->itemCount - sub);
+ return md->item_count;
}
+
/**
- * Iterate over MD entries
+ * Get the first MD entry of the given type. Caller
+ * is responsible for freeing the return value.
+ * Also, only meta data items that are strings (0-terminated)
+ * are returned by this function.
*
- * @return number of entries
+ * @param md metadata to inspect
+ * @param type type to look for
+ * @return NULL if no entry was found
*/
char *
GNUNET_CONTAINER_meta_data_get_by_type (const struct GNUNET_CONTAINER_MetaData
- *md, EXTRACTOR_KeywordType type)
+ *md, enum EXTRACTOR_MetaType type)
{
- uint32_t i;
+ struct MetaItem *pos;
- for (i = 0; i < md->itemCount; i++)
- if (type == md->items[i].type)
- return GNUNET_strdup (md->items[i].data);
+ pos = md->items;
+ while (NULL != pos)
+ {
+ if ( (type == pos->type) &&
+ ( (pos->format == EXTRACTOR_METAFORMAT_UTF8) ||
+ (pos->format == EXTRACTOR_METAFORMAT_C_STRING) ) )
+ return GNUNET_strdup (pos->data);
+ pos = pos->next;
+ }
return NULL;
}
+
/**
- * Iterate over MD entries
+ * Get the first matching MD entry of the given types. Caller is
+ * responsible for freeing the return value. Also, only meta data
+ * items that are strings (0-terminated) are returned by this
+ * function.
*
- * @return number of entries
+ * @param md metadata to inspect
+ * @param ... -1-terminated list of types
+ * @return NULL if we do not have any such entry,
+ * otherwise client is responsible for freeing the value!
*/
char *
GNUNET_CONTAINER_meta_data_get_first_by_types (const struct
@@ -234,13 +469,13 @@ GNUNET_CONTAINER_meta_data_get_first_by_types (const struct
{
char *ret;
va_list args;
- EXTRACTOR_KeywordType type;
+ enum EXTRACTOR_MetaType type;
ret = NULL;
va_start (args, md);
while (1)
{
- type = va_arg (args, EXTRACTOR_KeywordType);
+ type = va_arg (args, enum EXTRACTOR_MetaType);
if (type == -1)
break;
ret = GNUNET_CONTAINER_meta_data_get_by_type (md, type);
@@ -251,6 +486,7 @@ GNUNET_CONTAINER_meta_data_get_first_by_types (const struct
return ret;
}
+
/**
* Get a thumbnail from the meta-data (if present).
*
@@ -264,27 +500,33 @@ GNUNET_CONTAINER_meta_data_get_thumbnail (const struct
GNUNET_CONTAINER_MetaData * md,
unsigned char **thumb)
{
- char *encoded;
- int ret;
- size_t size;
+ struct MetaItem *pos;
+ struct MetaItem *match;
- encoded =
- GNUNET_CONTAINER_meta_data_get_by_type (md, EXTRACTOR_THUMBNAIL_DATA);
- if (encoded == NULL)
- return 0;
- if (strlen (encoded) == 0)
+ match = NULL;
+ pos = md->items;
+ while (NULL != pos)
{
- GNUNET_free (encoded);
- return 0; /* invalid */
+ if ( (0 == strncasecmp ("image/", pos->mime_type,
+ strlen("image/"))) &&
+ (pos->format == EXTRACTOR_METAFORMAT_BINARY) )
+ {
+ if (match == NULL)
+ match = pos;
+ else if ( (match->type != EXTRACTOR_METATYPE_THUMBNAIL) &&
+ (pos->type == EXTRACTOR_METATYPE_THUMBNAIL) )
+ match = pos;
+ }
+ pos = pos->next;
}
- *thumb = NULL;
- ret = EXTRACTOR_binaryDecode (encoded, thumb, &size);
- GNUNET_free (encoded);
- if (ret != 0)
+ if (match == NULL)
return 0;
- return size;
+ *thumb = GNUNET_malloc (match->data_size);
+ memcpy (*thumb, match->data, match->data_size);
+ return match->data_size;
}
+
/**
* Duplicate struct GNUNET_CONTAINER_MetaData.
*
@@ -295,18 +537,66 @@ struct GNUNET_CONTAINER_MetaData *
GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData
*md)
{
- uint32_t i;
struct GNUNET_CONTAINER_MetaData *ret;
+ struct MetaItem *pos;
if (md == NULL)
return NULL;
ret = GNUNET_CONTAINER_meta_data_create ();
- for (i = 0; i < md->itemCount; i++)
- GNUNET_CONTAINER_meta_data_insert (ret, md->items[i].type,
- md->items[i].data);
+ pos = md->items;
+ while (NULL != pos)
+ {
+ GNUNET_CONTAINER_meta_data_insert (ret,
+ pos->plugin_name,
+ pos->type,
+ pos->format,
+ pos->mime_type,
+ pos->data,
+ pos->data_size);
+ pos = pos->next;
+ }
return ret;
}
+
+/**
+ * Add meta data that libextractor finds to our meta data
+ * container.
+ *
+ * @param cls closure, our meta data container
+ * @param plugin_name name of the plugin that produced this value;
+ * special values can be used (i.e. '<zlib>' for zlib being
+ * used in the main libextractor library and yielding
+ * meta data).
+ * @param type libextractor-type describing the meta data
+ * @param format basic format information about data
+ * @param data_mime_type mime-type of data (not of the original file);
+ * can be NULL (if mime-type is not known)
+ * @param data actual meta-data found
+ * @param data_len number of bytes in data
+ * @return always 0 to continue extracting
+ */
+static int
+add_to_md(void *cls,
+ const char *plugin_name,
+ enum EXTRACTOR_MetaType type,
+ enum EXTRACTOR_MetaFormat format,
+ const char *data_mime_type,
+ const char *data,
+ size_t data_len)
+{
+ struct GNUNET_CONTAINER_MetaData *md = cls;
+ (void) GNUNET_CONTAINER_meta_data_insert (md,
+ plugin_name,
+ type,
+ format,
+ data_mime_type,
+ data,
+ data_len);
+ return 0;
+}
+
+
/**
* Extract meta-data from a file.
*
@@ -316,37 +606,43 @@ GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData
int
GNUNET_CONTAINER_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData
*md, const char *filename,
- EXTRACTOR_ExtractorList *
+ struct EXTRACTOR_PluginList *
extractors)
{
- EXTRACTOR_KeywordList *head;
- EXTRACTOR_KeywordList *pos;
- int ret;
+ unsigned int old;
if (filename == NULL)
return GNUNET_SYSERR;
if (extractors == NULL)
return 0;
- head = EXTRACTOR_getKeywords (extractors, filename);
- head = EXTRACTOR_removeDuplicateKeywords (head,
- EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN);
- pos = head;
- ret = 0;
- while (pos != NULL)
- {
- if (GNUNET_OK ==
- GNUNET_CONTAINER_meta_data_insert (md, pos->keywordType,
- pos->keyword))
- ret++;
- pos = pos->next;
- }
- EXTRACTOR_freeKeywords (head);
- return ret;
+ old = md->item_count;
+ EXTRACTOR_extract (extractors,
+ filename,
+ NULL, 0,
+ &add_to_md,
+ md);
+ return (int) (md->item_count - old);
}
-static unsigned int
-tryCompression (char *data, unsigned int oldSize)
+/**
+ * Try to compress the given block of data.
+ *
+ * @param data block to compress; if compression
+ * resulted in a smaller block, the first
+ * bytes of data are updated to the compressed
+ * data
+ * @param oldSize number of bytes in data
+ * @param result set to the compressed data
+ * @param newSize set to size of result
+ * @return GNUNET_YES if compression reduce the size,
+ * GNUNET_NO if compression did not help
+ */
+static int
+try_compression (const char *data,
+ size_t oldSize,
+ char **result,
+ size_t *newSize)
{
char *tmp;
uLongf dlen;
@@ -364,62 +660,40 @@ tryCompression (char *data, unsigned int oldSize)
{
if (dlen < oldSize)
{
- memcpy (data, tmp, dlen);
- GNUNET_free (tmp);
- return dlen;
+ *result = tmp;
+ *newSize = dlen;
+ return GNUNET_YES;
}
}
GNUNET_free (tmp);
- return oldSize;
+ return GNUNET_NO;
}
-/**
- * Decompress input, return the decompressed data
- * as output, set outputSize to the number of bytes
- * that were found.
- *
- * @return NULL on error
- */
-static char *
-decompress (const char *input,
- unsigned int inputSize, unsigned int outputSize)
-{
- char *output;
- uLongf olen;
-
- olen = outputSize;
- output = GNUNET_malloc (olen);
- if (Z_OK == uncompress ((Bytef *) output,
- &olen, (const Bytef *) input, inputSize))
- {
- return output;
- }
- else
- {
- GNUNET_free (output);
- return NULL;
- }
-}
/**
* Flag in 'version' that indicates compressed meta-data.
*/
#define HEADER_COMPRESSED 0x80000000
+
/**
* Bits in 'version' that give the version number.
*/
#define HEADER_VERSION_MASK 0x7FFFFFFF
+
+/**
+ * Header for serialized meta data.
+ */
struct MetaDataHeader
{
/**
- * The version of the MD serialization.
- * The highest bit is used to indicate
- * compression.
+ * The version of the MD serialization. The highest bit is used to
+ * indicate compression.
*
- * Version 0 is the current version;
- * Version is 1 for a NULL pointer.
+ * Version 0 is traditional (pre-0.9) meta data (unsupported)
+ * Version is 1 for a NULL pointer
+ * Version 2 is for 0.9.x (and possibly higher)
* Other version numbers are not yet defined.
*/
uint32_t version;
@@ -430,24 +704,57 @@ struct MetaDataHeader
uint32_t entries;
/**
- * Size of the MD (decompressed)
+ * Size of the decompressed meta data.
*/
uint32_t size;
/**
- * This is followed by 'entries' values of type 'uint32_t' that
- * correspond to EXTRACTOR_KeywordTypes. After that, the meta-data
- * keywords follow (0-terminated). The MD block always ends with
- * 0-termination, padding with 0 until a multiple of 8 bytes.
+ * This is followed by 'entries' values of type 'struct MetaDataEntry'
+ * and then by 'entry' plugin names, mime-types and data blocks
+ * as specified in those meta data entries.
+ */
+};
+
+
+/**
+ * Entry of serialized meta data.
+ */
+struct MetaDataEntry
+{
+ /**
+ * Meta data type. Corresponds to an 'enum EXTRACTOR_MetaType'
+ */
+ uint32_t type;
+
+ /**
+ * Meta data format. Corresponds to an 'enum EXTRACTOR_MetaFormat'
*/
+ uint32_t format;
+
+ /**
+ * Number of bytes of meta data.
+ */
+ uint32_t data_size;
+
+ /**
+ * Number of bytes in the plugin name including 0-terminator. 0 for NULL.
+ */
+ uint32_t plugin_name_len;
+
+ /**
+ * Number of bytes in the mime type including 0-terminator. 0 for NULL.
+ */
+ uint32_t mime_type_len;
};
+
/**
* Serialize meta-data to target.
*
* @param md metadata to serialize
- * @param target where to write the serialized metadata
+ * @param target where to write the serialized metadata;
+ * *target can be NULL, in which case memory is allocated
* @param max maximum number of bytes available in target
* @param opt is it ok to just write SOME of the
* meta-data to match the size constraint,
@@ -458,149 +765,273 @@ struct MetaDataHeader
*/
ssize_t
GNUNET_CONTAINER_meta_data_serialize (const struct GNUNET_CONTAINER_MetaData
- *md, char *target, size_t max,
+ *md, char **target, size_t max,
enum
GNUNET_CONTAINER_MetaDataSerializationOptions
opt)
{
- struct MetaDataHeader *hdr;
+ struct GNUNET_CONTAINER_MetaData *vmd;
+ struct MetaItem *pos;
+ struct MetaDataHeader *hdr;
+ struct MetaDataEntry *ent;
+ unsigned int i;
+ uint64_t msize;
+ size_t off;
+ char *mdata;
+ char *cdata;
+ size_t mlen;
+ size_t plen;
size_t size;
- size_t pos;
- uint32_t i;
- size_t len;
- uint32_t ic;
+ size_t left;
+ size_t clen;
+ int comp;
if (max < sizeof (struct MetaDataHeader))
return GNUNET_SYSERR; /* far too small */
- ic = md ? md->itemCount : 0;
- hdr = NULL;
- while (1)
+ if (md == NULL)
+ return 0;
+
+ if (md->sbuf != NULL)
{
- size = sizeof (struct MetaDataHeader);
- size += sizeof (uint32_t) * ic;
- for (i = 0; i < ic; i++)
- size += 1 + strlen (md->items[i].data);
- while (size % 8 != 0)
- size++;
- hdr = GNUNET_malloc (size);
- hdr->version = htonl (md == NULL ? 1 : 0);
- hdr->entries = htonl (ic);
- for (i = 0; i < ic; i++)
- ((uint32_t *) & hdr[1])[i] = htonl ((uint32_t) md->items[i].type);
- pos = sizeof (struct MetaDataHeader);
- pos += sizeof (uint32_t) * ic;
- for (i = 0; i < ic; i++)
- {
- len = strlen (md->items[i].data) + 1;
- memcpy (&((char *) hdr)[pos], md->items[i].data, len);
- pos += len;
- }
+ /* try to use serialization cache */
+ if (md->sbuf_size < max)
+ {
+ if (NULL == *target)
+ *target = GNUNET_malloc (md->sbuf_size);
+ memcpy (*target,
+ md->sbuf,
+ md->sbuf_size);
+ return md->sbuf_size;
+ }
+ if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART))
+ return GNUNET_SYSERR; /* can say that this will fail */
+ /* need to compute a partial serialization, sbuf useless ... */
+ }
- hdr->size = htonl (size);
- if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0)
- {
- pos = tryCompression ((char *) &hdr[1],
- size - sizeof (struct MetaDataHeader));
- }
+
+ msize = 0;
+ pos = md->items;
+ while (NULL != pos)
+ {
+ msize += sizeof (struct MetaDataEntry);
+ msize += pos->data_size;
+ if (pos->plugin_name != NULL)
+ msize += strlen (pos->plugin_name) + 1;
+ if (pos->mime_type != NULL)
+ msize += strlen (pos->mime_type) + 1;
+ pos = pos->next;
+ }
+ size = (size_t) msize;
+ if (size != msize)
+ {
+ GNUNET_break (0); /* integer overflow */
+ return GNUNET_SYSERR;
+ }
+ if (size >= GNUNET_MAX_MALLOC_CHECKED)
+ {
+ /* too large to be processed */
+ return GNUNET_SYSERR;
+ }
+ ent = GNUNET_malloc (size);
+ mdata = (char *) &ent[md->item_count];
+ off = size - (md->item_count * sizeof(struct MetaDataEntry));
+ i = 0;
+ pos = md->items;
+ while (NULL != pos)
+ {
+ ent[i].type = htonl ((uint32_t) pos->type);
+ ent[i].format = htonl ((uint32_t) pos->format);
+ ent[i].data_size = htonl ((uint32_t) pos->data_size);
+ if (pos->plugin_name == NULL)
+ plen = 0;
else
- {
- pos = size - sizeof (struct MetaDataHeader);
- }
- if (pos < size - sizeof (struct MetaDataHeader))
- {
- hdr->version = htonl (HEADER_COMPRESSED);
- size = pos + sizeof (struct MetaDataHeader);
- }
- if (size <= max)
- break;
- GNUNET_free (hdr);
- hdr = NULL;
+ plen = strlen (pos->plugin_name) + 1;
+ ent[i].plugin_name_len = htonl ( (uint32_t) plen);
+ if (pos->mime_type == NULL)
+ mlen = 0;
+ else
+ mlen = strlen (pos->mime_type) + 1;
+ ent[i].mime_type_len = htonl ((uint32_t) mlen);
+ off -= pos->data_size;
+ memcpy (&mdata[off], pos->data, pos->data_size);
+ off -= plen;
+ memcpy (&mdata[off], pos->plugin_name, plen);
+ off -= mlen;
+ memcpy (&mdata[off], pos->mime_type, mlen);
+ i++;
+ pos = pos->next;
+ }
+ GNUNET_assert (off == 0);
- if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART) == 0)
- {
- return GNUNET_SYSERR; /* does not fit! */
+ left = size;
+ for (i=0;i<md->item_count;i++)
+ {
+ comp = GNUNET_NO;
+ if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS))
+ comp = try_compression ((const char*) &ent[i],
+ left,
+ &cdata,
+ &clen);
+
+ if ( (md->sbuf == NULL) &&
+ (i == 0) )
+ {
+ /* fill 'sbuf'; this "modifies" md, but since this is only
+ an internal cache we will cast away the 'const' instead
+ of making the API look strange. */
+ vmd = (struct GNUNET_CONTAINER_MetaData*) md;
+ hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader));
+ hdr->entries = htonl (md->item_count);
+ if (GNUNET_YES == comp)
+ {
+ hdr->size = htonl (clen);
+ hdr->version = htonl (2 | HEADER_COMPRESSED);
+ memcpy (&hdr[1],
+ cdata,
+ clen);
+ vmd->sbuf_size = clen + sizeof (struct MetaDataHeader);
+ }
+ else
+ {
+ hdr->size = htonl (left);
+ hdr->version = htonl (2);
+ memcpy (&hdr[1],
+ &ent[0],
+ left);
+ vmd->sbuf_size = left + sizeof (struct MetaDataHeader);
+ }
+ vmd->sbuf = (char*) hdr;
+ }
+
+ if ( ( (left + sizeof (struct MetaDataHeader)) <= max) ||
+ ( (comp == GNUNET_YES) &&
+ (clen <= max)) )
+ {
+ /* success, this now fits! */
+ if (GNUNET_YES == comp)
+ {
+ hdr = (struct MetaDataHeader*) *target;
+ if (hdr == NULL)
+ {
+ hdr = GNUNET_malloc (clen + sizeof (struct MetaDataHeader));
+ *target = (char*) hdr;
+ }
+ hdr->version = htonl (2 | HEADER_COMPRESSED);
+ hdr->entries = htonl (md->item_count - i);
+ hdr->size = htonl (left);
+ memcpy (&target[sizeof(struct MetaDataHeader)],
+ cdata,
+ clen);
+ GNUNET_free (cdata);
+ GNUNET_free (ent);
+ return clen + sizeof (struct MetaDataHeader);
+ }
+ else
+ {
+ hdr = (struct MetaDataHeader*) target;
+ if (hdr == NULL)
+ {
+ hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader));
+ *target = (char*) hdr;
+ }
+ hdr->version = htonl (2);
+ hdr->entries = htonl (md->item_count - i);
+ hdr->size = htonl (left);
+ memcpy (&target[sizeof(struct MetaDataHeader)],
+ &ent[i],
+ left);
+ GNUNET_free (ent);
+ return left + sizeof (struct MetaDataHeader);
+ }
+ }
+
+ if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART))
+ {
+ /* does not fit! */
+ GNUNET_free (ent);
+ return GNUNET_SYSERR;
}
- /* partial serialization ok, try again with less meta-data */
- if (size > 2 * max)
- ic = ic * 2 / 3; /* still far too big, make big reductions */
- else
- ic--; /* small steps, we're close */
+
+ /* next iteration: ignore the corresponding meta data at the
+ end and try again without it */
+ left -= sizeof (struct MetaDataEntry);
+ left -= pos->data_size;
+ if (pos->plugin_name != NULL)
+ left -= strlen (pos->plugin_name) + 1;
+ if (pos->mime_type != NULL)
+ left -= strlen (pos->mime_type) + 1;
}
- GNUNET_assert (size <= max);
- memcpy (target, hdr, size);
- GNUNET_free (hdr);
- /* extra check: deserialize! */
-#if EXTRA_CHECKS
- {
- struct GNUNET_CONTAINER_MetaData *mdx;
- mdx = GNUNET_CONTAINER_meta_data_deserialize (target, size);
- GNUNET_assert (NULL != mdx);
- GNUNET_CONTAINER_meta_data_destroy (mdx);
- }
-#endif
- return size;
+ GNUNET_free (ent);
+
+ /* nothing fit, only write header! */
+ hdr = (struct MetaDataHeader*) target;
+ if (hdr == NULL)
+ {
+ hdr = GNUNET_malloc (sizeof (struct MetaDataHeader));
+ *target = (char*) hdr;
+ }
+ hdr->version = htonl (2);
+ hdr->entries = htonl (0);
+ hdr->size = htonl (0);
+ return sizeof (struct MetaDataHeader);
}
+
/**
- * Estimate (!) the size of the meta-data in
- * serialized form. The estimate MAY be higher
- * than what is strictly needed.
+ * Get the size of the full meta-data in serialized form.
*
* @param md metadata to inspect
- * @param opt is it ok to just write SOME of the
- * meta-data to match the size constraint,
- * possibly discarding some data?
* @return number of bytes needed for serialization, -1 on error
*/
ssize_t
-GNUNET_CONTAINER_meta_data_get_serialized_size (const struct
- GNUNET_CONTAINER_MetaData *
- md,
- enum
- GNUNET_CONTAINER_MetaDataSerializationOptions
- opt)
+GNUNET_CONTAINER_meta_data_get_serialized_size (const struct GNUNET_CONTAINER_MetaData *md)
{
- struct MetaDataHeader *hdr;
- size_t size;
- size_t pos;
- uint32_t i;
- size_t len;
- uint32_t ic;
+ ssize_t ret;
+ char *ptr;
+
+ if (md->sbuf != NULL)
+ return md->sbuf_size;
+ ptr = NULL;
+ ret = GNUNET_CONTAINER_meta_data_serialize (md,
+ &ptr,
+ GNUNET_MAX_MALLOC_CHECKED,
+ GNUNET_CONTAINER_META_DATA_SERIALIZE_FULL);
+ if (ret != -1)
+ GNUNET_free (ptr);
+ return ret;
+}
- ic = md ? md->itemCount : 0;
- size = sizeof (struct MetaDataHeader);
- size += sizeof (uint32_t) * ic;
- for (i = 0; i < ic; i++)
- size += 1 + strlen (md->items[i].data);
- while (size % 8 != 0)
- size++;
- hdr = GNUNET_malloc (size);
- hdr->version = htonl (md == NULL ? 1 : 0);
- hdr->entries = htonl (ic);
- for (i = 0; i < ic; i++)
- ((uint32_t *) & hdr[1])[i] = htonl ((uint32_t) md->items[i].type);
- pos = sizeof (struct MetaDataHeader);
- pos += sizeof (uint32_t) * ic;
- for (i = 0; i < ic; i++)
- {
- len = strlen (md->items[i].data) + 1;
- memcpy (&((char *) hdr)[pos], md->items[i].data, len);
- pos += len;
- }
- if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0)
+
+/**
+ * Decompress input, return the decompressed data
+ * as output, set outputSize to the number of bytes
+ * that were found.
+ *
+ * @param input compressed data
+ * @param inputSize number of bytes in input
+ * @param outputSize expected size of the output
+ * @return NULL on error
+ */
+static char *
+decompress (const char *input,
+ size_t inputSize,
+ size_t outputSize)
+{
+ char *output;
+ uLongf olen;
+
+ olen = outputSize;
+ output = GNUNET_malloc (olen);
+ if (Z_OK == uncompress ((Bytef *) output,
+ &olen, (const Bytef *) input, inputSize))
{
- pos =
- tryCompression ((char *) &hdr[1],
- size - sizeof (struct MetaDataHeader));
+ return output;
}
else
{
- pos = size - sizeof (struct MetaDataHeader);
+ GNUNET_free (output);
+ return NULL;
}
- if (pos < size - sizeof (struct MetaDataHeader))
- size = pos + sizeof (struct MetaDataHeader);
- GNUNET_free (hdr);
- return size;
}
@@ -616,41 +1047,57 @@ struct GNUNET_CONTAINER_MetaData *
GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size)
{
struct GNUNET_CONTAINER_MetaData *md;
- const struct MetaDataHeader *hdr;
+ struct MetaDataHeader hdr;
+ struct MetaDataEntry ent;
uint32_t ic;
+ uint32_t i;
char *data;
const char *cdata;
+ uint32_t version;
uint32_t dataSize;
int compressed;
- uint32_t i;
- size_t pos;
- size_t len;
- uint32_t version;
+ size_t left;
+ uint32_t mlen;
+ uint32_t plen;
+ uint32_t dlen;
+ const char *mdata;
+ const char *meta_data;
+ const char *plugin_name;
+ const char *mime_type;
+ enum EXTRACTOR_MetaFormat format;
if (size < sizeof (struct MetaDataHeader))
return NULL;
- hdr = (const struct MetaDataHeader *) input;
- version = ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_VERSION_MASK;
+ memcpy (&hdr,
+ input,
+ sizeof (struct MetaDataHeader));
+ version = ntohl (hdr.version) & HEADER_VERSION_MASK;
+ compressed = (ntohl (hdr.version) & HEADER_COMPRESSED) != 0;
+
if (version == 1)
- return NULL; /* null pointer */
- if (version != 0)
+ return NULL; /* null pointer */
+ if (version != 2)
{
GNUNET_break_op (0); /* unsupported version */
return NULL;
}
- ic = ntohl (MAKE_UNALIGNED (hdr->entries));
- compressed =
- (ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_COMPRESSED) != 0;
+
+ ic = ntohl (hdr.entries);
+ dataSize = ntohl (hdr.size);
+ if ((sizeof (struct MetaDataEntry) * ic) > dataSize)
+ {
+ GNUNET_break_op (0);
+ return NULL;
+ }
+
if (compressed)
{
- dataSize =
- ntohl (MAKE_UNALIGNED (hdr->size)) - sizeof (struct MetaDataHeader);
- if (dataSize > 2 * 1042 * 1024)
+ if (dataSize >= GNUNET_MAX_MALLOC_CHECKED)
{
- GNUNET_break (0);
- return NULL; /* only 2 MB allowed [to make sure we don't blow
- our memory limit because of a mal-formed
- message... ] */
+ /* make sure we don't blow our memory limit because of a mal-formed
+ message... */
+ GNUNET_break_op (0);
+ return NULL;
}
data =
decompress ((const char *) &input[sizeof (struct MetaDataHeader)],
@@ -665,84 +1112,93 @@ GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size)
else
{
data = NULL;
- cdata = (const char *) &hdr[1];
- dataSize = size - sizeof (struct MetaDataHeader);
- if (size != ntohl (MAKE_UNALIGNED (hdr->size)))
+ cdata = (const char *) &input[sizeof (struct MetaDataHeader)];
+ if (dataSize != size - sizeof (struct MetaDataHeader))
{
- GNUNET_break (0);
+ GNUNET_break_op (0);
return NULL;
}
}
- if ((sizeof (uint32_t) * ic + ic) > dataSize)
- {
- GNUNET_break (0);
- goto FAILURE;
- }
- if ((ic > 0) && (cdata[dataSize - 1] != '\0'))
- {
- GNUNET_break (0);
- goto FAILURE;
- }
-
md = GNUNET_CONTAINER_meta_data_create ();
- GNUNET_array_grow (md->items, md->itemCount, ic);
- i = 0;
- pos = sizeof (uint32_t) * ic;
- while ((pos < dataSize) && (i < ic))
+ left = dataSize - ic * sizeof (struct MetaDataEntry);
+ mdata = &cdata[ic * sizeof (struct MetaDataEntry)];
+ for (i=0;i<ic;i++)
{
- len = strlen (&cdata[pos]) + 1;
- md->items[i].type = (EXTRACTOR_KeywordType)
- ntohl (MAKE_UNALIGNED (((const uint32_t *) cdata)[i]));
- md->items[i].data = GNUNET_strdup (&cdata[pos]);
- pos += len;
- i++;
- }
- if (i < ic)
- { /* oops */
- GNUNET_CONTAINER_meta_data_destroy (md);
- goto FAILURE;
- }
- GNUNET_free_non_null (data);
- return md;
-FAILURE:
- GNUNET_free_non_null (data);
- return NULL; /* size too small */
-}
-
-/**
- * Test if two MDs are equal.
- *
- * @param md1 first value to check
- * @param md2 other value to check
- * @return GNUNET_YES if they are equal
- */
-int
-GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData
- *md1,
- const struct GNUNET_CONTAINER_MetaData
- *md2)
-{
- uint32_t i;
- uint32_t j;
- int found;
+ memcpy (&ent,
+ &cdata[i * sizeof(struct MetaDataEntry)],
+ sizeof (struct MetaDataEntry));
+ format = (enum EXTRACTOR_MetaFormat) ntohl (ent.format);
+ if ( (format != EXTRACTOR_METAFORMAT_UTF8) &&
+ (format != EXTRACTOR_METAFORMAT_C_STRING) &&
+ (format != EXTRACTOR_METAFORMAT_BINARY) )
+ {
+ GNUNET_break_op (0);
+ break;
+ }
+ dlen = ntohl (ent.data_size);
+ plen = ntohl (ent.plugin_name_len);
+ mlen = ntohl (ent.mime_type_len);
+ if (dlen > left)
+ {
+ GNUNET_break_op (0);
+ break;
+ }
+ left -= dlen;
+ meta_data = &mdata[left];
+ if ( (format == EXTRACTOR_METAFORMAT_UTF8) ||
+ (format == EXTRACTOR_METAFORMAT_C_STRING) )
+ {
+ if ( (dlen == 0) ||
+ (mdata[left + dlen - 1] != '\0') )
+ {
+ GNUNET_break_op (0);
+ break;
+ }
+ }
+ if (plen > left)
+ {
+ GNUNET_break_op (0);
+ break;
+ }
+ left -= plen;
+ if ( (plen > 0) &&
+ (mdata[left + plen - 1] != '\0') )
+ {
+ GNUNET_break_op (0);
+ break;
+ }
+ if (plen == 0)
+ plugin_name = NULL;
+ else
+ plugin_name = &mdata[left];
- if (md1->itemCount != md2->itemCount)
- return GNUNET_NO;
- for (i = 0; i < md1->itemCount; i++)
- {
- found = GNUNET_NO;
- for (j = 0; j < md2->itemCount; j++)
- if ((md1->items[i].type == md2->items[j].type) &&
- (0 == strcmp (md1->items[i].data, md2->items[j].data)))
- {
- found = GNUNET_YES;
- break;
- }
- if (found == GNUNET_NO)
- return GNUNET_NO;
+ if (mlen > left)
+ {
+ GNUNET_break_op (0);
+ break;
+ }
+ left -= mlen;
+ if ( (mlen > 0) &&
+ (mdata[left + mlen - 1] != '\0') )
+ {
+ GNUNET_break_op (0);
+ break;
+ }
+ if (mlen == 0)
+ mime_type = NULL;
+ else
+ mime_type = &mdata[left];
+ GNUNET_CONTAINER_meta_data_insert (md,
+ plugin_name,
+ (enum EXTRACTOR_MetaType) ntohl (ent.type),
+ format,
+ mime_type,
+ meta_data,
+ dlen);
}
- return GNUNET_YES;
+ GNUNET_free_non_null (data);
+ return md;
}