diff options
author | Christian Grothoff <christian@grothoff.org> | 2010-01-03 21:42:52 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2010-01-03 21:42:52 +0000 |
commit | 3c5249af8087b0b1a4f131a3e19a9759639ab5db (patch) | |
tree | 697ff622324a7d259dc59da63205f398661a56fa /src/util/container_meta_data.c | |
parent | ebd1325d27e2062f3fb8e92766178ab0660c2dac (diff) |
adapting main code base to work with libextractor 0.6 API; testcases will follow later
Diffstat (limited to 'src/util/container_meta_data.c')
-rw-r--r-- | src/util/container_meta_data.c | 1214 |
1 files changed, 835 insertions, 379 deletions
diff --git a/src/util/container_meta_data.c b/src/util/container_meta_data.c index 912ac2684d..e4d8737c89 100644 --- a/src/util/container_meta_data.c +++ b/src/util/container_meta_data.c @@ -1,6 +1,6 @@ /* This file is part of GNUnet. - (C) 2003, 2004, 2005, 2006, 2008, 2009 Christian Grothoff (and other contributing authors) + (C) 2003, 2004, 2005, 2006, 2008, 2009, 2010 Christian Grothoff (and other contributing authors) GNUnet is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -32,12 +32,46 @@ #include <extractor.h> #include <zlib.h> -#define EXTRA_CHECKS ALLOW_EXTRA_CHECKS - -struct Item +/** + * Meta data item. + */ +struct MetaItem { - EXTRACTOR_KeywordType type; + /** + * This is a linked list. + */ + struct MetaItem *next; + + /** + * Name of the extracting plugin. + */ + char *plugin_name; + + /** + * Mime-type of data. + */ + char *mime_type; + + /** + * The actual meta data. + */ char *data; + + /** + * Number of bytes in 'data'. + */ + size_t data_size; + + /** + * Type of the meta data. + */ + enum EXTRACTOR_MetaType type; + + /** + * Format of the meta data. + */ + enum EXTRACTOR_MetaFormat format; + }; /** @@ -45,86 +79,224 @@ struct Item */ struct GNUNET_CONTAINER_MetaData { - uint32_t itemCount; - struct Item *items; + /** + * Linked list of the meta data items. + */ + struct MetaItem *items; + + /** + * Complete serialized and compressed buffer of the items. + * NULL if we have not computed that buffer yet. + */ + char *sbuf; + + /** + * Number of bytes in 'sbuf'. 0 if the buffer is stale. + */ + size_t sbuf_size; + + /** + * Number of items in the linked list. + */ + unsigned int item_count; + }; + /** * Create a fresh struct CONTAINER_MetaData token. + * + * @return empty meta-data container */ struct GNUNET_CONTAINER_MetaData * GNUNET_CONTAINER_meta_data_create () { - struct GNUNET_CONTAINER_MetaData *ret; - ret = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData)); - ret->items = NULL; - ret->itemCount = 0; - return ret; + return GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData)); } + +/** + * Free meta data item. + * + * @param item item to free + */ +static void +meta_item_free (struct MetaItem *item) +{ + GNUNET_free_non_null (item->plugin_name); + GNUNET_free_non_null (item->mime_type); + GNUNET_free_non_null (item->data); + GNUNET_free (item); +} + + +/** + * The meta data has changed, invalidate its serialization + * buffer. + * + * @param md meta data that changed + */ +static void +invalidate_sbuf (struct GNUNET_CONTAINER_MetaData *md) +{ + if (md->sbuf == NULL) + return; + GNUNET_free (md->sbuf); + md->sbuf = NULL; + md->sbuf_size = 0; +} + + /** * Free meta data. + * + * @param md what to free */ void GNUNET_CONTAINER_meta_data_destroy (struct GNUNET_CONTAINER_MetaData *md) { - int i; + struct MetaItem *item; if (md == NULL) return; - for (i = 0; i < md->itemCount; i++) - GNUNET_free (md->items[i].data); - GNUNET_array_grow (md->items, md->itemCount, 0); + while (NULL != (item = md->items)) + { + md->items = item->next; + meta_item_free (item); + } + GNUNET_free_non_null (md->sbuf); GNUNET_free (md); } + /** - * Add the current time as the publication date - * to the meta-data. + * Test if two MDs are equal. We consider them equal if + * the meta types, formats and content match (we do not + * include the mime types and plugins names in this + * consideration). + * + * @param md1 first value to check + * @param md2 other value to check + * @return GNUNET_YES if they are equal */ -void -GNUNET_CONTAINER_meta_data_add_publication_date (struct - GNUNET_CONTAINER_MetaData - *md) +int +GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData + *md1, + const struct GNUNET_CONTAINER_MetaData + *md2) { - char *dat; - struct GNUNET_TIME_Absolute t; + struct MetaItem *i; + struct MetaItem *j; + int found; - t = GNUNET_TIME_absolute_get (); - GNUNET_CONTAINER_meta_data_delete (md, EXTRACTOR_PUBLICATION_DATE, NULL); - dat = GNUNET_STRINGS_absolute_time_to_string (t); - GNUNET_CONTAINER_meta_data_insert (md, EXTRACTOR_PUBLICATION_DATE, dat); - GNUNET_free (dat); + if (md1 == md2) + return GNUNET_YES; + if (md1->item_count != md2->item_count) + return GNUNET_NO; + + i = md1->items; + while (NULL != i) + { + found = GNUNET_NO; + j = md2->items; + while (NULL != j) + { + if ( (i->type == j->type) && + (i->format == j->format) && + (i->data_size == j->data_size) && + (0 == memcmp (i->data, + j->data, + i->data_size))) + { + found = GNUNET_YES; + break; + } + j = j->next; + } + if (found == GNUNET_NO) + return GNUNET_NO; + i = i->next; + } + return GNUNET_YES; } + /** - * Extend metadata. + * Extend metadata. Note that the list of meta data items is + * sorted by size (largest first). + * + * @param md metadata to extend + * @param plugin_name name of the plugin that produced this value; + * special values can be used (i.e. '<zlib>' for zlib being + * used in the main libextractor library and yielding + * meta data). + * @param type libextractor-type describing the meta data + * @param format basic format information about data + * @param data_mime_type mime-type of data (not of the original file); + * can be NULL (if mime-type is not known) + * @param data actual meta-data found + * @param data_len number of bytes in data * @return GNUNET_OK on success, GNUNET_SYSERR if this entry already exists + * data_mime_type and plugin_name are not considered for "exists" checks */ int GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md, - EXTRACTOR_KeywordType type, - const char *data) + const char *plugin_name, + enum EXTRACTOR_MetaType type, + enum EXTRACTOR_MetaFormat format, + const char *data_mime_type, + const char *data, + size_t data_len) { - uint32_t idx; + struct MetaItem *prev; + struct MetaItem *pos; + struct MetaItem *i; char *p; - GNUNET_assert (data != NULL); - for (idx = 0; idx < md->itemCount; idx++) + prev = NULL; + pos = md->items; + while (NULL != pos) { - if ((md->items[idx].type == type) && - (0 == strcmp (md->items[idx].data, data))) - return GNUNET_SYSERR; + if (pos->data_size < data_len) + break; + if ( (pos->type == type) && + (pos->format == format) && + (pos->data_size == data_len) && + (0 == memcmp (pos->data, + data, + data_len))) + { + if ( (pos->mime_type == NULL) && + (data_mime_type != NULL) ) + { + pos->mime_type = GNUNET_strdup (data_mime_type); + invalidate_sbuf (md); + } + return GNUNET_SYSERR; + } + prev = pos; + pos = pos->next; } - idx = md->itemCount; - GNUNET_array_grow (md->items, md->itemCount, md->itemCount + 1); - md->items[idx].type = type; - md->items[idx].data = p = GNUNET_strdup (data); - + md->item_count++; + i = GNUNET_malloc (sizeof (struct MetaItem)); + i->type = type; + i->format = format; + i->data_size = data_len; + i->next = pos; + if (prev == NULL) + md->items = i; + else + prev->next = i; + i->mime_type = (data_mime_type == NULL) ? NULL : GNUNET_strdup (data_mime_type); + i->plugin_name = (plugin_name == NULL) ? NULL : GNUNET_strdup (plugin_name); + i->data = GNUNET_malloc (data_len); + memcpy (i->data, data, data_len); /* change OS native dir separators to unix '/' and others to '_' */ - if (type == EXTRACTOR_FILENAME) + if (type == EXTRACTOR_METATYPE_FILENAME) { - while (*p != '\0') + p = i->data; + while ( (*p != '\0') && + (p < i->data + data_len) ) { if (*p == DIR_SEPARATOR) *p = '/'; @@ -133,10 +305,11 @@ GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md, p++; } } - + invalidate_sbuf (md); return GNUNET_OK; } + /** * Remove an item. * @@ -144,36 +317,78 @@ GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md, * @param type type of the item to remove * @param data specific value to remove, NULL to remove all * entries of the given type + * @param data_len number of bytes in data * @return GNUNET_OK on success, GNUNET_SYSERR if the item does not exist in md */ int GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md, - EXTRACTOR_KeywordType type, - const char *data) + enum EXTRACTOR_MetaType type, + const char *data, + size_t data_len) { - uint32_t idx; - int ret = GNUNET_SYSERR; - for (idx = 0; idx < md->itemCount; idx++) + struct MetaItem *pos; + struct MetaItem *prev; + + prev = NULL; + pos = md->items; + while (NULL != pos) { - if ((md->items[idx].type == type) && - ((data == NULL) || (0 == strcmp (md->items[idx].data, data)))) - { - GNUNET_free (md->items[idx].data); - md->items[idx] = md->items[md->itemCount - 1]; - GNUNET_array_grow (md->items, md->itemCount, md->itemCount - 1); - if (data == NULL) - { - ret = GNUNET_OK; - continue; - } - return GNUNET_OK; - } + if ( (pos->type == type) && + ( (data == NULL) || + ( (pos->data_size == data_len) && + (0 == memcmp (pos->data, + data, + data_len))) ) ) + { + if (prev == NULL) + md->items = pos->next; + else + prev->next = pos->next; + meta_item_free (pos); + md->item_count--; + invalidate_sbuf (md); + return GNUNET_OK; + } + prev = pos; + pos = pos->next; } - return ret; + return GNUNET_SYSERR; } + /** - * Iterate over MD entries, excluding thumbnails. + * Add the current time as the publication date + * to the meta-data. + * + * @param md metadata to modify + */ +void +GNUNET_CONTAINER_meta_data_add_publication_date (struct + GNUNET_CONTAINER_MetaData + *md) +{ + char *dat; + struct GNUNET_TIME_Absolute t; + + t = GNUNET_TIME_absolute_get (); + GNUNET_CONTAINER_meta_data_delete (md, + EXTRACTOR_METATYPE_PUBLICATION_DATE, + NULL, + 0); + dat = GNUNET_STRINGS_absolute_time_to_string (t); + GNUNET_CONTAINER_meta_data_insert (md, + "<gnunet>", + EXTRACTOR_METATYPE_PUBLICATION_DATE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + dat, + strlen(dat)+1); + GNUNET_free (dat); +} + + +/** + * Iterate over MD entries. * * @param md metadata to inspect * @param iter function to call on each entry @@ -181,51 +396,71 @@ GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md, * @return number of entries */ int -GNUNET_CONTAINER_meta_data_get_contents (const struct - GNUNET_CONTAINER_MetaData *md, - GNUNET_CONTAINER_MetaDataProcessor - iter, void *iter_cls) +GNUNET_CONTAINER_meta_data_iterate (const struct + GNUNET_CONTAINER_MetaData *md, + EXTRACTOR_MetaDataProcessor + iter, void *iter_cls) { - uint32_t i; - uint32_t sub; + struct MetaItem *pos; - sub = 0; - for (i = 0; i < md->itemCount; i++) + if (iter == NULL) + return md->item_count; + pos = md->items; + while (NULL != pos) { - if (!EXTRACTOR_isBinaryType (md->items[i].type)) - { - if ((iter != NULL) && - (GNUNET_OK != iter (iter_cls, - md->items[i].type, md->items[i].data))) - return GNUNET_SYSERR; - } - else - sub++; + if (0 != iter (iter_cls, + pos->plugin_name, + pos->type, + pos->format, + pos->mime_type, + pos->data, + pos->data_size)) + return md->item_count; + pos = pos->next; } - return (int) (md->itemCount - sub); + return md->item_count; } + /** - * Iterate over MD entries + * Get the first MD entry of the given type. Caller + * is responsible for freeing the return value. + * Also, only meta data items that are strings (0-terminated) + * are returned by this function. * - * @return number of entries + * @param md metadata to inspect + * @param type type to look for + * @return NULL if no entry was found */ char * GNUNET_CONTAINER_meta_data_get_by_type (const struct GNUNET_CONTAINER_MetaData - *md, EXTRACTOR_KeywordType type) + *md, enum EXTRACTOR_MetaType type) { - uint32_t i; + struct MetaItem *pos; - for (i = 0; i < md->itemCount; i++) - if (type == md->items[i].type) - return GNUNET_strdup (md->items[i].data); + pos = md->items; + while (NULL != pos) + { + if ( (type == pos->type) && + ( (pos->format == EXTRACTOR_METAFORMAT_UTF8) || + (pos->format == EXTRACTOR_METAFORMAT_C_STRING) ) ) + return GNUNET_strdup (pos->data); + pos = pos->next; + } return NULL; } + /** - * Iterate over MD entries + * Get the first matching MD entry of the given types. Caller is + * responsible for freeing the return value. Also, only meta data + * items that are strings (0-terminated) are returned by this + * function. * - * @return number of entries + * @param md metadata to inspect + * @param ... -1-terminated list of types + * @return NULL if we do not have any such entry, + * otherwise client is responsible for freeing the value! */ char * GNUNET_CONTAINER_meta_data_get_first_by_types (const struct @@ -234,13 +469,13 @@ GNUNET_CONTAINER_meta_data_get_first_by_types (const struct { char *ret; va_list args; - EXTRACTOR_KeywordType type; + enum EXTRACTOR_MetaType type; ret = NULL; va_start (args, md); while (1) { - type = va_arg (args, EXTRACTOR_KeywordType); + type = va_arg (args, enum EXTRACTOR_MetaType); if (type == -1) break; ret = GNUNET_CONTAINER_meta_data_get_by_type (md, type); @@ -251,6 +486,7 @@ GNUNET_CONTAINER_meta_data_get_first_by_types (const struct return ret; } + /** * Get a thumbnail from the meta-data (if present). * @@ -264,27 +500,33 @@ GNUNET_CONTAINER_meta_data_get_thumbnail (const struct GNUNET_CONTAINER_MetaData * md, unsigned char **thumb) { - char *encoded; - int ret; - size_t size; + struct MetaItem *pos; + struct MetaItem *match; - encoded = - GNUNET_CONTAINER_meta_data_get_by_type (md, EXTRACTOR_THUMBNAIL_DATA); - if (encoded == NULL) - return 0; - if (strlen (encoded) == 0) + match = NULL; + pos = md->items; + while (NULL != pos) { - GNUNET_free (encoded); - return 0; /* invalid */ + if ( (0 == strncasecmp ("image/", pos->mime_type, + strlen("image/"))) && + (pos->format == EXTRACTOR_METAFORMAT_BINARY) ) + { + if (match == NULL) + match = pos; + else if ( (match->type != EXTRACTOR_METATYPE_THUMBNAIL) && + (pos->type == EXTRACTOR_METATYPE_THUMBNAIL) ) + match = pos; + } + pos = pos->next; } - *thumb = NULL; - ret = EXTRACTOR_binaryDecode (encoded, thumb, &size); - GNUNET_free (encoded); - if (ret != 0) + if (match == NULL) return 0; - return size; + *thumb = GNUNET_malloc (match->data_size); + memcpy (*thumb, match->data, match->data_size); + return match->data_size; } + /** * Duplicate struct GNUNET_CONTAINER_MetaData. * @@ -295,18 +537,66 @@ struct GNUNET_CONTAINER_MetaData * GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData *md) { - uint32_t i; struct GNUNET_CONTAINER_MetaData *ret; + struct MetaItem *pos; if (md == NULL) return NULL; ret = GNUNET_CONTAINER_meta_data_create (); - for (i = 0; i < md->itemCount; i++) - GNUNET_CONTAINER_meta_data_insert (ret, md->items[i].type, - md->items[i].data); + pos = md->items; + while (NULL != pos) + { + GNUNET_CONTAINER_meta_data_insert (ret, + pos->plugin_name, + pos->type, + pos->format, + pos->mime_type, + pos->data, + pos->data_size); + pos = pos->next; + } return ret; } + +/** + * Add meta data that libextractor finds to our meta data + * container. + * + * @param cls closure, our meta data container + * @param plugin_name name of the plugin that produced this value; + * special values can be used (i.e. '<zlib>' for zlib being + * used in the main libextractor library and yielding + * meta data). + * @param type libextractor-type describing the meta data + * @param format basic format information about data + * @param data_mime_type mime-type of data (not of the original file); + * can be NULL (if mime-type is not known) + * @param data actual meta-data found + * @param data_len number of bytes in data + * @return always 0 to continue extracting + */ +static int +add_to_md(void *cls, + const char *plugin_name, + enum EXTRACTOR_MetaType type, + enum EXTRACTOR_MetaFormat format, + const char *data_mime_type, + const char *data, + size_t data_len) +{ + struct GNUNET_CONTAINER_MetaData *md = cls; + (void) GNUNET_CONTAINER_meta_data_insert (md, + plugin_name, + type, + format, + data_mime_type, + data, + data_len); + return 0; +} + + /** * Extract meta-data from a file. * @@ -316,37 +606,43 @@ GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData int GNUNET_CONTAINER_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData *md, const char *filename, - EXTRACTOR_ExtractorList * + struct EXTRACTOR_PluginList * extractors) { - EXTRACTOR_KeywordList *head; - EXTRACTOR_KeywordList *pos; - int ret; + unsigned int old; if (filename == NULL) return GNUNET_SYSERR; if (extractors == NULL) return 0; - head = EXTRACTOR_getKeywords (extractors, filename); - head = EXTRACTOR_removeDuplicateKeywords (head, - EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN); - pos = head; - ret = 0; - while (pos != NULL) - { - if (GNUNET_OK == - GNUNET_CONTAINER_meta_data_insert (md, pos->keywordType, - pos->keyword)) - ret++; - pos = pos->next; - } - EXTRACTOR_freeKeywords (head); - return ret; + old = md->item_count; + EXTRACTOR_extract (extractors, + filename, + NULL, 0, + &add_to_md, + md); + return (int) (md->item_count - old); } -static unsigned int -tryCompression (char *data, unsigned int oldSize) +/** + * Try to compress the given block of data. + * + * @param data block to compress; if compression + * resulted in a smaller block, the first + * bytes of data are updated to the compressed + * data + * @param oldSize number of bytes in data + * @param result set to the compressed data + * @param newSize set to size of result + * @return GNUNET_YES if compression reduce the size, + * GNUNET_NO if compression did not help + */ +static int +try_compression (const char *data, + size_t oldSize, + char **result, + size_t *newSize) { char *tmp; uLongf dlen; @@ -364,62 +660,40 @@ tryCompression (char *data, unsigned int oldSize) { if (dlen < oldSize) { - memcpy (data, tmp, dlen); - GNUNET_free (tmp); - return dlen; + *result = tmp; + *newSize = dlen; + return GNUNET_YES; } } GNUNET_free (tmp); - return oldSize; + return GNUNET_NO; } -/** - * Decompress input, return the decompressed data - * as output, set outputSize to the number of bytes - * that were found. - * - * @return NULL on error - */ -static char * -decompress (const char *input, - unsigned int inputSize, unsigned int outputSize) -{ - char *output; - uLongf olen; - - olen = outputSize; - output = GNUNET_malloc (olen); - if (Z_OK == uncompress ((Bytef *) output, - &olen, (const Bytef *) input, inputSize)) - { - return output; - } - else - { - GNUNET_free (output); - return NULL; - } -} /** * Flag in 'version' that indicates compressed meta-data. */ #define HEADER_COMPRESSED 0x80000000 + /** * Bits in 'version' that give the version number. */ #define HEADER_VERSION_MASK 0x7FFFFFFF + +/** + * Header for serialized meta data. + */ struct MetaDataHeader { /** - * The version of the MD serialization. - * The highest bit is used to indicate - * compression. + * The version of the MD serialization. The highest bit is used to + * indicate compression. * - * Version 0 is the current version; - * Version is 1 for a NULL pointer. + * Version 0 is traditional (pre-0.9) meta data (unsupported) + * Version is 1 for a NULL pointer + * Version 2 is for 0.9.x (and possibly higher) * Other version numbers are not yet defined. */ uint32_t version; @@ -430,24 +704,57 @@ struct MetaDataHeader uint32_t entries; /** - * Size of the MD (decompressed) + * Size of the decompressed meta data. */ uint32_t size; /** - * This is followed by 'entries' values of type 'uint32_t' that - * correspond to EXTRACTOR_KeywordTypes. After that, the meta-data - * keywords follow (0-terminated). The MD block always ends with - * 0-termination, padding with 0 until a multiple of 8 bytes. + * This is followed by 'entries' values of type 'struct MetaDataEntry' + * and then by 'entry' plugin names, mime-types and data blocks + * as specified in those meta data entries. + */ +}; + + +/** + * Entry of serialized meta data. + */ +struct MetaDataEntry +{ + /** + * Meta data type. Corresponds to an 'enum EXTRACTOR_MetaType' + */ + uint32_t type; + + /** + * Meta data format. Corresponds to an 'enum EXTRACTOR_MetaFormat' */ + uint32_t format; + + /** + * Number of bytes of meta data. + */ + uint32_t data_size; + + /** + * Number of bytes in the plugin name including 0-terminator. 0 for NULL. + */ + uint32_t plugin_name_len; + + /** + * Number of bytes in the mime type including 0-terminator. 0 for NULL. + */ + uint32_t mime_type_len; }; + /** * Serialize meta-data to target. * * @param md metadata to serialize - * @param target where to write the serialized metadata + * @param target where to write the serialized metadata; + * *target can be NULL, in which case memory is allocated * @param max maximum number of bytes available in target * @param opt is it ok to just write SOME of the * meta-data to match the size constraint, @@ -458,149 +765,273 @@ struct MetaDataHeader */ ssize_t GNUNET_CONTAINER_meta_data_serialize (const struct GNUNET_CONTAINER_MetaData - *md, char *target, size_t max, + *md, char **target, size_t max, enum GNUNET_CONTAINER_MetaDataSerializationOptions opt) { - struct MetaDataHeader *hdr; + struct GNUNET_CONTAINER_MetaData *vmd; + struct MetaItem *pos; + struct MetaDataHeader *hdr; + struct MetaDataEntry *ent; + unsigned int i; + uint64_t msize; + size_t off; + char *mdata; + char *cdata; + size_t mlen; + size_t plen; size_t size; - size_t pos; - uint32_t i; - size_t len; - uint32_t ic; + size_t left; + size_t clen; + int comp; if (max < sizeof (struct MetaDataHeader)) return GNUNET_SYSERR; /* far too small */ - ic = md ? md->itemCount : 0; - hdr = NULL; - while (1) + if (md == NULL) + return 0; + + if (md->sbuf != NULL) { - size = sizeof (struct MetaDataHeader); - size += sizeof (uint32_t) * ic; - for (i = 0; i < ic; i++) - size += 1 + strlen (md->items[i].data); - while (size % 8 != 0) - size++; - hdr = GNUNET_malloc (size); - hdr->version = htonl (md == NULL ? 1 : 0); - hdr->entries = htonl (ic); - for (i = 0; i < ic; i++) - ((uint32_t *) & hdr[1])[i] = htonl ((uint32_t) md->items[i].type); - pos = sizeof (struct MetaDataHeader); - pos += sizeof (uint32_t) * ic; - for (i = 0; i < ic; i++) - { - len = strlen (md->items[i].data) + 1; - memcpy (&((char *) hdr)[pos], md->items[i].data, len); - pos += len; - } + /* try to use serialization cache */ + if (md->sbuf_size < max) + { + if (NULL == *target) + *target = GNUNET_malloc (md->sbuf_size); + memcpy (*target, + md->sbuf, + md->sbuf_size); + return md->sbuf_size; + } + if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART)) + return GNUNET_SYSERR; /* can say that this will fail */ + /* need to compute a partial serialization, sbuf useless ... */ + } - hdr->size = htonl (size); - if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0) - { - pos = tryCompression ((char *) &hdr[1], - size - sizeof (struct MetaDataHeader)); - } + + msize = 0; + pos = md->items; + while (NULL != pos) + { + msize += sizeof (struct MetaDataEntry); + msize += pos->data_size; + if (pos->plugin_name != NULL) + msize += strlen (pos->plugin_name) + 1; + if (pos->mime_type != NULL) + msize += strlen (pos->mime_type) + 1; + pos = pos->next; + } + size = (size_t) msize; + if (size != msize) + { + GNUNET_break (0); /* integer overflow */ + return GNUNET_SYSERR; + } + if (size >= GNUNET_MAX_MALLOC_CHECKED) + { + /* too large to be processed */ + return GNUNET_SYSERR; + } + ent = GNUNET_malloc (size); + mdata = (char *) &ent[md->item_count]; + off = size - (md->item_count * sizeof(struct MetaDataEntry)); + i = 0; + pos = md->items; + while (NULL != pos) + { + ent[i].type = htonl ((uint32_t) pos->type); + ent[i].format = htonl ((uint32_t) pos->format); + ent[i].data_size = htonl ((uint32_t) pos->data_size); + if (pos->plugin_name == NULL) + plen = 0; else - { - pos = size - sizeof (struct MetaDataHeader); - } - if (pos < size - sizeof (struct MetaDataHeader)) - { - hdr->version = htonl (HEADER_COMPRESSED); - size = pos + sizeof (struct MetaDataHeader); - } - if (size <= max) - break; - GNUNET_free (hdr); - hdr = NULL; + plen = strlen (pos->plugin_name) + 1; + ent[i].plugin_name_len = htonl ( (uint32_t) plen); + if (pos->mime_type == NULL) + mlen = 0; + else + mlen = strlen (pos->mime_type) + 1; + ent[i].mime_type_len = htonl ((uint32_t) mlen); + off -= pos->data_size; + memcpy (&mdata[off], pos->data, pos->data_size); + off -= plen; + memcpy (&mdata[off], pos->plugin_name, plen); + off -= mlen; + memcpy (&mdata[off], pos->mime_type, mlen); + i++; + pos = pos->next; + } + GNUNET_assert (off == 0); - if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART) == 0) - { - return GNUNET_SYSERR; /* does not fit! */ + left = size; + for (i=0;i<md->item_count;i++) + { + comp = GNUNET_NO; + if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS)) + comp = try_compression ((const char*) &ent[i], + left, + &cdata, + &clen); + + if ( (md->sbuf == NULL) && + (i == 0) ) + { + /* fill 'sbuf'; this "modifies" md, but since this is only + an internal cache we will cast away the 'const' instead + of making the API look strange. */ + vmd = (struct GNUNET_CONTAINER_MetaData*) md; + hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader)); + hdr->entries = htonl (md->item_count); + if (GNUNET_YES == comp) + { + hdr->size = htonl (clen); + hdr->version = htonl (2 | HEADER_COMPRESSED); + memcpy (&hdr[1], + cdata, + clen); + vmd->sbuf_size = clen + sizeof (struct MetaDataHeader); + } + else + { + hdr->size = htonl (left); + hdr->version = htonl (2); + memcpy (&hdr[1], + &ent[0], + left); + vmd->sbuf_size = left + sizeof (struct MetaDataHeader); + } + vmd->sbuf = (char*) hdr; + } + + if ( ( (left + sizeof (struct MetaDataHeader)) <= max) || + ( (comp == GNUNET_YES) && + (clen <= max)) ) + { + /* success, this now fits! */ + if (GNUNET_YES == comp) + { + hdr = (struct MetaDataHeader*) *target; + if (hdr == NULL) + { + hdr = GNUNET_malloc (clen + sizeof (struct MetaDataHeader)); + *target = (char*) hdr; + } + hdr->version = htonl (2 | HEADER_COMPRESSED); + hdr->entries = htonl (md->item_count - i); + hdr->size = htonl (left); + memcpy (&target[sizeof(struct MetaDataHeader)], + cdata, + clen); + GNUNET_free (cdata); + GNUNET_free (ent); + return clen + sizeof (struct MetaDataHeader); + } + else + { + hdr = (struct MetaDataHeader*) target; + if (hdr == NULL) + { + hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader)); + *target = (char*) hdr; + } + hdr->version = htonl (2); + hdr->entries = htonl (md->item_count - i); + hdr->size = htonl (left); + memcpy (&target[sizeof(struct MetaDataHeader)], + &ent[i], + left); + GNUNET_free (ent); + return left + sizeof (struct MetaDataHeader); + } + } + + if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART)) + { + /* does not fit! */ + GNUNET_free (ent); + return GNUNET_SYSERR; } - /* partial serialization ok, try again with less meta-data */ - if (size > 2 * max) - ic = ic * 2 / 3; /* still far too big, make big reductions */ - else - ic--; /* small steps, we're close */ + + /* next iteration: ignore the corresponding meta data at the + end and try again without it */ + left -= sizeof (struct MetaDataEntry); + left -= pos->data_size; + if (pos->plugin_name != NULL) + left -= strlen (pos->plugin_name) + 1; + if (pos->mime_type != NULL) + left -= strlen (pos->mime_type) + 1; } - GNUNET_assert (size <= max); - memcpy (target, hdr, size); - GNUNET_free (hdr); - /* extra check: deserialize! */ -#if EXTRA_CHECKS - { - struct GNUNET_CONTAINER_MetaData *mdx; - mdx = GNUNET_CONTAINER_meta_data_deserialize (target, size); - GNUNET_assert (NULL != mdx); - GNUNET_CONTAINER_meta_data_destroy (mdx); - } -#endif - return size; + GNUNET_free (ent); + + /* nothing fit, only write header! */ + hdr = (struct MetaDataHeader*) target; + if (hdr == NULL) + { + hdr = GNUNET_malloc (sizeof (struct MetaDataHeader)); + *target = (char*) hdr; + } + hdr->version = htonl (2); + hdr->entries = htonl (0); + hdr->size = htonl (0); + return sizeof (struct MetaDataHeader); } + /** - * Estimate (!) the size of the meta-data in - * serialized form. The estimate MAY be higher - * than what is strictly needed. + * Get the size of the full meta-data in serialized form. * * @param md metadata to inspect - * @param opt is it ok to just write SOME of the - * meta-data to match the size constraint, - * possibly discarding some data? * @return number of bytes needed for serialization, -1 on error */ ssize_t -GNUNET_CONTAINER_meta_data_get_serialized_size (const struct - GNUNET_CONTAINER_MetaData * - md, - enum - GNUNET_CONTAINER_MetaDataSerializationOptions - opt) +GNUNET_CONTAINER_meta_data_get_serialized_size (const struct GNUNET_CONTAINER_MetaData *md) { - struct MetaDataHeader *hdr; - size_t size; - size_t pos; - uint32_t i; - size_t len; - uint32_t ic; + ssize_t ret; + char *ptr; + + if (md->sbuf != NULL) + return md->sbuf_size; + ptr = NULL; + ret = GNUNET_CONTAINER_meta_data_serialize (md, + &ptr, + GNUNET_MAX_MALLOC_CHECKED, + GNUNET_CONTAINER_META_DATA_SERIALIZE_FULL); + if (ret != -1) + GNUNET_free (ptr); + return ret; +} - ic = md ? md->itemCount : 0; - size = sizeof (struct MetaDataHeader); - size += sizeof (uint32_t) * ic; - for (i = 0; i < ic; i++) - size += 1 + strlen (md->items[i].data); - while (size % 8 != 0) - size++; - hdr = GNUNET_malloc (size); - hdr->version = htonl (md == NULL ? 1 : 0); - hdr->entries = htonl (ic); - for (i = 0; i < ic; i++) - ((uint32_t *) & hdr[1])[i] = htonl ((uint32_t) md->items[i].type); - pos = sizeof (struct MetaDataHeader); - pos += sizeof (uint32_t) * ic; - for (i = 0; i < ic; i++) - { - len = strlen (md->items[i].data) + 1; - memcpy (&((char *) hdr)[pos], md->items[i].data, len); - pos += len; - } - if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0) + +/** + * Decompress input, return the decompressed data + * as output, set outputSize to the number of bytes + * that were found. + * + * @param input compressed data + * @param inputSize number of bytes in input + * @param outputSize expected size of the output + * @return NULL on error + */ +static char * +decompress (const char *input, + size_t inputSize, + size_t outputSize) +{ + char *output; + uLongf olen; + + olen = outputSize; + output = GNUNET_malloc (olen); + if (Z_OK == uncompress ((Bytef *) output, + &olen, (const Bytef *) input, inputSize)) { - pos = - tryCompression ((char *) &hdr[1], - size - sizeof (struct MetaDataHeader)); + return output; } else { - pos = size - sizeof (struct MetaDataHeader); + GNUNET_free (output); + return NULL; } - if (pos < size - sizeof (struct MetaDataHeader)) - size = pos + sizeof (struct MetaDataHeader); - GNUNET_free (hdr); - return size; } @@ -616,41 +1047,57 @@ struct GNUNET_CONTAINER_MetaData * GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size) { struct GNUNET_CONTAINER_MetaData *md; - const struct MetaDataHeader *hdr; + struct MetaDataHeader hdr; + struct MetaDataEntry ent; uint32_t ic; + uint32_t i; char *data; const char *cdata; + uint32_t version; uint32_t dataSize; int compressed; - uint32_t i; - size_t pos; - size_t len; - uint32_t version; + size_t left; + uint32_t mlen; + uint32_t plen; + uint32_t dlen; + const char *mdata; + const char *meta_data; + const char *plugin_name; + const char *mime_type; + enum EXTRACTOR_MetaFormat format; if (size < sizeof (struct MetaDataHeader)) return NULL; - hdr = (const struct MetaDataHeader *) input; - version = ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_VERSION_MASK; + memcpy (&hdr, + input, + sizeof (struct MetaDataHeader)); + version = ntohl (hdr.version) & HEADER_VERSION_MASK; + compressed = (ntohl (hdr.version) & HEADER_COMPRESSED) != 0; + if (version == 1) - return NULL; /* null pointer */ - if (version != 0) + return NULL; /* null pointer */ + if (version != 2) { GNUNET_break_op (0); /* unsupported version */ return NULL; } - ic = ntohl (MAKE_UNALIGNED (hdr->entries)); - compressed = - (ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_COMPRESSED) != 0; + + ic = ntohl (hdr.entries); + dataSize = ntohl (hdr.size); + if ((sizeof (struct MetaDataEntry) * ic) > dataSize) + { + GNUNET_break_op (0); + return NULL; + } + if (compressed) { - dataSize = - ntohl (MAKE_UNALIGNED (hdr->size)) - sizeof (struct MetaDataHeader); - if (dataSize > 2 * 1042 * 1024) + if (dataSize >= GNUNET_MAX_MALLOC_CHECKED) { - GNUNET_break (0); - return NULL; /* only 2 MB allowed [to make sure we don't blow - our memory limit because of a mal-formed - message... ] */ + /* make sure we don't blow our memory limit because of a mal-formed + message... */ + GNUNET_break_op (0); + return NULL; } data = decompress ((const char *) &input[sizeof (struct MetaDataHeader)], @@ -665,84 +1112,93 @@ GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size) else { data = NULL; - cdata = (const char *) &hdr[1]; - dataSize = size - sizeof (struct MetaDataHeader); - if (size != ntohl (MAKE_UNALIGNED (hdr->size))) + cdata = (const char *) &input[sizeof (struct MetaDataHeader)]; + if (dataSize != size - sizeof (struct MetaDataHeader)) { - GNUNET_break (0); + GNUNET_break_op (0); return NULL; } } - if ((sizeof (uint32_t) * ic + ic) > dataSize) - { - GNUNET_break (0); - goto FAILURE; - } - if ((ic > 0) && (cdata[dataSize - 1] != '\0')) - { - GNUNET_break (0); - goto FAILURE; - } - md = GNUNET_CONTAINER_meta_data_create (); - GNUNET_array_grow (md->items, md->itemCount, ic); - i = 0; - pos = sizeof (uint32_t) * ic; - while ((pos < dataSize) && (i < ic)) + left = dataSize - ic * sizeof (struct MetaDataEntry); + mdata = &cdata[ic * sizeof (struct MetaDataEntry)]; + for (i=0;i<ic;i++) { - len = strlen (&cdata[pos]) + 1; - md->items[i].type = (EXTRACTOR_KeywordType) - ntohl (MAKE_UNALIGNED (((const uint32_t *) cdata)[i])); - md->items[i].data = GNUNET_strdup (&cdata[pos]); - pos += len; - i++; - } - if (i < ic) - { /* oops */ - GNUNET_CONTAINER_meta_data_destroy (md); - goto FAILURE; - } - GNUNET_free_non_null (data); - return md; -FAILURE: - GNUNET_free_non_null (data); - return NULL; /* size too small */ -} - -/** - * Test if two MDs are equal. - * - * @param md1 first value to check - * @param md2 other value to check - * @return GNUNET_YES if they are equal - */ -int -GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData - *md1, - const struct GNUNET_CONTAINER_MetaData - *md2) -{ - uint32_t i; - uint32_t j; - int found; + memcpy (&ent, + &cdata[i * sizeof(struct MetaDataEntry)], + sizeof (struct MetaDataEntry)); + format = (enum EXTRACTOR_MetaFormat) ntohl (ent.format); + if ( (format != EXTRACTOR_METAFORMAT_UTF8) && + (format != EXTRACTOR_METAFORMAT_C_STRING) && + (format != EXTRACTOR_METAFORMAT_BINARY) ) + { + GNUNET_break_op (0); + break; + } + dlen = ntohl (ent.data_size); + plen = ntohl (ent.plugin_name_len); + mlen = ntohl (ent.mime_type_len); + if (dlen > left) + { + GNUNET_break_op (0); + break; + } + left -= dlen; + meta_data = &mdata[left]; + if ( (format == EXTRACTOR_METAFORMAT_UTF8) || + (format == EXTRACTOR_METAFORMAT_C_STRING) ) + { + if ( (dlen == 0) || + (mdata[left + dlen - 1] != '\0') ) + { + GNUNET_break_op (0); + break; + } + } + if (plen > left) + { + GNUNET_break_op (0); + break; + } + left -= plen; + if ( (plen > 0) && + (mdata[left + plen - 1] != '\0') ) + { + GNUNET_break_op (0); + break; + } + if (plen == 0) + plugin_name = NULL; + else + plugin_name = &mdata[left]; - if (md1->itemCount != md2->itemCount) - return GNUNET_NO; - for (i = 0; i < md1->itemCount; i++) - { - found = GNUNET_NO; - for (j = 0; j < md2->itemCount; j++) - if ((md1->items[i].type == md2->items[j].type) && - (0 == strcmp (md1->items[i].data, md2->items[j].data))) - { - found = GNUNET_YES; - break; - } - if (found == GNUNET_NO) - return GNUNET_NO; + if (mlen > left) + { + GNUNET_break_op (0); + break; + } + left -= mlen; + if ( (mlen > 0) && + (mdata[left + mlen - 1] != '\0') ) + { + GNUNET_break_op (0); + break; + } + if (mlen == 0) + mime_type = NULL; + else + mime_type = &mdata[left]; + GNUNET_CONTAINER_meta_data_insert (md, + plugin_name, + (enum EXTRACTOR_MetaType) ntohl (ent.type), + format, + mime_type, + meta_data, + dlen); } - return GNUNET_YES; + GNUNET_free_non_null (data); + return md; } |