diff options
author | grothoff <grothoff@140774ce-b5e7-0310-ab8b-a85725594a96> | 2012-01-28 22:42:15 +0000 |
---|---|---|
committer | grothoff <grothoff@140774ce-b5e7-0310-ab8b-a85725594a96> | 2012-01-28 22:42:15 +0000 |
commit | 4804078bd7eb1d2720289e311b9be439c81858a5 (patch) | |
tree | 3d3b583ffba1f91d7a610a1815ded00ed0e6811b | |
parent | 1dd3e0b8929282c6d710ebb3611e7c5076efa670 (diff) |
-cleaning up dirmetascan code, still failing for me
git-svn-id: https://gnunet.org/svn/gnunet@19502 140774ce-b5e7-0310-ab8b-a85725594a96
-rw-r--r-- | src/fs/Makefile.am | 1 | ||||
-rw-r--r-- | src/fs/fs_dirmetascan.c | 1495 | ||||
-rw-r--r-- | src/fs/fs_sharetree.c | 6 | ||||
-rw-r--r-- | src/fs/gnunet-publish.c | 96 |
4 files changed, 492 insertions, 1106 deletions
diff --git a/src/fs/Makefile.am b/src/fs/Makefile.am index c4dfec0cae..fe593b394f 100644 --- a/src/fs/Makefile.am +++ b/src/fs/Makefile.am @@ -36,6 +36,7 @@ libgnunetfs_la_SOURCES = \ fs_misc.c \ fs_namespace.c \ fs_search.c \ + fs_sharetree.c \ fs_tree.c fs_tree.h \ fs_unindex.c \ fs_uri.c diff --git a/src/fs/fs_dirmetascan.c b/src/fs/fs_dirmetascan.c index 11313d7503..4c995a72a6 100644 --- a/src/fs/fs_dirmetascan.c +++ b/src/fs/fs_dirmetascan.c @@ -18,862 +18,471 @@ Boston, MA 02111-1307, USA. */ +/** + * @file fs/fs_dirmetascan.c + * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem' + * from an on-disk directory for publishing + * @author LRN + * @author Christian Grothoff + */ #include "platform.h" #include "gnunet_fs_service.h" #include "gnunet_scheduler_lib.h" #include <pthread.h> + /** - * Entry for each unique keyword to track how often - * it occured. Contains the keyword and the counter. + * An opaque structure a pointer to which is returned to the + * caller to be used to control the scanner. */ -struct KeywordCounter +struct GNUNET_FS_DirScanner { /** - * Keyword that was found. - */ - const char *value; - - /** - * How many files have this keyword? - */ - unsigned int count; - - /** - * This is a doubly-linked list - */ - struct KeywordCounter *prev; - - /** - * This is a doubly-linked list + * A thread object for the scanner thread. */ - struct KeywordCounter *next; -}; +#if WINDOWS + HANDLE thread; +#else + pthread_t thread; +#endif -/** - * Aggregate information we keep for meta data in each directory. - */ -struct MetaCounter -{ /** - * The actual meta data. + * Expanded filename (as given by the scan initiator). + * The scanner thread stores a copy here, and frees it when it finishes. */ - const char *data; + char *filename_expanded; /** - * Number of bytes in 'data'. + * List of libextractor plugins to use for extracting. + * Initialized when the scan starts, removed when it finishes. */ - size_t data_size; - + struct EXTRACTOR_PluginList *plugins; + /** - * Name of the plugin that provided that piece of metadata + * A pipe transfer signals to the scanner. */ - const char *plugin_name; + struct GNUNET_DISK_PipeHandle *stop_pipe; /** - * Type of the data + * A pipe end to read signals from. */ - enum EXTRACTOR_MetaType type; + const struct GNUNET_DISK_FileHandle *stop_read; /** - * Format of the data + * A pipe end to read signals from. */ - enum EXTRACTOR_MetaFormat format; - + const struct GNUNET_DISK_FileHandle *stop_write; + /** - * MIME-type of the metadata itself + * The pipe that is used to read progress messages. Only closed + * after the scanner thread is finished. */ - const char *data_mime_type; + struct GNUNET_DISK_PipeHandle *progress_pipe; /** - * How many files have meta entries matching this value? - * (type and format do not have to match). + * The end of the pipe that is used to read progress messages. */ - unsigned int count; + const struct GNUNET_DISK_FileHandle *progress_read; /** - * This is a doubly-linked list + * Handle of the pipe end into which the progress messages are written + * The initiator MUST keep it alive until the scanner thread is finished. */ - struct MetaCounter *prev; - + const struct GNUNET_DISK_FileHandle *progress_write; + /** - * This is a doubly-linked list + * The function that will be called every time there's a progress + * message. */ - struct MetaCounter *next; -}; - -struct AddDirContext; - -/** - * A structure used to hold a pointer to the tree item that is being - * processed. - * Needed to avoid changing the context for every recursive call. - */ -struct AddDirStack -{ + GNUNET_FS_DirScannerProgressCallback progress_callback; + /** - * Context pointer + * A closure for progress_callback. */ - struct AddDirContext *adc; - + void *progress_callback_cls; + /** - * Parent directory + * A task for reading progress messages from the scanner. */ - struct GNUNET_FS_ShareTreeItem *parent; -}; + GNUNET_SCHEDULER_TaskIdentifier progress_read_task; -/** - * Execution context for 'add_dir' - * Owned by the initiator thread. - */ -struct AddDirContext -{ /** * After the scan is finished, it will contain a pointer to the * top-level directory entry in the directory tree built by the - * scanner. + * scanner. Must only be manipulated by the thread for the + * duration of the thread's runtime. */ struct GNUNET_FS_ShareTreeItem *toplevel; /** - * Expanded filename (as given by the scan initiator). - * The scanner thread stores a copy here, and frees it when it finishes. - */ - char *filename_expanded; - - /** - * A pipe end to read signals from. - * Owned by the initiator thread. - */ - const struct GNUNET_DISK_FileHandle *stop_read; - - /** * 1 if the scanner should stop, 0 otherwise. Set in response * to communication errors or when the initiator wants the scanning * process to stop. */ - char do_stop; - - /** - * Handle of the pipe end into which the progress messages are written - * The pipe is owned by the initiator thread, and there's no way to - * close this end without having access to the pipe, so it won't - * be closed by the scanner thread. - * The initiator MUST keep it alive until the scanner thread is finished. - */ - const struct GNUNET_DISK_FileHandle *progress_write; + int do_stop; - - /** - * List of libextractor plugins to use for extracting. - * Initialized when the scan starts, removed when it finishes. - */ - struct EXTRACTOR_PluginList *plugins; }; -/** - * An opaque structure a pointer to which is returned to the - * caller to be used to control the scanner. - */ -struct GNUNET_FS_DirScanner -{ - /** - * A pipe end to read signals from. - * Owned by the initiator thread. - */ - const struct GNUNET_DISK_FileHandle *stop_write; - - /** - * A pipe transfer signals to the scanner. - * Owned by the initiator thread. - */ - struct GNUNET_DISK_PipeHandle *stop_pipe; - - /** - * A thread object for the scanner thread. - * Owned by the initiator thread. - */ -#if WINDOWS - HANDLE thread; -#else - pthread_t thread; -#endif - - /** - * A task for reading progress messages from the scanner. - */ - GNUNET_SCHEDULER_TaskIdentifier progress_read_task; - - /** - * The end of the pipe that is used to read progress messages. - */ - const struct GNUNET_DISK_FileHandle *progress_read; - - /** - * The pipe that is used to read progress messages. - * Owned (along with both of its ends) by the initiator thread. - * Only closed after the scanner thread is finished. - */ - struct GNUNET_DISK_PipeHandle *progress_pipe; - - /** - * The function that will be called every time there's a progress - * message. - */ - GNUNET_FS_DirScannerProgressCallback progress_callback; - /** - * A closure for progress_callback. - */ - void *cls; - - /** - * A pointer to the context of the scanner. - * Owned by the initiator thread. - * Initiator thread shouldn't touch it until the scanner thread - * is finished. - */ - struct AddDirContext *adc; -}; /** - * A structure that forms a singly-linked list that serves as a stack - * for metadata-processing function. + * Abort the scan. + * + * @param ds directory scanner structure */ -struct ProcessMetadataStackItem +void +GNUNET_FS_directory_scan_abort (struct GNUNET_FS_DirScanner *ds) { - /** - * A pointer to metadata-processing context. - * The same in every stack item. - */ - struct GNUNET_FS_ProcessMetadataContext *ctx; - - /** - * This is a singly-linked list. A pointer to its end is kept, and - * this pointer is used to walk it backwards. - */ - struct ProcessMetadataStackItem *parent; + static char c = 1; - /** - * Map from the hash over the keyword to an 'struct KeywordCounter *' - * counter that says how often this keyword was - * encountered in the current directory. - */ - struct GNUNET_CONTAINER_MultiHashMap *keywordcounter; + /* signal shutdown to other thread */ + (void) GNUNET_DISK_file_write (ds->stop_write, &c, 1); + GNUNET_DISK_pipe_close_end (ds->stop_pipe, GNUNET_DISK_PIPE_END_WRITE); - /** - * Map from the hash over the metadata to an 'struct MetaCounter *' - * counter that says how often this metadata was - * encountered in the current directory. - */ - struct GNUNET_CONTAINER_MultiHashMap *metacounter; - - /** - * Number of files in the current directory. - */ - unsigned int dir_entry_count; - - /** - * Keywords to exclude from using for KSK since they'll be associated - * with the parent as well. NULL for nothing blocked. - */ - struct GNUNET_FS_Uri *exclude_ksk; + /* stop reading from progress */ + if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK) + { + GNUNET_SCHEDULER_cancel (ds->progress_read_task); + ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; + } + GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ); - /** - * A share tree item that is being processed. - */ - struct GNUNET_FS_ShareTreeItem *item; + /* wait for other thread to terminate */ +#if WINDOWS + WaitForSingleObject (ds->thread, INFINITE); + CloseHandle (ds->thread); +#else + pthread_join (ds->thread, NULL); + pthread_detach (ds->thread); +#endif - /** - * Set to GNUNET_YES to indicate that the directory pointer by 'item' - * was processed, and we should move on to the next. - * Otherwise the directory will be recursed into. - */ - int end_directory; + /* free resources */ + GNUNET_DISK_pipe_close (ds->stop_pipe); + GNUNET_DISK_pipe_close (ds->progress_pipe); + if (NULL != ds->toplevel) + GNUNET_FS_share_tree_free (ds->toplevel); + if (NULL != ds->plugins) + EXTRACTOR_plugin_remove_all (ds->plugins); + GNUNET_free (ds); +} -}; /** - * The structure to keep the state of metadata processing + * Obtain the result of the scan after the scan has signalled + * completion. Must not be called prior to completion. The 'ds' is + * freed as part of this call. + * + * @param ds directory scanner structure + * @return the results of the scan (a directory tree) */ -struct GNUNET_FS_ProcessMetadataContext +struct GNUNET_FS_ShareTreeItem * +GNUNET_FS_directory_scan_get_result (struct GNUNET_FS_DirScanner *ds) { - /** - * The top of the stack. - */ - struct ProcessMetadataStackItem *stack; - - /** - * Callback to invoke when processing is finished - */ - GNUNET_SCHEDULER_Task cb; - - /** - * Closure for 'cb' - */ - void *cls; - - /** - * Toplevel directory item of the tree to process. - */ - struct GNUNET_FS_ShareTreeItem *toplevel; -}; + struct GNUNET_FS_ShareTreeItem *result; -/** - * Called every now and then by the scanner. - * Checks the synchronization privitive. - * Returns 1 if the scanner should stop, 0 otherwise. - */ -static int -should_stop (struct AddDirContext *adc) -{ - errno = 0; - char c; - if (GNUNET_DISK_file_read_non_blocking (adc->stop_read, &c, 1) == 1 - || errno != EAGAIN) - { - adc->do_stop = 1; - } - return adc->do_stop; + /* check that we're actually done */ + GNUNET_assert (GNUNET_SCHEDULER_NO_TASK == ds->progress_read_task); + /* preserve result */ + result = ds->toplevel; + ds->toplevel = NULL; + GNUNET_FS_directory_scan_abort (ds); + return result; } + /** - * Write progress message. - * Format is: - * "reason", "filename length", "filename", "directory flag" - * If filename is NULL, filename is not written, and its length - * is written as 0, and nothing else is written. It signals the initiator - * thread that the scanner is finished, and that it can now join its thread. + * Write 'size' bytes from 'buf' into 'out'. * - * Also checks if the initiator thread wants the scanner to stop, - * Returns 1 to stop scanning (if the signal was received, or - * if the pipe was broken somehow), 0 otherwise. + * @param in pipe to write to + * @param buf buffer with data to write + * @param size number of bytes to write + * @return GNUNET_OK on success, GNUNET_SYSERR on error */ static int -write_progress (struct AddDirContext *adc, const char *filename, - char is_directory, enum GNUNET_FS_DirScannerProgressUpdateReason reason) +write_all (const struct GNUNET_DISK_FileHandle *out, + const void *buf, + size_t size) { - size_t filename_len; + const char *cbuf = buf; + size_t total; ssize_t wr; - size_t total_write; - if ((adc->do_stop || should_stop (adc)) && reason != GNUNET_FS_DIRSCANNER_ASKED_TO_STOP - && reason != GNUNET_FS_DIRSCANNER_FINISHED) - return 1; - total_write = 0; - wr = 1; - while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (reason)) - { - wr = GNUNET_DISK_file_write_blocking (adc->progress_write, - &((char *)&reason)[total_write], sizeof (reason) - total_write); - if (wr > 0) - total_write += wr; - } - if (sizeof (reason) != total_write) - return adc->do_stop = 1; - if (filename) - filename_len = strlen (filename) + 1; - else - filename_len = 0; - total_write = 0; - wr = 1; - while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (size_t)) + + total = 0; + do { - wr = GNUNET_DISK_file_write_blocking (adc->progress_write, - &((char *)&filename_len)[total_write], sizeof (size_t) - total_write); + wr = GNUNET_DISK_file_write (out, + &cbuf[total], + size - total); if (wr > 0) - total_write += wr; - } - if (sizeof (size_t) != total_write) - return adc->do_stop = 1; - if (filename) - { - total_write = 0; - wr = 1; - while ((wr > 0 || errno == EAGAIN) && total_write < filename_len) - { - wr = GNUNET_DISK_file_write_blocking (adc->progress_write, - &((char *)filename)[total_write], filename_len - total_write); - if (wr > 0) - total_write += wr; - } - if (filename_len != total_write) - return adc->do_stop = 1; - total_write = 0; - wr = 1; - while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (char)) - { - wr = GNUNET_DISK_file_write_blocking (adc->progress_write, - &((char *)&is_directory)[total_write], sizeof (char) - total_write); - if (wr > 0) - total_write += wr; - } - if (sizeof (char) != total_write) - return adc->do_stop = 1; - } - return 0; + total += wr; + } while ( (wr > 0) && (total < size) ); + return (total == size) ? GNUNET_OK : GNUNET_SYSERR; } -/** - * Add the given keyword to the - * keyword statistics tracker. - * - * @param cls closure (user-defined) - * @param keyword the keyword to count - * @param is_mandatory ignored - * @return always GNUNET_OK - */ -static int -add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory) -{ - struct GNUNET_CONTAINER_MultiHashMap *mcm = cls; - struct KeywordCounter *cnt, *first_cnt; - GNUNET_HashCode hc; - size_t klen; - - klen = strlen (keyword) + 1; - GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); - /* Since the map might contain multiple values per keyword, we only - * store one value, and attach all other to it, forming a linked list. - * Somewhat easier than retrieving multiple items via callback. - */ - first_cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc); - for (cnt = first_cnt; cnt && strcmp (cnt->value, keyword) != 0; cnt = cnt->next); - if (cnt == NULL) - { - cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen); - cnt->value = (const char *) &cnt[1]; - memcpy (&cnt[1], keyword, klen); - if (first_cnt != NULL) - { - if (first_cnt->prev != NULL) - { - first_cnt->prev->next = cnt; - cnt->prev = first_cnt->prev; - } - first_cnt->prev = cnt; - cnt->next = first_cnt; - } - else - GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt, - GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE); - } - cnt->count++; - return GNUNET_OK; -} /** - * Type of a function that libextractor calls for each - * meta data item found. + * Write progress message. * - * @param cls the container multihashmap to update - * @param plugin_name name of the plugin that produced this value; - * special values can be used (i.e. '<zlib>' for zlib being - * used in the main libextractor library and yielding - * meta data). - * @param type libextractor-type describing the meta data - * @param format basic format information about data - * @param data_mime_type mime-type of data (not of the original file); - * can be NULL (if mime-type is not known) - * @param data actual meta-data found - * @param data_len number of bytes in data - * @return GNUNET_OK to continue extracting / iterating + * @param ds + * @param filename name of the file to transmit, never NULL + * @param is_directory GNUNET_YES for directory, GNUNET_NO for file, GNUNET_SYSERR for neither + * @param reason reason for the progress call + * @return GNUNET_SYSERR to stop scanning (the pipe was broken somehow) */ static int -add_to_meta_counter (void *cls, const char *plugin_name, - enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, - const char *data_mime_type, const char *data, size_t data_len) -{ - struct GNUNET_CONTAINER_MultiHashMap *map = cls; - GNUNET_HashCode key; - struct MetaCounter *cnt, *first_cnt; - - GNUNET_CRYPTO_hash (data, data_len, &key); - first_cnt = GNUNET_CONTAINER_multihashmap_get (map, &key); - for (cnt = first_cnt; cnt - && cnt->data_size != data_len - && memcmp (cnt->data, data, cnt->data_size) != 0; cnt = cnt->next); - if (cnt == NULL) - { - cnt = GNUNET_malloc (sizeof (struct MetaCounter)); - cnt->data = data; - cnt->data_size = data_len; - cnt->plugin_name = plugin_name; - cnt->type = type; - cnt->format = format; - cnt->data_mime_type = data_mime_type; - - if (first_cnt != NULL) - { - if (first_cnt->prev != NULL) - { - first_cnt->prev->next = cnt; - cnt->prev = first_cnt->prev; - } - first_cnt->prev = cnt; - cnt->next = first_cnt; - } - else - GNUNET_CONTAINER_multihashmap_put (map, &key, cnt, - GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE); - } - cnt->count++; - return 0; -} - -/** - * Allocates a struct GNUNET_FS_ShareTreeItem and adds it to its parent. - */ -static struct GNUNET_FS_ShareTreeItem * -make_item (struct GNUNET_FS_ShareTreeItem *parent) +write_progress (struct GNUNET_FS_DirScanner *ds, + const char *filename, + int is_directory, + enum GNUNET_FS_DirScannerProgressUpdateReason reason) { - struct GNUNET_FS_ShareTreeItem *item; - item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem)); - - item->parent = parent; - if (parent) - GNUNET_CONTAINER_DLL_insert (parent->children_head, parent->children_tail, - item); - return item; + size_t slen; + + slen = strlen (filename) + 1; + if ( (GNUNET_OK != + write_all (ds->progress_write, + &reason, + sizeof (reason))) || + (GNUNET_OK != + write_all (ds->progress_write, + &slen, + sizeof (slen))) || + (GNUNET_OK != + write_all (ds->progress_write, + filename, + slen)) || + (GNUNET_OK != + write_all (ds->progress_write, + &is_directory, + sizeof (is_directory))) ) + return GNUNET_SYSERR; + return GNUNET_OK; } -/** - * Extract metadata from a file and add it to the share tree - * - * @param ads context to modify - * @param filename name of the file to process - */ -static void -extract_file (struct AddDirStack *ads, const char *filename) -{ - struct GNUNET_FS_ShareTreeItem *item; - const char *short_fn; - - item = make_item (ads->parent); - - GNUNET_DISK_file_size (filename, &item->file_size, GNUNET_YES); - item->is_directory = GNUNET_NO; - - item->meta = GNUNET_CONTAINER_meta_data_create (); - GNUNET_FS_meta_data_extract_from_file (item->meta, filename, - ads->adc->plugins); - GNUNET_CONTAINER_meta_data_delete (item->meta, EXTRACTOR_METATYPE_FILENAME, - NULL, 0); - short_fn = GNUNET_STRINGS_get_short_name (filename); - - item->filename = GNUNET_strdup (filename); - item->short_filename = GNUNET_strdup (short_fn); - - GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>", - EXTRACTOR_METATYPE_FILENAME, - EXTRACTOR_METAFORMAT_UTF8, "text/plain", - short_fn, strlen (short_fn) + 1); - if (ads->parent == NULL) - { - /* we're finished with the scan, make sure caller gets the top-level - * directory pointer - */ - ads->adc->toplevel = item; - } -} /** - * Remove the keyword from the ksk URI. - * - * @param cls the ksk uri - * @param keyword the word to remove - * @param is_mandatory ignored - * @return always GNUNET_OK + * Called every now and then by the scanner thread to check + * if we're being aborted. + * + * @param ds scanner context + * @return GNUNET_OK to continue, GNUNET_SYSERR to stop */ static int -remove_keyword (void *cls, const char *keyword, int is_mandatory) +test_thread_stop (struct GNUNET_FS_DirScanner *ds) { - struct GNUNET_FS_Uri *ksk = cls; + char c; - GNUNET_FS_uri_ksk_remove_keyword (ksk, keyword); + if ( (GNUNET_DISK_file_read_non_blocking (ds->stop_read, &c, 1) == 1) || + (EAGAIN != errno) ) + return GNUNET_SYSERR; return GNUNET_OK; } + /** - * Remove keywords from current directory's children, if they are - * in the exluded keywords list of that directory. + * Function called to (recursively) add all of the files in the + * directory to the tree. Called by the directory scanner to initiate + * the scan. Does NOT yet add any metadata. * - * @param cls the ksk uri - * @param keyword the word to remove - * @param is_mandatory ignored - * @return always GNUNET_OK + * @param ds directory scanner context to use + * @param filename file or directory to scan + * @param dst where to store the resulting share tree item + * @return GNUNET_OK on success, GNUNET_SYSERR on error */ static int -remove_keywords (struct ProcessMetadataStackItem *stack, struct GNUNET_FS_ShareTreeItem *dir) -{ - struct GNUNET_FS_ShareTreeItem *item; +preprocess_file (struct GNUNET_FS_DirScanner *ds, + const char *filename, + struct GNUNET_FS_ShareTreeItem **dst); - for (item = dir->children_head; item; item = item->next) - { - if (stack->exclude_ksk != NULL) - GNUNET_FS_uri_ksk_get_keywords (stack->exclude_ksk, &remove_keyword, item->ksk_uri); - } - return GNUNET_OK; -} /** - * Context passed to 'migrate_and_drop'. + * Closure for the 'scan_callback' */ -struct KeywordProcessContext +struct RecursionContext { /** - * All the keywords we migrated to the parent. + * Global scanner context. */ - struct GNUNET_FS_Uri *ksk; - - /** - * How often does a keyword have to occur to be - * migrated to the parent? - */ - unsigned int threshold; -}; + struct GNUNET_FS_DirScanner *ds; -/** - * Context passed to 'migrate_and_drop'. - */ -struct MetaProcessContext -{ /** - * All the metadata we copy to the parent. + * Parent to add the files to. */ - struct GNUNET_CONTAINER_MetaData *meta; + struct GNUNET_FS_ShareTreeItem *parent; /** - * How often does a metadata have to occur to be - * migrated to the parent? + * Flag to set to GNUNET_YES on serious errors. */ - unsigned int threshold; + int stop; }; /** - * Move "frequent" keywords over to the - * target ksk uri, free the counters. + * Function called by the directory iterator to (recursively) add all + * of the files in the directory to the tree. Called by the directory + * scanner to initiate the scan. Does NOT yet add any metadata. * + * @param cls the 'struct RecursionContext' + * @param filename file or directory to scan + * @return GNUNET_OK on success, GNUNET_SYSERR on error */ static int -migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value) +scan_callback (void *cls, + const char *filename) { - struct KeywordProcessContext *kpc = cls; - struct KeywordCounter *counter = value; + struct RecursionContext *rc = cls; + struct GNUNET_FS_ShareTreeItem *chld; - if (counter->count >= kpc->threshold && counter->count > 1) + if (GNUNET_OK != + preprocess_file (rc->ds, + filename, + &chld)) { - GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO); + rc->stop = GNUNET_YES; + return GNUNET_SYSERR; } - GNUNET_free (counter); - return GNUNET_YES; + chld->parent = rc->parent; + GNUNET_CONTAINER_DLL_insert (rc->parent->children_head, + rc->parent->children_tail, + chld); + return GNUNET_OK; } -/** - * Copy "frequent" metadata items over to the - * target metadata container, free the counters. - * - */ -static int -migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value) -{ - struct MetaProcessContext *mpc = cls; - struct MetaCounter *counter = value; - if (counter->count >= mpc->threshold && counter->count > 1) - { - GNUNET_CONTAINER_meta_data_insert (mpc->meta, - counter->plugin_name, - counter->type, - counter->format, - counter->data_mime_type, counter->data, - counter->data_size); - } - GNUNET_free (counter); - return GNUNET_YES; -} /** - * Go over the collected keywords from all entries in the - * directory and push common keywords up one level (by - * adding it to the returned struct). Do the same for metadata. - * Destroys keywordcounter and metacoutner for current directory. + * Function called to (recursively) add all of the files in the + * directory to the tree. Called by the directory scanner to initiate + * the scan. Does NOT yet add any metadata. * - * @param adc collection of child meta data - * @param exclude_ksk pointer to where moveable keywords will be stored - * @param copy_meta pointer to where copyable metadata will be stored - */ -static void -process_keywords_and_metadata (struct ProcessMetadataStackItem *stack, - struct GNUNET_FS_Uri **exclude_ksk, - struct GNUNET_CONTAINER_MetaData **copy_meta) -{ - struct KeywordProcessContext kpc; - struct MetaProcessContext mpc; - struct GNUNET_CONTAINER_MetaData *tmp; - - /* Surprisingly, it's impossible to create a ksk with 0 keywords directly. - * But we can create one from an empty metadata set - */ - tmp = GNUNET_CONTAINER_meta_data_create (); - kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp); - GNUNET_CONTAINER_meta_data_destroy (tmp); - mpc.meta = GNUNET_CONTAINER_meta_data_create (); - - kpc.threshold = mpc.threshold = (stack->dir_entry_count + 1) / 2; /* 50% */ - - GNUNET_CONTAINER_multihashmap_iterate (stack->keywordcounter, - &migrate_and_drop, &kpc); - GNUNET_CONTAINER_multihashmap_iterate (stack->metacounter, - &migrate_and_drop_metadata, &mpc); - - GNUNET_CONTAINER_multihashmap_destroy (stack->keywordcounter); - GNUNET_CONTAINER_multihashmap_destroy (stack->metacounter); - *exclude_ksk = kpc.ksk; - *copy_meta = mpc.meta; -} - -/** - * Function called by the directory iterator to - * (recursively) add all of the files in the - * directory to the tree. - * Called by the directory scanner to initiate the - * scan. - * TODO: find a way to make it non-recursive. - * - * @param cls the 'struct AddDirStack *' we're in + * @param ds directory scanner context to use * @param filename file or directory to scan + * @param dst where to store the resulting share tree item + * @return GNUNET_OK on success, GNUNET_SYSERR on error */ static int -scan_directory (void *cls, const char *filename) +preprocess_file (struct GNUNET_FS_DirScanner *ds, + const char *filename, + struct GNUNET_FS_ShareTreeItem **dst) { - struct AddDirStack *ads = cls, recurse_ads; - struct AddDirContext *adc = ads->adc; - struct stat sbuf; struct GNUNET_FS_ShareTreeItem *item; - const char *short_fn; - int do_stop = 0; - - /* Wrap up fast */ - if (adc->do_stop) - return GNUNET_SYSERR; + struct stat sbuf; - /* If the file doesn't exist (or is not statable for any other reason, - * skip it, and report it. - */ if (0 != STAT (filename, &sbuf)) { - (void) write_progress (adc, filename, S_ISDIR (sbuf.st_mode), - GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST); + /* If the file doesn't exist (or is not stat-able for any other reason) + skip it (but report it), but do continue. */ + if (GNUNET_OK != + write_progress (ds, filename, GNUNET_SYSERR, + GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST)) + return GNUNET_SYSERR; return GNUNET_OK; } /* Report the progress */ - do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode), - GNUNET_FS_DIRSCANNER_NEW_FILE); - if (do_stop) - { - /* We were asked to stop, acknowledge that and return */ - (void) write_progress (adc, filename, S_ISDIR (sbuf.st_mode), - GNUNET_FS_DIRSCANNER_ASKED_TO_STOP); + if (GNUNET_OK != + write_progress (ds, + filename, + S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO, + GNUNET_FS_DIRSCANNER_FILE_START)) return GNUNET_SYSERR; - } - - if (!S_ISDIR (sbuf.st_mode)) - extract_file (ads, filename); - else - { - item = make_item (ads->parent); - item->meta = GNUNET_CONTAINER_meta_data_create (); - - item->is_directory = GNUNET_YES; - - recurse_ads.adc = adc; - recurse_ads.parent = item; - - /* recurse into directory */ - GNUNET_DISK_directory_scan (filename, &scan_directory, &recurse_ads); - - short_fn = GNUNET_STRINGS_get_short_name (filename); - - item->filename = GNUNET_strdup (filename); - item->short_filename = GNUNET_strdup (short_fn); - - if (ads->parent == NULL) + item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem)); + item->meta = GNUNET_CONTAINER_meta_data_create (); + item->filename = GNUNET_strdup (filename); + item->short_filename = GNUNET_strdup (GNUNET_STRINGS_get_short_name (filename)); + item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO; + item->file_size = (uint64_t) sbuf.st_size; + if (item->is_directory) + { + struct RecursionContext rc; + + rc.parent = item; + rc.ds = ds; + rc.stop = GNUNET_NO; + GNUNET_DISK_directory_scan (filename, + &scan_callback, + &rc); + if ( (rc.stop == GNUNET_YES) || + (GNUNET_OK != + test_thread_stop (ds)) ) { - /* we're finished with the scan, make sure caller gets the top-level - * directory pointer - */ - adc->toplevel = item; + GNUNET_FS_share_tree_free (item); + return GNUNET_SYSERR; } } - return GNUNET_OK; -} - -/** - * Signals the scanner to finish the scan as fast as possible. - * Does not block. - * Can close the pipe if asked to, but that is only used by the - * internal call to this function during cleanup. The client - * must understand the consequences of closing the pipe too early. - * - * @param ds directory scanner structure - * @param close_pipe GNUNET_YES to close - */ -void -GNUNET_FS_directory_scan_finish (struct GNUNET_FS_DirScanner *ds, - int close_pipe) -{ - char c = 1; - GNUNET_DISK_file_write (ds->stop_write, &c, 1); - - if (close_pipe) + /* Report the progress */ + if (GNUNET_OK != + write_progress (ds, + filename, + S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO, + GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED)) { - if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK) - { - GNUNET_SCHEDULER_cancel (ds->progress_read_task); - ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; - } - GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ); - ds->progress_read = NULL; + GNUNET_FS_share_tree_free (item); + return GNUNET_SYSERR; } + *dst = item; + return GNUNET_OK; } + /** - * Signals the scanner thread to finish (in case it isn't finishing - * already) and joins the scanner thread. Closes the pipes, frees the - * scanner contexts (both of them), returns the results of the scan. - * Results are valid (and have to be freed) even if the scanner had - * an error or was rushed to finish prematurely. - * Blocks until the scanner is finished. + * Extract metadata from files. * - * @param ds directory scanner structure - * @return the results of the scan (a directory tree) + * @param ds directory scanner context + * @param item entry we are processing + * @return GNUNET_OK on success, GNUNET_SYSERR on fatal errors */ -struct GNUNET_FS_ShareTreeItem * -GNUNET_FS_directory_scan_cleanup (struct GNUNET_FS_DirScanner *ds) -{ - struct GNUNET_FS_ShareTreeItem *result; - - GNUNET_FS_directory_scan_finish (ds, GNUNET_YES); -#if WINDOWS - WaitForSingleObject (ds->thread, INFINITE); - CloseHandle (ds->thread); -#else - pthread_join (ds->thread, NULL); - pthread_detach (ds->thread); -#endif +static int +extract_files (struct GNUNET_FS_DirScanner *ds, + struct GNUNET_FS_ShareTreeItem *item) +{ + if (item->is_directory) + { + /* for directories, we simply only descent, no extraction, no + progress reporting */ + struct GNUNET_FS_ShareTreeItem *pos; + + for (pos = item->children_head; NULL != pos; pos = pos->next) + if (GNUNET_OK != + extract_files (ds, pos)) + return GNUNET_SYSERR; + return GNUNET_OK; + } + + /* this is the expensive operation, *afterwards* we'll check for aborts */ + GNUNET_FS_meta_data_extract_from_file (item->meta, + item->filename, + ds->plugins); + + /* having full filenames is too dangerous; always make sure we clean them up */ + GNUNET_CONTAINER_meta_data_delete (item->meta, + EXTRACTOR_METATYPE_FILENAME, + NULL, 0); + GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>", + EXTRACTOR_METATYPE_FILENAME, + EXTRACTOR_METAFORMAT_UTF8, "text/plain", + item->short_filename, + strlen (item->short_filename) + 1); + /* check for abort */ + if (GNUNET_OK != + test_thread_stop (ds)) + return GNUNET_SYSERR; - GNUNET_DISK_pipe_close (ds->stop_pipe); - GNUNET_DISK_pipe_close (ds->progress_pipe); - result = ds->adc->toplevel; - GNUNET_free (ds->adc); - GNUNET_free (ds); - return result; + /* Report the progress */ + if (GNUNET_OK != + write_progress (ds, + item->filename, + GNUNET_NO, + GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED)) + return GNUNET_SYSERR; + return GNUNET_OK; } + /** * The function from which the scanner thread starts + * + * @param cls the 'struct GNUNET_FS_DirScanner' + * @return 0/NULL */ #if WINDOWS DWORD @@ -882,19 +491,58 @@ static void * #endif run_directory_scan_thread (void *cls) { - struct AddDirContext *adc = cls; - struct AddDirStack ads; - ads.adc = adc; - ads.parent = NULL; - scan_directory (&ads, adc->filename_expanded); - GNUNET_free (adc->filename_expanded); - if (adc->plugins != NULL) - EXTRACTOR_plugin_remove_all (adc->plugins); - /* Tell the initiator that we're finished, it can now join the thread */ - write_progress (adc, NULL, 0, GNUNET_FS_DIRSCANNER_FINISHED); + struct GNUNET_FS_DirScanner *ds = cls; + + if (GNUNET_OK != preprocess_file (ds, + ds->filename_expanded, + &ds->toplevel)) + { + (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR); + return 0; + } + if (GNUNET_OK != + write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_ALL_COUNTED)) + return 0; + if (GNUNET_OK != + extract_files (ds, ds->toplevel)) + { + (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR); + return 0; + } + (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_FINISHED); return 0; } + +/** + * Read 'size' bytes from 'in' into 'buf'. + * + * @param in pipe to read from + * @param buf buffer to read to + * @param size number of bytes to read + * @return GNUNET_OK on success, GNUNET_SYSERR on error + */ +static int +read_all (const struct GNUNET_DISK_FileHandle *in, + char *buf, + size_t size) +{ + size_t total; + ssize_t rd; + + total = 0; + do + { + rd = GNUNET_DISK_file_read (in, + &buf[total], + size - total); + if (rd > 0) + total += rd; + } while ( (rd > 0) && (total < size) ); + return (total == size) ? GNUNET_OK : GNUNET_SYSERR; +} + + /** * Called every time there is data to read from the scanner. * Calls the scanner progress handler. @@ -905,124 +553,69 @@ run_directory_scan_thread (void *cls) static void read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) { - struct GNUNET_FS_DirScanner *ds; - int end_it = 0; + struct GNUNET_FS_DirScanner *ds = cls; enum GNUNET_FS_DirScannerProgressUpdateReason reason; - ssize_t rd; - ssize_t total_read; - size_t filename_len; - char is_directory; + int is_directory; char *filename; - ds = cls; - ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; - - if (!(tc->reason & GNUNET_SCHEDULER_REASON_READ_READY)) + if (! (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY)) { - ds->progress_callback (ds->cls, ds, NULL, 0, GNUNET_FS_DIRSCANNER_SHUTDOWN); + ds->progress_read_task + = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, + ds->progress_read, &read_progress_task, + ds); return; } /* Read one message. If message is malformed or can't be read, end the scanner */ - total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &reason, sizeof (reason)); - while (rd > 0 && total_read < sizeof (reason)) - { - rd = GNUNET_DISK_file_read (ds->progress_read, - &((char *) &reason)[total_read], - sizeof (reason) - total_read); - if (rd > 0) - total_read += rd; - } - if (total_read != sizeof (reason) - || reason <= GNUNET_FS_DIRSCANNER_FIRST - || reason >= GNUNET_FS_DIRSCANNER_LAST) - { - end_it = 1; - reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; - } - - if (!end_it) - { - total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &filename_len, - sizeof (size_t)); - while (rd > 0 && total_read < sizeof (size_t)) - { - rd = GNUNET_DISK_file_read (ds->progress_read, - &((char *) &filename_len)[total_read], - sizeof (size_t) - total_read); - if (rd > 0) - total_read += rd; - } - if (rd != sizeof (size_t)) - { - end_it = 1; - reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; - } - } - if (!end_it) - { - if (filename_len == 0) - end_it = 1; - else if (filename_len > PATH_MAX) - { - end_it = 1; - reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; - } - } - if (!end_it) - { - filename = GNUNET_malloc (filename_len); - total_read = rd = GNUNET_DISK_file_read (ds->progress_read, filename, - filename_len); - while (rd > 0 && total_read < filename_len) - { - rd = GNUNET_DISK_file_read (ds->progress_read, &filename[total_read], - filename_len - total_read); - if (rd > 0) - total_read += rd; - } - if (rd != filename_len) - { - GNUNET_free (filename); - reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; - end_it = 1; - } - } - if (!end_it && filename_len > 0) - { - total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &is_directory, - sizeof (char)); - while (rd > 0 && total_read < sizeof (char)) - { - rd = GNUNET_DISK_file_read (ds->progress_read, &(&is_directory)[total_read], - sizeof (char) - total_read); - if (rd > 0) - total_read += rd; - } - if (rd != sizeof (char)) - { - GNUNET_free (filename); - reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; - end_it = 1; - } - } - if (!end_it) - { - end_it = ds->progress_callback (ds->cls, ds, (const char *) filename, is_directory, reason); - GNUNET_free (filename); - if (!end_it) - { - ds->progress_read_task = GNUNET_SCHEDULER_add_read_file ( - GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task, - cls); - } - } - else - { - ds->progress_callback (ds->cls, ds, NULL, 0, reason); + filename = NULL; + if ( (GNUNET_OK != + read_all (ds->progress_read, + (char*) &reason, + sizeof (reason))) || + (reason < GNUNET_FS_DIRSCANNER_FILE_START) || + (reason > GNUNET_FS_DIRSCANNER_INTERNAL_ERROR) || + (GNUNET_OK != + read_all (ds->progress_read, + (char*) &filename_len, + sizeof (size_t))) || + (filename_len == 0) || + (filename_len > PATH_MAX) || + (GNUNET_OK != + read_all (ds->progress_read, + filename = GNUNET_malloc (filename_len), + filename_len)) || + (filename[filename_len-1] != '\0') || + (GNUNET_OK != + read_all (ds->progress_read, + (char*) &is_directory, + sizeof (is_directory))) ) + { + /* IPC error, complain, signal client and stop reading + from the pipe */ + GNUNET_break (0); + ds->progress_callback (ds->progress_callback_cls, ds, + NULL, GNUNET_SYSERR, + GNUNET_FS_DIRSCANNER_INTERNAL_ERROR); + GNUNET_free_non_null (filename); + return; } + /* schedule task to keep reading (done here in case client calls + abort or something similar) */ + ds->progress_read_task + = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, + ds->progress_read, + &read_progress_task, ds); + + /* read successfully, notify client about progress */ + ds->progress_callback (ds->progress_callback_cls, + ds, + filename, + is_directory, + reason); + GNUNET_free (filename); } @@ -1033,275 +626,89 @@ read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) * @param GNUNET_YES to not to run libextractor on files (only build a tree) * @param ex if not NULL, must be a list of extra plugins for extractor * @param cb the callback to call when there are scanning progress messages - * @param cls closure for 'cb' + * @param cb_cls closure for 'cb' * @return directory scanner object to be used for controlling the scanner */ struct GNUNET_FS_DirScanner * GNUNET_FS_directory_scan_start (const char *filename, - int disable_extractor, const char *ex, - GNUNET_FS_DirScannerProgressCallback cb, void *cls) + int disable_extractor, const char *ex, + GNUNET_FS_DirScannerProgressCallback cb, + void *cb_cls) { struct stat sbuf; - struct AddDirContext *adc; char *filename_expanded; struct GNUNET_FS_DirScanner *ds; struct GNUNET_DISK_PipeHandle *progress_pipe; + struct GNUNET_DISK_PipeHandle *stop_pipe; int ok; - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "Starting to scan directory `%s'\n", - filename); if (0 != STAT (filename, &sbuf)) return NULL; - - /* scan_directory() is guaranteed to be given expanded filenames, - * so expand we will! - */ filename_expanded = GNUNET_STRINGS_filename_expand (filename); - if (filename_expanded == NULL) + if (NULL == filename_expanded) return NULL; - + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Starting to scan directory `%s'\n", + filename_expanded); progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO); - if (progress_pipe == NULL) + if (NULL == progress_pipe) { GNUNET_free (filename_expanded); return NULL; } - - adc = GNUNET_malloc (sizeof (struct AddDirContext)); - - ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner)); - - ds->adc = adc; - - ds->stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO); - if (ds->stop_pipe == NULL) + stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO); + if (NULL == stop_pipe) { - GNUNET_free (adc); - GNUNET_free (ds); - GNUNET_free (filename_expanded); GNUNET_DISK_pipe_close (progress_pipe); + GNUNET_free (filename_expanded); return NULL; } + + ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner)); + ds->progress_callback = cb; + ds->progress_callback_cls = cb_cls; + ds->stop_pipe = stop_pipe; ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe, - GNUNET_DISK_PIPE_END_WRITE); - adc->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe, - GNUNET_DISK_PIPE_END_READ); - - adc->plugins = NULL; - if (!disable_extractor) + GNUNET_DISK_PIPE_END_WRITE); + ds->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe, + GNUNET_DISK_PIPE_END_READ); + ds->progress_pipe = progress_pipe; + ds->progress_write = GNUNET_DISK_pipe_handle (progress_pipe, + GNUNET_DISK_PIPE_END_WRITE); + ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe, + GNUNET_DISK_PIPE_END_READ); + ds->filename_expanded = filename_expanded; + if (! disable_extractor) { - adc->plugins = EXTRACTOR_plugin_add_defaults ( - EXTRACTOR_OPTION_DEFAULT_POLICY); - if (ex && strlen (ex) > 0) - adc->plugins = EXTRACTOR_plugin_add_config (adc->plugins, ex, - EXTRACTOR_OPTION_DEFAULT_POLICY); + ds->plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY); + if ( (NULL != ex) && strlen (ex) > 0) + ds->plugins = EXTRACTOR_plugin_add_config (ds->plugins, ex, + EXTRACTOR_OPTION_DEFAULT_POLICY); } - - adc->filename_expanded = filename_expanded; - adc->progress_write = GNUNET_DISK_pipe_handle (progress_pipe, - GNUNET_DISK_PIPE_END_WRITE); - - - ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe, - GNUNET_DISK_PIPE_END_READ); - - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "Creating thread to scan directory `%s'\n", - filename); - #if WINDOWS ds->thread = CreateThread (NULL, 0, - (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, (LPVOID) adc, - 0, NULL); - ok = ds->thread != NULL; + (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, + (LPVOID) ds, 0, NULL); + ok = (ds->thread != NULL); #else - ok = !pthread_create (&ds->thread, NULL, &run_directory_scan_thread, - (void *) adc); + ok = (0 == pthread_create (&ds->thread, NULL, + &run_directory_scan_thread, ds)); #endif if (!ok) { - GNUNET_free (adc); + EXTRACTOR_plugin_remove_all (ds->plugins); GNUNET_free (filename_expanded); + GNUNET_DISK_pipe_close (stop_pipe); GNUNET_DISK_pipe_close (progress_pipe); GNUNET_free (ds); return NULL; } - - ds->progress_callback = cb; - ds->cls = cls; - ds->adc = adc; - ds->progress_pipe = progress_pipe; - - ds->progress_read_task = GNUNET_SCHEDULER_add_read_file ( - GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task, - ds); - + ds->progress_read_task + = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, + ds->progress_read, + &read_progress_task, ds); return ds; } -/** - * Task that post-processes the share item tree. - * This processing has to be done in the main thread, because - * it requires access to libgcrypt's hashing functions, and - * libgcrypt is not thread-safe without some special magic. - * - * @param cls top of the stack - * @param tc task context - */ -static void -trim_share_tree_task (void *cls, - const struct GNUNET_SCHEDULER_TaskContext *tc) -{ - struct ProcessMetadataStackItem *stack = cls; - struct ProcessMetadataStackItem *next = stack; - /* FIXME: figure out what to do when tc says we're shutting down */ - - /* item == NULL means that we've just finished going over the children of - * current directory. - */ - if (stack->item == NULL) - { - if (stack->parent->item != NULL) - { - /* end of a directory */ - struct GNUNET_FS_Uri *ksk; - - /* use keyword and metadata counters to create lists of keywords to move - * and metadata to copy. - */ - process_keywords_and_metadata (stack, &stack->parent->exclude_ksk, &stack->parent->item->meta); - - /* create keywords from metadata (copies all text-metadata as keywords, - * AND parses the directory name we've just added, producing even more - * keywords. - * then merge these keywords with the ones moved from children. - */ - ksk = GNUNET_FS_uri_ksk_create_from_meta_data (stack->parent->item->meta); - stack->parent->item->ksk_uri = GNUNET_FS_uri_ksk_merge (ksk, stack->parent->exclude_ksk); - GNUNET_FS_uri_destroy (ksk); - - /* remove moved keywords from children (complete the move) */ - remove_keywords (stack->parent, stack->parent->item); - GNUNET_FS_uri_destroy (stack->parent->exclude_ksk); - - /* go up the stack */ - next = stack->parent; - GNUNET_free (stack); - next->end_directory = GNUNET_YES; - } - else - { - /* we've just finished processing the toplevel directory */ - struct GNUNET_FS_ProcessMetadataContext *ctx = stack->ctx; - next = NULL; - GNUNET_SCHEDULER_add_continuation (ctx->cb, ctx->cls, - GNUNET_SCHEDULER_REASON_PREREQ_DONE); - GNUNET_free (stack->parent); - GNUNET_free (stack); - GNUNET_free (ctx); - } - } - else if (stack->item->is_directory - && !stack->end_directory - && stack->item->children_head != NULL) - { - /* recurse into subdirectory */ - next = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); - next->ctx = stack->ctx; - next->item = stack->item->children_head; - next->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024); - next->metacounter = GNUNET_CONTAINER_multihashmap_create (1024); - next->dir_entry_count = 0; - next->parent = stack; - } - else - { - /* process a child entry (a file or a directory) and move to the next one*/ - if (stack->item->is_directory) - stack->end_directory = GNUNET_NO; - if (stack->ctx->toplevel->is_directory) - { - stack->dir_entry_count++; - GNUNET_CONTAINER_meta_data_iterate (stack->item->meta, &add_to_meta_counter, stack->metacounter); - - if (stack->item->is_directory) - { - char *user = getenv ("USER"); - if ((user == NULL) || (0 != strncasecmp (user, stack->item->short_filename, strlen(user)))) - { - /* only use filename if it doesn't match $USER */ - GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>", - EXTRACTOR_METATYPE_FILENAME, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", stack->item->short_filename, - strlen (stack->item->short_filename) + 1); - GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>", - EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", stack->item->short_filename, - strlen (stack->item->short_filename) + 1); - } - } - } - stack->item->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (stack->item->meta); - if (stack->ctx->toplevel->is_directory) - { - GNUNET_FS_uri_ksk_get_keywords (stack->item->ksk_uri, &add_to_keyword_counter, stack->keywordcounter); - } - stack->item = stack->item->next; - } - /* Call this task again later, if there are more entries to process */ - if (next) - GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, next, - GNUNET_SCHEDULER_REASON_PREREQ_DONE); -} - -/** - * Process a share item tree, moving frequent keywords up and - * copying frequent metadata up. - * - * @param toplevel toplevel directory in the tree, returned by the scanner - * @param cb called after processing is done - * @param cls closure for 'cb' - */ -struct GNUNET_FS_ProcessMetadataContext * -GNUNET_FS_trim_share_tree (struct GNUNET_FS_ShareTreeItem *toplevel, - GNUNET_SCHEDULER_Task cb, void *cls) -{ - struct GNUNET_FS_ProcessMetadataContext *ret; - - if (toplevel == NULL) - { - struct GNUNET_SCHEDULER_TaskContext tc; - tc.reason = GNUNET_SCHEDULER_REASON_PREREQ_DONE; - cb (cls, &tc); - return NULL; - } - - ret = GNUNET_malloc (sizeof (struct GNUNET_FS_ProcessMetadataContext)); - ret->toplevel = toplevel; - ret->stack = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); - ret->stack->ctx = ret; - ret->stack->item = toplevel; - if (ret->stack->ctx->toplevel->is_directory) - { - ret->stack->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024); - ret->stack->metacounter = GNUNET_CONTAINER_multihashmap_create (1024); - } - - ret->stack->dir_entry_count = 0; - ret->stack->end_directory = GNUNET_NO; - - /* dummy stack entry that tells us we're at the top of the stack */ - ret->stack->parent = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); - ret->stack->parent->ctx = ret; - - ret->cb = cb; - ret->cls = cls; - - GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, ret->stack, - GNUNET_SCHEDULER_REASON_PREREQ_DONE); - return ret; -} +/* end of fs_dirmetascan.c */ diff --git a/src/fs/fs_sharetree.c b/src/fs/fs_sharetree.c index 396415bc6a..6c9642b9bd 100644 --- a/src/fs/fs_sharetree.c +++ b/src/fs/fs_sharetree.c @@ -421,8 +421,10 @@ GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel) GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head, toplevel->parent->children_tail, toplevel); - GNUNET_CONTAINER_meta_data_destroy (toplevel->meta); - GNUNET_FS_uri_destroy (toplevel->ksk_uri); + if (NULL != toplevel->meta) + GNUNET_CONTAINER_meta_data_destroy (toplevel->meta); + if (NULL != toplevel->ksk_uri) + GNUNET_FS_uri_destroy (toplevel->ksk_uri); GNUNET_free_non_null (toplevel->filename); GNUNET_free_non_null (toplevel->short_filename); GNUNET_free (toplevel); diff --git a/src/fs/gnunet-publish.c b/src/fs/gnunet-publish.c index 98f39b821b..33cba499eb 100644 --- a/src/fs/gnunet-publish.c +++ b/src/fs/gnunet-publish.c @@ -68,12 +68,8 @@ static GNUNET_SCHEDULER_TaskIdentifier kill_task; static struct GNUNET_FS_DirScanner *ds; -static struct GNUNET_FS_ShareTreeItem * directory_scan_intermediary_result; - static struct GNUNET_FS_ShareTreeItem * directory_scan_result; -static struct GNUNET_FS_ProcessMetadataContext *pmc; - static struct GNUNET_FS_Namespace *namespace; /** @@ -378,21 +374,17 @@ get_file_information (struct GNUNET_FS_ShareTreeItem *item) item->ksk_uri, item->meta, !do_insert, &bo); } - GNUNET_CONTAINER_meta_data_destroy (item->meta); - GNUNET_FS_uri_destroy (item->ksk_uri); - GNUNET_free (item->short_filename); - GNUNET_free (item->filename); - GNUNET_free (item); return fi; } + static void -directory_trim_complete (void *cls, - const struct GNUNET_SCHEDULER_TaskContext *tc) +directory_trim_complete () { struct GNUNET_FS_FileInformation *fi; - directory_scan_result = directory_scan_intermediary_result; + fi = get_file_information (directory_scan_result); + GNUNET_FS_share_tree_free (directory_scan_result); directory_scan_result = NULL; if (fi == NULL) { @@ -425,7 +417,8 @@ directory_trim_complete (void *cls, } } -static int + +static void directory_scan_cb (void *cls, struct GNUNET_FS_DirScanner *ds, const char *filename, int is_directory, @@ -433,64 +426,47 @@ directory_scan_cb (void *cls, struct GNUNET_FS_DirScanner *ds, { switch (reason) { - case GNUNET_FS_DIRSCANNER_NEW_FILE: - if (filename != NULL) - { - if (is_directory) - FPRINTF (stdout, _("Scanning directory `%s'.\n"), filename); - else - FPRINTF (stdout, _("Scanning file `%s'.\n"), filename); - } + case GNUNET_FS_DIRSCANNER_FILE_START: + if (is_directory) + FPRINTF (stdout, _("Scanning directory `%s'.\n"), filename); + else + FPRINTF (stdout, _("Scanning file `%s'.\n"), filename); break; - case GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST: - if (filename != NULL) - { - FPRINTF (stdout, - _("Failed to scan `%s', because it does not exist.\n"), - filename); - } + case GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED: + if (is_directory) + FPRINTF (stdout, _("Done scanning directory `%s'.\n"), filename); break; - case GNUNET_FS_DIRSCANNER_ASKED_TO_STOP: - if (filename != NULL) - { - FPRINTF (stdout, - _("Scanner was about to scan `%s', but is now stopping.\n"), - filename); - } - else - FPRINTF (stdout, "%s", _("Scanner is stopping.\n")); + case GNUNET_FS_DIRSCANNER_ALL_COUNTED: + FPRINTF (stdout, "%s", _("Preprocessing complete.\n")); + break; + case GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED: + FPRINTF (stdout, _("Extracting meta data from file `%s' complete.\n"), filename); break; - case GNUNET_FS_DIRSCANNER_SHUTDOWN: - FPRINTF (stdout, "%s", _("Client is shutting down.\n")); + case GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST: + FPRINTF (stdout, + _("There was trouble processing file `%s', skipping it.\n"), + filename); break; case GNUNET_FS_DIRSCANNER_FINISHED: FPRINTF (stdout, "%s", _("Scanner has finished.\n")); + directory_scan_result = GNUNET_FS_directory_scan_get_result (ds); + ds = NULL; + GNUNET_FS_share_tree_trim (directory_scan_result); + directory_trim_complete (); break; - case GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR: - FPRINTF (stdout, "%s", - _("There was a failure communicating with the scanner.\n")); + case GNUNET_FS_DIRSCANNER_INTERNAL_ERROR: + FPRINTF (stdout, "%s", _("Internal error scanning directory.\n")); + GNUNET_FS_directory_scan_abort (ds); + ds = NULL; + if (namespace != NULL) + GNUNET_FS_namespace_delete (namespace, GNUNET_NO); + GNUNET_FS_stop (ctx); + ret = 1; break; default: - FPRINTF (stdout, _("Got unknown scanner update with filename `%s'.\n"), - filename); + GNUNET_assert (0); break; } - if ((filename == NULL && GNUNET_FS_DIRSCANNER_FINISHED) - || reason == GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR - || reason == GNUNET_FS_DIRSCANNER_SHUTDOWN) - { - /* Any of this causes us to try to clean up the scanner */ - directory_scan_intermediary_result = GNUNET_FS_directory_scan_cleanup (ds); - pmc = GNUNET_FS_trim_share_tree (directory_scan_intermediary_result, - &directory_trim_complete, NULL); - - ds = NULL; - /* FIXME: change the tree processor to be able to free untrimmed trees - * right here instead of waiting for trimming to complete, if we need to - * cancel everything. - */ - } - return 0; } |