aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgrothoff <grothoff@140774ce-b5e7-0310-ab8b-a85725594a96>2012-01-28 22:42:15 +0000
committergrothoff <grothoff@140774ce-b5e7-0310-ab8b-a85725594a96>2012-01-28 22:42:15 +0000
commit4804078bd7eb1d2720289e311b9be439c81858a5 (patch)
tree3d3b583ffba1f91d7a610a1815ded00ed0e6811b
parent1dd3e0b8929282c6d710ebb3611e7c5076efa670 (diff)
-cleaning up dirmetascan code, still failing for me
git-svn-id: https://gnunet.org/svn/gnunet@19502 140774ce-b5e7-0310-ab8b-a85725594a96
-rw-r--r--src/fs/Makefile.am1
-rw-r--r--src/fs/fs_dirmetascan.c1495
-rw-r--r--src/fs/fs_sharetree.c6
-rw-r--r--src/fs/gnunet-publish.c96
4 files changed, 492 insertions, 1106 deletions
diff --git a/src/fs/Makefile.am b/src/fs/Makefile.am
index c4dfec0cae..fe593b394f 100644
--- a/src/fs/Makefile.am
+++ b/src/fs/Makefile.am
@@ -36,6 +36,7 @@ libgnunetfs_la_SOURCES = \
fs_misc.c \
fs_namespace.c \
fs_search.c \
+ fs_sharetree.c \
fs_tree.c fs_tree.h \
fs_unindex.c \
fs_uri.c
diff --git a/src/fs/fs_dirmetascan.c b/src/fs/fs_dirmetascan.c
index 11313d7503..4c995a72a6 100644
--- a/src/fs/fs_dirmetascan.c
+++ b/src/fs/fs_dirmetascan.c
@@ -18,862 +18,471 @@
Boston, MA 02111-1307, USA.
*/
+/**
+ * @file fs/fs_dirmetascan.c
+ * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem'
+ * from an on-disk directory for publishing
+ * @author LRN
+ * @author Christian Grothoff
+ */
#include "platform.h"
#include "gnunet_fs_service.h"
#include "gnunet_scheduler_lib.h"
#include <pthread.h>
+
/**
- * Entry for each unique keyword to track how often
- * it occured. Contains the keyword and the counter.
+ * An opaque structure a pointer to which is returned to the
+ * caller to be used to control the scanner.
*/
-struct KeywordCounter
+struct GNUNET_FS_DirScanner
{
/**
- * Keyword that was found.
- */
- const char *value;
-
- /**
- * How many files have this keyword?
- */
- unsigned int count;
-
- /**
- * This is a doubly-linked list
- */
- struct KeywordCounter *prev;
-
- /**
- * This is a doubly-linked list
+ * A thread object for the scanner thread.
*/
- struct KeywordCounter *next;
-};
+#if WINDOWS
+ HANDLE thread;
+#else
+ pthread_t thread;
+#endif
-/**
- * Aggregate information we keep for meta data in each directory.
- */
-struct MetaCounter
-{
/**
- * The actual meta data.
+ * Expanded filename (as given by the scan initiator).
+ * The scanner thread stores a copy here, and frees it when it finishes.
*/
- const char *data;
+ char *filename_expanded;
/**
- * Number of bytes in 'data'.
+ * List of libextractor plugins to use for extracting.
+ * Initialized when the scan starts, removed when it finishes.
*/
- size_t data_size;
-
+ struct EXTRACTOR_PluginList *plugins;
+
/**
- * Name of the plugin that provided that piece of metadata
+ * A pipe transfer signals to the scanner.
*/
- const char *plugin_name;
+ struct GNUNET_DISK_PipeHandle *stop_pipe;
/**
- * Type of the data
+ * A pipe end to read signals from.
*/
- enum EXTRACTOR_MetaType type;
+ const struct GNUNET_DISK_FileHandle *stop_read;
/**
- * Format of the data
+ * A pipe end to read signals from.
*/
- enum EXTRACTOR_MetaFormat format;
-
+ const struct GNUNET_DISK_FileHandle *stop_write;
+
/**
- * MIME-type of the metadata itself
+ * The pipe that is used to read progress messages. Only closed
+ * after the scanner thread is finished.
*/
- const char *data_mime_type;
+ struct GNUNET_DISK_PipeHandle *progress_pipe;
/**
- * How many files have meta entries matching this value?
- * (type and format do not have to match).
+ * The end of the pipe that is used to read progress messages.
*/
- unsigned int count;
+ const struct GNUNET_DISK_FileHandle *progress_read;
/**
- * This is a doubly-linked list
+ * Handle of the pipe end into which the progress messages are written
+ * The initiator MUST keep it alive until the scanner thread is finished.
*/
- struct MetaCounter *prev;
-
+ const struct GNUNET_DISK_FileHandle *progress_write;
+
/**
- * This is a doubly-linked list
+ * The function that will be called every time there's a progress
+ * message.
*/
- struct MetaCounter *next;
-};
-
-struct AddDirContext;
-
-/**
- * A structure used to hold a pointer to the tree item that is being
- * processed.
- * Needed to avoid changing the context for every recursive call.
- */
-struct AddDirStack
-{
+ GNUNET_FS_DirScannerProgressCallback progress_callback;
+
/**
- * Context pointer
+ * A closure for progress_callback.
*/
- struct AddDirContext *adc;
-
+ void *progress_callback_cls;
+
/**
- * Parent directory
+ * A task for reading progress messages from the scanner.
*/
- struct GNUNET_FS_ShareTreeItem *parent;
-};
+ GNUNET_SCHEDULER_TaskIdentifier progress_read_task;
-/**
- * Execution context for 'add_dir'
- * Owned by the initiator thread.
- */
-struct AddDirContext
-{
/**
* After the scan is finished, it will contain a pointer to the
* top-level directory entry in the directory tree built by the
- * scanner.
+ * scanner. Must only be manipulated by the thread for the
+ * duration of the thread's runtime.
*/
struct GNUNET_FS_ShareTreeItem *toplevel;
/**
- * Expanded filename (as given by the scan initiator).
- * The scanner thread stores a copy here, and frees it when it finishes.
- */
- char *filename_expanded;
-
- /**
- * A pipe end to read signals from.
- * Owned by the initiator thread.
- */
- const struct GNUNET_DISK_FileHandle *stop_read;
-
- /**
* 1 if the scanner should stop, 0 otherwise. Set in response
* to communication errors or when the initiator wants the scanning
* process to stop.
*/
- char do_stop;
-
- /**
- * Handle of the pipe end into which the progress messages are written
- * The pipe is owned by the initiator thread, and there's no way to
- * close this end without having access to the pipe, so it won't
- * be closed by the scanner thread.
- * The initiator MUST keep it alive until the scanner thread is finished.
- */
- const struct GNUNET_DISK_FileHandle *progress_write;
+ int do_stop;
-
- /**
- * List of libextractor plugins to use for extracting.
- * Initialized when the scan starts, removed when it finishes.
- */
- struct EXTRACTOR_PluginList *plugins;
};
-/**
- * An opaque structure a pointer to which is returned to the
- * caller to be used to control the scanner.
- */
-struct GNUNET_FS_DirScanner
-{
- /**
- * A pipe end to read signals from.
- * Owned by the initiator thread.
- */
- const struct GNUNET_DISK_FileHandle *stop_write;
-
- /**
- * A pipe transfer signals to the scanner.
- * Owned by the initiator thread.
- */
- struct GNUNET_DISK_PipeHandle *stop_pipe;
-
- /**
- * A thread object for the scanner thread.
- * Owned by the initiator thread.
- */
-#if WINDOWS
- HANDLE thread;
-#else
- pthread_t thread;
-#endif
-
- /**
- * A task for reading progress messages from the scanner.
- */
- GNUNET_SCHEDULER_TaskIdentifier progress_read_task;
-
- /**
- * The end of the pipe that is used to read progress messages.
- */
- const struct GNUNET_DISK_FileHandle *progress_read;
-
- /**
- * The pipe that is used to read progress messages.
- * Owned (along with both of its ends) by the initiator thread.
- * Only closed after the scanner thread is finished.
- */
- struct GNUNET_DISK_PipeHandle *progress_pipe;
-
- /**
- * The function that will be called every time there's a progress
- * message.
- */
- GNUNET_FS_DirScannerProgressCallback progress_callback;
- /**
- * A closure for progress_callback.
- */
- void *cls;
-
- /**
- * A pointer to the context of the scanner.
- * Owned by the initiator thread.
- * Initiator thread shouldn't touch it until the scanner thread
- * is finished.
- */
- struct AddDirContext *adc;
-};
/**
- * A structure that forms a singly-linked list that serves as a stack
- * for metadata-processing function.
+ * Abort the scan.
+ *
+ * @param ds directory scanner structure
*/
-struct ProcessMetadataStackItem
+void
+GNUNET_FS_directory_scan_abort (struct GNUNET_FS_DirScanner *ds)
{
- /**
- * A pointer to metadata-processing context.
- * The same in every stack item.
- */
- struct GNUNET_FS_ProcessMetadataContext *ctx;
-
- /**
- * This is a singly-linked list. A pointer to its end is kept, and
- * this pointer is used to walk it backwards.
- */
- struct ProcessMetadataStackItem *parent;
+ static char c = 1;
- /**
- * Map from the hash over the keyword to an 'struct KeywordCounter *'
- * counter that says how often this keyword was
- * encountered in the current directory.
- */
- struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
+ /* signal shutdown to other thread */
+ (void) GNUNET_DISK_file_write (ds->stop_write, &c, 1);
+ GNUNET_DISK_pipe_close_end (ds->stop_pipe, GNUNET_DISK_PIPE_END_WRITE);
- /**
- * Map from the hash over the metadata to an 'struct MetaCounter *'
- * counter that says how often this metadata was
- * encountered in the current directory.
- */
- struct GNUNET_CONTAINER_MultiHashMap *metacounter;
-
- /**
- * Number of files in the current directory.
- */
- unsigned int dir_entry_count;
-
- /**
- * Keywords to exclude from using for KSK since they'll be associated
- * with the parent as well. NULL for nothing blocked.
- */
- struct GNUNET_FS_Uri *exclude_ksk;
+ /* stop reading from progress */
+ if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK)
+ {
+ GNUNET_SCHEDULER_cancel (ds->progress_read_task);
+ ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
+ }
+ GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ);
- /**
- * A share tree item that is being processed.
- */
- struct GNUNET_FS_ShareTreeItem *item;
+ /* wait for other thread to terminate */
+#if WINDOWS
+ WaitForSingleObject (ds->thread, INFINITE);
+ CloseHandle (ds->thread);
+#else
+ pthread_join (ds->thread, NULL);
+ pthread_detach (ds->thread);
+#endif
- /**
- * Set to GNUNET_YES to indicate that the directory pointer by 'item'
- * was processed, and we should move on to the next.
- * Otherwise the directory will be recursed into.
- */
- int end_directory;
+ /* free resources */
+ GNUNET_DISK_pipe_close (ds->stop_pipe);
+ GNUNET_DISK_pipe_close (ds->progress_pipe);
+ if (NULL != ds->toplevel)
+ GNUNET_FS_share_tree_free (ds->toplevel);
+ if (NULL != ds->plugins)
+ EXTRACTOR_plugin_remove_all (ds->plugins);
+ GNUNET_free (ds);
+}
-};
/**
- * The structure to keep the state of metadata processing
+ * Obtain the result of the scan after the scan has signalled
+ * completion. Must not be called prior to completion. The 'ds' is
+ * freed as part of this call.
+ *
+ * @param ds directory scanner structure
+ * @return the results of the scan (a directory tree)
*/
-struct GNUNET_FS_ProcessMetadataContext
+struct GNUNET_FS_ShareTreeItem *
+GNUNET_FS_directory_scan_get_result (struct GNUNET_FS_DirScanner *ds)
{
- /**
- * The top of the stack.
- */
- struct ProcessMetadataStackItem *stack;
-
- /**
- * Callback to invoke when processing is finished
- */
- GNUNET_SCHEDULER_Task cb;
-
- /**
- * Closure for 'cb'
- */
- void *cls;
-
- /**
- * Toplevel directory item of the tree to process.
- */
- struct GNUNET_FS_ShareTreeItem *toplevel;
-};
+ struct GNUNET_FS_ShareTreeItem *result;
-/**
- * Called every now and then by the scanner.
- * Checks the synchronization privitive.
- * Returns 1 if the scanner should stop, 0 otherwise.
- */
-static int
-should_stop (struct AddDirContext *adc)
-{
- errno = 0;
- char c;
- if (GNUNET_DISK_file_read_non_blocking (adc->stop_read, &c, 1) == 1
- || errno != EAGAIN)
- {
- adc->do_stop = 1;
- }
- return adc->do_stop;
+ /* check that we're actually done */
+ GNUNET_assert (GNUNET_SCHEDULER_NO_TASK == ds->progress_read_task);
+ /* preserve result */
+ result = ds->toplevel;
+ ds->toplevel = NULL;
+ GNUNET_FS_directory_scan_abort (ds);
+ return result;
}
+
/**
- * Write progress message.
- * Format is:
- * "reason", "filename length", "filename", "directory flag"
- * If filename is NULL, filename is not written, and its length
- * is written as 0, and nothing else is written. It signals the initiator
- * thread that the scanner is finished, and that it can now join its thread.
+ * Write 'size' bytes from 'buf' into 'out'.
*
- * Also checks if the initiator thread wants the scanner to stop,
- * Returns 1 to stop scanning (if the signal was received, or
- * if the pipe was broken somehow), 0 otherwise.
+ * @param in pipe to write to
+ * @param buf buffer with data to write
+ * @param size number of bytes to write
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
*/
static int
-write_progress (struct AddDirContext *adc, const char *filename,
- char is_directory, enum GNUNET_FS_DirScannerProgressUpdateReason reason)
+write_all (const struct GNUNET_DISK_FileHandle *out,
+ const void *buf,
+ size_t size)
{
- size_t filename_len;
+ const char *cbuf = buf;
+ size_t total;
ssize_t wr;
- size_t total_write;
- if ((adc->do_stop || should_stop (adc)) && reason != GNUNET_FS_DIRSCANNER_ASKED_TO_STOP
- && reason != GNUNET_FS_DIRSCANNER_FINISHED)
- return 1;
- total_write = 0;
- wr = 1;
- while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (reason))
- {
- wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
- &((char *)&reason)[total_write], sizeof (reason) - total_write);
- if (wr > 0)
- total_write += wr;
- }
- if (sizeof (reason) != total_write)
- return adc->do_stop = 1;
- if (filename)
- filename_len = strlen (filename) + 1;
- else
- filename_len = 0;
- total_write = 0;
- wr = 1;
- while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (size_t))
+
+ total = 0;
+ do
{
- wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
- &((char *)&filename_len)[total_write], sizeof (size_t) - total_write);
+ wr = GNUNET_DISK_file_write (out,
+ &cbuf[total],
+ size - total);
if (wr > 0)
- total_write += wr;
- }
- if (sizeof (size_t) != total_write)
- return adc->do_stop = 1;
- if (filename)
- {
- total_write = 0;
- wr = 1;
- while ((wr > 0 || errno == EAGAIN) && total_write < filename_len)
- {
- wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
- &((char *)filename)[total_write], filename_len - total_write);
- if (wr > 0)
- total_write += wr;
- }
- if (filename_len != total_write)
- return adc->do_stop = 1;
- total_write = 0;
- wr = 1;
- while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (char))
- {
- wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
- &((char *)&is_directory)[total_write], sizeof (char) - total_write);
- if (wr > 0)
- total_write += wr;
- }
- if (sizeof (char) != total_write)
- return adc->do_stop = 1;
- }
- return 0;
+ total += wr;
+ } while ( (wr > 0) && (total < size) );
+ return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
}
-/**
- * Add the given keyword to the
- * keyword statistics tracker.
- *
- * @param cls closure (user-defined)
- * @param keyword the keyword to count
- * @param is_mandatory ignored
- * @return always GNUNET_OK
- */
-static int
-add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
-{
- struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
- struct KeywordCounter *cnt, *first_cnt;
- GNUNET_HashCode hc;
- size_t klen;
-
- klen = strlen (keyword) + 1;
- GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
- /* Since the map might contain multiple values per keyword, we only
- * store one value, and attach all other to it, forming a linked list.
- * Somewhat easier than retrieving multiple items via callback.
- */
- first_cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
- for (cnt = first_cnt; cnt && strcmp (cnt->value, keyword) != 0; cnt = cnt->next);
- if (cnt == NULL)
- {
- cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
- cnt->value = (const char *) &cnt[1];
- memcpy (&cnt[1], keyword, klen);
- if (first_cnt != NULL)
- {
- if (first_cnt->prev != NULL)
- {
- first_cnt->prev->next = cnt;
- cnt->prev = first_cnt->prev;
- }
- first_cnt->prev = cnt;
- cnt->next = first_cnt;
- }
- else
- GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt,
- GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE);
- }
- cnt->count++;
- return GNUNET_OK;
-}
/**
- * Type of a function that libextractor calls for each
- * meta data item found.
+ * Write progress message.
*
- * @param cls the container multihashmap to update
- * @param plugin_name name of the plugin that produced this value;
- * special values can be used (i.e. '&lt;zlib&gt;' for zlib being
- * used in the main libextractor library and yielding
- * meta data).
- * @param type libextractor-type describing the meta data
- * @param format basic format information about data
- * @param data_mime_type mime-type of data (not of the original file);
- * can be NULL (if mime-type is not known)
- * @param data actual meta-data found
- * @param data_len number of bytes in data
- * @return GNUNET_OK to continue extracting / iterating
+ * @param ds
+ * @param filename name of the file to transmit, never NULL
+ * @param is_directory GNUNET_YES for directory, GNUNET_NO for file, GNUNET_SYSERR for neither
+ * @param reason reason for the progress call
+ * @return GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
*/
static int
-add_to_meta_counter (void *cls, const char *plugin_name,
- enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
- const char *data_mime_type, const char *data, size_t data_len)
-{
- struct GNUNET_CONTAINER_MultiHashMap *map = cls;
- GNUNET_HashCode key;
- struct MetaCounter *cnt, *first_cnt;
-
- GNUNET_CRYPTO_hash (data, data_len, &key);
- first_cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
- for (cnt = first_cnt; cnt
- && cnt->data_size != data_len
- && memcmp (cnt->data, data, cnt->data_size) != 0; cnt = cnt->next);
- if (cnt == NULL)
- {
- cnt = GNUNET_malloc (sizeof (struct MetaCounter));
- cnt->data = data;
- cnt->data_size = data_len;
- cnt->plugin_name = plugin_name;
- cnt->type = type;
- cnt->format = format;
- cnt->data_mime_type = data_mime_type;
-
- if (first_cnt != NULL)
- {
- if (first_cnt->prev != NULL)
- {
- first_cnt->prev->next = cnt;
- cnt->prev = first_cnt->prev;
- }
- first_cnt->prev = cnt;
- cnt->next = first_cnt;
- }
- else
- GNUNET_CONTAINER_multihashmap_put (map, &key, cnt,
- GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE);
- }
- cnt->count++;
- return 0;
-}
-
-/**
- * Allocates a struct GNUNET_FS_ShareTreeItem and adds it to its parent.
- */
-static struct GNUNET_FS_ShareTreeItem *
-make_item (struct GNUNET_FS_ShareTreeItem *parent)
+write_progress (struct GNUNET_FS_DirScanner *ds,
+ const char *filename,
+ int is_directory,
+ enum GNUNET_FS_DirScannerProgressUpdateReason reason)
{
- struct GNUNET_FS_ShareTreeItem *item;
- item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem));
-
- item->parent = parent;
- if (parent)
- GNUNET_CONTAINER_DLL_insert (parent->children_head, parent->children_tail,
- item);
- return item;
+ size_t slen;
+
+ slen = strlen (filename) + 1;
+ if ( (GNUNET_OK !=
+ write_all (ds->progress_write,
+ &reason,
+ sizeof (reason))) ||
+ (GNUNET_OK !=
+ write_all (ds->progress_write,
+ &slen,
+ sizeof (slen))) ||
+ (GNUNET_OK !=
+ write_all (ds->progress_write,
+ filename,
+ slen)) ||
+ (GNUNET_OK !=
+ write_all (ds->progress_write,
+ &is_directory,
+ sizeof (is_directory))) )
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
}
-/**
- * Extract metadata from a file and add it to the share tree
- *
- * @param ads context to modify
- * @param filename name of the file to process
- */
-static void
-extract_file (struct AddDirStack *ads, const char *filename)
-{
- struct GNUNET_FS_ShareTreeItem *item;
- const char *short_fn;
-
- item = make_item (ads->parent);
-
- GNUNET_DISK_file_size (filename, &item->file_size, GNUNET_YES);
- item->is_directory = GNUNET_NO;
-
- item->meta = GNUNET_CONTAINER_meta_data_create ();
- GNUNET_FS_meta_data_extract_from_file (item->meta, filename,
- ads->adc->plugins);
- GNUNET_CONTAINER_meta_data_delete (item->meta, EXTRACTOR_METATYPE_FILENAME,
- NULL, 0);
- short_fn = GNUNET_STRINGS_get_short_name (filename);
-
- item->filename = GNUNET_strdup (filename);
- item->short_filename = GNUNET_strdup (short_fn);
-
- GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>",
- EXTRACTOR_METATYPE_FILENAME,
- EXTRACTOR_METAFORMAT_UTF8, "text/plain",
- short_fn, strlen (short_fn) + 1);
- if (ads->parent == NULL)
- {
- /* we're finished with the scan, make sure caller gets the top-level
- * directory pointer
- */
- ads->adc->toplevel = item;
- }
-}
/**
- * Remove the keyword from the ksk URI.
- *
- * @param cls the ksk uri
- * @param keyword the word to remove
- * @param is_mandatory ignored
- * @return always GNUNET_OK
+ * Called every now and then by the scanner thread to check
+ * if we're being aborted.
+ *
+ * @param ds scanner context
+ * @return GNUNET_OK to continue, GNUNET_SYSERR to stop
*/
static int
-remove_keyword (void *cls, const char *keyword, int is_mandatory)
+test_thread_stop (struct GNUNET_FS_DirScanner *ds)
{
- struct GNUNET_FS_Uri *ksk = cls;
+ char c;
- GNUNET_FS_uri_ksk_remove_keyword (ksk, keyword);
+ if ( (GNUNET_DISK_file_read_non_blocking (ds->stop_read, &c, 1) == 1) ||
+ (EAGAIN != errno) )
+ return GNUNET_SYSERR;
return GNUNET_OK;
}
+
/**
- * Remove keywords from current directory's children, if they are
- * in the exluded keywords list of that directory.
+ * Function called to (recursively) add all of the files in the
+ * directory to the tree. Called by the directory scanner to initiate
+ * the scan. Does NOT yet add any metadata.
*
- * @param cls the ksk uri
- * @param keyword the word to remove
- * @param is_mandatory ignored
- * @return always GNUNET_OK
+ * @param ds directory scanner context to use
+ * @param filename file or directory to scan
+ * @param dst where to store the resulting share tree item
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
*/
static int
-remove_keywords (struct ProcessMetadataStackItem *stack, struct GNUNET_FS_ShareTreeItem *dir)
-{
- struct GNUNET_FS_ShareTreeItem *item;
+preprocess_file (struct GNUNET_FS_DirScanner *ds,
+ const char *filename,
+ struct GNUNET_FS_ShareTreeItem **dst);
- for (item = dir->children_head; item; item = item->next)
- {
- if (stack->exclude_ksk != NULL)
- GNUNET_FS_uri_ksk_get_keywords (stack->exclude_ksk, &remove_keyword, item->ksk_uri);
- }
- return GNUNET_OK;
-}
/**
- * Context passed to 'migrate_and_drop'.
+ * Closure for the 'scan_callback'
*/
-struct KeywordProcessContext
+struct RecursionContext
{
/**
- * All the keywords we migrated to the parent.
+ * Global scanner context.
*/
- struct GNUNET_FS_Uri *ksk;
-
- /**
- * How often does a keyword have to occur to be
- * migrated to the parent?
- */
- unsigned int threshold;
-};
+ struct GNUNET_FS_DirScanner *ds;
-/**
- * Context passed to 'migrate_and_drop'.
- */
-struct MetaProcessContext
-{
/**
- * All the metadata we copy to the parent.
+ * Parent to add the files to.
*/
- struct GNUNET_CONTAINER_MetaData *meta;
+ struct GNUNET_FS_ShareTreeItem *parent;
/**
- * How often does a metadata have to occur to be
- * migrated to the parent?
+ * Flag to set to GNUNET_YES on serious errors.
*/
- unsigned int threshold;
+ int stop;
};
/**
- * Move "frequent" keywords over to the
- * target ksk uri, free the counters.
+ * Function called by the directory iterator to (recursively) add all
+ * of the files in the directory to the tree. Called by the directory
+ * scanner to initiate the scan. Does NOT yet add any metadata.
*
+ * @param cls the 'struct RecursionContext'
+ * @param filename file or directory to scan
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
*/
static int
-migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value)
+scan_callback (void *cls,
+ const char *filename)
{
- struct KeywordProcessContext *kpc = cls;
- struct KeywordCounter *counter = value;
+ struct RecursionContext *rc = cls;
+ struct GNUNET_FS_ShareTreeItem *chld;
- if (counter->count >= kpc->threshold && counter->count > 1)
+ if (GNUNET_OK !=
+ preprocess_file (rc->ds,
+ filename,
+ &chld))
{
- GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO);
+ rc->stop = GNUNET_YES;
+ return GNUNET_SYSERR;
}
- GNUNET_free (counter);
- return GNUNET_YES;
+ chld->parent = rc->parent;
+ GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
+ rc->parent->children_tail,
+ chld);
+ return GNUNET_OK;
}
-/**
- * Copy "frequent" metadata items over to the
- * target metadata container, free the counters.
- *
- */
-static int
-migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value)
-{
- struct MetaProcessContext *mpc = cls;
- struct MetaCounter *counter = value;
- if (counter->count >= mpc->threshold && counter->count > 1)
- {
- GNUNET_CONTAINER_meta_data_insert (mpc->meta,
- counter->plugin_name,
- counter->type,
- counter->format,
- counter->data_mime_type, counter->data,
- counter->data_size);
- }
- GNUNET_free (counter);
- return GNUNET_YES;
-}
/**
- * Go over the collected keywords from all entries in the
- * directory and push common keywords up one level (by
- * adding it to the returned struct). Do the same for metadata.
- * Destroys keywordcounter and metacoutner for current directory.
+ * Function called to (recursively) add all of the files in the
+ * directory to the tree. Called by the directory scanner to initiate
+ * the scan. Does NOT yet add any metadata.
*
- * @param adc collection of child meta data
- * @param exclude_ksk pointer to where moveable keywords will be stored
- * @param copy_meta pointer to where copyable metadata will be stored
- */
-static void
-process_keywords_and_metadata (struct ProcessMetadataStackItem *stack,
- struct GNUNET_FS_Uri **exclude_ksk,
- struct GNUNET_CONTAINER_MetaData **copy_meta)
-{
- struct KeywordProcessContext kpc;
- struct MetaProcessContext mpc;
- struct GNUNET_CONTAINER_MetaData *tmp;
-
- /* Surprisingly, it's impossible to create a ksk with 0 keywords directly.
- * But we can create one from an empty metadata set
- */
- tmp = GNUNET_CONTAINER_meta_data_create ();
- kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp);
- GNUNET_CONTAINER_meta_data_destroy (tmp);
- mpc.meta = GNUNET_CONTAINER_meta_data_create ();
-
- kpc.threshold = mpc.threshold = (stack->dir_entry_count + 1) / 2; /* 50% */
-
- GNUNET_CONTAINER_multihashmap_iterate (stack->keywordcounter,
- &migrate_and_drop, &kpc);
- GNUNET_CONTAINER_multihashmap_iterate (stack->metacounter,
- &migrate_and_drop_metadata, &mpc);
-
- GNUNET_CONTAINER_multihashmap_destroy (stack->keywordcounter);
- GNUNET_CONTAINER_multihashmap_destroy (stack->metacounter);
- *exclude_ksk = kpc.ksk;
- *copy_meta = mpc.meta;
-}
-
-/**
- * Function called by the directory iterator to
- * (recursively) add all of the files in the
- * directory to the tree.
- * Called by the directory scanner to initiate the
- * scan.
- * TODO: find a way to make it non-recursive.
- *
- * @param cls the 'struct AddDirStack *' we're in
+ * @param ds directory scanner context to use
* @param filename file or directory to scan
+ * @param dst where to store the resulting share tree item
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
*/
static int
-scan_directory (void *cls, const char *filename)
+preprocess_file (struct GNUNET_FS_DirScanner *ds,
+ const char *filename,
+ struct GNUNET_FS_ShareTreeItem **dst)
{
- struct AddDirStack *ads = cls, recurse_ads;
- struct AddDirContext *adc = ads->adc;
- struct stat sbuf;
struct GNUNET_FS_ShareTreeItem *item;
- const char *short_fn;
- int do_stop = 0;
-
- /* Wrap up fast */
- if (adc->do_stop)
- return GNUNET_SYSERR;
+ struct stat sbuf;
- /* If the file doesn't exist (or is not statable for any other reason,
- * skip it, and report it.
- */
if (0 != STAT (filename, &sbuf))
{
- (void) write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
- GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST);
+ /* If the file doesn't exist (or is not stat-able for any other reason)
+ skip it (but report it), but do continue. */
+ if (GNUNET_OK !=
+ write_progress (ds, filename, GNUNET_SYSERR,
+ GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST))
+ return GNUNET_SYSERR;
return GNUNET_OK;
}
/* Report the progress */
- do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
- GNUNET_FS_DIRSCANNER_NEW_FILE);
- if (do_stop)
- {
- /* We were asked to stop, acknowledge that and return */
- (void) write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
- GNUNET_FS_DIRSCANNER_ASKED_TO_STOP);
+ if (GNUNET_OK !=
+ write_progress (ds,
+ filename,
+ S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO,
+ GNUNET_FS_DIRSCANNER_FILE_START))
return GNUNET_SYSERR;
- }
-
- if (!S_ISDIR (sbuf.st_mode))
- extract_file (ads, filename);
- else
- {
- item = make_item (ads->parent);
- item->meta = GNUNET_CONTAINER_meta_data_create ();
-
- item->is_directory = GNUNET_YES;
-
- recurse_ads.adc = adc;
- recurse_ads.parent = item;
-
- /* recurse into directory */
- GNUNET_DISK_directory_scan (filename, &scan_directory, &recurse_ads);
-
- short_fn = GNUNET_STRINGS_get_short_name (filename);
-
- item->filename = GNUNET_strdup (filename);
- item->short_filename = GNUNET_strdup (short_fn);
-
- if (ads->parent == NULL)
+ item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem));
+ item->meta = GNUNET_CONTAINER_meta_data_create ();
+ item->filename = GNUNET_strdup (filename);
+ item->short_filename = GNUNET_strdup (GNUNET_STRINGS_get_short_name (filename));
+ item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
+ item->file_size = (uint64_t) sbuf.st_size;
+ if (item->is_directory)
+ {
+ struct RecursionContext rc;
+
+ rc.parent = item;
+ rc.ds = ds;
+ rc.stop = GNUNET_NO;
+ GNUNET_DISK_directory_scan (filename,
+ &scan_callback,
+ &rc);
+ if ( (rc.stop == GNUNET_YES) ||
+ (GNUNET_OK !=
+ test_thread_stop (ds)) )
{
- /* we're finished with the scan, make sure caller gets the top-level
- * directory pointer
- */
- adc->toplevel = item;
+ GNUNET_FS_share_tree_free (item);
+ return GNUNET_SYSERR;
}
}
- return GNUNET_OK;
-}
-
-/**
- * Signals the scanner to finish the scan as fast as possible.
- * Does not block.
- * Can close the pipe if asked to, but that is only used by the
- * internal call to this function during cleanup. The client
- * must understand the consequences of closing the pipe too early.
- *
- * @param ds directory scanner structure
- * @param close_pipe GNUNET_YES to close
- */
-void
-GNUNET_FS_directory_scan_finish (struct GNUNET_FS_DirScanner *ds,
- int close_pipe)
-{
- char c = 1;
- GNUNET_DISK_file_write (ds->stop_write, &c, 1);
-
- if (close_pipe)
+ /* Report the progress */
+ if (GNUNET_OK !=
+ write_progress (ds,
+ filename,
+ S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO,
+ GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED))
{
- if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK)
- {
- GNUNET_SCHEDULER_cancel (ds->progress_read_task);
- ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
- }
- GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ);
- ds->progress_read = NULL;
+ GNUNET_FS_share_tree_free (item);
+ return GNUNET_SYSERR;
}
+ *dst = item;
+ return GNUNET_OK;
}
+
/**
- * Signals the scanner thread to finish (in case it isn't finishing
- * already) and joins the scanner thread. Closes the pipes, frees the
- * scanner contexts (both of them), returns the results of the scan.
- * Results are valid (and have to be freed) even if the scanner had
- * an error or was rushed to finish prematurely.
- * Blocks until the scanner is finished.
+ * Extract metadata from files.
*
- * @param ds directory scanner structure
- * @return the results of the scan (a directory tree)
+ * @param ds directory scanner context
+ * @param item entry we are processing
+ * @return GNUNET_OK on success, GNUNET_SYSERR on fatal errors
*/
-struct GNUNET_FS_ShareTreeItem *
-GNUNET_FS_directory_scan_cleanup (struct GNUNET_FS_DirScanner *ds)
-{
- struct GNUNET_FS_ShareTreeItem *result;
-
- GNUNET_FS_directory_scan_finish (ds, GNUNET_YES);
-#if WINDOWS
- WaitForSingleObject (ds->thread, INFINITE);
- CloseHandle (ds->thread);
-#else
- pthread_join (ds->thread, NULL);
- pthread_detach (ds->thread);
-#endif
+static int
+extract_files (struct GNUNET_FS_DirScanner *ds,
+ struct GNUNET_FS_ShareTreeItem *item)
+{
+ if (item->is_directory)
+ {
+ /* for directories, we simply only descent, no extraction, no
+ progress reporting */
+ struct GNUNET_FS_ShareTreeItem *pos;
+
+ for (pos = item->children_head; NULL != pos; pos = pos->next)
+ if (GNUNET_OK !=
+ extract_files (ds, pos))
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
+ }
+
+ /* this is the expensive operation, *afterwards* we'll check for aborts */
+ GNUNET_FS_meta_data_extract_from_file (item->meta,
+ item->filename,
+ ds->plugins);
+
+ /* having full filenames is too dangerous; always make sure we clean them up */
+ GNUNET_CONTAINER_meta_data_delete (item->meta,
+ EXTRACTOR_METATYPE_FILENAME,
+ NULL, 0);
+ GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>",
+ EXTRACTOR_METATYPE_FILENAME,
+ EXTRACTOR_METAFORMAT_UTF8, "text/plain",
+ item->short_filename,
+ strlen (item->short_filename) + 1);
+ /* check for abort */
+ if (GNUNET_OK !=
+ test_thread_stop (ds))
+ return GNUNET_SYSERR;
- GNUNET_DISK_pipe_close (ds->stop_pipe);
- GNUNET_DISK_pipe_close (ds->progress_pipe);
- result = ds->adc->toplevel;
- GNUNET_free (ds->adc);
- GNUNET_free (ds);
- return result;
+ /* Report the progress */
+ if (GNUNET_OK !=
+ write_progress (ds,
+ item->filename,
+ GNUNET_NO,
+ GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED))
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
}
+
/**
* The function from which the scanner thread starts
+ *
+ * @param cls the 'struct GNUNET_FS_DirScanner'
+ * @return 0/NULL
*/
#if WINDOWS
DWORD
@@ -882,19 +491,58 @@ static void *
#endif
run_directory_scan_thread (void *cls)
{
- struct AddDirContext *adc = cls;
- struct AddDirStack ads;
- ads.adc = adc;
- ads.parent = NULL;
- scan_directory (&ads, adc->filename_expanded);
- GNUNET_free (adc->filename_expanded);
- if (adc->plugins != NULL)
- EXTRACTOR_plugin_remove_all (adc->plugins);
- /* Tell the initiator that we're finished, it can now join the thread */
- write_progress (adc, NULL, 0, GNUNET_FS_DIRSCANNER_FINISHED);
+ struct GNUNET_FS_DirScanner *ds = cls;
+
+ if (GNUNET_OK != preprocess_file (ds,
+ ds->filename_expanded,
+ &ds->toplevel))
+ {
+ (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
+ return 0;
+ }
+ if (GNUNET_OK !=
+ write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_ALL_COUNTED))
+ return 0;
+ if (GNUNET_OK !=
+ extract_files (ds, ds->toplevel))
+ {
+ (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
+ return 0;
+ }
+ (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_FINISHED);
return 0;
}
+
+/**
+ * Read 'size' bytes from 'in' into 'buf'.
+ *
+ * @param in pipe to read from
+ * @param buf buffer to read to
+ * @param size number of bytes to read
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
+ */
+static int
+read_all (const struct GNUNET_DISK_FileHandle *in,
+ char *buf,
+ size_t size)
+{
+ size_t total;
+ ssize_t rd;
+
+ total = 0;
+ do
+ {
+ rd = GNUNET_DISK_file_read (in,
+ &buf[total],
+ size - total);
+ if (rd > 0)
+ total += rd;
+ } while ( (rd > 0) && (total < size) );
+ return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
+}
+
+
/**
* Called every time there is data to read from the scanner.
* Calls the scanner progress handler.
@@ -905,124 +553,69 @@ run_directory_scan_thread (void *cls)
static void
read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
{
- struct GNUNET_FS_DirScanner *ds;
- int end_it = 0;
+ struct GNUNET_FS_DirScanner *ds = cls;
enum GNUNET_FS_DirScannerProgressUpdateReason reason;
- ssize_t rd;
- ssize_t total_read;
-
size_t filename_len;
- char is_directory;
+ int is_directory;
char *filename;
- ds = cls;
-
ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
-
- if (!(tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
+ if (! (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
{
- ds->progress_callback (ds->cls, ds, NULL, 0, GNUNET_FS_DIRSCANNER_SHUTDOWN);
+ ds->progress_read_task
+ = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
+ ds->progress_read, &read_progress_task,
+ ds);
return;
}
/* Read one message. If message is malformed or can't be read, end the scanner */
- total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &reason, sizeof (reason));
- while (rd > 0 && total_read < sizeof (reason))
- {
- rd = GNUNET_DISK_file_read (ds->progress_read,
- &((char *) &reason)[total_read],
- sizeof (reason) - total_read);
- if (rd > 0)
- total_read += rd;
- }
- if (total_read != sizeof (reason)
- || reason <= GNUNET_FS_DIRSCANNER_FIRST
- || reason >= GNUNET_FS_DIRSCANNER_LAST)
- {
- end_it = 1;
- reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR;
- }
-
- if (!end_it)
- {
- total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &filename_len,
- sizeof (size_t));
- while (rd > 0 && total_read < sizeof (size_t))
- {
- rd = GNUNET_DISK_file_read (ds->progress_read,
- &((char *) &filename_len)[total_read],
- sizeof (size_t) - total_read);
- if (rd > 0)
- total_read += rd;
- }
- if (rd != sizeof (size_t))
- {
- end_it = 1;
- reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR;
- }
- }
- if (!end_it)
- {
- if (filename_len == 0)
- end_it = 1;
- else if (filename_len > PATH_MAX)
- {
- end_it = 1;
- reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR;
- }
- }
- if (!end_it)
- {
- filename = GNUNET_malloc (filename_len);
- total_read = rd = GNUNET_DISK_file_read (ds->progress_read, filename,
- filename_len);
- while (rd > 0 && total_read < filename_len)
- {
- rd = GNUNET_DISK_file_read (ds->progress_read, &filename[total_read],
- filename_len - total_read);
- if (rd > 0)
- total_read += rd;
- }
- if (rd != filename_len)
- {
- GNUNET_free (filename);
- reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR;
- end_it = 1;
- }
- }
- if (!end_it && filename_len > 0)
- {
- total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &is_directory,
- sizeof (char));
- while (rd > 0 && total_read < sizeof (char))
- {
- rd = GNUNET_DISK_file_read (ds->progress_read, &(&is_directory)[total_read],
- sizeof (char) - total_read);
- if (rd > 0)
- total_read += rd;
- }
- if (rd != sizeof (char))
- {
- GNUNET_free (filename);
- reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR;
- end_it = 1;
- }
- }
- if (!end_it)
- {
- end_it = ds->progress_callback (ds->cls, ds, (const char *) filename, is_directory, reason);
- GNUNET_free (filename);
- if (!end_it)
- {
- ds->progress_read_task = GNUNET_SCHEDULER_add_read_file (
- GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task,
- cls);
- }
- }
- else
- {
- ds->progress_callback (ds->cls, ds, NULL, 0, reason);
+ filename = NULL;
+ if ( (GNUNET_OK !=
+ read_all (ds->progress_read,
+ (char*) &reason,
+ sizeof (reason))) ||
+ (reason < GNUNET_FS_DIRSCANNER_FILE_START) ||
+ (reason > GNUNET_FS_DIRSCANNER_INTERNAL_ERROR) ||
+ (GNUNET_OK !=
+ read_all (ds->progress_read,
+ (char*) &filename_len,
+ sizeof (size_t))) ||
+ (filename_len == 0) ||
+ (filename_len > PATH_MAX) ||
+ (GNUNET_OK !=
+ read_all (ds->progress_read,
+ filename = GNUNET_malloc (filename_len),
+ filename_len)) ||
+ (filename[filename_len-1] != '\0') ||
+ (GNUNET_OK !=
+ read_all (ds->progress_read,
+ (char*) &is_directory,
+ sizeof (is_directory))) )
+ {
+ /* IPC error, complain, signal client and stop reading
+ from the pipe */
+ GNUNET_break (0);
+ ds->progress_callback (ds->progress_callback_cls, ds,
+ NULL, GNUNET_SYSERR,
+ GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
+ GNUNET_free_non_null (filename);
+ return;
}
+ /* schedule task to keep reading (done here in case client calls
+ abort or something similar) */
+ ds->progress_read_task
+ = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
+ ds->progress_read,
+ &read_progress_task, ds);
+
+ /* read successfully, notify client about progress */
+ ds->progress_callback (ds->progress_callback_cls,
+ ds,
+ filename,
+ is_directory,
+ reason);
+ GNUNET_free (filename);
}
@@ -1033,275 +626,89 @@ read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
* @param GNUNET_YES to not to run libextractor on files (only build a tree)
* @param ex if not NULL, must be a list of extra plugins for extractor
* @param cb the callback to call when there are scanning progress messages
- * @param cls closure for 'cb'
+ * @param cb_cls closure for 'cb'
* @return directory scanner object to be used for controlling the scanner
*/
struct GNUNET_FS_DirScanner *
GNUNET_FS_directory_scan_start (const char *filename,
- int disable_extractor, const char *ex,
- GNUNET_FS_DirScannerProgressCallback cb, void *cls)
+ int disable_extractor, const char *ex,
+ GNUNET_FS_DirScannerProgressCallback cb,
+ void *cb_cls)
{
struct stat sbuf;
- struct AddDirContext *adc;
char *filename_expanded;
struct GNUNET_FS_DirScanner *ds;
struct GNUNET_DISK_PipeHandle *progress_pipe;
+ struct GNUNET_DISK_PipeHandle *stop_pipe;
int ok;
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Starting to scan directory `%s'\n",
- filename);
if (0 != STAT (filename, &sbuf))
return NULL;
-
- /* scan_directory() is guaranteed to be given expanded filenames,
- * so expand we will!
- */
filename_expanded = GNUNET_STRINGS_filename_expand (filename);
- if (filename_expanded == NULL)
+ if (NULL == filename_expanded)
return NULL;
-
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Starting to scan directory `%s'\n",
+ filename_expanded);
progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
- if (progress_pipe == NULL)
+ if (NULL == progress_pipe)
{
GNUNET_free (filename_expanded);
return NULL;
}
-
- adc = GNUNET_malloc (sizeof (struct AddDirContext));
-
- ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner));
-
- ds->adc = adc;
-
- ds->stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
- if (ds->stop_pipe == NULL)
+ stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
+ if (NULL == stop_pipe)
{
- GNUNET_free (adc);
- GNUNET_free (ds);
- GNUNET_free (filename_expanded);
GNUNET_DISK_pipe_close (progress_pipe);
+ GNUNET_free (filename_expanded);
return NULL;
}
+
+ ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner));
+ ds->progress_callback = cb;
+ ds->progress_callback_cls = cb_cls;
+ ds->stop_pipe = stop_pipe;
ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe,
- GNUNET_DISK_PIPE_END_WRITE);
- adc->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe,
- GNUNET_DISK_PIPE_END_READ);
-
- adc->plugins = NULL;
- if (!disable_extractor)
+ GNUNET_DISK_PIPE_END_WRITE);
+ ds->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe,
+ GNUNET_DISK_PIPE_END_READ);
+ ds->progress_pipe = progress_pipe;
+ ds->progress_write = GNUNET_DISK_pipe_handle (progress_pipe,
+ GNUNET_DISK_PIPE_END_WRITE);
+ ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe,
+ GNUNET_DISK_PIPE_END_READ);
+ ds->filename_expanded = filename_expanded;
+ if (! disable_extractor)
{
- adc->plugins = EXTRACTOR_plugin_add_defaults (
- EXTRACTOR_OPTION_DEFAULT_POLICY);
- if (ex && strlen (ex) > 0)
- adc->plugins = EXTRACTOR_plugin_add_config (adc->plugins, ex,
- EXTRACTOR_OPTION_DEFAULT_POLICY);
+ ds->plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
+ if ( (NULL != ex) && strlen (ex) > 0)
+ ds->plugins = EXTRACTOR_plugin_add_config (ds->plugins, ex,
+ EXTRACTOR_OPTION_DEFAULT_POLICY);
}
-
- adc->filename_expanded = filename_expanded;
- adc->progress_write = GNUNET_DISK_pipe_handle (progress_pipe,
- GNUNET_DISK_PIPE_END_WRITE);
-
-
- ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe,
- GNUNET_DISK_PIPE_END_READ);
-
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Creating thread to scan directory `%s'\n",
- filename);
-
#if WINDOWS
ds->thread = CreateThread (NULL, 0,
- (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, (LPVOID) adc,
- 0, NULL);
- ok = ds->thread != NULL;
+ (LPTHREAD_START_ROUTINE) &run_directory_scan_thread,
+ (LPVOID) ds, 0, NULL);
+ ok = (ds->thread != NULL);
#else
- ok = !pthread_create (&ds->thread, NULL, &run_directory_scan_thread,
- (void *) adc);
+ ok = (0 == pthread_create (&ds->thread, NULL,
+ &run_directory_scan_thread, ds));
#endif
if (!ok)
{
- GNUNET_free (adc);
+ EXTRACTOR_plugin_remove_all (ds->plugins);
GNUNET_free (filename_expanded);
+ GNUNET_DISK_pipe_close (stop_pipe);
GNUNET_DISK_pipe_close (progress_pipe);
GNUNET_free (ds);
return NULL;
}
-
- ds->progress_callback = cb;
- ds->cls = cls;
- ds->adc = adc;
- ds->progress_pipe = progress_pipe;
-
- ds->progress_read_task = GNUNET_SCHEDULER_add_read_file (
- GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task,
- ds);
-
+ ds->progress_read_task
+ = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
+ ds->progress_read,
+ &read_progress_task, ds);
return ds;
}
-/**
- * Task that post-processes the share item tree.
- * This processing has to be done in the main thread, because
- * it requires access to libgcrypt's hashing functions, and
- * libgcrypt is not thread-safe without some special magic.
- *
- * @param cls top of the stack
- * @param tc task context
- */
-static void
-trim_share_tree_task (void *cls,
- const struct GNUNET_SCHEDULER_TaskContext *tc)
-{
- struct ProcessMetadataStackItem *stack = cls;
- struct ProcessMetadataStackItem *next = stack;
- /* FIXME: figure out what to do when tc says we're shutting down */
-
- /* item == NULL means that we've just finished going over the children of
- * current directory.
- */
- if (stack->item == NULL)
- {
- if (stack->parent->item != NULL)
- {
- /* end of a directory */
- struct GNUNET_FS_Uri *ksk;
-
- /* use keyword and metadata counters to create lists of keywords to move
- * and metadata to copy.
- */
- process_keywords_and_metadata (stack, &stack->parent->exclude_ksk, &stack->parent->item->meta);
-
- /* create keywords from metadata (copies all text-metadata as keywords,
- * AND parses the directory name we've just added, producing even more
- * keywords.
- * then merge these keywords with the ones moved from children.
- */
- ksk = GNUNET_FS_uri_ksk_create_from_meta_data (stack->parent->item->meta);
- stack->parent->item->ksk_uri = GNUNET_FS_uri_ksk_merge (ksk, stack->parent->exclude_ksk);
- GNUNET_FS_uri_destroy (ksk);
-
- /* remove moved keywords from children (complete the move) */
- remove_keywords (stack->parent, stack->parent->item);
- GNUNET_FS_uri_destroy (stack->parent->exclude_ksk);
-
- /* go up the stack */
- next = stack->parent;
- GNUNET_free (stack);
- next->end_directory = GNUNET_YES;
- }
- else
- {
- /* we've just finished processing the toplevel directory */
- struct GNUNET_FS_ProcessMetadataContext *ctx = stack->ctx;
- next = NULL;
- GNUNET_SCHEDULER_add_continuation (ctx->cb, ctx->cls,
- GNUNET_SCHEDULER_REASON_PREREQ_DONE);
- GNUNET_free (stack->parent);
- GNUNET_free (stack);
- GNUNET_free (ctx);
- }
- }
- else if (stack->item->is_directory
- && !stack->end_directory
- && stack->item->children_head != NULL)
- {
- /* recurse into subdirectory */
- next = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
- next->ctx = stack->ctx;
- next->item = stack->item->children_head;
- next->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
- next->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
- next->dir_entry_count = 0;
- next->parent = stack;
- }
- else
- {
- /* process a child entry (a file or a directory) and move to the next one*/
- if (stack->item->is_directory)
- stack->end_directory = GNUNET_NO;
- if (stack->ctx->toplevel->is_directory)
- {
- stack->dir_entry_count++;
- GNUNET_CONTAINER_meta_data_iterate (stack->item->meta, &add_to_meta_counter, stack->metacounter);
-
- if (stack->item->is_directory)
- {
- char *user = getenv ("USER");
- if ((user == NULL) || (0 != strncasecmp (user, stack->item->short_filename, strlen(user))))
- {
- /* only use filename if it doesn't match $USER */
- GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>",
- EXTRACTOR_METATYPE_FILENAME,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain", stack->item->short_filename,
- strlen (stack->item->short_filename) + 1);
- GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>",
- EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain", stack->item->short_filename,
- strlen (stack->item->short_filename) + 1);
- }
- }
- }
- stack->item->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (stack->item->meta);
- if (stack->ctx->toplevel->is_directory)
- {
- GNUNET_FS_uri_ksk_get_keywords (stack->item->ksk_uri, &add_to_keyword_counter, stack->keywordcounter);
- }
- stack->item = stack->item->next;
- }
- /* Call this task again later, if there are more entries to process */
- if (next)
- GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, next,
- GNUNET_SCHEDULER_REASON_PREREQ_DONE);
-}
-
-/**
- * Process a share item tree, moving frequent keywords up and
- * copying frequent metadata up.
- *
- * @param toplevel toplevel directory in the tree, returned by the scanner
- * @param cb called after processing is done
- * @param cls closure for 'cb'
- */
-struct GNUNET_FS_ProcessMetadataContext *
-GNUNET_FS_trim_share_tree (struct GNUNET_FS_ShareTreeItem *toplevel,
- GNUNET_SCHEDULER_Task cb, void *cls)
-{
- struct GNUNET_FS_ProcessMetadataContext *ret;
-
- if (toplevel == NULL)
- {
- struct GNUNET_SCHEDULER_TaskContext tc;
- tc.reason = GNUNET_SCHEDULER_REASON_PREREQ_DONE;
- cb (cls, &tc);
- return NULL;
- }
-
- ret = GNUNET_malloc (sizeof (struct GNUNET_FS_ProcessMetadataContext));
- ret->toplevel = toplevel;
- ret->stack = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
- ret->stack->ctx = ret;
- ret->stack->item = toplevel;
- if (ret->stack->ctx->toplevel->is_directory)
- {
- ret->stack->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
- ret->stack->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
- }
-
- ret->stack->dir_entry_count = 0;
- ret->stack->end_directory = GNUNET_NO;
-
- /* dummy stack entry that tells us we're at the top of the stack */
- ret->stack->parent = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
- ret->stack->parent->ctx = ret;
-
- ret->cb = cb;
- ret->cls = cls;
-
- GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, ret->stack,
- GNUNET_SCHEDULER_REASON_PREREQ_DONE);
- return ret;
-}
+/* end of fs_dirmetascan.c */
diff --git a/src/fs/fs_sharetree.c b/src/fs/fs_sharetree.c
index 396415bc6a..6c9642b9bd 100644
--- a/src/fs/fs_sharetree.c
+++ b/src/fs/fs_sharetree.c
@@ -421,8 +421,10 @@ GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head,
toplevel->parent->children_tail,
toplevel);
- GNUNET_CONTAINER_meta_data_destroy (toplevel->meta);
- GNUNET_FS_uri_destroy (toplevel->ksk_uri);
+ if (NULL != toplevel->meta)
+ GNUNET_CONTAINER_meta_data_destroy (toplevel->meta);
+ if (NULL != toplevel->ksk_uri)
+ GNUNET_FS_uri_destroy (toplevel->ksk_uri);
GNUNET_free_non_null (toplevel->filename);
GNUNET_free_non_null (toplevel->short_filename);
GNUNET_free (toplevel);
diff --git a/src/fs/gnunet-publish.c b/src/fs/gnunet-publish.c
index 98f39b821b..33cba499eb 100644
--- a/src/fs/gnunet-publish.c
+++ b/src/fs/gnunet-publish.c
@@ -68,12 +68,8 @@ static GNUNET_SCHEDULER_TaskIdentifier kill_task;
static struct GNUNET_FS_DirScanner *ds;
-static struct GNUNET_FS_ShareTreeItem * directory_scan_intermediary_result;
-
static struct GNUNET_FS_ShareTreeItem * directory_scan_result;
-static struct GNUNET_FS_ProcessMetadataContext *pmc;
-
static struct GNUNET_FS_Namespace *namespace;
/**
@@ -378,21 +374,17 @@ get_file_information (struct GNUNET_FS_ShareTreeItem *item)
item->ksk_uri, item->meta, !do_insert,
&bo);
}
- GNUNET_CONTAINER_meta_data_destroy (item->meta);
- GNUNET_FS_uri_destroy (item->ksk_uri);
- GNUNET_free (item->short_filename);
- GNUNET_free (item->filename);
- GNUNET_free (item);
return fi;
}
+
static void
-directory_trim_complete (void *cls,
- const struct GNUNET_SCHEDULER_TaskContext *tc)
+directory_trim_complete ()
{
struct GNUNET_FS_FileInformation *fi;
- directory_scan_result = directory_scan_intermediary_result;
+
fi = get_file_information (directory_scan_result);
+ GNUNET_FS_share_tree_free (directory_scan_result);
directory_scan_result = NULL;
if (fi == NULL)
{
@@ -425,7 +417,8 @@ directory_trim_complete (void *cls,
}
}
-static int
+
+static void
directory_scan_cb (void *cls, struct GNUNET_FS_DirScanner *ds,
const char *filename,
int is_directory,
@@ -433,64 +426,47 @@ directory_scan_cb (void *cls, struct GNUNET_FS_DirScanner *ds,
{
switch (reason)
{
- case GNUNET_FS_DIRSCANNER_NEW_FILE:
- if (filename != NULL)
- {
- if (is_directory)
- FPRINTF (stdout, _("Scanning directory `%s'.\n"), filename);
- else
- FPRINTF (stdout, _("Scanning file `%s'.\n"), filename);
- }
+ case GNUNET_FS_DIRSCANNER_FILE_START:
+ if (is_directory)
+ FPRINTF (stdout, _("Scanning directory `%s'.\n"), filename);
+ else
+ FPRINTF (stdout, _("Scanning file `%s'.\n"), filename);
break;
- case GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST:
- if (filename != NULL)
- {
- FPRINTF (stdout,
- _("Failed to scan `%s', because it does not exist.\n"),
- filename);
- }
+ case GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED:
+ if (is_directory)
+ FPRINTF (stdout, _("Done scanning directory `%s'.\n"), filename);
break;
- case GNUNET_FS_DIRSCANNER_ASKED_TO_STOP:
- if (filename != NULL)
- {
- FPRINTF (stdout,
- _("Scanner was about to scan `%s', but is now stopping.\n"),
- filename);
- }
- else
- FPRINTF (stdout, "%s", _("Scanner is stopping.\n"));
+ case GNUNET_FS_DIRSCANNER_ALL_COUNTED:
+ FPRINTF (stdout, "%s", _("Preprocessing complete.\n"));
+ break;
+ case GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED:
+ FPRINTF (stdout, _("Extracting meta data from file `%s' complete.\n"), filename);
break;
- case GNUNET_FS_DIRSCANNER_SHUTDOWN:
- FPRINTF (stdout, "%s", _("Client is shutting down.\n"));
+ case GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST:
+ FPRINTF (stdout,
+ _("There was trouble processing file `%s', skipping it.\n"),
+ filename);
break;
case GNUNET_FS_DIRSCANNER_FINISHED:
FPRINTF (stdout, "%s", _("Scanner has finished.\n"));
+ directory_scan_result = GNUNET_FS_directory_scan_get_result (ds);
+ ds = NULL;
+ GNUNET_FS_share_tree_trim (directory_scan_result);
+ directory_trim_complete ();
break;
- case GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR:
- FPRINTF (stdout, "%s",
- _("There was a failure communicating with the scanner.\n"));
+ case GNUNET_FS_DIRSCANNER_INTERNAL_ERROR:
+ FPRINTF (stdout, "%s", _("Internal error scanning directory.\n"));
+ GNUNET_FS_directory_scan_abort (ds);
+ ds = NULL;
+ if (namespace != NULL)
+ GNUNET_FS_namespace_delete (namespace, GNUNET_NO);
+ GNUNET_FS_stop (ctx);
+ ret = 1;
break;
default:
- FPRINTF (stdout, _("Got unknown scanner update with filename `%s'.\n"),
- filename);
+ GNUNET_assert (0);
break;
}
- if ((filename == NULL && GNUNET_FS_DIRSCANNER_FINISHED)
- || reason == GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR
- || reason == GNUNET_FS_DIRSCANNER_SHUTDOWN)
- {
- /* Any of this causes us to try to clean up the scanner */
- directory_scan_intermediary_result = GNUNET_FS_directory_scan_cleanup (ds);
- pmc = GNUNET_FS_trim_share_tree (directory_scan_intermediary_result,
- &directory_trim_complete, NULL);
-
- ds = NULL;
- /* FIXME: change the tree processor to be able to free untrimmed trees
- * right here instead of waiting for trimming to complete, if we need to
- * cancel everything.
- */
- }
- return 0;
}