aboutsummaryrefslogtreecommitdiff
path: root/src/fs/gnunet-helper-fs-publish.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fs/gnunet-helper-fs-publish.c')
-rw-r--r--src/fs/gnunet-helper-fs-publish.c457
1 files changed, 457 insertions, 0 deletions
diff --git a/src/fs/gnunet-helper-fs-publish.c b/src/fs/gnunet-helper-fs-publish.c
new file mode 100644
index 0000000..4f70464
--- /dev/null
+++ b/src/fs/gnunet-helper-fs-publish.c
@@ -0,0 +1,457 @@
+/*
+ This file is part of GNUnet.
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+
+/**
+ * @file src/fs/gnunet-helper-fs-publish.c
+ * @brief Tool to help extract meta data asynchronously
+ * @author Christian Grothoff
+ *
+ * This program will scan a directory for files with meta data
+ * and report the results to stdout.
+ */
+#include "platform.h"
+#include "gnunet_fs_service.h"
+
+
+/**
+ * A node of a directory tree.
+ */
+struct ScanTreeNode
+{
+
+ /**
+ * This is a doubly-linked list
+ */
+ struct ScanTreeNode *next;
+
+ /**
+ * This is a doubly-linked list
+ */
+ struct ScanTreeNode *prev;
+
+ /**
+ * Parent of this node, NULL for top-level entries.
+ */
+ struct ScanTreeNode *parent;
+
+ /**
+ * This is a doubly-linked tree
+ * NULL for files and empty directories
+ */
+ struct ScanTreeNode *children_head;
+
+ /**
+ * This is a doubly-linked tree
+ * NULL for files and empty directories
+ */
+ struct ScanTreeNode *children_tail;
+
+ /**
+ * Name of the file/directory
+ */
+ char *filename;
+
+ /**
+ * Size of the file (if it is a file), in bytes
+ */
+ uint64_t file_size;
+
+ /**
+ * GNUNET_YES if this is a directory
+ */
+ int is_directory;
+
+};
+
+
+/**
+ * List of libextractor plugins to use for extracting.
+ */
+static struct EXTRACTOR_PluginList *plugins;
+
+
+/**
+ * Add meta data that libextractor finds to our meta data
+ * container.
+ *
+ * @param cls closure, our meta data container
+ * @param plugin_name name of the plugin that produced this value;
+ * special values can be used (i.e. '<zlib>' for zlib being
+ * used in the main libextractor library and yielding
+ * meta data).
+ * @param type libextractor-type describing the meta data
+ * @param format basic format information about data
+ * @param data_mime_type mime-type of data (not of the original file);
+ * can be NULL (if mime-type is not known)
+ * @param data actual meta-data found
+ * @param data_len number of bytes in data
+ * @return always 0 to continue extracting
+ */
+static int
+add_to_md (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type,
+ enum EXTRACTOR_MetaFormat format, const char *data_mime_type,
+ const char *data, size_t data_len)
+{
+ struct GNUNET_CONTAINER_MetaData *md = cls;
+
+ (void) GNUNET_CONTAINER_meta_data_insert (md, plugin_name, type, format,
+ data_mime_type, data, data_len);
+ return 0;
+}
+
+
+/**
+ * Free memory of the 'tree' structure
+ *
+ * @param tree tree to free
+ */
+static void
+free_tree (struct ScanTreeNode *tree)
+{
+ struct ScanTreeNode *pos;
+
+ while (NULL != (pos = tree->children_head))
+ free_tree (pos);
+ if (NULL != tree->parent)
+ GNUNET_CONTAINER_DLL_remove (tree->parent->children_head,
+ tree->parent->children_tail,
+ tree);
+ GNUNET_free (tree->filename);
+ GNUNET_free (tree);
+}
+
+
+/**
+ * Write 'size' bytes from 'buf' into 'out'.
+ *
+ * @param buf buffer with data to write
+ * @param size number of bytes to write
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
+ */
+static int
+write_all (const void *buf,
+ size_t size)
+{
+ const char *cbuf = buf;
+ size_t total;
+ ssize_t wr;
+
+ total = 0;
+ do
+ {
+ wr = write (1,
+ &cbuf[total],
+ size - total);
+ if (wr > 0)
+ total += wr;
+ } while ( (wr > 0) && (total < size) );
+ if (wr <= 0)
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Failed to write to stdout: %s\n",
+ strerror (errno));
+ return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
+}
+
+
+/**
+ * Write message to the master process.
+ *
+ * @param message_type message type to use
+ * @param data data to append, NULL for none
+ * @param data_length number of bytes in data
+ * @return GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
+ */
+static int
+write_message (uint16_t message_type,
+ const char *data,
+ size_t data_length)
+{
+ struct GNUNET_MessageHeader hdr;
+
+ hdr.type = htons (message_type);
+ hdr.size = htons (sizeof (struct GNUNET_MessageHeader) + data_length);
+ if ( (GNUNET_OK !=
+ write_all (&hdr,
+ sizeof (hdr))) ||
+ (GNUNET_OK !=
+ write_all (data,
+ data_length)) )
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
+}
+
+
+/**
+ * Function called to (recursively) add all of the files in the
+ * directory to the tree. Called by the directory scanner to initiate
+ * the scan. Does NOT yet add any metadata.
+ *
+ * @param filename file or directory to scan
+ * @param dst where to store the resulting share tree item
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
+ */
+static int
+preprocess_file (const char *filename,
+ struct ScanTreeNode **dst);
+
+
+/**
+ * Closure for the 'scan_callback'
+ */
+struct RecursionContext
+{
+ /**
+ * Parent to add the files to.
+ */
+ struct ScanTreeNode *parent;
+
+ /**
+ * Flag to set to GNUNET_YES on serious errors.
+ */
+ int stop;
+};
+
+
+/**
+ * Function called by the directory iterator to (recursively) add all
+ * of the files in the directory to the tree. Called by the directory
+ * scanner to initiate the scan. Does NOT yet add any metadata.
+ *
+ * @param cls the 'struct RecursionContext'
+ * @param filename file or directory to scan
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
+ */
+static int
+scan_callback (void *cls,
+ const char *filename)
+{
+ struct RecursionContext *rc = cls;
+ struct ScanTreeNode *chld;
+
+ if (GNUNET_OK !=
+ preprocess_file (filename,
+ &chld))
+ {
+ rc->stop = GNUNET_YES;
+ return GNUNET_SYSERR;
+ }
+ chld->parent = rc->parent;
+ GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
+ rc->parent->children_tail,
+ chld);
+ return GNUNET_OK;
+}
+
+
+/**
+ * Function called to (recursively) add all of the files in the
+ * directory to the tree. Called by the directory scanner to initiate
+ * the scan. Does NOT yet add any metadata.
+ *
+ * @param filename file or directory to scan
+ * @param dst where to store the resulting share tree item
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
+ */
+static int
+preprocess_file (const char *filename,
+ struct ScanTreeNode **dst)
+{
+ struct ScanTreeNode *item;
+ struct stat sbuf;
+
+ if (0 != STAT (filename, &sbuf))
+ {
+ /* If the file doesn't exist (or is not stat-able for any other reason)
+ skip it (but report it), but do continue. */
+ if (GNUNET_OK !=
+ write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE,
+ filename, strlen (filename) + 1))
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
+ }
+
+ /* Report the progress */
+ if (GNUNET_OK !=
+ write_message (S_ISDIR (sbuf.st_mode)
+ ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY
+ : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE,
+ filename, strlen (filename) + 1))
+ return GNUNET_SYSERR;
+ item = GNUNET_malloc (sizeof (struct ScanTreeNode));
+ item->filename = GNUNET_strdup (filename);
+ item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
+ item->file_size = (uint64_t) sbuf.st_size;
+ if (item->is_directory == GNUNET_YES)
+ {
+ struct RecursionContext rc;
+
+ rc.parent = item;
+ rc.stop = GNUNET_NO;
+ GNUNET_DISK_directory_scan (filename,
+ &scan_callback,
+ &rc);
+ if ( (rc.stop == GNUNET_YES) ||
+ (GNUNET_OK !=
+ write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY,
+ "..", 3)) )
+ {
+ free_tree (item);
+ return GNUNET_SYSERR;
+ }
+ }
+ *dst = item;
+ return GNUNET_OK;
+}
+
+
+/**
+ * Extract metadata from files.
+ *
+ * @param item entry we are processing
+ * @return GNUNET_OK on success, GNUNET_SYSERR on fatal errors
+ */
+static int
+extract_files (struct ScanTreeNode *item)
+{
+ struct GNUNET_CONTAINER_MetaData *meta;
+ ssize_t size;
+ size_t slen;
+
+ if (item->is_directory == GNUNET_YES)
+ {
+ /* for directories, we simply only descent, no extraction, no
+ progress reporting */
+ struct ScanTreeNode *pos;
+
+ for (pos = item->children_head; NULL != pos; pos = pos->next)
+ if (GNUNET_OK !=
+ extract_files (pos))
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
+ }
+
+ /* this is the expensive operation, *afterwards* we'll check for aborts */
+ meta = GNUNET_CONTAINER_meta_data_create ();
+ if (NULL != plugins)
+ EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta);
+ slen = strlen (item->filename) + 1;
+ size = GNUNET_CONTAINER_meta_data_get_serialized_size (meta);
+ if (-1 == size)
+ {
+ /* no meta data */
+ GNUNET_CONTAINER_meta_data_destroy (meta);
+ if (GNUNET_OK !=
+ write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
+ item->filename, slen))
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
+ }
+ {
+ char buf[size + slen];
+ char *dst = &buf[slen];
+
+ memcpy (buf, item->filename, slen);
+ size = GNUNET_CONTAINER_meta_data_serialize (meta,
+ &dst, size - slen,
+ GNUNET_CONTAINER_META_DATA_SERIALIZE_PART);
+ GNUNET_CONTAINER_meta_data_destroy (meta);
+ if (GNUNET_OK !=
+ write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
+ buf,
+ slen + size))
+ return GNUNET_SYSERR;
+ }
+ return GNUNET_OK;
+}
+
+
+/**
+ * Main function of the helper process to extract meta data.
+ *
+ * @param argc should be 3
+ * @param argv [0] our binary name
+ * [1] name of the file or directory to process
+ * [2] "-" to disable extraction, NULL for defaults,
+ * otherwise custom plugins to load from LE
+ * @return 0 on success
+ */
+int main(int argc,
+ char **argv)
+{
+ const char *filename_expanded;
+ const char *ex;
+ struct ScanTreeNode *root;
+
+#if WINDOWS
+ /* We're using stdout to communicate binary data back to the parent; use
+ * binary mode.
+ */
+ _setmode (1, _O_BINARY);
+#endif
+
+ /* parse command line */
+ if ( (argc != 3) && (argc != 2) )
+ {
+ FPRINTF (stderr,
+ "%s",
+ "gnunet-helper-fs-publish needs exactly one or two arguments\n");
+ return 1;
+ }
+ filename_expanded = argv[1];
+ ex = argv[2];
+ if ( (ex == NULL) ||
+ (0 != strcmp (ex, "-")) )
+ {
+ plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
+ if (NULL != ex)
+ plugins = EXTRACTOR_plugin_add_config (plugins, ex,
+ EXTRACTOR_OPTION_DEFAULT_POLICY);
+ }
+
+ /* scan tree to find out how much work there is to be done */
+ if (GNUNET_OK != preprocess_file (filename_expanded,
+ &root))
+ {
+ (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
+ return 2;
+ }
+ /* signal that we're done counting files, so that a percentage of
+ progress can now be calculated */
+ if (GNUNET_OK !=
+ write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE, NULL, 0))
+ return 3;
+ if (GNUNET_OK !=
+ extract_files (root))
+ {
+ (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
+ free_tree (root);
+ return 4;
+ }
+ free_tree (root);
+ /* enable "clean" shutdown by telling parent that we are done */
+ (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED, NULL, 0);
+ if (NULL != plugins)
+ EXTRACTOR_plugin_remove_all (plugins);
+
+ return 0;
+}
+
+/* end of gnunet-helper-fs-publish.c */
+