1 files changed, 1114 insertions, 0 deletions
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
new file mode 100644
index 00000000000..6252b173a46
--- /dev/null
+++ b/fs/nilfs2/inode.c
@@ -0,0 +1,1114 @@
+/*
+ * inode.c - NILFS inode operations.
+ *
+ * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ *
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/gfp.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+#include <linux/aio.h>
+#include "nilfs.h"
+#include "btnode.h"
+#include "segment.h"
+#include "page.h"
+#include "mdt.h"
+#include "cpfile.h"
+#include "ifile.h"
+
+/**
+ * struct nilfs_iget_args - arguments used during comparison between inodes
+ * @ino: inode number
+ * @cno: checkpoint number
+ * @root: pointer on NILFS root object (mounted checkpoint)
+ * @for_gc: inode for GC flag
+ */
+struct nilfs_iget_args {
+	u64 ino;
+	__u64 cno;
+	struct nilfs_root *root;
+	int for_gc;
+};
+
+void nilfs_inode_add_blocks(struct inode *inode, int n)
+{
+	struct nilfs_root *root = NILFS_I(inode)->i_root;
+
+	inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
+	if (root)
+		atomic64_add(n, &root->blocks_count);
+}
+
+void nilfs_inode_sub_blocks(struct inode *inode, int n)
+{
+	struct nilfs_root *root = NILFS_I(inode)->i_root;
+
+	inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
+	if (root)
+		atomic64_sub(n, &root->blocks_count);
+}
+
+/**
+ * nilfs_get_block() - get a file block on the filesystem (callback function)
+ * @inode - inode struct of the target file
+ * @blkoff - file block number
+ * @bh_result - buffer head to be mapped on
+ * @create - indicate whether allocating the block or not when it has not
+ *      been allocated yet.
+ *
+ * This function does not issue actual read request of the specified data
+ * block. It is done by VFS.
+ */
+int nilfs_get_block(struct inode *inode, sector_t blkoff,
+		    struct buffer_head *bh_result, int create)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+	__u64 blknum = 0;
+	int err = 0, ret;
+	unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
+
+	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+	ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
+	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+	if (ret >= 0) {	/* found */
+		map_bh(bh_result, inode->i_sb, blknum);
+		if (ret > 0)
+			bh_result->b_size = (ret << inode->i_blkbits);
+		goto out;
+	}
+	/* data block was not found */
+	if (ret == -ENOENT && create) {
+		struct nilfs_transaction_info ti;
+
+		bh_result->b_blocknr = 0;
+		err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
+		if (unlikely(err))
+			goto out;
+		err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff,
+					(unsigned long)bh_result);
+		if (unlikely(err != 0)) {
+			if (err == -EEXIST) {
+				/*
+				 * The get_block() function could be called
+				 * from multiple callers for an inode.
+				 * However, the page having this block must
+				 * be locked in this case.
+				 */
+				printk(KERN_WARNING
+				       "nilfs_get_block: a race condition "
+				       "while inserting a data block. "
+				       "(inode number=%lu, file block "
+				       "offset=%llu)\n",
+				       inode->i_ino,
+				       (unsigned long long)blkoff);
+				err = 0;
+			}
+			nilfs_transaction_abort(inode->i_sb);
+			goto out;
+		}
+		nilfs_mark_inode_dirty(inode);
+		nilfs_transaction_commit(inode->i_sb); /* never fails */
+		/* Error handling should be detailed */
+		set_buffer_new(bh_result);
+		set_buffer_delay(bh_result);
+		map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed
+						      to proper value */
+	} else if (ret == -ENOENT) {
+		/* not found is not error (e.g. hole); must return without
+		   the mapped state flag. */
+		;
+	} else {
+		err = ret;
+	}
+
+ out:
+	return err;
+}
+
+/**
+ * nilfs_readpage() - implement readpage() method of nilfs_aops {}
+ * address_space_operations.
+ * @file - file struct of the file to be read
+ * @page - the page to be read
+ */
+static int nilfs_readpage(struct file *file, struct page *page)
+{
+	return mpage_readpage(page, nilfs_get_block);
+}
+
+/**
+ * nilfs_readpages() - implement readpages() method of nilfs_aops {}
+ * address_space_operations.
+ * @file - file struct of the file to be read
+ * @mapping - address_space struct used for reading multiple pages
+ * @pages - the pages to be read
+ * @nr_pages - number of pages to be read
+ */
+static int nilfs_readpages(struct file *file, struct address_space *mapping,
+			   struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block);
+}
+
+static int nilfs_writepages(struct address_space *mapping,
+			    struct writeback_control *wbc)
+{
+	struct inode *inode = mapping->host;
+	int err = 0;
+
+	if (inode->i_sb->s_flags & MS_RDONLY) {
+		nilfs_clear_dirty_pages(mapping, false);
+		return -EROFS;
+	}
+
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		err = nilfs_construct_dsync_segment(inode->i_sb, inode,
+						    wbc->range_start,
+						    wbc->range_end);
+	return err;
+}
+
+static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	int err;
+
+	if (inode->i_sb->s_flags & MS_RDONLY) {
+		/*
+		 * It means that filesystem was remounted in read-only
+		 * mode because of error or metadata corruption. But we
+		 * have dirty pages that try to be flushed in background.
+		 * So, here we simply discard this dirty page.
+		 */
+		nilfs_clear_dirty_page(page, false);
+		unlock_page(page);
+		return -EROFS;
+	}
+
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+
+	if (wbc->sync_mode == WB_SYNC_ALL) {
+		err = nilfs_construct_segment(inode->i_sb);
+		if (unlikely(err))
+			return err;
+	} else if (wbc->for_reclaim)
+		nilfs_flush_segment(inode->i_sb, inode->i_ino);
+
+	return 0;
+}
+
+static int nilfs_set_page_dirty(struct page *page)
+{
+	int ret = __set_page_dirty_nobuffers(page);
+
+	if (page_has_buffers(page)) {
+		struct inode *inode = page->mapping->host;
+		unsigned nr_dirty = 0;
+		struct buffer_head *bh, *head;
+
+		/*
+		 * This page is locked by callers, and no other thread
+		 * concurrently marks its buffers dirty since they are
+		 * only dirtied through routines in fs/buffer.c in
+		 * which call sites of mark_buffer_dirty are protected
+		 * by page lock.
+		 */
+		bh = head = page_buffers(page);
+		do {
+			/* Do not mark hole blocks dirty */
+			if (buffer_dirty(bh) || !buffer_mapped(bh))
+				continue;
+
+			set_buffer_dirty(bh);
+			nr_dirty++;
+		} while (bh = bh->b_this_page, bh != head);
+
+		if (nr_dirty)
+			nilfs_set_file_dirty(inode, nr_dirty);
+	}
+	return ret;
+}
+
+void nilfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, inode->i_size);
+		nilfs_truncate(inode);
+	}
+}
+
+static int nilfs_write_begin(struct file *file, struct address_space *mapping,
+			     loff_t pos, unsigned len, unsigned flags,
+			     struct page **pagep, void **fsdata)
+
+{
+	struct inode *inode = mapping->host;
+	int err = nilfs_transaction_begin(inode->i_sb, NULL, 1);
+
+	if (unlikely(err))
+		return err;
+
+	err = block_write_begin(mapping, pos, len, flags, pagep,
+				nilfs_get_block);
+	if (unlikely(err)) {
+		nilfs_write_failed(mapping, pos + len);
+		nilfs_transaction_abort(inode->i_sb);
+	}
+	return err;
+}
+
+static int nilfs_write_end(struct file *file, struct address_space *mapping,
+			   loff_t pos, unsigned len, unsigned copied,
+			   struct page *page, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned nr_dirty;
+	int err;
+
+	nr_dirty = nilfs_page_count_clean_buffers(page, start,
+						  start + copied);
+	copied = generic_write_end(file, mapping, pos, len, copied, page,
+				   fsdata);
+	nilfs_set_file_dirty(inode, nr_dirty);
+	err = nilfs_transaction_commit(inode->i_sb);
+	return err ? : copied;
+}
+
+static ssize_t
+nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+		loff_t offset)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = file->f_mapping->host;
+	size_t count = iov_iter_count(iter);
+	ssize_t size;
+
+	if (rw == WRITE)
+		return 0;
+
+	/* Needs synchronization with the cleaner */
+	size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				  nilfs_get_block);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && size < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + count;
+
+		if (end > isize)
+			nilfs_write_failed(mapping, end);
+	}
+
+	return size;
+}
+
+const struct address_space_operations nilfs_aops = {
+	.writepage		= nilfs_writepage,
+	.readpage		= nilfs_readpage,
+	.writepages		= nilfs_writepages,
+	.set_page_dirty		= nilfs_set_page_dirty,
+	.readpages		= nilfs_readpages,
+	.write_begin		= nilfs_write_begin,
+	.write_end		= nilfs_write_end,
+	/* .releasepage		= nilfs_releasepage, */
+	.invalidatepage		= block_invalidatepage,
+	.direct_IO		= nilfs_direct_IO,
+	.is_partially_uptodate  = block_is_partially_uptodate,
+};
+
+struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
+{
+	struct super_block *sb = dir->i_sb;
+	struct the_nilfs *nilfs = sb->s_fs_info;
+	struct inode *inode;
+	struct nilfs_inode_info *ii;
+	struct nilfs_root *root;
+	int err = -ENOMEM;
+	ino_t ino;
+
+	inode = new_inode(sb);
+	if (unlikely(!inode))
+		goto failed;
+
+	mapping_set_gfp_mask(inode->i_mapping,
+			     mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+
+	root = NILFS_I(dir)->i_root;
+	ii = NILFS_I(inode);
+	ii->i_state = 1 << NILFS_I_NEW;
+	ii->i_root = root;
+
+	err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh);
+	if (unlikely(err))
+		goto failed_ifile_create_inode;
+	/* reference count of i_bh inherits from nilfs_mdt_read_block() */
+
+	atomic64_inc(&root->inodes_count);
+	inode_init_owner(inode, dir, mode);
+	inode->i_ino = ino;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
+		err = nilfs_bmap_read(ii->i_bmap, NULL);
+		if (err < 0)
+			goto failed_bmap;
+
+		set_bit(NILFS_I_BMAP, &ii->i_state);
+		/* No lock is needed; iget() ensures it. */
+	}
+
+	ii->i_flags = nilfs_mask_flags(
+		mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
+
+	/* ii->i_file_acl = 0; */
+	/* ii->i_dir_acl = 0; */
+	ii->i_dir_start_lookup = 0;
+	nilfs_set_inode_flags(inode);
+	spin_lock(&nilfs->ns_next_gen_lock);
+	inode->i_generation = nilfs->ns_next_generation++;
+	spin_unlock(&nilfs->ns_next_gen_lock);
+	insert_inode_hash(inode);
+
+	err = nilfs_init_acl(inode, dir);
+	if (unlikely(err))
+		goto failed_acl; /* never occur. When supporting
+				    nilfs_init_acl(), proper cancellation of
+				    above jobs should be considered */
+
+	return inode;
+
+ failed_acl:
+ failed_bmap:
+	clear_nlink(inode);
+	iput(inode);  /* raw_inode will be deleted through
+			 generic_delete_inode() */
+	goto failed;
+
+ failed_ifile_create_inode:
+	make_bad_inode(inode);
+	iput(inode);  /* if i_nlink == 1, generic_forget_inode() will be
+			 called */
+ failed:
+	return ERR_PTR(err);
+}
+
+void nilfs_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = NILFS_I(inode)->i_flags;
+
+	inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
+			    S_DIRSYNC);
+	if (flags & FS_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & FS_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & FS_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	if (flags & FS_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & FS_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+	mapping_set_gfp_mask(inode->i_mapping,
+			     mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+}
+
+int nilfs_read_inode_common(struct inode *inode,
+			    struct nilfs_inode *raw_inode)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	int err;
+
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+	i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
+	i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
+	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+	inode->i_size = le64_to_cpu(raw_inode->i_size);
+	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
+	inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
+	inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
+	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+	inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
+	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+	if (inode->i_nlink == 0 && inode->i_mode == 0)
+		return -EINVAL; /* this inode is deleted */
+
+	inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
+	ii->i_flags = le32_to_cpu(raw_inode->i_flags);
+#if 0
+	ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+	ii->i_dir_acl = S_ISREG(inode->i_mode) ?
+		0 : le32_to_cpu(raw_inode->i_dir_acl);
+#endif
+	ii->i_dir_start_lookup = 0;
+	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+
+	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)) {
+		err = nilfs_bmap_read(ii->i_bmap, raw_inode);
+		if (err < 0)
+			return err;
+		set_bit(NILFS_I_BMAP, &ii->i_state);
+		/* No lock is needed; iget() ensures it. */
+	}
+	return 0;
+}
+
+static int __nilfs_read_inode(struct super_block *sb,
+			      struct nilfs_root *root, unsigned long ino,
+			      struct inode *inode)
+{
+	struct the_nilfs *nilfs = sb->s_fs_info;
+	struct buffer_head *bh;
+	struct nilfs_inode *raw_inode;
+	int err;
+
+	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+	err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
+	if (unlikely(err))
+		goto bad_inode;
+
+	raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh);
+
+	err = nilfs_read_inode_common(inode, raw_inode);
+	if (err)
+		goto failed_unmap;
+
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &nilfs_file_inode_operations;
+		inode->i_fop = &nilfs_file_operations;
+		inode->i_mapping->a_ops = &nilfs_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &nilfs_dir_inode_operations;
+		inode->i_fop = &nilfs_dir_operations;
+		inode->i_mapping->a_ops = &nilfs_aops;
+	} else if (S_ISLNK(inode->i_mode)) {
+		inode->i_op = &nilfs_symlink_inode_operations;
+		inode->i_mapping->a_ops = &nilfs_aops;
+	} else {
+		inode->i_op = &nilfs_special_inode_operations;
+		init_special_inode(
+			inode, inode->i_mode,
+			huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
+	}
+	nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+	brelse(bh);
+	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+	nilfs_set_inode_flags(inode);
+	return 0;
+
+ failed_unmap:
+	nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+	brelse(bh);
+
+ bad_inode:
+	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+	return err;
+}
+
+static int nilfs_iget_test(struct inode *inode, void *opaque)
+{
+	struct nilfs_iget_args *args = opaque;
+	struct nilfs_inode_info *ii;
+
+	if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root)
+		return 0;
+
+	ii = NILFS_I(inode);
+	if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
+		return !args->for_gc;
+
+	return args->for_gc && args->cno == ii->i_cno;
+}
+
+static int nilfs_iget_set(struct inode *inode, void *opaque)
+{
+	struct nilfs_iget_args *args = opaque;
+
+	inode->i_ino = args->ino;
+	if (args->for_gc) {
+		NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE;
+		NILFS_I(inode)->i_cno = args->cno;
+		NILFS_I(inode)->i_root = NULL;
+	} else {
+		if (args->root && args->ino == NILFS_ROOT_INO)
+			nilfs_get_root(args->root);
+		NILFS_I(inode)->i_root = args->root;
+	}
+	return 0;
+}
+
+struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
+			    unsigned long ino)
+{
+	struct nilfs_iget_args args = {
+		.ino = ino, .root = root, .cno = 0, .for_gc = 0
+	};
+
+	return ilookup5(sb, ino, nilfs_iget_test, &args);
+}
+
+struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
+				unsigned long ino)
+{
+	struct nilfs_iget_args args = {
+		.ino = ino, .root = root, .cno = 0, .for_gc = 0
+	};
+
+	return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
+}
+
+struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
+			 unsigned long ino)
+{
+	struct inode *inode;
+	int err;
+
+	inode = nilfs_iget_locked(sb, root, ino);
+	if (unlikely(!inode))
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	err = __nilfs_read_inode(sb, root, ino, inode);
+	if (unlikely(err)) {
+		iget_failed(inode);
+		return ERR_PTR(err);
+	}
+	unlock_new_inode(inode);
+	return inode;
+}
+
+struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
+				__u64 cno)
+{
+	struct nilfs_iget_args args = {
+		.ino = ino, .root = NULL, .cno = cno, .for_gc = 1
+	};
+	struct inode *inode;
+	int err;
+
+	inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
+	if (unlikely(!inode))
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	err = nilfs_init_gcinode(inode);
+	if (unlikely(err)) {
+		iget_failed(inode);
+		return ERR_PTR(err);
+	}
+	unlock_new_inode(inode);
+	return inode;
+}
+
+void nilfs_write_inode_common(struct inode *inode,
+			      struct nilfs_inode *raw_inode, int has_bmap)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+
+	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+	raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
+	raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
+	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+	raw_inode->i_size = cpu_to_le64(inode->i_size);
+	raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
+	raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+	raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
+
+	raw_inode->i_flags = cpu_to_le32(ii->i_flags);
+	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+
+	if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
+		struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+		/* zero-fill unused portion in the case of super root block */
+		raw_inode->i_xattr = 0;
+		raw_inode->i_pad = 0;
+		memset((void *)raw_inode + sizeof(*raw_inode), 0,
+		       nilfs->ns_inode_size - sizeof(*raw_inode));
+	}
+
+	if (has_bmap)
+		nilfs_bmap_write(ii->i_bmap, raw_inode);
+	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+		raw_inode->i_device_code =
+			cpu_to_le64(huge_encode_dev(inode->i_rdev));
+	/* When extending inode, nilfs->ns_inode_size should be checked
+	   for substitutions of appended fields */
+}
+
+void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
+{
+	ino_t ino = inode->i_ino;
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	struct inode *ifile = ii->i_root->ifile;
+	struct nilfs_inode *raw_inode;
+
+	raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh);
+
+	if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
+		memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size);
+	set_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
+
+	nilfs_write_inode_common(inode, raw_inode, 0);
+		/* XXX: call with has_bmap = 0 is a workaround to avoid
+		   deadlock of bmap. This delays update of i_bmap to just
+		   before writing */
+	nilfs_ifile_unmap_inode(ifile, ino, ibh);
+}
+
+#define NILFS_MAX_TRUNCATE_BLOCKS	16384  /* 64MB for 4KB block */
+
+static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
+				unsigned long from)
+{
+	unsigned long b;
+	int ret;
+
+	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
+		return;
+repeat:
+	ret = nilfs_bmap_last_key(ii->i_bmap, &b);
+	if (ret == -ENOENT)
+		return;
+	else if (ret < 0)
+		goto failed;
+
+	if (b < from)
+		return;
+
+	b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
+	ret = nilfs_bmap_truncate(ii->i_bmap, b);
+	nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
+	if (!ret || (ret == -ENOMEM &&
+		     nilfs_bmap_truncate(ii->i_bmap, b) == 0))
+		goto repeat;
+
+failed:
+	nilfs_warning(ii->vfs_inode.i_sb, __func__,
+		      "failed to truncate bmap (ino=%lu, err=%d)",
+		      ii->vfs_inode.i_ino, ret);
+}
+
+void nilfs_truncate(struct inode *inode)
+{
+	unsigned long blkoff;
+	unsigned int blocksize;
+	struct nilfs_transaction_info ti;
+	struct super_block *sb = inode->i_sb;
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+
+	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return;
+
+	blocksize = sb->s_blocksize;
+	blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits;
+	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
+
+	block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block);
+
+	nilfs_truncate_bmap(ii, blkoff);
+
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	if (IS_SYNC(inode))
+		nilfs_set_transaction_flag(NILFS_TI_SYNC);
+
+	nilfs_mark_inode_dirty(inode);
+	nilfs_set_file_dirty(inode, 0);
+	nilfs_transaction_commit(sb);
+	/* May construct a logical segment and may fail in sync mode.
+	   But truncate has no return value. */
+}
+
+static void nilfs_clear_inode(struct inode *inode)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+
+	/*
+	 * Free resources allocated in nilfs_read_inode(), here.
+	 */
+	BUG_ON(!list_empty(&ii->i_dirty));
+	brelse(ii->i_bh);
+	ii->i_bh = NULL;
+
+	if (mdi && mdi->mi_palloc_cache)
+		nilfs_palloc_destroy_cache(inode);
+
+	if (test_bit(NILFS_I_BMAP, &ii->i_state))
+		nilfs_bmap_clear(ii->i_bmap);
+
+	nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+
+	if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
+		nilfs_put_root(ii->i_root);
+}
+
+void nilfs_evict_inode(struct inode *inode)
+{
+	struct nilfs_transaction_info ti;
+	struct super_block *sb = inode->i_sb;
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	int ret;
+
+	if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
+		truncate_inode_pages_final(&inode->i_data);
+		clear_inode(inode);
+		nilfs_clear_inode(inode);
+		return;
+	}
+	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
+
+	truncate_inode_pages_final(&inode->i_data);
+
+	/* TODO: some of the following operations may fail.  */
+	nilfs_truncate_bmap(ii, 0);
+	nilfs_mark_inode_dirty(inode);
+	clear_inode(inode);
+
+	ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
+	if (!ret)
+		atomic64_dec(&ii->i_root->inodes_count);
+
+	nilfs_clear_inode(inode);
+
+	if (IS_SYNC(inode))
+		nilfs_set_transaction_flag(NILFS_TI_SYNC);
+	nilfs_transaction_commit(sb);
+	/* May construct a logical segment and may fail in sync mode.
+	   But delete_inode has no return value. */
+}
+
+int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	struct nilfs_transaction_info ti;
+	struct inode *inode = dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
+	int err;
+
+	err = inode_change_ok(inode, iattr);
+	if (err)
+		return err;
+
+	err = nilfs_transaction_begin(sb, &ti, 0);
+	if (unlikely(err))
+		return err;
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		inode_dio_wait(inode);
+		truncate_setsize(inode, iattr->ia_size);
+		nilfs_truncate(inode);
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+
+	if (iattr->ia_valid & ATTR_MODE) {
+		err = nilfs_acl_chmod(inode);
+		if (unlikely(err))
+			goto out_err;
+	}
+
+	return nilfs_transaction_commit(sb);
+
+out_err:
+	nilfs_transaction_abort(sb);
+	return err;
+}
+
+int nilfs_permission(struct inode *inode, int mask)
+{
+	struct nilfs_root *root = NILFS_I(inode)->i_root;
+	if ((mask & MAY_WRITE) && root &&
+	    root->cno != NILFS_CPTREE_CURRENT_CNO)
+		return -EROFS; /* snapshot is not writable */
+
+	return generic_permission(inode, mask);
+}
+
+int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
+{
+	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	int err;
+
+	spin_lock(&nilfs->ns_inode_lock);
+	if (ii->i_bh == NULL) {
+		spin_unlock(&nilfs->ns_inode_lock);
+		err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
+						  inode->i_ino, pbh);
+		if (unlikely(err))
+			return err;
+		spin_lock(&nilfs->ns_inode_lock);
+		if (ii->i_bh == NULL)
+			ii->i_bh = *pbh;
+		else {
+			brelse(*pbh);
+			*pbh = ii->i_bh;
+		}
+	} else
+		*pbh = ii->i_bh;
+
+	get_bh(*pbh);
+	spin_unlock(&nilfs->ns_inode_lock);
+	return 0;
+}
+
+int nilfs_inode_dirty(struct inode *inode)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+	int ret = 0;
+
+	if (!list_empty(&ii->i_dirty)) {
+		spin_lock(&nilfs->ns_inode_lock);
+		ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
+			test_bit(NILFS_I_BUSY, &ii->i_state);
+		spin_unlock(&nilfs->ns_inode_lock);
+	}
+	return ret;
+}
+
+int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+	atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
+
+	if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
+		return 0;
+
+	spin_lock(&nilfs->ns_inode_lock);
+	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
+	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
+		/* Because this routine may race with nilfs_dispose_list(),
+		   we have to check NILFS_I_QUEUED here, too. */
+		if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
+			/* This will happen when somebody is freeing
+			   this inode. */
+			nilfs_warning(inode->i_sb, __func__,
+				      "cannot get inode (ino=%lu)\n",
+				      inode->i_ino);
+			spin_unlock(&nilfs->ns_inode_lock);
+			return -EINVAL; /* NILFS_I_DIRTY may remain for
+					   freeing inode */
+		}
+		list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
+		set_bit(NILFS_I_QUEUED, &ii->i_state);
+	}
+	spin_unlock(&nilfs->ns_inode_lock);
+	return 0;
+}
+
+int nilfs_mark_inode_dirty(struct inode *inode)
+{
+	struct buffer_head *ibh;
+	int err;
+
+	err = nilfs_load_inode_block(inode, &ibh);
+	if (unlikely(err)) {
+		nilfs_warning(inode->i_sb, __func__,
+			      "failed to reget inode block.\n");
+		return err;
+	}
+	nilfs_update_inode(inode, ibh);
+	mark_buffer_dirty(ibh);
+	nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
+	brelse(ibh);
+	return 0;
+}
+
+/**
+ * nilfs_dirty_inode - reflect changes on given inode to an inode block.
+ * @inode: inode of the file to be registered.
+ *
+ * nilfs_dirty_inode() loads a inode block containing the specified
+ * @inode and copies data from a nilfs_inode to a corresponding inode
+ * entry in the inode block. This operation is excluded from the segment
+ * construction. This function can be called both as a single operation
+ * and as a part of indivisible file operations.
+ */
+void nilfs_dirty_inode(struct inode *inode, int flags)
+{
+	struct nilfs_transaction_info ti;
+	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+
+	if (is_bad_inode(inode)) {
+		nilfs_warning(inode->i_sb, __func__,
+			      "tried to mark bad_inode dirty. ignored.\n");
+		dump_stack();
+		return;
+	}
+	if (mdi) {
+		nilfs_mdt_mark_dirty(inode);
+		return;
+	}
+	nilfs_transaction_begin(inode->i_sb, &ti, 0);
+	nilfs_mark_inode_dirty(inode);
+	nilfs_transaction_commit(inode->i_sb); /* never fails */
+}
+
+int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		 __u64 start, __u64 len)
+{
+	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+	__u64 logical = 0, phys = 0, size = 0;
+	__u32 flags = 0;
+	loff_t isize;
+	sector_t blkoff, end_blkoff;
+	sector_t delalloc_blkoff;
+	unsigned long delalloc_blklen;
+	unsigned int blkbits = inode->i_blkbits;
+	int ret, n;
+
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
+
+	isize = i_size_read(inode);
+
+	blkoff = start >> blkbits;
+	end_blkoff = (start + len - 1) >> blkbits;
+
+	delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
+							&delalloc_blkoff);
+
+	do {
+		__u64 blkphy;
+		unsigned int maxblocks;
+
+		if (delalloc_blklen && blkoff == delalloc_blkoff) {
+			if (size) {
+				/* End of the current extent */
+				ret = fiemap_fill_next_extent(
+					fieinfo, logical, phys, size, flags);
+				if (ret)
+					break;
+			}
+			if (blkoff > end_blkoff)
+				break;
+
+			flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
+			logical = blkoff << blkbits;
+			phys = 0;
+			size = delalloc_blklen << blkbits;
+
+			blkoff = delalloc_blkoff + delalloc_blklen;
+			delalloc_blklen = nilfs_find_uncommitted_extent(
+				inode, blkoff, &delalloc_blkoff);
+			continue;
+		}
+
+		/*
+		 * Limit the number of blocks that we look up so as
+		 * not to get into the next delayed allocation extent.
+		 */
+		maxblocks = INT_MAX;
+		if (delalloc_blklen)
+			maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
+					  maxblocks);
+		blkphy = 0;
+
+		down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+		n = nilfs_bmap_lookup_contig(
+			NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
+		up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+		if (n < 0) {
+			int past_eof;
+
+			if (unlikely(n != -ENOENT))
+				break; /* error */
+
+			/* HOLE */
+			blkoff++;
+			past_eof = ((blkoff << blkbits) >= isize);
+
+			if (size) {
+				/* End of the current extent */
+
+				if (past_eof)
+					flags |= FIEMAP_EXTENT_LAST;
+
+				ret = fiemap_fill_next_extent(
+					fieinfo, logical, phys, size, flags);
+				if (ret)
+					break;
+				size = 0;
+			}
+			if (blkoff > end_blkoff || past_eof)
+				break;
+		} else {
+			if (size) {
+				if (phys && blkphy << blkbits == phys + size) {
+					/* The current extent goes on */
+					size += n << blkbits;
+				} else {
+					/* Terminate the current extent */
+					ret = fiemap_fill_next_extent(
+						fieinfo, logical, phys, size,
+						flags);
+					if (ret || blkoff > end_blkoff)
+						break;
+
+					/* Start another extent */
+					flags = FIEMAP_EXTENT_MERGED;
+					logical = blkoff << blkbits;
+					phys = blkphy << blkbits;
+					size = n << blkbits;
+				}
+			} else {
+				/* Start a new extent */
+				flags = FIEMAP_EXTENT_MERGED;
+				logical = blkoff << blkbits;
+				phys = blkphy << blkbits;
+				size = n << blkbits;
+			}
+			blkoff += n;
+		}
+		cond_resched();
+	} while (true);
+
+	/* If ret is 1 then we just hit the end of the extent array */
+	if (ret == 1)
+		ret = 0;
+
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
diff --git a/net/802/Kconfig b/net/802/Kconfig
index be33d27c8e6..80d4bf78905 100644
--- a/net/802/Kconfig
+++ b/net/802/Kconfig
@@ -5,3 +5,6 @@ config STP
 config GARP
 	tristate
 	select STP
+
+config MRP
+	tristate
diff --git a/net/802/Makefile b/net/802/Makefile
index 7893d679910..37e654d6615 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -4,7 +4,6 @@
 
 # Check the p8022 selections against net/core/Makefile.
 obj-$(CONFIG_LLC)	+= p8022.o psnap.o
-obj-$(CONFIG_TR)	+= p8022.o psnap.o tr.o
 obj-$(CONFIG_NET_FC)	+=                 fc.o
 obj-$(CONFIG_FDDI)	+=                 fddi.o
 obj-$(CONFIG_HIPPI)	+=                 hippi.o
@@ -12,3 +11,4 @@ obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
 obj-$(CONFIG_STP)	+= stp.o
 obj-$(CONFIG_GARP)	+= garp.o
+obj-$(CONFIG_MRP)	+= mrp.o
diff --git a/net/802/fc.c b/net/802/fc.c
index 1e49f2d4ea9..05eea6b98bb 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -11,7 +11,6 @@
  */
 
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -27,6 +26,7 @@
 #include <linux/net.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
+#include <linux/export.h>
 #include <net/arp.h>
 
 /*
@@ -35,7 +35,7 @@
 
 static int fc_header(struct sk_buff *skb, struct net_device *dev,
 		     unsigned short type,
-		     const void *daddr, const void *saddr, unsigned len)
+		     const void *daddr, const void *saddr, unsigned int len)
 {
 	struct fch_hdr *fch;
 	int hdr_len;
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 94b3ad08f39..9cda40661e0 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -27,7 +27,6 @@
  */
 
 #include <linux/module.h>
-#include <asm/system.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -52,7 +51,7 @@
 
 static int fddi_header(struct sk_buff *skb, struct net_device *dev,
 		       unsigned short type,
-		       const void *daddr, const void *saddr, unsigned len)
+		       const void *daddr, const void *saddr, unsigned int len)
 {
 	int hl = FDDI_K_SNAP_HLEN;
 	struct fddihdr *fddi;
diff --git a/net/802/garp.c b/net/802/garp.c
index 16102951d36..b38ee6dcba4 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -15,6 +15,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/llc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
 #include <net/garp.h>
@@ -156,9 +157,9 @@ static struct garp_attr *garp_attr_lookup(const struct garp_applicant *app,
 	while (parent) {
 		attr = rb_entry(parent, struct garp_attr, node);
 		d = garp_attr_cmp(attr, data, len, type);
-		if (d < 0)
+		if (d > 0)
 			parent = parent->rb_left;
-		else if (d > 0)
+		else if (d < 0)
 			parent = parent->rb_right;
 		else
 			return attr;
@@ -166,7 +167,8 @@ static struct garp_attr *garp_attr_lookup(const struct garp_applicant *app,
 	return NULL;
 }
 
-static void garp_attr_insert(struct garp_applicant *app, struct garp_attr *new)
+static struct garp_attr *garp_attr_create(struct garp_applicant *app,
+					  const void *data, u8 len, u8 type)
 {
 	struct rb_node *parent = NULL, **p = &app->gid.rb_node;
 	struct garp_attr *attr;
@@ -175,21 +177,16 @@ static void garp_attr_insert(struct garp_applicant *app, struct garp_attr *new)
 	while (*p) {
 		parent = *p;
 		attr = rb_entry(parent, struct garp_attr, node);
-		d = garp_attr_cmp(attr, new->data, new->dlen, new->type);
-		if (d < 0)
+		d = garp_attr_cmp(attr, data, len, type);
+		if (d > 0)
 			p = &parent->rb_left;
-		else if (d > 0)
+		else if (d < 0)
 			p = &parent->rb_right;
+		else {
+			/* The attribute already exists; re-use it. */
+			return attr;
+		}
 	}
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, &app->gid);
-}
-
-static struct garp_attr *garp_attr_create(struct garp_applicant *app,
-					  const void *data, u8 len, u8 type)
-{
-	struct garp_attr *attr;
-
 	attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
 	if (!attr)
 		return attr;
@@ -197,7 +194,9 @@ static struct garp_attr *garp_attr_create(struct garp_applicant *app,
 	attr->type  = type;
 	attr->dlen  = len;
 	memcpy(attr->data, data, len);
-	garp_attr_insert(app, attr);
+
+	rb_link_node(&attr->node, parent, p);
+	rb_insert_color(&attr->node, &app->gid);
 	return attr;
 }
 
@@ -398,7 +397,7 @@ static void garp_join_timer_arm(struct garp_applicant *app)
 {
 	unsigned long delay;
 
-	delay = (u64)msecs_to_jiffies(garp_join_time) * net_random() >> 32;
+	delay = (u64)msecs_to_jiffies(garp_join_time) * prandom_u32() >> 32;
 	mod_timer(&app->join_timer, jiffies + delay);
 }
 
@@ -553,7 +552,7 @@ static void garp_release_port(struct net_device *dev)
 		if (rtnl_dereference(port->applicants[i]))
 			return;
 	}
-	rcu_assign_pointer(dev->garp_port, NULL);
+	RCU_INIT_POINTER(dev->garp_port, NULL);
 	kfree_rcu(port, rcu);
 }
 
@@ -605,13 +604,17 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
 
 	ASSERT_RTNL();
 
-	rcu_assign_pointer(port->applicants[appl->type], NULL);
+	RCU_INIT_POINTER(port->applicants[appl->type], NULL);
 
 	/* Delete timer and generate a final TRANSMIT_PDU event to flush out
 	 * all pending messages before the applicant is gone. */
 	del_timer_sync(&app->join_timer);
+
+	spin_lock_bh(&app->lock);
 	garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
 	garp_pdu_queue(app);
+	spin_unlock_bh(&app->lock);
+
 	garp_queue_xmit(app);
 
 	dev_mc_del(dev, appl->proto.group_address);
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 91aca8780fd..5ff2a718ddc 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -35,7 +35,6 @@
 #include <net/arp.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 
 /*
  * Create the HIPPI MAC header for an arbitrary protocol layer
@@ -46,7 +45,7 @@
 
 static int hippi_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type,
-			const void *daddr, const void *saddr, unsigned len)
+			const void *daddr, const void *saddr, unsigned int len)
 {
 	struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN);
 	struct hippi_cb *hcb = (struct hippi_cb *) skb->cb;
@@ -173,14 +172,14 @@ EXPORT_SYMBOL(hippi_mac_addr);
 int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
 {
 	/* Never send broadcast/multicast ARP messages */
-	p->mcast_probes = 0;
+	NEIGH_VAR_INIT(p, MCAST_PROBES, 0);
 
 	/* In IPv6 unicast probes are valid even on NBMA,
 	* because they are encapsulated in normal IPv6 protocol.
 	* Should be a generic flag.
 	*/
 	if (p->tbl->family != AF_INET6)
-		p->ucast_probes = 0;
+		NEIGH_VAR_INIT(p, UCAST_PROBES, 0);
 	return 0;
 }
 EXPORT_SYMBOL(hippi_neigh_setup_dev);
diff --git a/net/802/mrp.c b/net/802/mrp.c
new file mode 100644
index 00000000000..72db2785ef2
--- /dev/null
+++ b/net/802/mrp.c
@@ -0,0 +1,926 @@
+/*
+ *	IEEE 802.1Q Multiple Registration Protocol (MRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/802/garp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/mrp.h>
+#include <asm/unaligned.h>
+
+static unsigned int mrp_join_time __read_mostly = 200;
+module_param(mrp_join_time, uint, 0644);
+MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+
+static unsigned int mrp_periodic_time __read_mostly = 1000;
+module_param(mrp_periodic_time, uint, 0644);
+MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
+
+MODULE_LICENSE("GPL");
+
+static const u8
+mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VO,
+	},
+	[MRP_APPLICANT_VP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VP,
+	},
+	[MRP_APPLICANT_VN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VN,
+	},
+	[MRP_APPLICANT_AN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AN,
+	},
+	[MRP_APPLICANT_AA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_QA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_LA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_LA,
+	},
+	[MRP_APPLICANT_AO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AO,
+	},
+	[MRP_APPLICANT_QO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_QO,
+	},
+	[MRP_APPLICANT_AP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+	[MRP_APPLICANT_QP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+};
+
+static const u8
+mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL,
+	[MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV,
+	[MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL,
+};
+
+static void mrp_attrvalue_inc(void *value, u8 len)
+{
+	u8 *v = (u8 *)value;
+
+	/* Add 1 to the last byte. If it becomes zero,
+	 * go to the previous byte and repeat.
+	 */
+	while (len > 0 && !++v[--len])
+		;
+}
+
+static int mrp_attr_cmp(const struct mrp_attr *attr,
+			 const void *value, u8 len, u8 type)
+{
+	if (attr->type != type)
+		return attr->type - type;
+	if (attr->len != len)
+		return attr->len - len;
+	return memcmp(attr->value, value, len);
+}
+
+static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (parent) {
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			parent = parent->rb_left;
+		else if (d < 0)
+			parent = parent->rb_right;
+		else
+			return attr;
+	}
+	return NULL;
+}
+
+static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = NULL, **p = &app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (*p) {
+		parent = *p;
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			p = &parent->rb_left;
+		else if (d < 0)
+			p = &parent->rb_right;
+		else {
+			/* The attribute already exists; re-use it. */
+			return attr;
+		}
+	}
+	attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
+	if (!attr)
+		return attr;
+	attr->state = MRP_APPLICANT_VO;
+	attr->type  = type;
+	attr->len   = len;
+	memcpy(attr->value, value, len);
+
+	rb_link_node(&attr->node, parent, p);
+	rb_insert_color(&attr->node, &app->mad);
+	return attr;
+}
+
+static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
+{
+	rb_erase(&attr->node, &app->mad);
+	kfree(attr);
+}
+
+static int mrp_pdu_init(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+	struct mrp_pdu_hdr *ph;
+
+	skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev),
+			GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	skb->dev = app->dev;
+	skb->protocol = app->app->pkttype.type;
+	skb_reserve(skb, LL_RESERVED_SPACE(app->dev));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph));
+	ph->version = app->app->version;
+
+	app->pdu = skb;
+	return 0;
+}
+
+static int mrp_pdu_append_end_mark(struct mrp_applicant *app)
+{
+	__be16 *endmark;
+
+	if (skb_tailroom(app->pdu) < sizeof(*endmark))
+		return -1;
+	endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark));
+	put_unaligned(MRP_END_MARK, endmark);
+	return 0;
+}
+
+static void mrp_pdu_queue(struct mrp_applicant *app)
+{
+	if (!app->pdu)
+		return;
+
+	if (mrp_cb(app->pdu)->mh)
+		mrp_pdu_append_end_mark(app);
+	mrp_pdu_append_end_mark(app);
+
+	dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type),
+			app->app->group_address, app->dev->dev_addr,
+			app->pdu->len);
+
+	skb_queue_tail(&app->queue, app->pdu);
+	app->pdu = NULL;
+}
+
+static void mrp_queue_xmit(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&app->queue)))
+		dev_queue_xmit(skb);
+}
+
+static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app,
+				  u8 attrtype, u8 attrlen)
+{
+	struct mrp_msg_hdr *mh;
+
+	if (mrp_cb(app->pdu)->mh) {
+		if (mrp_pdu_append_end_mark(app) < 0)
+			return -1;
+		mrp_cb(app->pdu)->mh = NULL;
+		mrp_cb(app->pdu)->vah = NULL;
+	}
+
+	if (skb_tailroom(app->pdu) < sizeof(*mh))
+		return -1;
+	mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh));
+	mh->attrtype = attrtype;
+	mh->attrlen = attrlen;
+	mrp_cb(app->pdu)->mh = mh;
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app,
+				      const void *firstattrvalue, u8 attrlen)
+{
+	struct mrp_vecattr_hdr *vah;
+
+	if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen)
+		return -1;
+	vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu,
+						  sizeof(*vah) + attrlen);
+	put_unaligned(0, &vah->lenflags);
+	memcpy(vah->firstattrvalue, firstattrvalue, attrlen);
+	mrp_cb(app->pdu)->vah = vah;
+	memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen);
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app,
+					const struct mrp_attr *attr,
+					enum mrp_vecattr_event vaevent)
+{
+	u16 len, pos;
+	u8 *vaevents;
+	int err;
+again:
+	if (!app->pdu) {
+		err = mrp_pdu_init(app);
+		if (err < 0)
+			return err;
+	}
+
+	/* If there is no Message header in the PDU, or the Message header is
+	 * for a different attribute type, add an EndMark (if necessary) and a
+	 * new Message header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->mh ||
+	    mrp_cb(app->pdu)->mh->attrtype != attr->type ||
+	    mrp_cb(app->pdu)->mh->attrlen != attr->len) {
+		if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0)
+			goto queue;
+	}
+
+	/* If there is no VectorAttribute header for this Message in the PDU,
+	 * or this attribute's value does not sequentially follow the previous
+	 * attribute's value, add a new VectorAttribute header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->vah ||
+	    memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) {
+		if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0)
+			goto queue;
+	}
+
+	len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags));
+	pos = len % 3;
+
+	/* Events are packed into Vectors in the PDU, three to a byte. Add a
+	 * byte to the end of the Vector if necessary.
+	 */
+	if (!pos) {
+		if (skb_tailroom(app->pdu) < sizeof(u8))
+			goto queue;
+		vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8));
+	} else {
+		vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8));
+	}
+
+	switch (pos) {
+	case 0:
+		*vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX *
+				       __MRP_VECATTR_EVENT_MAX);
+		break;
+	case 1:
+		*vaevents += vaevent * __MRP_VECATTR_EVENT_MAX;
+		break;
+	case 2:
+		*vaevents += vaevent;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	/* Increment the length of the VectorAttribute in the PDU, as well as
+	 * the value of the next attribute that would continue its Vector.
+	 */
+	put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags);
+	mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len);
+
+	return 0;
+
+queue:
+	mrp_pdu_queue(app);
+	goto again;
+}
+
+static void mrp_attr_event(struct mrp_applicant *app,
+			   struct mrp_attr *attr, enum mrp_event event)
+{
+	enum mrp_applicant_state state;
+
+	state = mrp_applicant_state_table[attr->state][event];
+	if (state == MRP_APPLICANT_INVALID) {
+		WARN_ON(1);
+		return;
+	}
+
+	if (event == MRP_EVENT_TX) {
+		/* When appending the attribute fails, don't update its state
+		 * in order to retry at the next TX event.
+		 */
+
+		switch (mrp_tx_action_table[attr->state]) {
+		case MRP_TX_ACTION_NONE:
+		case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL:
+		case MRP_TX_ACTION_S_IN_OPTIONAL:
+			break;
+		case MRP_TX_ACTION_S_NEW:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_NEW) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_JOIN_IN:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_LV:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_LV) < 0)
+				return;
+			/* As a pure applicant, sending a leave message
+			 * implies that the attribute was unregistered and
+			 * can be destroyed.
+			 */
+			mrp_attr_destroy(app, attr);
+			return;
+		default:
+			WARN_ON(1);
+		}
+	}
+
+	attr->state = state;
+}
+
+int mrp_request_join(const struct net_device *dev,
+		     const struct mrp_application *appl,
+		     const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -ENOMEM;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_create(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return -ENOMEM;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_JOIN);
+	spin_unlock_bh(&app->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_request_join);
+
+void mrp_request_leave(const struct net_device *dev,
+		       const struct mrp_application *appl,
+		       const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_lookup(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_LV);
+	spin_unlock_bh(&app->lock);
+}
+EXPORT_SYMBOL_GPL(mrp_request_leave);
+
+static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event)
+{
+	struct rb_node *node, *next;
+	struct mrp_attr *attr;
+
+	for (node = rb_first(&app->mad);
+	     next = node ? rb_next(node) : NULL, node != NULL;
+	     node = next) {
+		attr = rb_entry(node, struct mrp_attr, node);
+		mrp_attr_event(app, attr, event);
+	}
+}
+
+static void mrp_join_timer_arm(struct mrp_applicant *app)
+{
+	unsigned long delay;
+
+	delay = (u64)msecs_to_jiffies(mrp_join_time) * prandom_u32() >> 32;
+	mod_timer(&app->join_timer, jiffies + delay);
+}
+
+static void mrp_join_timer(unsigned long data)
+{
+	struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+	spin_lock(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	mrp_queue_xmit(app);
+	mrp_join_timer_arm(app);
+}
+
+static void mrp_periodic_timer_arm(struct mrp_applicant *app)
+{
+	mod_timer(&app->periodic_timer,
+		  jiffies + msecs_to_jiffies(mrp_periodic_time));
+}
+
+static void mrp_periodic_timer(unsigned long data)
+{
+	struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+	spin_lock(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_PERIODIC);
+	mrp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	mrp_periodic_timer_arm(app);
+}
+
+static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
+{
+	__be16 endmark;
+
+	if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0)
+		return -1;
+	if (endmark == MRP_END_MARK) {
+		*offset += sizeof(endmark);
+		return -1;
+	}
+	return 0;
+}
+
+static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app,
+					struct sk_buff *skb,
+					enum mrp_vecattr_event vaevent)
+{
+	struct mrp_attr *attr;
+	enum mrp_event event;
+
+	attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue,
+			       mrp_cb(skb)->mh->attrlen,
+			       mrp_cb(skb)->mh->attrtype);
+	if (attr == NULL)
+		return;
+
+	switch (vaevent) {
+	case MRP_VECATTR_EVENT_NEW:
+		event = MRP_EVENT_R_NEW;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_IN:
+		event = MRP_EVENT_R_JOIN_IN;
+		break;
+	case MRP_VECATTR_EVENT_IN:
+		event = MRP_EVENT_R_IN;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_MT:
+		event = MRP_EVENT_R_JOIN_MT;
+		break;
+	case MRP_VECATTR_EVENT_MT:
+		event = MRP_EVENT_R_MT;
+		break;
+	case MRP_VECATTR_EVENT_LV:
+		event = MRP_EVENT_R_LV;
+		break;
+	default:
+		return;
+	}
+
+	mrp_attr_event(app, attr, event);
+}
+
+static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
+				 struct sk_buff *skb, int *offset)
+{
+	struct mrp_vecattr_hdr _vah;
+	u16 valen;
+	u8 vaevents, vaevent;
+
+	mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah),
+					      &_vah);
+	if (!mrp_cb(skb)->vah)
+		return -1;
+	*offset += sizeof(_vah);
+
+	if (get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+	    MRP_VECATTR_HDR_FLAG_LA)
+		mrp_mad_event(app, MRP_EVENT_R_LA);
+	valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+			    MRP_VECATTR_HDR_LEN_MASK);
+
+	/* The VectorAttribute structure in a PDU carries event information
+	 * about one or more attributes having consecutive values. Only the
+	 * value for the first attribute is contained in the structure. So
+	 * we make a copy of that value, and then increment it each time we
+	 * advance to the next event in its Vector.
+	 */
+	if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -1;
+	if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue,
+			  mrp_cb(skb)->mh->attrlen) < 0)
+		return -1;
+	*offset += mrp_cb(skb)->mh->attrlen;
+
+	/* In a VectorAttribute, the Vector contains events which are packed
+	 * three to a byte. We process one byte of the Vector at a time.
+	 */
+	while (valen > 0) {
+		if (skb_copy_bits(skb, *offset, &vaevents,
+				  sizeof(vaevents)) < 0)
+			return -1;
+		*offset += sizeof(vaevents);
+
+		/* Extract and process the first event. */
+		vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX *
+				      __MRP_VECATTR_EVENT_MAX);
+		if (vaevent >= __MRP_VECATTR_EVENT_MAX) {
+			/* The byte is malformed; stop processing. */
+			return -1;
+		}
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the second event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= (__MRP_VECATTR_EVENT_MAX *
+			     __MRP_VECATTR_EVENT_MAX);
+		vaevent = vaevents / __MRP_VECATTR_EVENT_MAX;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the third event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= __MRP_VECATTR_EVENT_MAX;
+		vaevent = vaevents;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+	}
+	return 0;
+}
+
+static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb,
+			     int *offset)
+{
+	struct mrp_msg_hdr _mh;
+
+	mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh);
+	if (!mrp_cb(skb)->mh)
+		return -1;
+	*offset += sizeof(_mh);
+
+	if (mrp_cb(skb)->mh->attrtype == 0 ||
+	    mrp_cb(skb)->mh->attrtype > app->app->maxattr ||
+	    mrp_cb(skb)->mh->attrlen == 0)
+		return -1;
+
+	while (skb->len > *offset) {
+		if (mrp_pdu_parse_end_mark(skb, offset) < 0)
+			break;
+		if (mrp_pdu_parse_vecattr(app, skb, offset) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static int mrp_rcv(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct mrp_application *appl = container_of(pt, struct mrp_application,
+						    pkttype);
+	struct mrp_port *port;
+	struct mrp_applicant *app;
+	struct mrp_pdu_hdr _ph;
+	const struct mrp_pdu_hdr *ph;
+	int offset = skb_network_offset(skb);
+
+	/* If the interface is in promiscuous mode, drop the packet if
+	 * it was unicast to another host.
+	 */
+	if (unlikely(skb->pkt_type == PACKET_OTHERHOST))
+		goto out;
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto out;
+	port = rcu_dereference(dev->mrp_port);
+	if (unlikely(!port))
+		goto out;
+	app = rcu_dereference(port->applicants[appl->type]);
+	if (unlikely(!app))
+		goto out;
+
+	ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph);
+	if (!ph)
+		goto out;
+	offset += sizeof(_ph);
+
+	if (ph->version != app->app->version)
+		goto out;
+
+	spin_lock(&app->lock);
+	while (skb->len > offset) {
+		if (mrp_pdu_parse_end_mark(skb, &offset) < 0)
+			break;
+		if (mrp_pdu_parse_msg(app, skb, &offset) < 0)
+			break;
+	}
+	spin_unlock(&app->lock);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int mrp_init_port(struct net_device *dev)
+{
+	struct mrp_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+	rcu_assign_pointer(dev->mrp_port, port);
+	return 0;
+}
+
+static void mrp_release_port(struct net_device *dev)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	unsigned int i;
+
+	for (i = 0; i <= MRP_APPLICATION_MAX; i++) {
+		if (rtnl_dereference(port->applicants[i]))
+			return;
+	}
+	RCU_INIT_POINTER(dev->mrp_port, NULL);
+	kfree_rcu(port, rcu);
+}
+
+int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_applicant *app;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!rtnl_dereference(dev->mrp_port)) {
+		err = mrp_init_port(dev);
+		if (err < 0)
+			goto err1;
+	}
+
+	err = -ENOMEM;
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		goto err2;
+
+	err = dev_mc_add(dev, appl->group_address);
+	if (err < 0)
+		goto err3;
+
+	app->dev = dev;
+	app->app = appl;
+	app->mad = RB_ROOT;
+	spin_lock_init(&app->lock);
+	skb_queue_head_init(&app->queue);
+	rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
+	setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
+	mrp_join_timer_arm(app);
+	setup_timer(&app->periodic_timer, mrp_periodic_timer,
+		    (unsigned long)app);
+	mrp_periodic_timer_arm(app);
+	return 0;
+
+err3:
+	kfree(app);
+err2:
+	mrp_release_port(dev);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(mrp_init_applicant);
+
+void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+
+	ASSERT_RTNL();
+
+	RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+
+	/* Delete timer and generate a final TX event to flush out
+	 * all pending messages before the applicant is gone.
+	 */
+	del_timer_sync(&app->join_timer);
+	del_timer_sync(&app->periodic_timer);
+
+	spin_lock_bh(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	spin_unlock_bh(&app->lock);
+
+	mrp_queue_xmit(app);
+
+	dev_mc_del(dev, appl->group_address);
+	kfree_rcu(app, rcu);
+	mrp_release_port(dev);
+}
+EXPORT_SYMBOL_GPL(mrp_uninit_applicant);
+
+int mrp_register_application(struct mrp_application *appl)
+{
+	appl->pkttype.func = mrp_rcv;
+	dev_add_pack(&appl->pkttype);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_register_application);
+
+void mrp_unregister_application(struct mrp_application *appl)
+{
+	dev_remove_pack(&appl->pkttype);
+}
+EXPORT_SYMBOL_GPL(mrp_unregister_application);
diff --git a/net/802/p8022.c b/net/802/p8022.c
index 7f353c4f437..0bda8de7df5 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -1,6 +1,5 @@
 /*
- *	NET3:	Support for 802.2 demultiplexing off Ethernet (Token ring
- *		is kept separate see p8022tr.c)
+ *	NET3:	Support for 802.2 demultiplexing off Ethernet
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
  *		as published by the Free Software Foundation; either version
diff --git a/net/802/stp.c b/net/802/stp.c
index 978c30b1b36..2c40ba0ec11 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -12,6 +12,7 @@
 #include <linux/etherdevice.h>
 #include <linux/llc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
 #include <net/stp.h>
@@ -45,7 +46,7 @@ static int stp_pdu_rcv(struct sk_buff *skb, struct net_device *dev,
 		proto = rcu_dereference(garp_protos[eh->h_dest[5] -
 						    GARP_ADDR_MIN]);
 		if (proto &&
-		    compare_ether_addr(eh->h_dest, proto->group_address))
+		    !ether_addr_equal(eh->h_dest, proto->group_address))
 			goto err;
 	} else
 		proto = rcu_dereference(stp_proto);
@@ -88,9 +89,9 @@ void stp_proto_unregister(const struct stp_proto *proto)
 {
 	mutex_lock(&stp_proto_mutex);
 	if (is_zero_ether_addr(proto->group_address))
-		rcu_assign_pointer(stp_proto, NULL);
+		RCU_INIT_POINTER(stp_proto, NULL);
 	else
-		rcu_assign_pointer(garp_protos[proto->group_address[5] -
+		RCU_INIT_POINTER(garp_protos[proto->group_address[5] -
 					       GARP_ADDR_MIN], NULL);
 	synchronize_rcu();
 
diff --git a/net/802/tr.c b/net/802/tr.c
deleted file mode 100644
index 5e20cf8a074..00000000000
--- a/net/802/tr.c
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
- * NET3:	Token ring device handling subroutines
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- * Fixes:       3 Feb 97 Paul Norton <pnorton@cts.com> Minor routing fixes.
- *              Added rif table to /proc/net/tr_rif and rif timeout to
- *              /proc/sys/net/token-ring/rif_timeout.
- *              22 Jun 98 Paul Norton <p.norton@computer.org> Rearranged
- *              tr_header and tr_type_trans to handle passing IPX SNAP and
- *              802.2 through the correct layers. Eliminated tr_reformat.
- *
- */
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/trdevice.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/net.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/slab.h>
-#include <net/arp.h>
-#include <net/net_namespace.h>
-
-static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev);
-static void rif_check_expire(unsigned long dummy);
-
-#define TR_SR_DEBUG 0
-
-/*
- *	Each RIF entry we learn is kept this way
- */
-
-struct rif_cache {
-	unsigned char addr[TR_ALEN];
-	int iface;
-	__be16 rcf;
-	__be16 rseg[8];
-	struct rif_cache *next;
-	unsigned long last_used;
-	unsigned char local_ring;
-};
-
-#define RIF_TABLE_SIZE 32
-
-/*
- *	We hash the RIF cache 32 ways. We do after all have to look it
- *	up a lot.
- */
-
-static struct rif_cache *rif_table[RIF_TABLE_SIZE];
-
-static DEFINE_SPINLOCK(rif_lock);
-
-
-/*
- *	Garbage disposal timer.
- */
-
-static struct timer_list rif_timer;
-
-static int sysctl_tr_rif_timeout = 60*10*HZ;
-
-static inline unsigned long rif_hash(const unsigned char *addr)
-{
-	unsigned long x;
-
-	x = addr[0];
-	x = (x << 2) ^ addr[1];
-	x = (x << 2) ^ addr[2];
-	x = (x << 2) ^ addr[3];
-	x = (x << 2) ^ addr[4];
-	x = (x << 2) ^ addr[5];
-
-	x ^= x >> 8;
-
-	return x & (RIF_TABLE_SIZE - 1);
-}
-
-/*
- *	Put the headers on a token ring packet. Token ring source routing
- *	makes this a little more exciting than on ethernet.
- */
-
-static int tr_header(struct sk_buff *skb, struct net_device *dev,
-		     unsigned short type,
-		     const void *daddr, const void *saddr, unsigned len)
-{
-	struct trh_hdr *trh;
-	int hdr_len;
-
-	/*
-	 * Add the 802.2 SNAP header if IP as the IPv4/IPv6 code calls
-	 * dev->hard_header directly.
-	 */
-	if (type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP)
-	{
-		struct trllc *trllc;
-
-		hdr_len = sizeof(struct trh_hdr) + sizeof(struct trllc);
-		trh = (struct trh_hdr *)skb_push(skb, hdr_len);
-		trllc = (struct trllc *)(trh+1);
-		trllc->dsap = trllc->ssap = EXTENDED_SAP;
-		trllc->llc = UI_CMD;
-		trllc->protid[0] = trllc->protid[1] = trllc->protid[2] = 0x00;
-		trllc->ethertype = htons(type);
-	}
-	else
-	{
-		hdr_len = sizeof(struct trh_hdr);
-		trh = (struct trh_hdr *)skb_push(skb, hdr_len);
-	}
-
-	trh->ac=AC;
-	trh->fc=LLC_FRAME;
-
-	if(saddr)
-		memcpy(trh->saddr,saddr,dev->addr_len);
-	else
-		memcpy(trh->saddr,dev->dev_addr,dev->addr_len);
-
-	/*
-	 *	Build the destination and then source route the frame
-	 */
-
-	if(daddr)
-	{
-		memcpy(trh->daddr,daddr,dev->addr_len);
-		tr_source_route(skb, trh, dev);
-		return hdr_len;
-	}
-
-	return -hdr_len;
-}
-
-/*
- *	A neighbour discovery of some species (eg arp) has completed. We
- *	can now send the packet.
- */
-
-static int tr_rebuild_header(struct sk_buff *skb)
-{
-	struct trh_hdr *trh=(struct trh_hdr *)skb->data;
-	struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr));
-	struct net_device *dev = skb->dev;
-
-	/*
-	 *	FIXME: We don't yet support IPv6 over token rings
-	 */
-
-	if(trllc->ethertype != htons(ETH_P_IP)) {
-		printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(trllc->ethertype));
-		return 0;
-	}
-
-#ifdef CONFIG_INET
-	if(arp_find(trh->daddr, skb)) {
-			return 1;
-	}
-	else
-#endif
-	{
-		tr_source_route(skb,trh,dev);
-		return 0;
-	}
-}
-
-/*
- *	Some of this is a bit hackish. We intercept RIF information
- *	used for source routing. We also grab IP directly and don't feed
- *	it via SNAP.
- */
-
-__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-
-	struct trh_hdr *trh;
-	struct trllc *trllc;
-	unsigned riflen=0;
-
-	skb->dev = dev;
-	skb_reset_mac_header(skb);
-	trh = tr_hdr(skb);
-
-	if(trh->saddr[0] & TR_RII)
-		riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
-
-	trllc = (struct trllc *)(skb->data+sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
-
-	skb_pull(skb,sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
-
-	if(*trh->daddr & 0x80)
-	{
-		if(!memcmp(trh->daddr,dev->broadcast,TR_ALEN))
-			skb->pkt_type=PACKET_BROADCAST;
-		else
-			skb->pkt_type=PACKET_MULTICAST;
-	}
-	else if ( (trh->daddr[0] & 0x01) && (trh->daddr[1] & 0x00) && (trh->daddr[2] & 0x5E))
-	{
-		skb->pkt_type=PACKET_MULTICAST;
-	}
-	else if(dev->flags & IFF_PROMISC)
-	{
-		if(memcmp(trh->daddr, dev->dev_addr, TR_ALEN))
-			skb->pkt_type=PACKET_OTHERHOST;
-	}
-
-	if ((skb->pkt_type != PACKET_BROADCAST) &&
-	    (skb->pkt_type != PACKET_MULTICAST))
-		tr_add_rif_info(trh,dev) ;
-
-	/*
-	 * Strip the SNAP header from ARP packets since we don't
-	 * pass them through to the 802.2/SNAP layers.
-	 */
-
-	if (trllc->dsap == EXTENDED_SAP &&
-	    (trllc->ethertype == htons(ETH_P_IP) ||
-	     trllc->ethertype == htons(ETH_P_IPV6) ||
-	     trllc->ethertype == htons(ETH_P_ARP)))
-	{
-		skb_pull(skb, sizeof(struct trllc));
-		return trllc->ethertype;
-	}
-
-	return htons(ETH_P_TR_802_2);
-}
-
-/*
- *	We try to do source routing...
- */
-
-void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,
-		     struct net_device *dev)
-{
-	int slack;
-	unsigned int hash;
-	struct rif_cache *entry;
-	unsigned char *olddata;
-	unsigned long flags;
-	static const unsigned char mcast_func_addr[]
-		= {0xC0,0x00,0x00,0x04,0x00,0x00};
-
-	spin_lock_irqsave(&rif_lock, flags);
-
-	/*
-	 *	Broadcasts are single route as stated in RFC 1042
-	 */
-	if( (!memcmp(&(trh->daddr[0]),&(dev->broadcast[0]),TR_ALEN)) ||
-	    (!memcmp(&(trh->daddr[0]),&(mcast_func_addr[0]), TR_ALEN))  )
-	{
-		trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
-			       | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
-		trh->saddr[0]|=TR_RII;
-	}
-	else
-	{
-		hash = rif_hash(trh->daddr);
-		/*
-		 *	Walk the hash table and look for an entry
-		 */
-		for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->daddr[0]),TR_ALEN);entry=entry->next);
-
-		/*
-		 *	If we found an entry we can route the frame.
-		 */
-		if(entry)
-		{
-#if TR_SR_DEBUG
-printk("source routing for %pM\n", trh->daddr);
-#endif
-			if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8)
-			{
-				trh->rcf=entry->rcf;
-				memcpy(&trh->rseg[0],&entry->rseg[0],8*sizeof(unsigned short));
-				trh->rcf^=htons(TR_RCF_DIR_BIT);
-				trh->rcf&=htons(0x1fff);	/* Issam Chehab <ichehab@madge1.demon.co.uk> */
-
-				trh->saddr[0]|=TR_RII;
-#if TR_SR_DEBUG
-				printk("entry found with rcf %04x\n", entry->rcf);
-			}
-			else
-			{
-				printk("entry found but without rcf length, local=%02x\n", entry->local_ring);
-#endif
-			}
-			entry->last_used=jiffies;
-		}
-		else
-		{
-			/*
-			 *	Without the information we simply have to shout
-			 *	on the wire. The replies should rapidly clean this
-			 *	situation up.
-			 */
-			trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
-				       | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
-			trh->saddr[0]|=TR_RII;
-#if TR_SR_DEBUG
-			printk("no entry in rif table found - broadcasting frame\n");
-#endif
-		}
-	}
-
-	/* Compress the RIF here so we don't have to do it in the driver(s) */
-	if (!(trh->saddr[0] & 0x80))
-		slack = 18;
-	else
-		slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
-	olddata = skb->data;
-	spin_unlock_irqrestore(&rif_lock, flags);
-
-	skb_pull(skb, slack);
-	memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
-}
-
-/*
- *	We have learned some new RIF information for our source
- *	routing.
- */
-
-static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
-{
-	unsigned int hash, rii_p = 0;
-	unsigned long flags;
-	struct rif_cache *entry;
-	unsigned char saddr0;
-
-	spin_lock_irqsave(&rif_lock, flags);
-	saddr0 = trh->saddr[0];
-
-	/*
-	 *	Firstly see if the entry exists
-	 */
-
-	if(trh->saddr[0] & TR_RII)
-	{
-		trh->saddr[0]&=0x7f;
-		if (((ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8) > 2)
-		{
-			rii_p = 1;
-		}
-	}
-
-	hash = rif_hash(trh->saddr);
-	for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);entry=entry->next);
-
-	if(entry==NULL)
-	{
-#if TR_SR_DEBUG
-		printk("adding rif_entry: addr:%pM rcf:%04X\n",
-		       trh->saddr, ntohs(trh->rcf));
-#endif
-		/*
-		 *	Allocate our new entry. A failure to allocate loses
-		 *	use the information. This is harmless.
-		 *
-		 *	FIXME: We ought to keep some kind of cache size
-		 *	limiting and adjust the timers to suit.
-		 */
-		entry=kmalloc(sizeof(struct rif_cache),GFP_ATOMIC);
-
-		if(!entry)
-		{
-			printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n");
-			spin_unlock_irqrestore(&rif_lock, flags);
-			return;
-		}
-
-		memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);
-		entry->iface = dev->ifindex;
-		entry->next=rif_table[hash];
-		entry->last_used=jiffies;
-		rif_table[hash]=entry;
-
-		if (rii_p)
-		{
-			entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
-			memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
-			entry->local_ring = 0;
-		}
-		else
-		{
-			entry->local_ring = 1;
-		}
-	}
-	else	/* Y. Tahara added */
-	{
-		/*
-		 *	Update existing entries
-		 */
-		if (!entry->local_ring)
-		    if (entry->rcf != (trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK)) &&
-			 !(trh->rcf & htons(TR_RCF_BROADCAST_MASK)))
-		    {
-#if TR_SR_DEBUG
-printk("updating rif_entry: addr:%pM rcf:%04X\n",
-		trh->saddr, ntohs(trh->rcf));
-#endif
-			    entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
-			    memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
-		    }
-		entry->last_used=jiffies;
-	}
-	trh->saddr[0]=saddr0; /* put the routing indicator back for tcpdump */
-	spin_unlock_irqrestore(&rif_lock, flags);
-}
-
-/*
- *	Scan the cache with a timer and see what we need to throw out.
- */
-
-static void rif_check_expire(unsigned long dummy)
-{
-	int i;
-	unsigned long flags, next_interval = jiffies + sysctl_tr_rif_timeout/2;
-
-	spin_lock_irqsave(&rif_lock, flags);
-
-	for(i =0; i < RIF_TABLE_SIZE; i++) {
-		struct rif_cache *entry, **pentry;
-
-		pentry = rif_table+i;
-		while((entry=*pentry) != NULL) {
-			unsigned long expires
-				= entry->last_used + sysctl_tr_rif_timeout;
-
-			if (time_before_eq(expires, jiffies)) {
-				*pentry = entry->next;
-				kfree(entry);
-			} else {
-				pentry = &entry->next;
-
-				if (time_before(expires, next_interval))
-					next_interval = expires;
-			}
-		}
-	}
-
-	spin_unlock_irqrestore(&rif_lock, flags);
-
-	mod_timer(&rif_timer, next_interval);
-
-}
-
-/*
- *	Generate the /proc/net information for the token ring RIF
- *	routing.
- */
-
-#ifdef CONFIG_PROC_FS
-
-static struct rif_cache *rif_get_idx(loff_t pos)
-{
-	int i;
-	struct rif_cache *entry;
-	loff_t off = 0;
-
-	for(i = 0; i < RIF_TABLE_SIZE; i++)
-		for(entry = rif_table[i]; entry; entry = entry->next) {
-			if (off == pos)
-				return entry;
-			++off;
-		}
-
-	return NULL;
-}
-
-static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(&rif_lock)
-{
-	spin_lock_irq(&rif_lock);
-
-	return *pos ? rif_get_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	int i;
-	struct rif_cache *ent = v;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN) {
-		i = -1;
-		goto scan;
-	}
-
-	if (ent->next)
-		return ent->next;
-
-	i = rif_hash(ent->addr);
- scan:
-	while (++i < RIF_TABLE_SIZE) {
-		if ((ent = rif_table[i]) != NULL)
-			return ent;
-	}
-	return NULL;
-}
-
-static void rif_seq_stop(struct seq_file *seq, void *v)
-	__releases(&rif_lock)
-{
-	spin_unlock_irq(&rif_lock);
-}
-
-static int rif_seq_show(struct seq_file *seq, void *v)
-{
-	int j, rcf_len, segment, brdgnmb;
-	struct rif_cache *entry = v;
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-		     "if     TR address       TTL   rcf   routing segments\n");
-	else {
-		struct net_device *dev = dev_get_by_index(&init_net, entry->iface);
-		long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout)
-				- (long) jiffies;
-
-		seq_printf(seq, "%s %pM %7li ",
-			   dev?dev->name:"?",
-			   entry->addr,
-			   ttl/HZ);
-
-			if (entry->local_ring)
-				seq_puts(seq, "local\n");
-			else {
-
-				seq_printf(seq, "%04X", ntohs(entry->rcf));
-				rcf_len = ((ntohs(entry->rcf) & TR_RCF_LEN_MASK)>>8)-2;
-				if (rcf_len)
-					rcf_len >>= 1;
-				for(j = 1; j < rcf_len; j++) {
-					if(j==1) {
-						segment=ntohs(entry->rseg[j-1])>>4;
-						seq_printf(seq,"  %03X",segment);
-					}
-
-					segment=ntohs(entry->rseg[j])>>4;
-					brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
-					seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
-				}
-				seq_putc(seq, '\n');
-			}
-
-		if (dev)
-			dev_put(dev);
-		}
-	return 0;
-}
-
-
-static const struct seq_operations rif_seq_ops = {
-	.start = rif_seq_start,
-	.next  = rif_seq_next,
-	.stop  = rif_seq_stop,
-	.show  = rif_seq_show,
-};
-
-static int rif_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rif_seq_ops);
-}
-
-static const struct file_operations rif_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = rif_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-#endif
-
-static const struct header_ops tr_header_ops = {
-	.create = tr_header,
-	.rebuild= tr_rebuild_header,
-};
-
-static void tr_setup(struct net_device *dev)
-{
-	/*
-	 *	Configure and register
-	 */
-
-	dev->header_ops	= &tr_header_ops;
-
-	dev->type		= ARPHRD_IEEE802_TR;
-	dev->hard_header_len	= TR_HLEN;
-	dev->mtu		= 2000;
-	dev->addr_len		= TR_ALEN;
-	dev->tx_queue_len	= 100;	/* Long queues on tr */
-
-	memset(dev->broadcast,0xFF, TR_ALEN);
-
-	/* New-style flags. */
-	dev->flags		= IFF_BROADCAST | IFF_MULTICAST ;
-}
-
-/**
- * alloc_trdev - Register token ring device
- * @sizeof_priv: Size of additional driver-private structure to be allocated
- *	for this token ring device
- *
- * Fill in the fields of the device structure with token ring-generic values.
- *
- * Constructs a new net device, complete with a private data area of
- * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
- * this private data area.
- */
-struct net_device *alloc_trdev(int sizeof_priv)
-{
-	return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
-}
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table tr_table[] = {
-	{
-		.procname	= "rif_timeout",
-		.data		= &sysctl_tr_rif_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{ },
-};
-
-static __initdata struct ctl_path tr_path[] = {
-	{ .procname = "net", },
-	{ .procname = "token-ring", },
-	{ }
-};
-#endif
-
-/*
- *	Called during bootup.  We don't actually have to initialise
- *	too much for this.
- */
-
-static int __init rif_init(void)
-{
-	rif_timer.expires  = jiffies + sysctl_tr_rif_timeout;
-	setup_timer(&rif_timer, rif_check_expire, 0);
-	add_timer(&rif_timer);
-#ifdef CONFIG_SYSCTL
-	register_sysctl_paths(tr_path, tr_table);
-#endif
-	proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
-	return 0;
-}
-
-module_init(rif_init);
-
-EXPORT_SYMBOL(tr_type_trans);
-EXPORT_SYMBOL(alloc_trdev);
-
-MODULE_LICENSE("GPL");
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index fa073a54963..42320180967 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -3,14 +3,14 @@
 #
 
 config VLAN_8021Q
-	tristate "802.1Q VLAN Support"
+	tristate "802.1Q/802.1ad VLAN Support"
 	---help---
 	  Select this and you will be able to create 802.1Q VLAN interfaces
-	  on your ethernet interfaces.  802.1Q VLAN supports almost
-	  everything a regular ethernet interface does, including
-	  firewalling, bridging, and of course IP traffic.  You will need
-	  the 'vconfig' tool from the VLAN project in order to effectively
-	  use VLANs.  See the VLAN web page for more information:
+	  on your Ethernet interfaces. 802.1Q VLAN supports almost
+	  everything a regular Ethernet interface does, including
+	  firewalling, bridging, and of course IP traffic. You will need
+	  the 'ip' utility in order to effectively use VLANs.
+	  See the VLAN web page for more information:
 	  <http://www.candelatech.com/~greear/vlan.html>
 
 	  To compile this code as a module, choose M here: the module
@@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP
 	  automatic propagation of registered VLANs to switches.
 
 	  If unsure, say N.
+
+config VLAN_8021Q_MVRP
+	bool "MVRP (Multiple VLAN Registration Protocol) support"
+	depends on VLAN_8021Q
+	select MRP
+	help
+	  Select this to enable MVRP end-system support. MVRP is used for
+	  automatic propagation of registered VLANs to switches; it
+	  supersedes GVRP and is not backwards-compatible.
+
+	  If unsure, say N.
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 9f4f174ead1..7bc8db08d7e 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q)		+= 8021q.o
 
 8021q-y					:= vlan.o vlan_dev.o vlan_netlink.o
 8021q-$(CONFIG_VLAN_8021Q_GVRP)		+= vlan_gvrp.o
+8021q-$(CONFIG_VLAN_8021Q_MVRP)		+= vlan_mvrp.o
 8021q-$(CONFIG_PROC_FS)			+= vlanproc.o
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 917ecb93ea2..44ebd5c2cd4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -18,6 +18,8 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
@@ -49,35 +51,18 @@ const char vlan_version[] = DRV_VERSION;
 
 /* End of global variables definitions. */
 
-static void vlan_group_free(struct vlan_group *grp)
-{
-	int i;
-
-	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
-		kfree(grp->vlan_devices_arrays[i]);
-	kfree(grp);
-}
-
-static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
-{
-	struct vlan_group *grp;
-
-	grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
-	if (!grp)
-		return NULL;
-
-	grp->real_dev = real_dev;
-	return grp;
-}
-
-static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
+static int vlan_group_prealloc_vid(struct vlan_group *vg,
+				   __be16 vlan_proto, u16 vlan_id)
 {
 	struct net_device **array;
+	unsigned int pidx, vidx;
 	unsigned int size;
 
 	ASSERT_RTNL();
 
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	pidx  = vlan_proto_idx(vlan_proto);
+	vidx  = vlan_id / VLAN_GROUP_ARRAY_PART_LEN;
+	array = vg->vlan_devices_arrays[pidx][vidx];
 	if (array != NULL)
 		return 0;
 
@@ -86,80 +71,68 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 	if (array == NULL)
 		return -ENOBUFS;
 
-	vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
+	vg->vlan_devices_arrays[pidx][vidx] = array;
 	return 0;
 }
 
-static void vlan_rcu_free(struct rcu_head *rcu)
-{
-	vlan_group_free(container_of(rcu, struct vlan_group, rcu));
-}
-
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
+	struct vlan_info *vlan_info;
 	struct vlan_group *grp;
 	u16 vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
 
-	grp = rtnl_dereference(real_dev->vlgrp);
-	BUG_ON(!grp);
+	vlan_info = rtnl_dereference(real_dev->vlan_info);
+	BUG_ON(!vlan_info);
 
-	/* Take it out of our own structures, but be sure to interlock with
-	 * HW accelerating devices or SW vlan input packet processing if
-	 * VLAN is not 0 (leave it there for 802.1p).
-	 */
-	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
-		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
+	grp = &vlan_info->grp;
 
-	grp->nr_vlans--;
+	grp->nr_vlan_devs--;
 
+	if (vlan->flags & VLAN_FLAG_MVRP)
+		vlan_mvrp_request_leave(dev);
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_leave(dev);
 
-	vlan_group_set_device(grp, vlan_id, NULL);
+	vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
+
+	netdev_upper_dev_unlink(real_dev, dev);
 	/* Because unregister_netdevice_queue() makes sure at least one rcu
 	 * grace period is respected before device freeing,
 	 * we dont need to call synchronize_net() here.
 	 */
 	unregister_netdevice_queue(dev, head);
 
-	/* If the group is now empty, kill off the group. */
-	if (grp->nr_vlans == 0) {
+	if (grp->nr_vlan_devs == 0) {
+		vlan_mvrp_uninit_applicant(real_dev);
 		vlan_gvrp_uninit_applicant(real_dev);
-
-		rcu_assign_pointer(real_dev->vlgrp, NULL);
-		if (ops->ndo_vlan_rx_register)
-			ops->ndo_vlan_rx_register(real_dev, NULL);
-
-		/* Free the group, after all cpu's are done. */
-		call_rcu(&grp->rcu, vlan_rcu_free);
 	}
 
+	/* Take it out of our own structures, but be sure to interlock with
+	 * HW accelerating devices or SW vlan input packet processing if
+	 * VLAN is not 0 (leave it there for 802.1p).
+	 */
+	if (vlan_id)
+		vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
 }
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev,
+			__be16 protocol, u16 vlan_id)
 {
 	const char *name = real_dev->name;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
-		pr_info("8021q: VLANs not supported on %s\n", name);
-		return -EOPNOTSUPP;
-	}
-
-	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
-		pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
+		pr_info("VLANs not supported on %s\n", name);
 		return -EOPNOTSUPP;
 	}
 
-	if (vlan_find_dev(real_dev, vlan_id) != NULL)
+	if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
 		return -EEXIST;
 
 	return 0;
@@ -167,32 +140,45 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 
 int register_vlan_dev(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 	u16 vlan_id = vlan->vlan_id;
-	struct vlan_group *grp, *ngrp = NULL;
+	struct vlan_info *vlan_info;
+	struct vlan_group *grp;
 	int err;
 
-	grp = rtnl_dereference(real_dev->vlgrp);
-	if (!grp) {
-		ngrp = grp = vlan_group_alloc(real_dev);
-		if (!grp)
-			return -ENOBUFS;
+	err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id);
+	if (err)
+		return err;
+
+	vlan_info = rtnl_dereference(real_dev->vlan_info);
+	/* vlan_info should be there now. vlan_vid_add took care of it */
+	BUG_ON(!vlan_info);
+
+	grp = &vlan_info->grp;
+	if (grp->nr_vlan_devs == 0) {
 		err = vlan_gvrp_init_applicant(real_dev);
 		if (err < 0)
-			goto out_free_group;
+			goto out_vid_del;
+		err = vlan_mvrp_init_applicant(real_dev);
+		if (err < 0)
+			goto out_uninit_gvrp;
 	}
 
-	err = vlan_group_prealloc_vid(grp, vlan_id);
+	err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id);
 	if (err < 0)
-		goto out_uninit_applicant;
+		goto out_uninit_mvrp;
 
+	vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
 	err = register_netdevice(dev);
 	if (err < 0)
-		goto out_uninit_applicant;
+		goto out_uninit_mvrp;
+
+	err = netdev_upper_dev_link(real_dev, dev);
+	if (err)
+		goto out_unregister_netdev;
 
-	/* Account for reference in struct vlan_dev_info */
+	/* Account for reference in struct vlan_dev_priv */
 	dev_hold(real_dev);
 
 	netif_stacked_transfer_operstate(real_dev, dev);
@@ -201,27 +187,21 @@ int register_vlan_dev(struct net_device *dev)
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
-	vlan_group_set_device(grp, vlan_id, dev);
-	grp->nr_vlans++;
-
-	if (ngrp) {
-		if (ops->ndo_vlan_rx_register && (real_dev->features & NETIF_F_HW_VLAN_RX))
-			ops->ndo_vlan_rx_register(real_dev, ngrp);
-		rcu_assign_pointer(real_dev->vlgrp, ngrp);
-	}
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
-		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
+	vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev);
+	grp->nr_vlan_devs++;
 
 	return 0;
 
-out_uninit_applicant:
-	if (ngrp)
+out_unregister_netdev:
+	unregister_netdevice(dev);
+out_uninit_mvrp:
+	if (grp->nr_vlan_devs == 0)
+		vlan_mvrp_uninit_applicant(real_dev);
+out_uninit_gvrp:
+	if (grp->nr_vlan_devs == 0)
 		vlan_gvrp_uninit_applicant(real_dev);
-out_free_group:
-	if (ngrp) {
-		/* Free the group, after all cpu's are done. */
-		call_rcu(&ngrp->rcu, vlan_rcu_free);
-	}
+out_vid_del:
+	vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
 	return err;
 }
 
@@ -231,6 +211,7 @@ out_free_group:
 static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 {
 	struct net_device *new_dev;
+	struct vlan_dev_priv *vlan;
 	struct net *net = dev_net(real_dev);
 	struct vlan_net *vn = net_generic(net, vlan_net_id);
 	char name[IFNAMSIZ];
@@ -239,7 +220,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	if (vlan_id >= VLAN_VID_MASK)
 		return -ERANGE;
 
-	err = vlan_check_real_dev(real_dev, vlan_id);
+	err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);
 	if (err < 0)
 		return err;
 
@@ -269,7 +250,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
-	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
+	new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup);
 
 	if (new_dev == NULL)
 		return -ENOBUFS;
@@ -279,11 +260,14 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	 * hope the underlying device can handle it.
 	 */
 	new_dev->mtu = real_dev->mtu;
+	new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
 
-	vlan_dev_info(new_dev)->vlan_id = vlan_id;
-	vlan_dev_info(new_dev)->real_dev = real_dev;
-	vlan_dev_info(new_dev)->dent = NULL;
-	vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+	vlan = vlan_dev_priv(new_dev);
+	vlan->vlan_proto = htons(ETH_P_8021Q);
+	vlan->vlan_id = vlan_id;
+	vlan->real_dev = real_dev;
+	vlan->dent = NULL;
+	vlan->flags = VLAN_FLAG_REORDER_HDR;
 
 	new_dev->rtnl_link_ops = &vlan_link_ops;
 	err = register_vlan_dev(new_dev);
@@ -300,38 +284,40 @@ out_free_newdev:
 static void vlan_sync_address(struct net_device *dev,
 			      struct net_device *vlandev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(vlandev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 
 	/* May be called without an actual change */
-	if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
+	if (ether_addr_equal(vlan->real_dev_addr, dev->dev_addr))
 		return;
 
 	/* vlan address was different from the old address and is equal to
 	 * the new address */
-	if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
-	    !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
+	if (!ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) &&
+	    ether_addr_equal(vlandev->dev_addr, dev->dev_addr))
 		dev_uc_del(dev, vlandev->dev_addr);
 
 	/* vlan address was equal to the old address and is different from
 	 * the new address */
-	if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
-	    compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
+	if (ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) &&
+	    !ether_addr_equal(vlandev->dev_addr, dev->dev_addr))
 		dev_uc_add(dev, vlandev->dev_addr);
 
-	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
+	ether_addr_copy(vlan->real_dev_addr, dev->dev_addr);
 }
 
 static void vlan_transfer_features(struct net_device *dev,
 				   struct net_device *vlandev)
 {
+	struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
+
 	vlandev->gso_max_size = dev->gso_max_size;
 
-	if (dev->features & NETIF_F_HW_VLAN_TX)
+	if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
 		vlandev->hard_header_len = dev->hard_header_len;
 	else
 		vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
 
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 	vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
 #endif
 
@@ -344,13 +330,12 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event)
 	case NETDEV_CHANGENAME:
 		vlan_proc_rem_dev(dev);
 		if (vlan_proc_add_dev(dev) < 0)
-			pr_warning("8021q: failed to change proc name for %s\n",
-					dev->name);
+			pr_warn("failed to change proc name for %s\n",
+				dev->name);
 		break;
 	case NETDEV_REGISTER:
 		if (vlan_proc_add_dev(dev) < 0)
-			pr_warning("8021q: failed to add proc entry for %s\n",
-					dev->name);
+			pr_warn("failed to add proc entry for %s\n", dev->name);
 		break;
 	case NETDEV_UNREGISTER:
 		vlan_proc_rem_dev(dev);
@@ -361,27 +346,29 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event)
 static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			     void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct vlan_group *grp;
+	struct vlan_info *vlan_info;
 	int i, flgs;
 	struct net_device *vlandev;
-	struct vlan_dev_info *vlan;
+	struct vlan_dev_priv *vlan;
+	bool last = false;
 	LIST_HEAD(list);
 
 	if (is_vlan_dev(dev))
 		__vlan_device_event(dev, event);
 
 	if ((event == NETDEV_UP) &&
-	    (dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    dev->netdev_ops->ndo_vlan_rx_add_vid) {
-		pr_info("8021q: adding VLAN 0 to HW filter on device %s\n",
+	    (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
+		pr_info("adding VLAN 0 to HW filter on device %s\n",
 			dev->name);
-		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
+		vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
 	}
 
-	grp = rtnl_dereference(dev->vlgrp);
-	if (!grp)
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
 		goto out;
+	grp = &vlan_info->grp;
 
 	/* It is OK that we do not hold the group lock right now,
 	 * as we run under the RTNL lock.
@@ -390,22 +377,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev)
 			netif_stacked_transfer_operstate(dev, vlandev);
-		}
 		break;
 
 	case NETDEV_CHANGEADDR:
 		/* Adjust unicast filters on underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
@@ -415,11 +393,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_CHANGEMTU:
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			if (vlandev->mtu <= dev->mtu)
 				continue;
 
@@ -429,28 +403,21 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_FEAT_CHANGE:
 		/* Propagate device features to underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev)
 			vlan_transfer_features(dev, vlandev);
-		}
-
 		break;
 
 	case NETDEV_DOWN:
-		/* Put all VLANs for this dev in the down state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
+		if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+			vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
 
+		/* Put all VLANs for this dev in the down state too.  */
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
 
-			vlan = vlan_dev_info(vlandev);
+			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs & ~IFF_UP);
 			netif_stacked_transfer_operstate(dev, vlandev);
@@ -459,16 +426,12 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_UP:
 		/* Put all VLANs for this dev in the up state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			flgs = vlandev->flags;
 			if (flgs & IFF_UP)
 				continue;
 
-			vlan = vlan_dev_info(vlandev);
+			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs | IFF_UP);
 			netif_stacked_transfer_operstate(dev, vlandev);
@@ -480,35 +443,31 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
-			/* unregistration of last vlan destroys group, abort
+		vlan_group_for_each_dev(grp, i, vlandev) {
+			/* removal of last vid destroys vlan_info, abort
 			 * afterwards */
-			if (grp->nr_vlans == 1)
-				i = VLAN_N_VID;
+			if (vlan_info->nr_vids == 1)
+				last = true;
 
 			unregister_vlan_dev(vlandev, &list);
+			if (last)
+				break;
 		}
 		unregister_netdevice_many(&list);
 		break;
 
 	case NETDEV_PRE_TYPE_CHANGE:
 		/* Forbid underlaying device to change its type. */
-		return NOTIFY_BAD;
+		if (vlan_uses_dev(dev))
+			return NOTIFY_BAD;
+		break;
 
 	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_BONDING_FAILOVER:
+	case NETDEV_RESEND_IGMP:
 		/* Propagate to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev)
 			call_netdevice_notifiers(event, vlandev);
-		}
 		break;
 	}
 
@@ -561,7 +520,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 	switch (args.cmd) {
 	case SET_VLAN_INGRESS_PRIORITY_CMD:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 		vlan_dev_set_ingress_priority(dev,
 					      args.u.skb_priority,
@@ -571,7 +530,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 
 	case SET_VLAN_EGRESS_PRIORITY_CMD:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 		err = vlan_dev_set_egress_priority(dev,
 						   args.u.skb_priority,
@@ -580,7 +539,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 
 	case SET_VLAN_FLAG_CMD:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 		err = vlan_dev_change_flags(dev,
 					    args.vlan_qos ? args.u.flag : 0,
@@ -589,7 +548,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 
 	case SET_VLAN_NAME_TYPE_CMD:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 		if ((args.u.name_type >= 0) &&
 		    (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
@@ -605,14 +564,14 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 
 	case ADD_VLAN_CMD:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 		err = register_vlan_device(dev, args.u.VID);
 		break;
 
 	case DEL_VLAN_CMD:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 		unregister_vlan_dev(dev, NULL);
 		err = 0;
@@ -685,13 +644,19 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err3;
 
-	err = vlan_netlink_init();
+	err = vlan_mvrp_init();
 	if (err < 0)
 		goto err4;
 
+	err = vlan_netlink_init();
+	if (err < 0)
+		goto err5;
+
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
+err5:
+	vlan_mvrp_uninit();
 err4:
 	vlan_gvrp_uninit();
 err3:
@@ -712,6 +677,7 @@ static void __exit vlan_cleanup_module(void)
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
+	vlan_mvrp_uninit();
 	vlan_gvrp_uninit();
 }
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 9da07e30d1a..9d010a09ab9 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,77 +3,99 @@
 
 #include <linux/if_vlan.h>
 #include <linux/u64_stats_sync.h>
+#include <linux/list.h>
 
-
-/**
- *	struct vlan_priority_tci_mapping - vlan egress priority mappings
- *	@priority: skb priority
- *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
- *	@next: pointer to next struct
+/* if this changes, algorithm will have to be reworked because this
+ * depends on completely exhausting the VLAN identifier space.  Thus
+ * it gives constant time look-up, but in many cases it wastes memory.
  */
-struct vlan_priority_tci_mapping {
-	u32					priority;
-	u16					vlan_qos;
-	struct vlan_priority_tci_mapping	*next;
+#define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
+#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+
+enum vlan_protos {
+	VLAN_PROTO_8021Q	= 0,
+	VLAN_PROTO_8021AD,
+	VLAN_PROTO_NUM,
 };
 
+struct vlan_group {
+	unsigned int		nr_vlan_devs;
+	struct hlist_node	hlist;	/* linked list */
+	struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM]
+					       [VLAN_GROUP_ARRAY_SPLIT_PARTS];
+};
 
-/**
- *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
- *	@rx_packets: number of received packets
- *	@rx_bytes: number of received bytes
- *	@rx_multicast: number of received multicast packets
- *	@tx_packets: number of transmitted packets
- *	@tx_bytes: number of transmitted bytes
- *	@syncp: synchronization point for 64bit counters
- *	@rx_errors: number of rx errors
- *	@tx_dropped: number of tx drops
- */
-struct vlan_pcpu_stats {
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_multicast;
-	u64			tx_packets;
-	u64			tx_bytes;
-	struct u64_stats_sync	syncp;
-	u32			rx_errors;
-	u32			tx_dropped;
+struct vlan_info {
+	struct net_device	*real_dev; /* The ethernet(like) device
+					    * the vlan is attached to.
+					    */
+	struct vlan_group	grp;
+	struct list_head	vid_list;
+	unsigned int		nr_vids;
+	struct rcu_head		rcu;
 };
 
-/**
- *	struct vlan_dev_info - VLAN private device data
- *	@nr_ingress_mappings: number of ingress priority mappings
- *	@ingress_priority_map: ingress priority mappings
- *	@nr_egress_mappings: number of egress priority mappings
- *	@egress_priority_map: hash of egress priority mappings
- *	@vlan_id: VLAN identifier
- *	@flags: device flags
- *	@real_dev: underlying netdevice
- *	@real_dev_addr: address of underlying netdevice
- *	@dent: proc dir entry
- *	@vlan_pcpu_stats: ptr to percpu rx stats
- */
-struct vlan_dev_info {
-	unsigned int				nr_ingress_mappings;
-	u32					ingress_priority_map[8];
-	unsigned int				nr_egress_mappings;
-	struct vlan_priority_tci_mapping	*egress_priority_map[16];
+static inline unsigned int vlan_proto_idx(__be16 proto)
+{
+	switch (proto) {
+	case htons(ETH_P_8021Q):
+		return VLAN_PROTO_8021Q;
+	case htons(ETH_P_8021AD):
+		return VLAN_PROTO_8021AD;
+	default:
+		BUG();
+		return 0;
+	}
+}
 
-	u16					vlan_id;
-	u16					flags;
+static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
+							 unsigned int pidx,
+							 u16 vlan_id)
+{
+	struct net_device **array;
 
-	struct net_device			*real_dev;
-	unsigned char				real_dev_addr[ETH_ALEN];
+	array = vg->vlan_devices_arrays[pidx]
+				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
+}
 
-	struct proc_dir_entry			*dent;
-	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
-};
+static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
+						       __be16 vlan_proto,
+						       u16 vlan_id)
+{
+	return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id);
+}
 
-static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
+static inline void vlan_group_set_device(struct vlan_group *vg,
+					 __be16 vlan_proto, u16 vlan_id,
+					 struct net_device *dev)
 {
-	return netdev_priv(dev);
+	struct net_device **array;
+	if (!vg)
+		return;
+	array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)]
+				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
+}
+
+/* Must be invoked with rcu_read_lock or with RTNL. */
+static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
+					       __be16 vlan_proto, u16 vlan_id)
+{
+	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
+
+	if (vlan_info)
+		return vlan_group_get_device(&vlan_info->grp,
+					     vlan_proto, vlan_id);
+
+	return NULL;
 }
 
+#define vlan_group_for_each_dev(grp, i, dev) \
+	for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \
+		if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
+							    (i) % VLAN_N_VID)))
+
 /* found in vlan_dev.c */
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio);
@@ -82,7 +104,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id);
+int vlan_check_real_dev(struct net_device *real_dev,
+			__be16 protocol, u16 vlan_id);
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
@@ -90,18 +113,18 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
 static inline u32 vlan_get_ingress_priority(struct net_device *dev,
 					    u16 vlan_tci)
 {
-	struct vlan_dev_info *vip = vlan_dev_info(dev);
+	struct vlan_dev_priv *vip = vlan_dev_priv(dev);
 
 	return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7];
 }
 
 #ifdef CONFIG_VLAN_8021Q_GVRP
-extern int vlan_gvrp_request_join(const struct net_device *dev);
-extern void vlan_gvrp_request_leave(const struct net_device *dev);
-extern int vlan_gvrp_init_applicant(struct net_device *dev);
-extern void vlan_gvrp_uninit_applicant(struct net_device *dev);
-extern int vlan_gvrp_init(void);
-extern void vlan_gvrp_uninit(void);
+int vlan_gvrp_request_join(const struct net_device *dev);
+void vlan_gvrp_request_leave(const struct net_device *dev);
+int vlan_gvrp_init_applicant(struct net_device *dev);
+void vlan_gvrp_uninit_applicant(struct net_device *dev);
+int vlan_gvrp_init(void);
+void vlan_gvrp_uninit(void);
 #else
 static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; }
 static inline void vlan_gvrp_request_leave(const struct net_device *dev) {}
@@ -111,10 +134,26 @@ static inline int vlan_gvrp_init(void) { return 0; }
 static inline void vlan_gvrp_uninit(void) {}
 #endif
 
+#ifdef CONFIG_VLAN_8021Q_MVRP
+int vlan_mvrp_request_join(const struct net_device *dev);
+void vlan_mvrp_request_leave(const struct net_device *dev);
+int vlan_mvrp_init_applicant(struct net_device *dev);
+void vlan_mvrp_uninit_applicant(struct net_device *dev);
+int vlan_mvrp_init(void);
+void vlan_mvrp_uninit(void);
+#else
+static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; }
+static inline void vlan_mvrp_request_leave(const struct net_device *dev) {}
+static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; }
+static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {}
+static inline int vlan_mvrp_init(void) { return 0; }
+static inline void vlan_mvrp_uninit(void) {}
+#endif
+
 extern const char vlan_fullname[];
 extern const char vlan_version[];
-extern int vlan_netlink_init(void);
-extern void vlan_netlink_fini(void);
+int vlan_netlink_init(void);
+void vlan_netlink_fini(void);
 
 extern struct rtnl_link_ops vlan_link_ops;
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index fcc684678af..75d42776399 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -2,37 +2,35 @@
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/netpoll.h>
+#include <linux/export.h>
 #include "vlan.h"
 
 bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
-	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
+	__be16 vlan_proto = skb->vlan_proto;
+	u16 vlan_id = vlan_tx_tag_get_id(skb);
 	struct net_device *vlan_dev;
 	struct vlan_pcpu_stats *rx_stats;
 
-	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
-	if (!vlan_dev) {
-		if (vlan_id)
-			skb->pkt_type = PACKET_OTHERHOST;
+	vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id);
+	if (!vlan_dev)
 		return false;
-	}
 
 	skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(!skb))
 		return false;
 
 	skb->dev = vlan_dev;
-	if (skb->pkt_type == PACKET_OTHERHOST) {
+	if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
 		/* Our lower layer thinks this is not local, let's make sure.
 		 * This allows the VLAN to have a different MAC than the
 		 * underlying device, and still route correctly. */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					vlan_dev->dev_addr))
+		if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, vlan_dev->dev_addr))
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*
@@ -41,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		 * original position later
 		 */
 		skb_push(skb, offset);
-		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
+					      skb->vlan_tci);
 		if (!skb)
 			return false;
 		skb_pull(skb, offset + VLAN_HLEN);
@@ -51,7 +50,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
+	rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
@@ -63,86 +62,68 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	return true;
 }
 
-struct net_device *vlan_dev_real_dev(const struct net_device *dev)
+/* Must be invoked with rcu_read_lock. */
+struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev,
+					__be16 vlan_proto, u16 vlan_id)
 {
-	return vlan_dev_info(dev)->real_dev;
-}
-EXPORT_SYMBOL(vlan_dev_real_dev);
+	struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
 
-u16 vlan_dev_vlan_id(const struct net_device *dev)
-{
-	return vlan_dev_info(dev)->vlan_id;
+	if (vlan_info) {
+		return vlan_group_get_device(&vlan_info->grp,
+					     vlan_proto, vlan_id);
+	} else {
+		/*
+		 * Lower devices of master uppers (bonding, team) do not have
+		 * grp assigned to themselves. Grp is assigned to upper device
+		 * instead.
+		 */
+		struct net_device *upper_dev;
+
+		upper_dev = netdev_master_upper_dev_get_rcu(dev);
+		if (upper_dev)
+			return __vlan_find_dev_deep_rcu(upper_dev,
+						    vlan_proto, vlan_id);
+	}
+
+	return NULL;
 }
-EXPORT_SYMBOL(vlan_dev_vlan_id);
+EXPORT_SYMBOL(__vlan_find_dev_deep_rcu);
 
-/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
-int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-		      u16 vlan_tci, int polling)
+struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	return polling ? netif_receive_skb(skb) : netif_rx(skb);
+	struct net_device *ret = vlan_dev_priv(dev)->real_dev;
+
+	while (is_vlan_dev(ret))
+		ret = vlan_dev_priv(ret)->real_dev;
+
+	return ret;
 }
-EXPORT_SYMBOL(__vlan_hwaccel_rx);
+EXPORT_SYMBOL(vlan_dev_real_dev);
 
-gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
-			      unsigned int vlan_tci, struct sk_buff *skb)
+u16 vlan_dev_vlan_id(const struct net_device *dev)
 {
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	return napi_gro_receive(napi, skb);
+	return vlan_dev_priv(dev)->vlan_id;
 }
-EXPORT_SYMBOL(vlan_gro_receive);
+EXPORT_SYMBOL(vlan_dev_vlan_id);
 
-gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-			    unsigned int vlan_tci)
+__be16 vlan_dev_vlan_proto(const struct net_device *dev)
 {
-	__vlan_hwaccel_put_tag(napi->skb, vlan_tci);
-	return napi_gro_frags(napi);
+	return vlan_dev_priv(dev)->vlan_proto;
 }
-EXPORT_SYMBOL(vlan_gro_frags);
+EXPORT_SYMBOL(vlan_dev_vlan_proto);
 
 static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 {
-	if (skb_cow(skb, skb_headroom(skb)) < 0)
+	if (skb_cow(skb, skb_headroom(skb)) < 0) {
+		kfree_skb(skb);
 		return NULL;
+	}
+
 	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
 	skb->mac_header += VLAN_HLEN;
-	skb_reset_mac_len(skb);
 	return skb;
 }
 
-static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
-{
-	__be16 proto;
-	unsigned char *rawp;
-
-	/*
-	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
-	 * three protocols care about.
-	 */
-
-	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
-		skb->protocol = proto;
-		return;
-	}
-
-	rawp = skb->data;
-	if (*(unsigned short *) rawp == 0xFFFF)
-		/*
-		 * This is a magic hack to spot IPX packets. Older Novell
-		 * breaks the protocol design and runs IPX over 802.3 without
-		 * an 802.2 LLC layer. We look for FFFF which isn't a used
-		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
-		 * but does for the rest.
-		 */
-		skb->protocol = htons(ETH_P_802_3);
-	else
-		/*
-		 * Real 802.2 LLC
-		 */
-		skb->protocol = htons(ETH_P_802_2);
-}
-
 struct sk_buff *vlan_untag(struct sk_buff *skb)
 {
 	struct vlan_hdr *vhdr;
@@ -162,7 +143,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 
 	vhdr = (struct vlan_hdr *) skb->data;
 	vlan_tci = ntohs(vhdr->h_vlan_TCI);
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
+	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
 
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
@@ -171,9 +152,264 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	if (unlikely(!skb))
 		goto err_free;
 
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_reset_mac_len(skb);
+
 	return skb;
 
 err_free:
 	kfree_skb(skb);
 	return NULL;
 }
+EXPORT_SYMBOL(vlan_untag);
+
+
+/*
+ * vlan info and vid list
+ */
+
+static void vlan_group_free(struct vlan_group *grp)
+{
+	int i, j;
+
+	for (i = 0; i < VLAN_PROTO_NUM; i++)
+		for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++)
+			kfree(grp->vlan_devices_arrays[i][j]);
+}
+
+static void vlan_info_free(struct vlan_info *vlan_info)
+{
+	vlan_group_free(&vlan_info->grp);
+	kfree(vlan_info);
+}
+
+static void vlan_info_rcu_free(struct rcu_head *rcu)
+{
+	vlan_info_free(container_of(rcu, struct vlan_info, rcu));
+}
+
+static struct vlan_info *vlan_info_alloc(struct net_device *dev)
+{
+	struct vlan_info *vlan_info;
+
+	vlan_info = kzalloc(sizeof(struct vlan_info), GFP_KERNEL);
+	if (!vlan_info)
+		return NULL;
+
+	vlan_info->real_dev = dev;
+	INIT_LIST_HEAD(&vlan_info->vid_list);
+	return vlan_info;
+}
+
+struct vlan_vid_info {
+	struct list_head list;
+	__be16 proto;
+	u16 vid;
+	int refcount;
+};
+
+static bool vlan_hw_filter_capable(const struct net_device *dev,
+				     const struct vlan_vid_info *vid_info)
+{
+	if (vid_info->proto == htons(ETH_P_8021Q) &&
+	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+		return true;
+	if (vid_info->proto == htons(ETH_P_8021AD) &&
+	    dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
+		return true;
+	return false;
+}
+
+static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
+					       __be16 proto, u16 vid)
+{
+	struct vlan_vid_info *vid_info;
+
+	list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+		if (vid_info->proto == proto && vid_info->vid == vid)
+			return vid_info;
+	}
+	return NULL;
+}
+
+static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
+{
+	struct vlan_vid_info *vid_info;
+
+	vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
+	if (!vid_info)
+		return NULL;
+	vid_info->proto = proto;
+	vid_info->vid = vid;
+
+	return vid_info;
+}
+
+static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
+			  struct vlan_vid_info **pvid_info)
+{
+	struct net_device *dev = vlan_info->real_dev;
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct vlan_vid_info *vid_info;
+	int err;
+
+	vid_info = vlan_vid_info_alloc(proto, vid);
+	if (!vid_info)
+		return -ENOMEM;
+
+	if (vlan_hw_filter_capable(dev, vid_info)) {
+		err =  ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+		if (err) {
+			kfree(vid_info);
+			return err;
+		}
+	}
+	list_add(&vid_info->list, &vlan_info->vid_list);
+	vlan_info->nr_vids++;
+	*pvid_info = vid_info;
+	return 0;
+}
+
+int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct vlan_info *vlan_info;
+	struct vlan_vid_info *vid_info;
+	bool vlan_info_created = false;
+	int err;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info) {
+		vlan_info = vlan_info_alloc(dev);
+		if (!vlan_info)
+			return -ENOMEM;
+		vlan_info_created = true;
+	}
+	vid_info = vlan_vid_info_get(vlan_info, proto, vid);
+	if (!vid_info) {
+		err = __vlan_vid_add(vlan_info, proto, vid, &vid_info);
+		if (err)
+			goto out_free_vlan_info;
+	}
+	vid_info->refcount++;
+
+	if (vlan_info_created)
+		rcu_assign_pointer(dev->vlan_info, vlan_info);
+
+	return 0;
+
+out_free_vlan_info:
+	if (vlan_info_created)
+		kfree(vlan_info);
+	return err;
+}
+EXPORT_SYMBOL(vlan_vid_add);
+
+static void __vlan_vid_del(struct vlan_info *vlan_info,
+			   struct vlan_vid_info *vid_info)
+{
+	struct net_device *dev = vlan_info->real_dev;
+	const struct net_device_ops *ops = dev->netdev_ops;
+	__be16 proto = vid_info->proto;
+	u16 vid = vid_info->vid;
+	int err;
+
+	if (vlan_hw_filter_capable(dev, vid_info)) {
+		err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+		if (err) {
+			pr_warn("failed to kill vid %04x/%d for device %s\n",
+				proto, vid, dev->name);
+		}
+	}
+	list_del(&vid_info->list);
+	kfree(vid_info);
+	vlan_info->nr_vids--;
+}
+
+void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct vlan_info *vlan_info;
+	struct vlan_vid_info *vid_info;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
+		return;
+
+	vid_info = vlan_vid_info_get(vlan_info, proto, vid);
+	if (!vid_info)
+		return;
+	vid_info->refcount--;
+	if (vid_info->refcount == 0) {
+		__vlan_vid_del(vlan_info, vid_info);
+		if (vlan_info->nr_vids == 0) {
+			RCU_INIT_POINTER(dev->vlan_info, NULL);
+			call_rcu(&vlan_info->rcu, vlan_info_rcu_free);
+		}
+	}
+}
+EXPORT_SYMBOL(vlan_vid_del);
+
+int vlan_vids_add_by_dev(struct net_device *dev,
+			 const struct net_device *by_dev)
+{
+	struct vlan_vid_info *vid_info;
+	struct vlan_info *vlan_info;
+	int err;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(by_dev->vlan_info);
+	if (!vlan_info)
+		return 0;
+
+	list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+		err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);
+		if (err)
+			goto unwind;
+	}
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(vid_info,
+					     &vlan_info->vid_list,
+					     list) {
+		vlan_vid_del(dev, vid_info->proto, vid_info->vid);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(vlan_vids_add_by_dev);
+
+void vlan_vids_del_by_dev(struct net_device *dev,
+			  const struct net_device *by_dev)
+{
+	struct vlan_vid_info *vid_info;
+	struct vlan_info *vlan_info;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(by_dev->vlan_info);
+	if (!vlan_info)
+		return;
+
+	list_for_each_entry(vid_info, &vlan_info->vid_list, list)
+		vlan_vid_del(dev, vid_info->proto, vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_vids_del_by_dev);
+
+bool vlan_uses_dev(const struct net_device *dev)
+{
+	struct vlan_info *vlan_info;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
+		return false;
+	return vlan_info->grp.nr_vlan_devs ? true : false;
+}
+EXPORT_SYMBOL(vlan_uses_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 6e82148edfc..dd11f612e03 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -20,6 +20,8 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
@@ -31,6 +33,7 @@
 #include "vlan.h"
 #include "vlanproc.h"
 #include <linux/if_vlan.h>
+#include <linux/netpoll.h>
 
 /*
  *	Rebuild the Ethernet MAC header. This is called after an ARP
@@ -55,33 +58,16 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 		return arp_find(veth->h_dest, skb);
 #endif
 	default:
-		pr_debug("%s: unable to resolve type %X addresses.\n",
+		pr_debug("%s: unable to resolve type %X addresses\n",
 			 dev->name, ntohs(veth->h_vlan_encapsulated_proto));
 
-		memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
+		ether_addr_copy(veth->h_source, dev->dev_addr);
 		break;
 	}
 
 	return 0;
 }
 
-static inline u16
-vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
-{
-	struct vlan_priority_tci_mapping *mp;
-
-	mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
-	while (mp) {
-		if (mp->priority == skb->priority) {
-			return mp->vlan_qos; /* This should already be shifted
-					      * to mask correctly with the
-					      * VLAN's TCI */
-		}
-		mp = mp->next;
-	}
-	return 0;
-}
-
 /*
  *	Create the VLAN header for an arbitrary protocol layer
  *
@@ -96,16 +82,17 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				const void *daddr, const void *saddr,
 				unsigned int len)
 {
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_hdr *vhdr;
 	unsigned int vhdrlen = 0;
 	u16 vlan_tci = 0;
 	int rc;
 
-	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan->flags & VLAN_FLAG_REORDER_HDR)) {
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+		vlan_tci = vlan->vlan_id;
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
@@ -117,8 +104,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
 
-		skb->protocol = htons(ETH_P_8021Q);
-		type = ETH_P_8021Q;
+		skb->protocol = vlan->vlan_proto;
+		type = ntohs(vlan->vlan_proto);
 		vhdrlen = VLAN_HLEN;
 	}
 
@@ -127,16 +114,28 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		saddr = dev->dev_addr;
 
 	/* Now make the underlying real hard header */
-	dev = vlan_dev_info(dev)->real_dev;
+	dev = vlan->real_dev;
 	rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
 	if (rc > 0)
 		rc += vhdrlen;
 	return rc;
 }
 
+static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (vlan->netpoll)
+		netpoll_send_skb(vlan->netpoll, skb);
+#else
+	BUG();
+#endif
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 	unsigned int len;
 	int ret;
@@ -146,28 +145,31 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+	if (veth->h_vlan_proto != vlan->vlan_proto ||
+	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
+		vlan_tci = vlan->vlan_id;
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
+		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
 	}
 
-	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
+	skb->dev = vlan->real_dev;
 	len = skb->len;
+	if (unlikely(netpoll_tx_running(dev)))
+		return vlan_netpoll_send_skb(vlan, skb);
+
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		struct vlan_pcpu_stats *stats;
 
-		stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+		stats = this_cpu_ptr(vlan->vlan_pcpu_stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
 		stats->tx_bytes += len;
 		u64_stats_update_end(&stats->syncp);
 	} else {
-		this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+		this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped);
 	}
 
 	return ret;
@@ -178,7 +180,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 	/* TODO: gotta make sure the underlying layer can handle it,
 	 * maybe an IFF_VLAN_CAPABLE flag for devices?
 	 */
-	if (vlan_dev_info(dev)->real_dev->mtu < new_mtu)
+	if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
 		return -ERANGE;
 
 	dev->mtu = new_mtu;
@@ -189,7 +191,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
 	if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
 		vlan->nr_ingress_mappings--;
@@ -202,7 +204,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
 int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, u16 vlan_prio)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_priority_tci_mapping *mp = NULL;
 	struct vlan_priority_tci_mapping *np;
 	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
@@ -230,6 +232,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 	np->next = mp;
 	np->priority = skb_prio;
 	np->vlan_qos = vlan_qos;
+	/* Before inserting this element in hash table, make sure all its fields
+	 * are committed to memory.
+	 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
+	 */
+	smp_wmb();
 	vlan->egress_priority_map[skb_prio & 0xF] = np;
 	if (vlan_qos)
 		vlan->nr_egress_mappings++;
@@ -239,11 +246,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
 int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	u32 old_flags = vlan->flags;
 
 	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		     VLAN_FLAG_LOOSE_BINDING))
+		     VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
 		return -EINVAL;
 
 	vlan->flags = (old_flags & ~mask) | (flags & mask);
@@ -254,17 +261,24 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 		else
 			vlan_gvrp_request_leave(dev);
 	}
+
+	if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) {
+		if (vlan->flags & VLAN_FLAG_MVRP)
+			vlan_mvrp_request_join(dev);
+		else
+			vlan_mvrp_request_leave(dev);
+	}
 	return 0;
 }
 
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
 {
-	strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
+	strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23);
 }
 
 static int vlan_dev_open(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	int err;
 
@@ -272,7 +286,7 @@ static int vlan_dev_open(struct net_device *dev)
 	    !(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 		return -ENETDOWN;
 
-	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
+	if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) {
 		err = dev_uc_add(real_dev, dev->dev_addr);
 		if (err < 0)
 			goto out;
@@ -289,11 +303,14 @@ static int vlan_dev_open(struct net_device *dev)
 			goto clear_allmulti;
 	}
 
-	memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN);
+	ether_addr_copy(vlan->real_dev_addr, real_dev->dev_addr);
 
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
+	if (vlan->flags & VLAN_FLAG_MVRP)
+		vlan_mvrp_request_join(dev);
+
 	if (netif_carrier_ok(real_dev))
 		netif_carrier_on(dev);
 	return 0;
@@ -302,7 +319,7 @@ clear_allmulti:
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(real_dev, -1);
 del_unicast:
-	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
+	if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr))
 		dev_uc_del(real_dev, dev->dev_addr);
 out:
 	netif_carrier_off(dev);
@@ -311,7 +328,7 @@ out:
 
 static int vlan_dev_stop(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 
 	dev_mc_unsync(real_dev, dev);
@@ -321,7 +338,7 @@ static int vlan_dev_stop(struct net_device *dev)
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(real_dev, -1);
 
-	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
+	if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr))
 		dev_uc_del(real_dev, dev->dev_addr);
 
 	netif_carrier_off(dev);
@@ -330,7 +347,7 @@ static int vlan_dev_stop(struct net_device *dev)
 
 static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	struct sockaddr *addr = p;
 	int err;
 
@@ -340,23 +357,23 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 	if (!(dev->flags & IFF_UP))
 		goto out;
 
-	if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
+	if (!ether_addr_equal(addr->sa_data, real_dev->dev_addr)) {
 		err = dev_uc_add(real_dev, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
-	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
+	if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr))
 		dev_uc_del(real_dev, dev->dev_addr);
 
 out:
-	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	ether_addr_copy(dev->dev_addr, addr->sa_data);
 	return 0;
 }
 
 static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct ifreq ifrr;
 	int err = -EOPNOTSUPP;
@@ -381,7 +398,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int err = 0;
 
@@ -391,11 +408,11 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
 	return err;
 }
 
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
 				   struct scatterlist *sgl, unsigned int sgc)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = 0;
 
@@ -407,7 +424,7 @@ static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
 
 static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int len = 0;
 
@@ -419,7 +436,7 @@ static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
 
 static int vlan_dev_fcoe_enable(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -430,7 +447,7 @@ static int vlan_dev_fcoe_enable(struct net_device *dev)
 
 static int vlan_dev_fcoe_disable(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -441,7 +458,7 @@ static int vlan_dev_fcoe_disable(struct net_device *dev)
 
 static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -453,7 +470,7 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
 				    struct scatterlist *sgl, unsigned int sgc)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = 0;
 
@@ -466,18 +483,20 @@ static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
 
 static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 
-	if (change & IFF_ALLMULTI)
-		dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
-	if (change & IFF_PROMISC)
-		dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
+	if (dev->flags & IFF_UP) {
+		if (change & IFF_ALLMULTI)
+			dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
+		if (change & IFF_PROMISC)
+			dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
+	}
 }
 
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
-	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
-	dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+	dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+	dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
@@ -505,18 +524,46 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
 	netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
 }
 
+static int vlan_dev_get_lock_subclass(struct net_device *dev)
+{
+	return vlan_dev_priv(dev)->nest_level;
+}
+
 static const struct header_ops vlan_header_ops = {
 	.create	 = vlan_dev_hard_header,
 	.rebuild = vlan_dev_rebuild_header,
 	.parse	 = eth_header_parse,
 };
 
+static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev,
+				     unsigned short type,
+				     const void *daddr, const void *saddr,
+				     unsigned int len)
+{
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	struct net_device *real_dev = vlan->real_dev;
+
+	if (saddr == NULL)
+		saddr = dev->dev_addr;
+
+	return dev_hard_header(skb, real_dev, type, daddr, saddr, len);
+}
+
+static const struct header_ops vlan_passthru_header_ops = {
+	.create	 = vlan_passthru_hard_header,
+	.rebuild = dev_rebuild_header,
+	.parse	 = eth_header_parse,
+};
+
+static struct device_type vlan_type = {
+	.name	= "vlan",
+};
+
 static const struct net_device_ops vlan_netdev_ops;
 
 static int vlan_dev_init(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
-	int subclass = 0;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 
 	netif_carrier_off(dev);
 
@@ -535,22 +582,26 @@ static int vlan_dev_init(struct net_device *dev)
 
 	dev->features |= real_dev->vlan_features | NETIF_F_LLTX;
 	dev->gso_max_size = real_dev->gso_max_size;
+	if (dev->features & NETIF_F_VLAN_FEATURES)
+		netdev_warn(real_dev, "VLAN features are set incorrectly.  Q-in-Q configurations may not work correctly.\n");
+
 
 	/* ipv6 shared card related stuff */
 	dev->dev_id = real_dev->dev_id;
 
 	if (is_zero_ether_addr(dev->dev_addr))
-		memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
+		eth_hw_addr_inherit(dev, real_dev);
 	if (is_zero_ether_addr(dev->broadcast))
 		memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
 
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 	dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
 #endif
 
 	dev->needed_headroom = real_dev->needed_headroom;
-	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
-		dev->header_ops      = real_dev->header_ops;
+	if (vlan_hw_offload_capable(real_dev->features,
+				    vlan_dev_priv(dev)->vlan_proto)) {
+		dev->header_ops      = &vlan_passthru_header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
 	} else {
 		dev->header_ops      = &vlan_header_ops;
@@ -559,13 +610,12 @@ static int vlan_dev_init(struct net_device *dev)
 
 	dev->netdev_ops = &vlan_netdev_ops;
 
-	if (is_vlan_dev(real_dev))
-		subclass = 1;
+	SET_NETDEV_DEVTYPE(dev, &vlan_type);
 
-	vlan_dev_set_lockdep_class(dev, subclass);
+	vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
 
-	vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
-	if (!vlan_dev_info(dev)->vlan_pcpu_stats)
+	vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
+	if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -574,11 +624,9 @@ static int vlan_dev_init(struct net_device *dev)
 static void vlan_dev_uninit(struct net_device *dev)
 {
 	struct vlan_priority_tci_mapping *pm;
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	int i;
 
-	free_percpu(vlan->vlan_pcpu_stats);
-	vlan->vlan_pcpu_stats = NULL;
 	for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
 		while ((pm = vlan->egress_priority_map[i]) != NULL) {
 			vlan->egress_priority_map[i] = pm->next;
@@ -587,19 +635,17 @@ static void vlan_dev_uninit(struct net_device *dev)
 	}
 }
 
-static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
-	u32 old_features = features;
-
-	features &= real_dev->features;
-	features &= real_dev->vlan_features;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+	netdev_features_t old_features = features;
 
-	if (old_features & NETIF_F_SOFT_FEATURES)
-		features |= old_features & NETIF_F_SOFT_FEATURES;
+	features = netdev_intersect_features(features, real_dev->vlan_features);
+	features |= NETIF_F_RXCSUM;
+	features = netdev_intersect_features(features, real_dev->features);
 
-	if (dev_ethtool_get_rx_csum(real_dev))
-		features |= NETIF_F_RXCSUM;
+	features |= old_features & NETIF_F_SOFT_FEATURES;
 	features |= NETIF_F_LLTX;
 
 	return features;
@@ -608,55 +654,98 @@ static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
 static int vlan_ethtool_get_settings(struct net_device *dev,
 				     struct ethtool_cmd *cmd)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
-	return dev_ethtool_get_settings(vlan->real_dev, cmd);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+	return __ethtool_get_settings(vlan->real_dev, cmd);
 }
 
 static void vlan_ethtool_get_drvinfo(struct net_device *dev,
 				     struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, vlan_fullname);
-	strcpy(info->version, vlan_version);
-	strcpy(info->fw_version, "N/A");
+	strlcpy(info->driver, vlan_fullname, sizeof(info->driver));
+	strlcpy(info->version, vlan_version, sizeof(info->version));
+	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
 }
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
+	struct vlan_pcpu_stats *p;
+	u32 rx_errors = 0, tx_dropped = 0;
+	int i;
 
-	if (vlan_dev_info(dev)->vlan_pcpu_stats) {
-		struct vlan_pcpu_stats *p;
-		u32 rx_errors = 0, tx_dropped = 0;
-		int i;
-
-		for_each_possible_cpu(i) {
-			u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
-			unsigned int start;
-
-			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
-			do {
-				start = u64_stats_fetch_begin_bh(&p->syncp);
-				rxpackets	= p->rx_packets;
-				rxbytes		= p->rx_bytes;
-				rxmulticast	= p->rx_multicast;
-				txpackets	= p->tx_packets;
-				txbytes		= p->tx_bytes;
-			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
-
-			stats->rx_packets	+= rxpackets;
-			stats->rx_bytes		+= rxbytes;
-			stats->multicast	+= rxmulticast;
-			stats->tx_packets	+= txpackets;
-			stats->tx_bytes		+= txbytes;
-			/* rx_errors & tx_dropped are u32 */
-			rx_errors	+= p->rx_errors;
-			tx_dropped	+= p->tx_dropped;
-		}
-		stats->rx_errors  = rx_errors;
-		stats->tx_dropped = tx_dropped;
+	for_each_possible_cpu(i) {
+		u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
+		unsigned int start;
+
+		p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&p->syncp);
+			rxpackets	= p->rx_packets;
+			rxbytes		= p->rx_bytes;
+			rxmulticast	= p->rx_multicast;
+			txpackets	= p->tx_packets;
+			txbytes		= p->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&p->syncp, start));
+
+		stats->rx_packets	+= rxpackets;
+		stats->rx_bytes		+= rxbytes;
+		stats->multicast	+= rxmulticast;
+		stats->tx_packets	+= txpackets;
+		stats->tx_bytes		+= txbytes;
+		/* rx_errors & tx_dropped are u32 */
+		rx_errors	+= p->rx_errors;
+		tx_dropped	+= p->tx_dropped;
 	}
+	stats->rx_errors  = rx_errors;
+	stats->tx_dropped = tx_dropped;
+
 	return stats;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void vlan_dev_poll_controller(struct net_device *dev)
+{
+	return;
+}
+
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+{
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	struct net_device *real_dev = vlan->real_dev;
+	struct netpoll *netpoll;
+	int err = 0;
+
+	netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!netpoll)
+		goto out;
+
+	err = __netpoll_setup(netpoll, real_dev);
+	if (err) {
+		kfree(netpoll);
+		goto out;
+	}
+
+	vlan->netpoll = netpoll;
+
+out:
+	return err;
+}
+
+static void vlan_dev_netpoll_cleanup(struct net_device *dev)
+{
+	struct vlan_dev_priv *vlan= vlan_dev_priv(dev);
+	struct netpoll *netpoll = vlan->netpoll;
+
+	if (!netpoll)
+		return;
+
+	vlan->netpoll = NULL;
+
+	__netpoll_free_async(netpoll);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
 static const struct ethtool_ops vlan_ethtool_ops = {
 	.get_settings	        = vlan_ethtool_get_settings,
 	.get_drvinfo	        = vlan_ethtool_get_drvinfo,
@@ -673,12 +762,11 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= vlan_dev_set_mac_address,
 	.ndo_set_rx_mode	= vlan_dev_set_rx_mode,
-	.ndo_set_multicast_list	= vlan_dev_set_rx_mode,
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
 	.ndo_get_stats64	= vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
 	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
@@ -686,19 +774,34 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
 	.ndo_fcoe_ddp_target	= vlan_dev_fcoe_ddp_target,
 #endif
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= vlan_dev_poll_controller,
+	.ndo_netpoll_setup	= vlan_dev_netpoll_setup,
+	.ndo_netpoll_cleanup	= vlan_dev_netpoll_cleanup,
+#endif
 	.ndo_fix_features	= vlan_dev_fix_features,
+	.ndo_get_lock_subclass  = vlan_dev_get_lock_subclass,
 };
 
+static void vlan_dev_free(struct net_device *dev)
+{
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+	free_percpu(vlan->vlan_pcpu_stats);
+	vlan->vlan_pcpu_stats = NULL;
+	free_netdev(dev);
+}
+
 void vlan_setup(struct net_device *dev)
 {
 	ether_setup(dev);
 
 	dev->priv_flags		|= IFF_802_1Q_VLAN;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	dev->priv_flags		&= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->tx_queue_len	= 0;
 
 	dev->netdev_ops		= &vlan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->destructor		= vlan_dev_free;
 	dev->ethtool_ops	= &vlan_ethtool_ops;
 
 	memset(dev->broadcast, 0, ETH_ALEN);
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 061ceceeef1..66a80320b03 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -29,18 +29,22 @@ static struct garp_application vlan_gvrp_app __read_mostly = {
 
 int vlan_gvrp_request_join(const struct net_device *dev)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return 0;
 	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
 				 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
 
 void vlan_gvrp_request_leave(const struct net_device *dev)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return;
 	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
 			   &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c
new file mode 100644
index 00000000000..e0fe091801b
--- /dev/null
+++ b/net/8021q/vlan_mvrp.c
@@ -0,0 +1,76 @@
+/*
+ *	IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/8021q/vlan_gvrp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/mrp.h>
+#include "vlan.h"
+
+#define MRP_MVRP_ADDRESS	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 }
+
+enum mvrp_attributes {
+	MVRP_ATTR_INVALID,
+	MVRP_ATTR_VID,
+	__MVRP_ATTR_MAX
+};
+#define MVRP_ATTR_MAX	(__MVRP_ATTR_MAX - 1)
+
+static struct mrp_application vlan_mrp_app __read_mostly = {
+	.type		= MRP_APPLICATION_MVRP,
+	.maxattr	= MVRP_ATTR_MAX,
+	.pkttype.type	= htons(ETH_P_MVRP),
+	.group_address	= MRP_MVRP_ADDRESS,
+	.version	= 0,
+};
+
+int vlan_mvrp_request_join(const struct net_device *dev)
+{
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	__be16 vlan_id = htons(vlan->vlan_id);
+
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return 0;
+	return mrp_request_join(vlan->real_dev, &vlan_mrp_app,
+				&vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
+}
+
+void vlan_mvrp_request_leave(const struct net_device *dev)
+{
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	__be16 vlan_id = htons(vlan->vlan_id);
+
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return;
+	mrp_request_leave(vlan->real_dev, &vlan_mrp_app,
+			  &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
+}
+
+int vlan_mvrp_init_applicant(struct net_device *dev)
+{
+	return mrp_init_applicant(dev, &vlan_mrp_app);
+}
+
+void vlan_mvrp_uninit_applicant(struct net_device *dev)
+{
+	mrp_uninit_applicant(dev, &vlan_mrp_app);
+}
+
+int __init vlan_mvrp_init(void)
+{
+	return mrp_register_application(&vlan_mrp_app);
+}
+
+void vlan_mvrp_uninit(void)
+{
+	mrp_unregister_application(&vlan_mrp_app);
+}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index be9a5c19a77..8ac8a5cc214 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
+#include <linux/module.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <net/rtnetlink.h>
@@ -22,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
 	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },
 	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },
 	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
+	[IFLA_VLAN_PROTOCOL]	= { .type = NLA_U16 },
 };
 
 static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
@@ -52,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	if (!data)
 		return -EINVAL;
 
+	if (data[IFLA_VLAN_PROTOCOL]) {
+		switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
+		case htons(ETH_P_8021Q):
+		case htons(ETH_P_8021AD):
+			break;
+		default:
+			return -EPROTONOSUPPORT;
+		}
+	}
+
 	if (data[IFLA_VLAN_ID]) {
 		id = nla_get_u16(data[IFLA_VLAN_ID]);
 		if (id >= VLAN_VID_MASK)
@@ -61,7 +73,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
 		if ((flags->flags & flags->mask) &
 		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		      VLAN_FLAG_LOOSE_BINDING))
+		      VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
 			return -EINVAL;
 	}
 
@@ -104,8 +116,9 @@ static int vlan_changelink(struct net_device *dev,
 static int vlan_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev;
+	__be16 proto;
 	int err;
 
 	if (!data[IFLA_VLAN_ID])
@@ -117,11 +130,17 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (!real_dev)
 		return -ENODEV;
 
-	vlan->vlan_id  = nla_get_u16(data[IFLA_VLAN_ID]);
-	vlan->real_dev = real_dev;
-	vlan->flags    = VLAN_FLAG_REORDER_HDR;
+	if (data[IFLA_VLAN_PROTOCOL])
+		proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
+	else
+		proto = htons(ETH_P_8021Q);
 
-	err = vlan_check_real_dev(real_dev, vlan->vlan_id);
+	vlan->vlan_proto = proto;
+	vlan->vlan_id	 = nla_get_u16(data[IFLA_VLAN_ID]);
+	vlan->real_dev	 = real_dev;
+	vlan->flags	 = VLAN_FLAG_REORDER_HDR;
+
+	err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
 	if (err < 0)
 		return err;
 
@@ -148,28 +167,32 @@ static inline size_t vlan_qos_map_size(unsigned int n)
 
 static size_t vlan_get_size(const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
-	return nla_total_size(2) +	/* IFLA_VLAN_ID */
-	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
+	return nla_total_size(2) +	/* IFLA_VLAN_PROTOCOL */
+	       nla_total_size(2) +	/* IFLA_VLAN_ID */
+	       nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */
 	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
 	       vlan_qos_map_size(vlan->nr_egress_mappings);
 }
 
 static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_priority_tci_mapping *pm;
 	struct ifla_vlan_flags f;
 	struct ifla_vlan_qos_mapping m;
 	struct nlattr *nest;
 	unsigned int i;
 
-	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
+	if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) ||
+	    nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id))
+		goto nla_put_failure;
 	if (vlan->flags) {
 		f.flags = vlan->flags;
 		f.mask  = ~0;
-		NLA_PUT(skb, IFLA_VLAN_FLAGS, sizeof(f), &f);
+		if (nla_put(skb, IFLA_VLAN_FLAGS, sizeof(f), &f))
+			goto nla_put_failure;
 	}
 	if (vlan->nr_ingress_mappings) {
 		nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS);
@@ -182,8 +205,9 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 			m.from = i;
 			m.to   = vlan->ingress_priority_map[i];
-			NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
-				sizeof(m), &m);
+			if (nla_put(skb, IFLA_VLAN_QOS_MAPPING,
+				    sizeof(m), &m))
+				goto nla_put_failure;
 		}
 		nla_nest_end(skb, nest);
 	}
@@ -201,8 +225,9 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 				m.from = pm->priority;
 				m.to   = (pm->vlan_qos >> 13) & 0x7;
-				NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
-					sizeof(m), &m);
+				if (nla_put(skb, IFLA_VLAN_QOS_MAPPING,
+					    sizeof(m), &m))
+					goto nla_put_failure;
 			}
 		}
 		nla_nest_end(skb, nest);
@@ -217,7 +242,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.kind		= "vlan",
 	.maxtype	= IFLA_VLAN_MAX,
 	.policy		= vlan_policy,
-	.priv_size	= sizeof(struct vlan_dev_info),
+	.priv_size	= sizeof(struct vlan_dev_priv),
 	.setup		= vlan_setup,
 	.validate	= vlan_validate,
 	.newlink	= vlan_newlink,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d940c49d168..1d0e89213a2 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -17,6 +17,8 @@
  * Jan 20, 1998        Ben Greear     Initial Version
  *****************************************************************************/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -91,7 +93,7 @@ static const struct file_operations vlan_fops = {
 
 static int vlandev_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, vlandev_seq_show, PDE(inode)->data);
+	return single_open(file, vlandev_seq_show, PDE_DATA(inode));
 }
 
 static const struct file_operations vlandev_fops = {
@@ -103,7 +105,7 @@ static const struct file_operations vlandev_fops = {
 };
 
 /*
- * Proc filesystem derectory entries.
+ * Proc filesystem directory entries.
  */
 
 /* Strings */
@@ -129,7 +131,7 @@ void vlan_proc_cleanup(struct net *net)
 		remove_proc_entry(name_conf, vn->proc_vlan_dir);
 
 	if (vn->proc_vlan_dir)
-		proc_net_remove(net, name_root);
+		remove_proc_entry(name_root, net->proc_net);
 
 	/* Dynamically added entries should be cleaned up as their vlan_device
 	 * is removed, so we should not have to take care of it here...
@@ -155,7 +157,7 @@ int __net_init vlan_proc_init(struct net *net)
 	return 0;
 
 err:
-	pr_err("%s: can't create entry in proc filesystem!\n", __func__);
+	pr_err("can't create entry in proc filesystem!\n");
 	vlan_proc_cleanup(net);
 	return -ENOBUFS;
 }
@@ -166,13 +168,13 @@ err:
 
 int vlan_proc_add_dev(struct net_device *vlandev)
 {
-	struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 	struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
 
-	dev_info->dent =
+	vlan->dent =
 		proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
 				 vn->proc_vlan_dir, &vlandev_fops, vlandev);
-	if (!dev_info->dent)
+	if (!vlan->dent)
 		return -ENOBUFS;
 	return 0;
 }
@@ -182,14 +184,9 @@ int vlan_proc_add_dev(struct net_device *vlandev)
  */
 int vlan_proc_rem_dev(struct net_device *vlandev)
 {
-	struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
-
 	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
-	if (vlan_dev_info(vlandev)->dent) {
-		remove_proc_entry(vlan_dev_info(vlandev)->dent->name,
-				  vn->proc_vlan_dir);
-		vlan_dev_info(vlandev)->dent = NULL;
-	}
+	proc_remove(vlan_dev_priv(vlandev)->dent);
+	vlan_dev_priv(vlandev)->dent = NULL;
 	return 0;
 }
 
@@ -229,7 +226,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	++*pos;
 
-	dev = (struct net_device *)v;
+	dev = v;
 	if (v == SEQ_START_TOKEN)
 		dev = net_device_entry(&net->dev_base_head);
 
@@ -266,10 +263,10 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 			   nmtype ? nmtype :  "UNKNOWN");
 	} else {
 		const struct net_device *vlandev = v;
-		const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+		const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 
 		seq_printf(seq, "%-15s| %d  | %s\n",  vlandev->name,
-			   dev_info->vlan_id,    dev_info->real_dev->name);
+			   vlan->vlan_id,    vlan->real_dev->name);
 	}
 	return 0;
 }
@@ -277,7 +274,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
-	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	static const char fmt64[] = "%30s %12llu\n";
@@ -289,8 +286,8 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	stats = dev_get_stats(vlandev, &temp);
 	seq_printf(seq,
 		   "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
-		   vlandev->name, dev_info->vlan_id,
-		   (int)(dev_info->flags & 1), vlandev->priv_flags);
+		   vlandev->name, vlan->vlan_id,
+		   (int)(vlan->flags & 1), vlandev->priv_flags);
 
 	seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
 	seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
@@ -298,23 +295,23 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	seq_puts(seq, "\n");
 	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
 	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
-	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+	seq_printf(seq, "Device: %s", vlan->real_dev->name);
 	/* now show all PRIORITY mappings relating to this VLAN */
 	seq_printf(seq, "\nINGRESS priority mappings: "
 			"0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
-		   dev_info->ingress_priority_map[0],
-		   dev_info->ingress_priority_map[1],
-		   dev_info->ingress_priority_map[2],
-		   dev_info->ingress_priority_map[3],
-		   dev_info->ingress_priority_map[4],
-		   dev_info->ingress_priority_map[5],
-		   dev_info->ingress_priority_map[6],
-		   dev_info->ingress_priority_map[7]);
+		   vlan->ingress_priority_map[0],
+		   vlan->ingress_priority_map[1],
+		   vlan->ingress_priority_map[2],
+		   vlan->ingress_priority_map[3],
+		   vlan->ingress_priority_map[4],
+		   vlan->ingress_priority_map[5],
+		   vlan->ingress_priority_map[6],
+		   vlan->ingress_priority_map[7]);
 
 	seq_printf(seq, " EGRESS priority mappings: ");
 	for (i = 0; i < 16; i++) {
 		const struct vlan_priority_tci_mapping *mp
-			= dev_info->egress_priority_map[i];
+			= vlan->egress_priority_map[i];
 		while (mp) {
 			seq_printf(seq, "%u:%hu ",
 				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index d9ea09b11cf..a75174a3372 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -23,7 +23,7 @@ config NET_9P_VIRTIO
 	  guest partitions and a host partition.
 
 config NET_9P_RDMA
-	depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
+	depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
 	tristate "9P RDMA Transport (Experimental)"
 	help
 	  This builds support for an RDMA transport.
diff --git a/net/9p/client.c b/net/9p/client.c
index 9e3b0e640da..0004cbaac4a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -23,6 +23,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
@@ -38,6 +40,9 @@
 #include <net/9p/transport.h>
 #include "protocol.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/9p.h>
+
 /*
   * Client Option Parsing (code inspired by NFS code)
   *  - a little lazy - parse all client options
@@ -71,24 +76,37 @@ inline int p9_is_proto_dotu(struct p9_client *clnt)
 }
 EXPORT_SYMBOL(p9_is_proto_dotu);
 
+/*
+ * Some error codes are taken directly from the server replies,
+ * make sure they are valid.
+ */
+static int safe_errno(int err)
+{
+	if ((err > 0) || (err < -MAX_ERRNO)) {
+		p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err);
+		return -EPROTO;
+	}
+	return err;
+}
+
+
 /* Interpret mount option for protocol version */
-static int get_protocol_version(const substring_t *name)
+static int get_protocol_version(char *s)
 {
 	int version = -EINVAL;
 
-	if (!strncmp("9p2000", name->from, name->to-name->from)) {
+	if (!strcmp(s, "9p2000")) {
 		version = p9_proto_legacy;
-		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n");
-	} else if (!strncmp("9p2000.u", name->from, name->to-name->from)) {
+		p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n");
+	} else if (!strcmp(s, "9p2000.u")) {
 		version = p9_proto_2000u;
-		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
-	} else if (!strncmp("9p2000.L", name->from, name->to-name->from)) {
+		p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
+	} else if (!strcmp(s, "9p2000.L")) {
 		version = p9_proto_2000L;
-		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
-	} else {
-		P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ",
-							name->from);
-	}
+		p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
+	} else
+		pr_info("Unknown protocol version %s\n", s);
+
 	return version;
 }
 
@@ -106,9 +124,10 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
+	char *s;
 	int ret = 0;
 
-	clnt->proto_version = p9_proto_2000u;
+	clnt->proto_version = p9_proto_2000L;
 	clnt->msize = 8192;
 
 	if (!opts)
@@ -116,47 +135,63 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 
 	tmp_options = kstrdup(opts, GFP_KERNEL);
 	if (!tmp_options) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-				"failed to allocate copy of option string\n");
+		p9_debug(P9_DEBUG_ERROR,
+			 "failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
 	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
-		int token;
+		int token, r;
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
-		if (token < Opt_trans) {
-			int r = match_int(&args[0], &option);
+		switch (token) {
+		case Opt_msize:
+			r = match_int(&args[0], &option);
 			if (r < 0) {
-				P9_DPRINTK(P9_DEBUG_ERROR,
-					"integer field, but no integer?\n");
+				p9_debug(P9_DEBUG_ERROR,
+					 "integer field, but no integer?\n");
 				ret = r;
 				continue;
 			}
-		}
-		switch (token) {
-		case Opt_msize:
 			clnt->msize = option;
 			break;
 		case Opt_trans:
-			clnt->trans_mod = v9fs_get_trans_by_name(&args[0]);
-			if(clnt->trans_mod == NULL) {
-				P9_DPRINTK(P9_DEBUG_ERROR,
-				   "Could not find request transport: %s\n",
-				   (char *) &args[0]);
+			s = match_strdup(&args[0]);
+			if (!s) {
+				ret = -ENOMEM;
+				p9_debug(P9_DEBUG_ERROR,
+					 "problem allocating copy of trans arg\n");
+				goto free_and_return;
+			 }
+			clnt->trans_mod = v9fs_get_trans_by_name(s);
+			if (clnt->trans_mod == NULL) {
+				pr_info("Could not find request transport: %s\n",
+					s);
 				ret = -EINVAL;
+				kfree(s);
 				goto free_and_return;
 			}
+			kfree(s);
 			break;
 		case Opt_legacy:
 			clnt->proto_version = p9_proto_legacy;
 			break;
 		case Opt_version:
-			ret = get_protocol_version(&args[0]);
-			if (ret == -EINVAL)
+			s = match_strdup(&args[0]);
+			if (!s) {
+				ret = -ENOMEM;
+				p9_debug(P9_DEBUG_ERROR,
+					 "problem allocating copy of version arg\n");
+				goto free_and_return;
+			}
+			ret = get_protocol_version(s);
+			if (ret == -EINVAL) {
+				kfree(s);
 				goto free_and_return;
+			}
+			kfree(s);
 			clnt->proto_version = ret;
 			break;
 		default:
@@ -169,6 +204,17 @@ free_and_return:
 	return ret;
 }
 
+static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+{
+	struct p9_fcall *fc;
+	fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
+	if (!fc)
+		return NULL;
+	fc->capacity = alloc_msize;
+	fc->sdata = (char *) fc + sizeof(struct p9_fcall);
+	return fc;
+}
+
 /**
  * p9_tag_alloc - lookup/allocate a request by tag
  * @c: client session to lookup tag within
@@ -184,11 +230,13 @@ free_and_return:
  *
  */
 
-static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
+static struct p9_req_t *
+p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
 {
 	unsigned long flags;
 	int row, col;
 	struct p9_req_t *req;
+	int alloc_msize = min(c->msize, max_size);
 
 	/* This looks up the original request by tag so we know which
 	 * buffer to read the data into */
@@ -203,7 +251,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 					sizeof(struct p9_req_t), GFP_ATOMIC);
 
 			if (!c->reqs[row]) {
-				printk(KERN_ERR "Couldn't grow tag array\n");
+				pr_err("Couldn't grow tag array\n");
 				spin_unlock_irqrestore(&c->lock, flags);
 				return ERR_PTR(-ENOMEM);
 			}
@@ -219,50 +267,36 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 	col = tag % P9_ROW_MAXTAG;
 
 	req = &c->reqs[row][col];
-	if (!req->tc) {
+	if (!req->wq) {
 		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
-		if (!req->wq) {
-			printk(KERN_ERR "Couldn't grow tag array\n");
-			return ERR_PTR(-ENOMEM);
-		}
+		if (!req->wq)
+			goto grow_failed;
 		init_waitqueue_head(req->wq);
-		if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
-				P9_TRANS_PREF_PAYLOAD_SEP) {
-			int alloc_msize = min(c->msize, 4096);
-			req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
-					  GFP_NOFS);
-			req->tc->capacity = alloc_msize;
-			req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
-					  GFP_NOFS);
-			req->rc->capacity = alloc_msize;
-		} else {
-			req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-					  GFP_NOFS);
-			req->tc->capacity = c->msize;
-			req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-					  GFP_NOFS);
-			req->rc->capacity = c->msize;
-		}
-		if ((!req->tc) || (!req->rc)) {
-			printk(KERN_ERR "Couldn't grow tag array\n");
-			kfree(req->tc);
-			kfree(req->rc);
-			kfree(req->wq);
-			req->tc = req->rc = NULL;
-			req->wq = NULL;
-			return ERR_PTR(-ENOMEM);
-		}
-		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
-		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
 	}
 
+	if (!req->tc)
+		req->tc = p9_fcall_alloc(alloc_msize);
+	if (!req->rc)
+		req->rc = p9_fcall_alloc(alloc_msize);
+	if (!req->tc || !req->rc)
+		goto grow_failed;
+
 	p9pdu_reset(req->tc);
 	p9pdu_reset(req->rc);
 
 	req->tc->tag = tag-1;
 	req->status = REQ_STATUS_ALLOC;
 
-	return &c->reqs[row][col];
+	return req;
+
+grow_failed:
+	pr_err("Couldn't grow tag array\n");
+	kfree(req->tc);
+	kfree(req->rc);
+	kfree(req->wq);
+	req->tc = req->rc = NULL;
+	req->wq = NULL;
+	return ERR_PTR(-ENOMEM);
 }
 
 /**
@@ -280,7 +314,8 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
 	 * buffer to read the data into */
 	tag++;
 
-	BUG_ON(tag >= c->max_tag);
+	if(tag >= c->max_tag) 
+		return NULL;
 
 	row = tag / P9_ROW_MAXTAG;
 	col = tag % P9_ROW_MAXTAG;
@@ -331,9 +366,9 @@ static void p9_tag_cleanup(struct p9_client *c)
 	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
 		for (col = 0; col < P9_ROW_MAXTAG; col++) {
 			if (c->reqs[row][col].status != REQ_STATUS_IDLE) {
-				P9_DPRINTK(P9_DEBUG_MUX,
-				  "Attempting to cleanup non-free tag %d,%d\n",
-				  row, col);
+				p9_debug(P9_DEBUG_MUX,
+					 "Attempting to cleanup non-free tag %d,%d\n",
+					 row, col);
 				/* TODO: delay execution of cleanup */
 				return;
 			}
@@ -367,7 +402,7 @@ static void p9_tag_cleanup(struct p9_client *c)
 static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
 {
 	int tag = r->tc->tag;
-	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+	p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
 
 	r->status = REQ_STATUS_IDLE;
 	if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
@@ -380,11 +415,19 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
  * req: request received
  *
  */
-void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
 {
-	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+	p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+
+	/*
+	 * This barrier is needed to make sure any change made to req before
+	 * the other thread wakes up will indeed be seen by the waiting side.
+	 */
+	smp_wmb();
+	req->status = status;
+
 	wake_up(req->wq);
-	P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -419,8 +462,8 @@ p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,
 	pdu->id = r_type;
 	pdu->tag = r_tag;
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size,
-							pdu->id, pdu->tag);
+	p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n",
+		 pdu->size, pdu->id, pdu->tag);
 
 	if (type)
 		*type = r_type;
@@ -455,37 +498,114 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 	int ecode;
 
 	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+	/*
+	 * dump the response from server
+	 * This should be after check errors which poplulate pdu_fcall.
+	 */
+	trace_9p_protocol_dump(c, req->rc);
 	if (err) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
+		p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
 		return err;
 	}
-
 	if (type != P9_RERROR && type != P9_RLERROR)
 		return 0;
 
 	if (!p9_is_proto_dotl(c)) {
 		char *ename;
+		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+				  &ename, &ecode);
+		if (err)
+			goto out_err;
+
+		if (p9_is_proto_dotu(c))
+			err = -ecode;
+
+		if (!err || !IS_ERR_VALUE(err)) {
+			err = p9_errstr2errno(ename, strlen(ename));
+
+			p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
+				 -ecode, ename);
+		}
+		kfree(ename);
+	} else {
+		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+		err = -ecode;
 
-		if (req->tc->pbuf_size) {
-			/* Handle user buffers */
-			size_t len = req->rc->size - req->rc->offset;
-			if (req->tc->pubuf) {
-				/* User Buffer */
-				err = copy_from_user(
-					&req->rc->sdata[req->rc->offset],
-					req->tc->pubuf, len);
+		p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+	}
+
+	return err;
+
+out_err:
+	p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
+
+	return err;
+}
+
+/**
+ * p9_check_zc_errors - check 9p packet for error return and process it
+ * @c: current client instance
+ * @req: request to parse and check for error conditions
+ * @in_hdrlen: Size of response protocol buffer.
+ *
+ * returns error code if one is discovered, otherwise returns 0
+ *
+ * this will have to be more complicated if we have multiple
+ * error packet types
+ */
+
+static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
+			      char *uidata, int in_hdrlen, int kern_buf)
+{
+	int err;
+	int ecode;
+	int8_t type;
+	char *ename = NULL;
+
+	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+	/*
+	 * dump the response from server
+	 * This should be after parse_header which poplulate pdu_fcall.
+	 */
+	trace_9p_protocol_dump(c, req->rc);
+	if (err) {
+		p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
+		return err;
+	}
+
+	if (type != P9_RERROR && type != P9_RLERROR)
+		return 0;
+
+	if (!p9_is_proto_dotl(c)) {
+		/* Error is reported in string format */
+		int len;
+		/* 7 = header size for RERROR; */
+		int inline_len = in_hdrlen - 7;
+
+		len =  req->rc->size - req->rc->offset;
+		if (len > (P9_ZC_HDR_SZ - 7)) {
+			err = -EFAULT;
+			goto out_err;
+		}
+
+		ename = &req->rc->sdata[req->rc->offset];
+		if (len > inline_len) {
+			/* We have error in external buffer */
+			if (kern_buf) {
+				memcpy(ename + inline_len, uidata,
+				       len - inline_len);
+			} else {
+				err = copy_from_user(ename + inline_len,
+						     uidata, len - inline_len);
 				if (err) {
 					err = -EFAULT;
 					goto out_err;
 				}
-			} else {
-				/* Kernel Buffer */
-				memmove(&req->rc->sdata[req->rc->offset],
-						req->tc->pkbuf, len);
 			}
 		}
+		ename = NULL;
 		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
-				&ename, &ecode);
+				  &ename, &ecode);
 		if (err)
 			goto out_err;
 
@@ -495,24 +615,20 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 		if (!err || !IS_ERR_VALUE(err)) {
 			err = p9_errstr2errno(ename, strlen(ename));
 
-			P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
-					ename);
-
-			kfree(ename);
+			p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
+				 -ecode, ename);
 		}
+		kfree(ename);
 	} else {
 		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
 		err = -ecode;
 
-		P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+		p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
 	}
-
-
 	return err;
 
 out_err:
-	P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
-
+	p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
 	return err;
 }
 
@@ -541,43 +657,32 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 	if (err)
 		return err;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag);
+	p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag);
 
 	req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-
-	/* if we haven't received a response for oldreq,
-	   remove it from the list. */
-	spin_lock(&c->lock);
-	if (oldreq->status == REQ_STATUS_FLSH)
-		list_del(&oldreq->req_list);
-	spin_unlock(&c->lock);
+	/*
+	 * if we haven't received a response for oldreq,
+	 * remove it from the list
+	 */
+	if (oldreq->status == REQ_STATUS_SENT)
+		if (c->trans_mod->cancelled)
+			c->trans_mod->cancelled(c, oldreq);
 
 	p9_free_req(c, req);
 	return 0;
 }
 
-/**
- * p9_client_rpc - issue a request and wait for a response
- * @c: client session
- * @type: type of request
- * @fmt: protocol format string (see protocol.c)
- *
- * Returns request structure (which client must free using p9_free_req)
- */
-
-static struct p9_req_t *
-p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
+					      int8_t type, int req_size,
+					      const char *fmt, va_list ap)
 {
-	va_list ap;
 	int tag, err;
 	struct p9_req_t *req;
-	unsigned long flags;
-	int sigpending;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
+	p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);
 
 	/* we allow for any status other than disconnected */
 	if (c->status == Disconnected)
@@ -587,12 +692,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
 		return ERR_PTR(-EIO);
 
-	if (signal_pending(current)) {
-		sigpending = 1;
-		clear_thread_flag(TIF_SIGPENDING);
-	} else
-		sigpending = 0;
-
 	tag = P9_NOTAG;
 	if (type != P9_TVERSION) {
 		tag = p9_idpool_get(c->tagpool);
@@ -600,18 +699,51 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 			return ERR_PTR(-ENOMEM);
 	}
 
-	req = p9_tag_alloc(c, tag);
+	req = p9_tag_alloc(c, tag, req_size);
 	if (IS_ERR(req))
 		return req;
 
 	/* marshall the data */
 	p9pdu_prepare(req->tc, tag, type);
-	va_start(ap, fmt);
 	err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
-	va_end(ap);
 	if (err)
 		goto reterr;
-	p9pdu_finalize(req->tc);
+	p9pdu_finalize(c, req->tc);
+	trace_9p_client_req(c, type, tag);
+	return req;
+reterr:
+	p9_free_req(c, req);
+	return ERR_PTR(err);
+}
+
+/**
+ * p9_client_rpc - issue a request and wait for a response
+ * @c: client session
+ * @type: type of request
+ * @fmt: protocol format string (see protocol.c)
+ *
+ * Returns request structure (which client must free using p9_free_req)
+ */
+
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+{
+	va_list ap;
+	int sigpending, err;
+	unsigned long flags;
+	struct p9_req_t *req;
+
+	va_start(ap, fmt);
+	req = p9_client_prepare_req(c, type, c->msize, fmt, ap);
+	va_end(ap);
+	if (IS_ERR(req))
+		return req;
+
+	if (signal_pending(current)) {
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+	} else
+		sigpending = 0;
 
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
@@ -619,20 +751,30 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 			c->status = Disconnected;
 		goto reterr;
 	}
-
-	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
+again:
+	/* Wait for the response */
 	err = wait_event_interruptible(*req->wq,
-						req->status >= REQ_STATUS_RCVD);
-	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
-						req->wq, tag, err);
+				       req->status >= REQ_STATUS_RCVD);
+
+	/*
+	 * Make sure our req is coherent with regard to updates in other
+	 * threads - echoes to wmb() in the callback
+	 */
+	smp_rmb();
+
+	if ((err == -ERESTARTSYS) && (c->status == Connected)
+				  && (type == P9_TFLUSH)) {
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+		goto again;
+	}
 
 	if (req->status == REQ_STATUS_ERROR) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+		p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
 		err = req->t_err;
 	}
-
 	if ((err == -ERESTARTSYS) && (c->status == Connected)) {
-		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
+		p9_debug(P9_DEBUG_MUX, "flushing\n");
 		sigpending = 1;
 		clear_thread_flag(TIF_SIGPENDING);
 
@@ -643,27 +785,104 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 		if (req->status == REQ_STATUS_RCVD)
 			err = 0;
 	}
-
 	if (sigpending) {
 		spin_lock_irqsave(&current->sighand->siglock, flags);
 		recalc_sigpending();
 		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
-
 	if (err < 0)
 		goto reterr;
 
 	err = p9_check_errors(c, req);
-	if (!err) {
-		P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type);
+	trace_9p_client_res(c, type, req->rc->tag, err);
+	if (!err)
 		return req;
+reterr:
+	p9_free_req(c, req);
+	return ERR_PTR(safe_errno(err));
+}
+
+/**
+ * p9_client_zc_rpc - issue a request and wait for a response
+ * @c: client session
+ * @type: type of request
+ * @uidata: user bffer that should be ued for zero copy read
+ * @uodata: user buffer that shoud be user for zero copy write
+ * @inlen: read buffer size
+ * @olen: write buffer size
+ * @hdrlen: reader header size, This is the size of response protocol data
+ * @fmt: protocol format string (see protocol.c)
+ *
+ * Returns request structure (which client must free using p9_free_req)
+ */
+static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
+					 char *uidata, char *uodata,
+					 int inlen, int olen, int in_hdrlen,
+					 int kern_buf, const char *fmt, ...)
+{
+	va_list ap;
+	int sigpending, err;
+	unsigned long flags;
+	struct p9_req_t *req;
+
+	va_start(ap, fmt);
+	/*
+	 * We allocate a inline protocol data of only 4k bytes.
+	 * The actual content is passed in zero-copy fashion.
+	 */
+	req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
+	va_end(ap);
+	if (IS_ERR(req))
+		return req;
+
+	if (signal_pending(current)) {
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+	} else
+		sigpending = 0;
+
+	/* If we are called with KERNEL_DS force kern_buf */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		kern_buf = 1;
+
+	err = c->trans_mod->zc_request(c, req, uidata, uodata,
+				       inlen, olen, in_hdrlen, kern_buf);
+	if (err < 0) {
+		if (err == -EIO)
+			c->status = Disconnected;
+		goto reterr;
+	}
+	if (req->status == REQ_STATUS_ERROR) {
+		p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+		err = req->t_err;
+	}
+	if ((err == -ERESTARTSYS) && (c->status == Connected)) {
+		p9_debug(P9_DEBUG_MUX, "flushing\n");
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+
+		if (c->trans_mod->cancel(c, req))
+			p9_client_flush(c, req);
+
+		/* if we received the response anyway, don't signal error */
+		if (req->status == REQ_STATUS_RCVD)
+			err = 0;
 	}
+	if (sigpending) {
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		recalc_sigpending();
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+	if (err < 0)
+		goto reterr;
 
+	err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf);
+	trace_9p_client_res(c, type, req->rc->tag, err);
+	if (!err)
+		return req;
 reterr:
-	P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type,
-									err);
 	p9_free_req(c, req);
-	return ERR_PTR(err);
+	return ERR_PTR(safe_errno(err));
 }
 
 static struct p9_fid *p9_fid_create(struct p9_client *clnt)
@@ -672,7 +891,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 	struct p9_fid *fid;
 	unsigned long flags;
 
-	P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt);
+	p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
 	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
 	if (!fid)
 		return ERR_PTR(-ENOMEM);
@@ -705,7 +924,7 @@ static void p9_fid_destroy(struct p9_fid *fid)
 	struct p9_client *clnt;
 	unsigned long flags;
 
-	P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid);
 	clnt = fid->clnt;
 	p9_idpool_put(fid->fid, clnt->fidpool);
 	spin_lock_irqsave(&clnt->lock, flags);
@@ -722,8 +941,8 @@ static int p9_client_version(struct p9_client *c)
 	char *version;
 	int msize;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
-						c->msize, c->proto_version);
+	p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
+		 c->msize, c->proto_version);
 
 	switch (c->proto_version) {
 	case p9_proto_2000L:
@@ -748,12 +967,12 @@ static int p9_client_version(struct p9_client *c)
 
 	err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
 	if (err) {
-		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
-		p9pdu_dump(1, req->rc);
+		p9_debug(P9_DEBUG_9P, "version error %d\n", err);
+		trace_9p_protocol_dump(c, req->rc);
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
+	p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
 	if (!strncmp(version, "9P2000.L", 8))
 		c->proto_version = p9_proto_2000L;
 	else if (!strncmp(version, "9P2000.u", 8))
@@ -779,6 +998,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 {
 	int err;
 	struct p9_client *clnt;
+	char *client_id;
 
 	err = 0;
 	clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
@@ -787,6 +1007,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 
 	clnt->trans_mod = NULL;
 	clnt->trans = NULL;
+
+	client_id = utsname()->nodename;
+	memcpy(clnt->name, client_id, strlen(client_id) + 1);
+
 	spin_lock_init(&clnt->lock);
 	INIT_LIST_HEAD(&clnt->fidlist);
 
@@ -803,8 +1027,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 
 	if (clnt->trans_mod == NULL) {
 		err = -EPROTONOSUPPORT;
-		P9_DPRINTK(P9_DEBUG_ERROR,
-				"No transport defined or default transport\n");
+		p9_debug(P9_DEBUG_ERROR,
+			 "No transport defined or default transport\n");
 		goto destroy_tagpool;
 	}
 
@@ -814,15 +1038,15 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto put_trans;
 	}
 
-	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
-		clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
+	p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
+		 clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
 		goto destroy_fidpool;
 
-	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize)
-		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ;
+	if (clnt->msize > clnt->trans_mod->maxsize)
+		clnt->msize = clnt->trans_mod->maxsize;
 
 	err = p9_client_version(clnt);
 	if (err)
@@ -848,7 +1072,7 @@ void p9_client_destroy(struct p9_client *clnt)
 {
 	struct p9_fid *fid, *fidptr;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt);
+	p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt);
 
 	if (clnt->trans_mod)
 		clnt->trans_mod->close(clnt);
@@ -856,7 +1080,7 @@ void p9_client_destroy(struct p9_client *clnt)
 	v9fs_put_trans(clnt->trans_mod);
 
 	list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) {
-		printk(KERN_INFO "Found fid %d not clunked\n", fid->fid);
+		pr_info("Found fid %d not clunked\n", fid->fid);
 		p9_fid_destroy(fid);
 	}
 
@@ -871,30 +1095,29 @@ EXPORT_SYMBOL(p9_client_destroy);
 
 void p9_client_disconnect(struct p9_client *clnt)
 {
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt);
 	clnt->status = Disconnected;
 }
 EXPORT_SYMBOL(p9_client_disconnect);
 
 void p9_client_begin_disconnect(struct p9_client *clnt)
 {
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt);
 	clnt->status = BeginDisconnect;
 }
 EXPORT_SYMBOL(p9_client_begin_disconnect);
 
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
-	char *uname, u32 n_uname, char *aname)
+	char *uname, kuid_t n_uname, char *aname)
 {
-	int err;
+	int err = 0;
 	struct p9_req_t *req;
 	struct p9_fid *fid;
 	struct p9_qid qid;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
-					afid ? afid->fid : -1, uname, aname);
-	err = 0;
 
+	p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
+		 afid ? afid->fid : -1, uname, aname);
 	fid = p9_fid_create(clnt);
 	if (IS_ERR(fid)) {
 		err = PTR_ERR(fid);
@@ -902,7 +1125,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 		goto error;
 	}
 
-	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid,
+	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid,
 			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -911,15 +1134,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		p9_free_req(clnt, req);
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
-					qid.type,
-					(unsigned long long)qid.path,
-					qid.version);
+	p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
+		 qid.type, (unsigned long long)qid.path, qid.version);
 
 	memmove(&fid->qid, &qid, sizeof(struct p9_qid));
 
@@ -959,8 +1180,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
 		fid = oldfid;
 
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
-		oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
+	p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
+		 oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
 
 	req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
 								nwname, wnames);
@@ -971,13 +1192,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		p9_free_req(clnt, req);
 		goto clunk_fid;
 	}
 	p9_free_req(clnt, req);
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
+	p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
 
 	if (nwqids != nwname) {
 		err = -ENOENT;
@@ -985,7 +1206,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
 	}
 
 	for (count = 0; count < nwqids; count++)
-		P9_DPRINTK(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n",
+		p9_debug(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n",
 			count, wqids[count].type,
 			(unsigned long long)wqids[count].path,
 			wqids[count].version);
@@ -1020,7 +1241,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
 	int iounit;
 
 	clnt = fid->clnt;
-	P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
+	p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
 		p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
 	err = 0;
 
@@ -1038,11 +1259,11 @@ int p9_client_open(struct p9_fid *fid, int mode)
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
+	p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
 		p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN",  qid.type,
 		(unsigned long long)qid.path, qid.version, iounit);
 
@@ -1057,22 +1278,23 @@ error:
 EXPORT_SYMBOL(p9_client_open);
 
 int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
-		gid_t gid, struct p9_qid *qid)
+		kgid_t gid, struct p9_qid *qid)
 {
 	int err = 0;
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P,
+	p9_debug(P9_DEBUG_9P,
 			">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
-			ofid->fid, name, flags, mode, gid);
+			ofid->fid, name, flags, mode,
+		 	from_kgid(&init_user_ns, gid));
 	clnt = ofid->clnt;
 
 	if (ofid->mode != -1)
 		return -EINVAL;
 
-	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags,
+	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags,
 			mode, gid);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1081,11 +1303,11 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
+	p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
 			qid->type,
 			(unsigned long long)qid->path,
 			qid->version, iounit);
@@ -1109,7 +1331,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 	struct p9_qid qid;
 	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
+	p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
 						fid->fid, name, perm, mode);
 	err = 0;
 	clnt = fid->clnt;
@@ -1126,11 +1348,11 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
+	p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
 				qid.type,
 				(unsigned long long)qid.path,
 				qid.version, iounit);
@@ -1145,18 +1367,18 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
-int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
+int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
 		struct p9_qid *qid)
 {
 	int err = 0;
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s  symtgt %s\n",
+	p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s  symtgt %s\n",
 			dfid->fid, name, symtgt);
 	clnt = dfid->clnt;
 
-	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt,
+	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt,
 			gid);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1165,11 +1387,11 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
+	p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
 			qid->type, (unsigned long long)qid->path, qid->version);
 
 free_and_error:
@@ -1184,7 +1406,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
+	p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
 			dfid->fid, oldfid->fid, newname);
 	clnt = dfid->clnt;
 	req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid,
@@ -1192,7 +1414,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n");
+	p9_debug(P9_DEBUG_9P, "<<< RLINK\n");
 	p9_free_req(clnt, req);
 	return 0;
 }
@@ -1204,7 +1426,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
+	p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
 			fid->fid, datasync);
 	err = 0;
 	clnt = fid->clnt;
@@ -1215,7 +1437,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
 
 	p9_free_req(clnt, req);
 
@@ -1229,14 +1451,18 @@ int p9_client_clunk(struct p9_fid *fid)
 	int err;
 	struct p9_client *clnt;
 	struct p9_req_t *req;
+	int retries = 0;
 
 	if (!fid) {
-		P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n");
+		pr_warn("%s (%d): Trying to clunk with NULL fid\n",
+			__func__, task_pid_nr(current));
 		dump_stack();
 		return 0;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
+again:
+	p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid,
+								retries);
 	err = 0;
 	clnt = fid->clnt;
 
@@ -1246,12 +1472,20 @@ int p9_client_clunk(struct p9_fid *fid)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
 
 	p9_free_req(clnt, req);
-	p9_fid_destroy(fid);
-
 error:
+	/*
+	 * Fid is not valid even after a failed clunk
+	 * If interrupted, retry once then give up and
+	 * leak fid until umount.
+	 */
+	if (err == -ERESTARTSYS) {
+		if (retries++ == 0)
+			goto again;
+	} else
+		p9_fid_destroy(fid);
 	return err;
 }
 EXPORT_SYMBOL(p9_client_clunk);
@@ -1262,7 +1496,7 @@ int p9_client_remove(struct p9_fid *fid)
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
 	err = 0;
 	clnt = fid->clnt;
 
@@ -1272,26 +1506,54 @@ int p9_client_remove(struct p9_fid *fid)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
 
 	p9_free_req(clnt, req);
 error:
-	p9_fid_destroy(fid);
+	if (err == -ERESTARTSYS)
+		p9_client_clunk(fid);
+	else
+		p9_fid_destroy(fid);
 	return err;
 }
 EXPORT_SYMBOL(p9_client_remove);
 
+int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags)
+{
+	int err = 0;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n",
+		   dfid->fid, name, flags);
+
+	clnt = dfid->clnt;
+	req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name);
+
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_unlinkat);
+
 int
 p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 								u32 count)
 {
-	int err, rsize;
-	struct p9_client *clnt;
-	struct p9_req_t *req;
 	char *dataptr;
+	int kernel_buf = 0;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+	int err, rsize, non_zc = 0;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid,
-					(long long unsigned) offset, count);
+
+	p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
+		   fid->fid, (unsigned long long) offset, count);
 	err = 0;
 	clnt = fid->clnt;
 
@@ -1303,13 +1565,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 		rsize = count;
 
 	/* Don't bother zerocopy for small IO (< 1024) */
-	if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
-			P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
-		req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
-				rsize, data, udata);
+	if (clnt->trans_mod->zc_request && rsize > 1024) {
+		char *indata;
+		if (data) {
+			kernel_buf = 1;
+			indata = data;
+		} else
+			indata = (__force char *)udata;
+		/*
+		 * response header len is 11
+		 * PDU Header(7) + IO Size (4)
+		 */
+		req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0,
+				       11, kernel_buf, "dqd", fid->fid,
+				       offset, rsize);
 	} else {
+		non_zc = 1;
 		req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
-				rsize);
+				    rsize);
 	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1318,13 +1591,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
+	p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
-	if (!req->tc->pbuf_size) {
+	if (non_zc) {
 		if (data) {
 			memmove(data, dataptr, count);
 		} else {
@@ -1350,11 +1623,12 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 							u64 offset, u32 count)
 {
 	int err, rsize;
+	int kernel_buf = 0;
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n",
-				fid->fid, (long long unsigned) offset, count);
+	p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n",
+				fid->fid, (unsigned long long) offset, count);
 	err = 0;
 	clnt = fid->clnt;
 
@@ -1365,19 +1639,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 	if (count < rsize)
 		rsize = count;
 
-	/* Don't bother zerocopy form small IO (< 1024) */
-	if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
-				P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
-		req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
-				rsize, data, udata);
+	/* Don't bother zerocopy for small IO (< 1024) */
+	if (clnt->trans_mod->zc_request && rsize > 1024) {
+		char *odata;
+		if (data) {
+			kernel_buf = 1;
+			odata = data;
+		} else
+			odata = (char *)udata;
+		req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize,
+				       P9_ZC_HDR_SZ, kernel_buf, "dqd",
+				       fid->fid, offset, rsize);
 	} else {
-
 		if (data)
 			req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
-					offset, rsize, data);
+					    offset, rsize, data);
 		else
 			req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
-					offset, rsize, udata);
+					    offset, rsize, udata);
 	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1386,11 +1665,11 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
+	p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
 	p9_free_req(clnt, req);
 	return count;
@@ -1410,7 +1689,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 	struct p9_req_t *req;
 	u16 ignored;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
 
 	if (!ret)
 		return ERR_PTR(-ENOMEM);
@@ -1426,12 +1705,12 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		p9_free_req(clnt, req);
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P,
+	p9_debug(P9_DEBUG_9P,
 		"<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
 		"<<<    mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
 		"<<<    name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
@@ -1440,7 +1719,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 		(unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
 		ret->atime, ret->mtime, (unsigned long long)ret->length,
 		ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
-		ret->n_uid, ret->n_gid, ret->n_muid);
+		from_kuid(&init_user_ns, ret->n_uid),
+		from_kgid(&init_user_ns, ret->n_gid),
+		from_kuid(&init_user_ns, ret->n_muid));
 
 	p9_free_req(clnt, req);
 	return ret;
@@ -1460,7 +1741,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 								GFP_KERNEL);
 	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
+	p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
 							fid->fid, request_mask);
 
 	if (!ret)
@@ -1477,12 +1758,12 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		p9_free_req(clnt, req);
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P,
+	p9_debug(P9_DEBUG_9P,
 		"<<< RGETATTR st_result_mask=%lld\n"
 		"<<< qid=%x.%llx.%x\n"
 		"<<< st_mode=%8.8x st_nlink=%llu\n"
@@ -1494,8 +1775,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 		"<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
 		"<<< st_gen=%lld st_data_version=%lld",
 		ret->st_result_mask, ret->qid.type, ret->qid.path,
-		ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid,
-		ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize,
+		ret->qid.version, ret->st_mode, ret->st_nlink,
+		from_kuid(&init_user_ns, ret->st_uid),
+		from_kgid(&init_user_ns, ret->st_gid),
+		ret->st_rdev, ret->st_size, ret->st_blksize,
 		ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
 		ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
 		ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
@@ -1548,8 +1831,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 	err = 0;
 	clnt = fid->clnt;
 	wst->size = p9_client_statsize(wst, clnt->proto_version);
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
-	P9_DPRINTK(P9_DEBUG_9P,
+	p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P,
 		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
 		"     mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
 		"     name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
@@ -1558,7 +1841,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		(unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
 		wst->atime, wst->mtime, (unsigned long long)wst->length,
 		wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
-		wst->n_uid, wst->n_gid, wst->n_muid);
+		from_kuid(&init_user_ns, wst->n_uid),
+		from_kgid(&init_user_ns, wst->n_gid),
+		from_kuid(&init_user_ns, wst->n_muid));
 
 	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);
 	if (IS_ERR(req)) {
@@ -1566,7 +1851,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
 
 	p9_free_req(clnt, req);
 error:
@@ -1582,12 +1867,14 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
 
 	err = 0;
 	clnt = fid->clnt;
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
-	P9_DPRINTK(P9_DEBUG_9P,
+	p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P,
 		"    valid=%x mode=%x uid=%d gid=%d size=%lld\n"
 		"    atime_sec=%lld atime_nsec=%lld\n"
 		"    mtime_sec=%lld mtime_nsec=%lld\n",
-		p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
+		p9attr->valid, p9attr->mode,
+		from_kuid(&init_user_ns, p9attr->uid),
+		from_kgid(&init_user_ns, p9attr->gid),
 		p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
 		p9attr->mtime_sec, p9attr->mtime_nsec);
 
@@ -1597,7 +1884,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
 		err = PTR_ERR(req);
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
 	p9_free_req(clnt, req);
 error:
 	return err;
@@ -1613,7 +1900,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
 	err = 0;
 	clnt = fid->clnt;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
 
 	req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid);
 	if (IS_ERR(req)) {
@@ -1625,12 +1912,12 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
 		&sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
 		&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		p9_free_req(clnt, req);
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
+	p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
 		"blocks %llu bfree %llu bavail %llu files %llu ffree %llu "
 		"fsid %llu namelen %ld\n",
 		fid->fid, (long unsigned int)sb->type, (long int)sb->bsize,
@@ -1643,7 +1930,8 @@ error:
 }
 EXPORT_SYMBOL(p9_client_statfs);
 
-int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
+int p9_client_rename(struct p9_fid *fid,
+		     struct p9_fid *newdirfid, const char *name)
 {
 	int err;
 	struct p9_req_t *req;
@@ -1652,7 +1940,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
 	err = 0;
 	clnt = fid->clnt;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
+	p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
 			fid->fid, newdirfid->fid, name);
 
 	req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
@@ -1662,7 +1950,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
 
 	p9_free_req(clnt, req);
 error:
@@ -1670,6 +1958,36 @@ error:
 }
 EXPORT_SYMBOL(p9_client_rename);
 
+int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
+		       struct p9_fid *newdirfid, const char *new_name)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = olddirfid->clnt;
+
+	p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s"
+		   " newdirfid %d new name %s\n", olddirfid->fid, old_name,
+		   newdirfid->fid, new_name);
+
+	req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid,
+			    old_name, newdirfid->fid, new_name);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
+		   newdirfid->fid, new_name);
+
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_renameat);
+
 /*
  * An xattrwalk without @attr_name gives the fid for the lisxattr namespace
  */
@@ -1689,7 +2007,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
 		attr_fid = NULL;
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P,
+	p9_debug(P9_DEBUG_9P,
 		">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
 		file_fid->fid, attr_fid->fid, attr_name);
 
@@ -1701,12 +2019,12 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
 	}
 	err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		p9_free_req(clnt, req);
 		goto clunk_fid;
 	}
 	p9_free_req(clnt, req);
-	P9_DPRINTK(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n",
+	p9_debug(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n",
 		attr_fid->fid, *attr_size);
 	return attr_fid;
 clunk_fid:
@@ -1727,7 +2045,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
 	struct p9_req_t *req;
 	struct p9_client *clnt;
 
-	P9_DPRINTK(P9_DEBUG_9P,
+	p9_debug(P9_DEBUG_9P,
 		">>> TXATTRCREATE fid %d name  %s size %lld flag %d\n",
 		fid->fid, name, (long long)attr_size, flags);
 	err = 0;
@@ -1738,7 +2056,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
 		err = PTR_ERR(req);
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
 	p9_free_req(clnt, req);
 error:
 	return err;
@@ -1747,13 +2065,13 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
 
 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 {
-	int err, rsize;
+	int err, rsize, non_zc = 0;
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 	char *dataptr;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
-				fid->fid, (long long unsigned) offset, count);
+	p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
+				fid->fid, (unsigned long long) offset, count);
 
 	err = 0;
 	clnt = fid->clnt;
@@ -1765,13 +2083,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 	if (count < rsize)
 		rsize = count;
 
-	if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
-			P9_TRANS_PREF_PAYLOAD_SEP) {
-		req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
-				offset, rsize, data);
+	/* Don't bother zerocopy for small IO (< 1024) */
+	if (clnt->trans_mod->zc_request && rsize > 1024) {
+		/*
+		 * response header len is 11
+		 * PDU Header(7) + IO Size (4)
+		 */
+		req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0,
+				       11, 1, "dqd", fid->fid, offset, rsize);
 	} else {
+		non_zc = 1;
 		req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
-				offset, rsize);
+				    offset, rsize);
 	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1780,13 +2103,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
+	p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
 
-	if (!req->tc->pbuf_size && data)
+	if (non_zc)
 		memmove(data, dataptr, count);
 
 	p9_free_req(clnt, req);
@@ -1800,7 +2123,7 @@ error:
 EXPORT_SYMBOL(p9_client_readdir);
 
 int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
-			dev_t rdev, gid_t gid, struct p9_qid *qid)
+			dev_t rdev, kgid_t gid, struct p9_qid *qid)
 {
 	int err;
 	struct p9_client *clnt;
@@ -1808,19 +2131,19 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
 
 	err = 0;
 	clnt = fid->clnt;
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
+	p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
 		"minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
-	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode,
+	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode,
 		MAJOR(rdev), MINOR(rdev), gid);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
+	p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
 				(unsigned long long)qid->path, qid->version);
 
 error:
@@ -1831,7 +2154,7 @@ error:
 EXPORT_SYMBOL(p9_client_mknod_dotl);
 
 int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
-				gid_t gid, struct p9_qid *qid)
+				kgid_t gid, struct p9_qid *qid)
 {
 	int err;
 	struct p9_client *clnt;
@@ -1839,19 +2162,19 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
 
 	err = 0;
 	clnt = fid->clnt;
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
-		 fid->fid, name, mode, gid);
-	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode,
+	p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
+		 fid->fid, name, mode, from_kgid(&init_user_ns, gid));
+	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode,
 		gid);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
+	p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
 				(unsigned long long)qid->path, qid->version);
 
 error:
@@ -1869,7 +2192,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
 
 	err = 0;
 	clnt = fid->clnt;
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
+	p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
 			"start %lld length %lld proc_id %d client_id %s\n",
 			fid->fid, flock->type, flock->flags, flock->start,
 			flock->length, flock->proc_id, flock->client_id);
@@ -1883,10 +2206,10 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
+	p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
 error:
 	p9_free_req(clnt, req);
 	return err;
@@ -1902,7 +2225,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
 
 	err = 0;
 	clnt = fid->clnt;
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
+	p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
 		"length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
 		glock->start, glock->length, glock->proc_id, glock->client_id);
 
@@ -1916,10 +2239,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
 			&glock->start, &glock->length, &glock->proc_id,
 			&glock->client_id);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
+	p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
 		"proc_id %d client_id %s\n", glock->type, glock->start,
 		glock->length, glock->proc_id, glock->client_id);
 error:
@@ -1936,7 +2259,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
 
 	err = 0;
 	clnt = fid->clnt;
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
+	p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
 
 	req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);
 	if (IS_ERR(req))
@@ -1944,10 +2267,10 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
 
 	err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
 	if (err) {
-		p9pdu_dump(1, req->rc);
+		trace_9p_protocol_dump(clnt, req->rc);
 		goto error;
 	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
+	p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
 error:
 	p9_free_req(clnt, req);
 	return err;
diff --git a/net/9p/error.c b/net/9p/error.c
index 52518512a93..126fd0dceea 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -27,6 +27,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/jhash.h>
@@ -219,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init);
 int p9_errstr2errno(char *errstr, int len)
 {
 	int errno;
-	struct hlist_node *p;
 	struct errormap *c;
 	int bucket;
 
 	errno = 0;
-	p = NULL;
 	c = NULL;
 	bucket = jhash(errstr, len, 0) % ERRHASHSZ;
-	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+	hlist_for_each_entry(c, &hash_errmap[bucket], list) {
 		if (c->namelen == len && !memcmp(c->name, errstr, len)) {
 			errno = c->val;
 			break;
@@ -237,8 +237,8 @@ int p9_errstr2errno(char *errstr, int len)
 	if (errno == 0) {
 		/* TODO: if error isn't found, add it dynamically */
 		errstr[len] = 0;
-		printk(KERN_ERR "%s: server reported unknown error %s\n",
-			__func__, errstr);
+		pr_err("%s: server reported unknown error %s\n",
+		       __func__, errstr);
 		errno = ESERVERFAULT;
 	}
 
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 72c39827505..6ab36aea772 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -24,7 +24,11 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
 #include <linux/moduleparam.h>
 #include <net/9p/9p.h>
 #include <linux/fs.h>
@@ -39,6 +43,29 @@ unsigned int p9_debug_level = 0;	/* feature-rific global debug level  */
 EXPORT_SYMBOL(p9_debug_level);
 module_param_named(debug, p9_debug_level, uint, 0);
 MODULE_PARM_DESC(debug, "9P debugging level");
+
+void _p9_debug(enum p9_debug_flags level, const char *func,
+		const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	if ((p9_debug_level & level) != level)
+		return;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (level == P9_DEBUG_9P)
+		pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf);
+	else
+		pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf);
+
+	va_end(args);
+}
+EXPORT_SYMBOL(_p9_debug);
 #endif
 
 /*
@@ -80,14 +107,14 @@ EXPORT_SYMBOL(v9fs_unregister_trans);
  * @name: string identifying transport
  *
  */
-struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name)
+struct p9_trans_module *v9fs_get_trans_by_name(char *s)
 {
 	struct p9_trans_module *t, *found = NULL;
 
 	spin_lock(&v9fs_trans_lock);
 
 	list_for_each_entry(t, &v9fs_trans_list, list)
-		if (strncmp(t->name, name->from, name->to-name->from) == 0 &&
+		if (strcmp(t->name, s) == 0 &&
 		    try_module_get(t->owner)) {
 			found = t;
 			break;
@@ -147,7 +174,7 @@ static int __init init_p9(void)
 	int ret = 0;
 
 	p9_error_init();
-	printk(KERN_INFO "Installing 9P2000 support\n");
+	pr_info("Installing 9P2000 support\n");
 	p9_trans_fd_init();
 
 	return ret;
@@ -160,7 +187,7 @@ static int __init init_p9(void)
 
 static void __exit exit_p9(void)
 {
-	printk(KERN_INFO "Unloading 9P2000 support\n");
+	pr_info("Unloading 9P2000 support\n");
 
 	p9_trans_fd_exit();
 }
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index a873277cb99..ab9127ec5b7 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -37,46 +37,11 @@
 #include <net/9p/client.h>
 #include "protocol.h"
 
+#include <trace/events/9p.h>
+
 static int
 p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 
-#ifdef CONFIG_NET_9P_DEBUG
-void
-p9pdu_dump(int way, struct p9_fcall *pdu)
-{
-	int i, n;
-	u8 *data = pdu->sdata;
-	int datalen = pdu->size;
-	char buf[255];
-	int buflen = 255;
-
-	i = n = 0;
-	if (datalen > (buflen-16))
-		datalen = buflen-16;
-	while (i < datalen) {
-		n += scnprintf(buf + n, buflen - n, "%02x ", data[i]);
-		if (i%4 == 3)
-			n += scnprintf(buf + n, buflen - n, " ");
-		if (i%32 == 31)
-			n += scnprintf(buf + n, buflen - n, "\n");
-
-		i++;
-	}
-	n += scnprintf(buf + n, buflen - n, "\n");
-
-	if (way)
-		P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf);
-	else
-		P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
-}
-#else
-void
-p9pdu_dump(int way, struct p9_fcall *pdu)
-{
-}
-#endif
-EXPORT_SYMBOL(p9pdu_dump);
-
 void p9stat_free(struct p9_wstat *stbuf)
 {
 	kfree(stbuf->name);
@@ -87,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf)
 }
 EXPORT_SYMBOL(p9stat_free);
 
-static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
+size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
 {
 	size_t len = min(pdu->size - pdu->offset, size);
 	memcpy(data, &pdu->sdata[pdu->offset], len);
@@ -114,32 +79,14 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 	return size - len;
 }
 
-static size_t
-pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
-		size_t size)
-{
-	BUG_ON(pdu->size > P9_IOHDRSZ);
-	pdu->pubuf = (char __user *)udata;
-	pdu->pkbuf = (char *)kdata;
-	pdu->pbuf_size = size;
-	return 0;
-}
-
-static size_t
-pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
-{
-	BUG_ON(pdu->size > P9_READDIRHDRSZ);
-	pdu->pkbuf = (char *)kdata;
-	pdu->pbuf_size = size;
-	return 0;
-}
-
 /*
 	b - int8_t
 	w - int16_t
 	d - int32_t
 	q - int64_t
 	s - string
+	u - numeric uid
+	g - numeric gid
 	S - stat
 	Q - qid
 	D - data blob (int32_t size followed by void *, results are not freed)
@@ -218,6 +165,26 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					(*sptr)[len] = 0;
 			}
 			break;
+		case 'u': {
+				kuid_t *uid = va_arg(ap, kuid_t *);
+				__le32 le_val;
+				if (pdu_read(pdu, &le_val, sizeof(le_val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*uid = make_kuid(&init_user_ns,
+						 le32_to_cpu(le_val));
+			} break;
+		case 'g': {
+				kgid_t *gid = va_arg(ap, kgid_t *);
+				__le32 le_val;
+				if (pdu_read(pdu, &le_val, sizeof(le_val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*gid = make_kgid(&init_user_ns,
+						 le32_to_cpu(le_val));
+			} break;
 		case 'Q':{
 				struct p9_qid *qid =
 				    va_arg(ap, struct p9_qid *);
@@ -232,11 +199,12 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				    va_arg(ap, struct p9_wstat *);
 
 				memset(stbuf, 0, sizeof(struct p9_wstat));
-				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
-									-1;
+				stbuf->n_uid = stbuf->n_muid = INVALID_UID;
+				stbuf->n_gid = INVALID_GID;
+
 				errcode =
 				    p9pdu_readf(pdu, proto_version,
-						"wwdQdddqssss?sddd",
+						"wwdQdddqssss?sugu",
 						&stbuf->size, &stbuf->type,
 						&stbuf->dev, &stbuf->qid,
 						&stbuf->mode, &stbuf->atime,
@@ -349,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				memset(stbuf, 0, sizeof(struct p9_stat_dotl));
 				errcode =
 				    p9pdu_readf(pdu, proto_version,
-					"qQdddqqqqqqqqqqqqqqq",
+					"qQdugqqqqqqqqqqqqqqq",
 					&stbuf->st_result_mask,
 					&stbuf->qid,
 					&stbuf->st_mode,
@@ -423,7 +391,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				const char *sptr = va_arg(ap, const char *);
 				uint16_t len = 0;
 				if (sptr)
-					len = min_t(uint16_t, strlen(sptr),
+					len = min_t(size_t, strlen(sptr),
 								USHRT_MAX);
 
 				errcode = p9pdu_writef(pdu, proto_version,
@@ -432,6 +400,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					errcode = -EFAULT;
 			}
 			break;
+		case 'u': {
+				kuid_t uid = va_arg(ap, kuid_t);
+				__le32 val = cpu_to_le32(
+						from_kuid(&init_user_ns, uid));
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			} break;
+		case 'g': {
+				kgid_t gid = va_arg(ap, kgid_t);
+				__le32 val = cpu_to_le32(
+						from_kgid(&init_user_ns, gid));
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			} break;
 		case 'Q':{
 				const struct p9_qid *qid =
 				    va_arg(ap, const struct p9_qid *);
@@ -445,7 +427,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				    va_arg(ap, const struct p9_wstat *);
 				errcode =
 				    p9pdu_writef(pdu, proto_version,
-						 "wwdQdddqssss?sddd",
+						 "wwdQdddqssss?sugu",
 						 stbuf->size, stbuf->type,
 						 stbuf->dev, &stbuf->qid,
 						 stbuf->mode, stbuf->atime,
@@ -465,26 +447,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					errcode = -EFAULT;
 			}
 			break;
-		case 'E':{
-				 int32_t cnt = va_arg(ap, int32_t);
-				 const char *k = va_arg(ap, const void *);
-				 const char __user *u = va_arg(ap,
-							const void __user *);
-				 errcode = p9pdu_writef(pdu, proto_version, "d",
-						 cnt);
-				 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
-					errcode = -EFAULT;
-			 }
-			 break;
-		case 'F':{
-				 int32_t cnt = va_arg(ap, int32_t);
-				 const char *k = va_arg(ap, const void *);
-				 errcode = p9pdu_writef(pdu, proto_version, "d",
-						 cnt);
-				 if (!errcode && pdu_write_readdir(pdu, k, cnt))
-					errcode = -EFAULT;
-			 }
-			 break;
 		case 'U':{
 				int32_t count = va_arg(ap, int32_t);
 				const char __user *udata =
@@ -543,7 +505,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 							struct p9_iattr_dotl *);
 
 				errcode = p9pdu_writef(pdu, proto_version,
-							"ddddqqqqq",
+							"ddugqqqqq",
 							p9attr->valid,
 							p9attr->mode,
 							p9attr->uid,
@@ -597,7 +559,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
 	return ret;
 }
 
-int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
+int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st)
 {
 	struct p9_fcall fake_pdu;
 	int ret;
@@ -607,10 +569,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
 	fake_pdu.sdata = buf;
 	fake_pdu.offset = 0;
 
-	ret = p9pdu_readf(&fake_pdu, proto_version, "S", st);
+	ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st);
 	if (ret) {
-		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
-		p9pdu_dump(1, &fake_pdu);
+		p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
+		trace_9p_protocol_dump(clnt, &fake_pdu);
 	}
 
 	return ret;
@@ -623,7 +585,7 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
 	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
 }
 
-int p9pdu_finalize(struct p9_fcall *pdu)
+int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu)
 {
 	int size = pdu->size;
 	int err;
@@ -632,13 +594,9 @@ int p9pdu_finalize(struct p9_fcall *pdu)
 	err = p9pdu_writef(pdu, 0, "d", size);
 	pdu->size = size;
 
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
-		p9pdu_dump(0, pdu);
-#endif
-
-	P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
-							pdu->id, pdu->tag);
+	trace_9p_protocol_dump(clnt, pdu);
+	p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n",
+		 pdu->size, pdu->id, pdu->tag);
 
 	return err;
 }
@@ -647,14 +605,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
 {
 	pdu->offset = 0;
 	pdu->size = 0;
-	pdu->private = NULL;
-	pdu->pubuf = NULL;
-	pdu->pkbuf = NULL;
-	pdu->pbuf_size = 0;
 }
 
-int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
-						int proto_version)
+int p9dirent_read(struct p9_client *clnt, char *buf, int len,
+		  struct p9_dirent *dirent)
 {
 	struct p9_fcall fake_pdu;
 	int ret;
@@ -665,11 +619,11 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
 	fake_pdu.sdata = buf;
 	fake_pdu.offset = 0;
 
-	ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid,
-			&dirent->d_off, &dirent->d_type, &nameptr);
+	ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid,
+			  &dirent->d_off, &dirent->d_type, &nameptr);
 	if (ret) {
-		P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
-		p9pdu_dump(1, &fake_pdu);
+		p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
+		trace_9p_protocol_dump(clnt, &fake_pdu);
 		goto out;
 	}
 
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 2431c0f38d5..2cc525fa49f 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 								va_list ap);
 int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
-int p9pdu_finalize(struct p9_fcall *pdu);
-void p9pdu_dump(int, struct p9_fcall *);
+int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu);
 void p9pdu_reset(struct p9_fcall *pdu);
+size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size);
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index 9a70ebdec56..2ee3879161b 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -21,30 +21,25 @@
 
 /**
  *  p9_release_req_pages - Release pages after the transaction.
- *  @*private: PDU's private page of struct trans_rpage_info
  */
-void
-p9_release_req_pages(struct trans_rpage_info *rpinfo)
+void p9_release_pages(struct page **pages, int nr_pages)
 {
-	int i = 0;
+	int i;
 
-	while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
-		put_page(rpinfo->rp_data[i]);
-		i++;
-	}
+	for (i = 0; i < nr_pages; i++)
+		if (pages[i])
+			put_page(pages[i]);
 }
-EXPORT_SYMBOL(p9_release_req_pages);
+EXPORT_SYMBOL(p9_release_pages);
 
 /**
  * p9_nr_pages - Return number of pages needed to accommodate the payload.
  */
-int
-p9_nr_pages(struct p9_req_t *req)
+int p9_nr_pages(char *data, int len)
 {
 	unsigned long start_page, end_page;
-	start_page =  (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
-	end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
-			PAGE_SIZE - 1) >> PAGE_SHIFT;
+	start_page =  (unsigned long)data >> PAGE_SHIFT;
+	end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	return end_page - start_page;
 }
 EXPORT_SYMBOL(p9_nr_pages);
@@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages);
  * @nr_pages: number of pages to accommodate the payload
  * @rw: Indicates if the pages are for read or write.
  */
-int
-p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
-		int nr_pages, u8 rw)
-{
-	uint32_t first_page_bytes = 0;
-	int32_t pdata_mapped_pages;
-	struct trans_rpage_info  *rpinfo;
-
-	*pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1);
 
-	if (*pdata_off)
-		first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
-				       req->tc->pbuf_size);
+int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write)
+{
+	int nr_mapped_pages;
 
-	rpinfo = req->tc->private;
-	pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
-			nr_pages, rw, &rpinfo->rp_data[0]);
-	if (pdata_mapped_pages <= 0)
-		return pdata_mapped_pages;
+	nr_mapped_pages = get_user_pages_fast((unsigned long)data,
+					      *nr_pages, write, pages);
+	if (nr_mapped_pages <= 0)
+		return nr_mapped_pages;
 
-	rpinfo->rp_nr_pages = pdata_mapped_pages;
-	if (*pdata_off) {
-		*pdata_len = first_page_bytes;
-		*pdata_len += min((req->tc->pbuf_size - *pdata_len),
-				((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
-	} else {
-		*pdata_len = min(req->tc->pbuf_size,
-				(size_t)pdata_mapped_pages << PAGE_SHIFT);
-	}
+	*nr_pages = nr_mapped_pages;
 	return 0;
 }
 EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
index 76309223bb0..173bb550a9e 100644
--- a/net/9p/trans_common.h
+++ b/net/9p/trans_common.h
@@ -12,21 +12,6 @@
  *
  */
 
-/* TRUE if it is user context */
-#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
-
-/**
- * struct trans_rpage_info - To store mapped page information in PDU.
- * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
- * @rp_nr_pages: Number of mapped pages
- * @rp_data: Array of page pointers
- */
-struct trans_rpage_info {
-	u8 rp_alloc;
-	int rp_nr_pages;
-	struct page *rp_data[0];
-};
-
-void p9_release_req_pages(struct trans_rpage_info *);
-int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
-int p9_nr_pages(struct p9_req_t *);
+void p9_release_pages(struct page **, int);
+int p9_payload_gup(char *, int *, struct page **, int);
+int p9_nr_pages(char *, int);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index fdfdb5747f6..80d08f6664c 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -25,6 +25,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/net.h>
@@ -61,20 +63,7 @@ struct p9_fd_opts {
 	int rfd;
 	int wfd;
 	u16 port;
-};
-
-/**
- * struct p9_trans_fd - transport state
- * @rd: reference to file to read from
- * @wr: reference of file to write to
- * @conn: connection state reference
- *
- */
-
-struct p9_trans_fd {
-	struct file *rd;
-	struct file *wr;
-	struct p9_conn *conn;
+	int privport;
 };
 
 /*
@@ -85,12 +74,15 @@ struct p9_trans_fd {
 enum {
 	/* Options that take integer arguments */
 	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
+	/* Options that take no arguments */
+	Opt_privport,
 };
 
 static const match_table_t tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
+	{Opt_privport, "privport"},
 	{Opt_err, NULL},
 };
 
@@ -153,12 +145,29 @@ struct p9_conn {
 	unsigned long wsched;
 };
 
+/**
+ * struct p9_trans_fd - transport state
+ * @rd: reference to file to read from
+ * @wr: reference of file to write to
+ * @conn: connection state reference
+ *
+ */
+
+struct p9_trans_fd {
+	struct file *rd;
+	struct file *wr;
+	struct p9_conn conn;
+};
+
 static void p9_poll_workfn(struct work_struct *work);
 
 static DEFINE_SPINLOCK(p9_poll_lock);
 static LIST_HEAD(p9_poll_pending_list);
 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
 
+static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
+static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
+
 static void p9_mux_poll_stop(struct p9_conn *m)
 {
 	unsigned long flags;
@@ -191,7 +200,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 	unsigned long flags;
 	LIST_HEAD(cancel_list);
 
-	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
+	p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
 
 	spin_lock_irqsave(&m->client->lock, flags);
 
@@ -203,23 +212,19 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 	m->err = err;
 
 	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
-		req->status = REQ_STATUS_ERROR;
-		if (!req->t_err)
-			req->t_err = err;
 		list_move(&req->req_list, &cancel_list);
 	}
 	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
-		req->status = REQ_STATUS_ERROR;
-		if (!req->t_err)
-			req->t_err = err;
 		list_move(&req->req_list, &cancel_list);
 	}
 	spin_unlock_irqrestore(&m->client->lock, flags);
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
+		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
 		list_del(&req->req_list);
-		p9_client_cb(m->client, req);
+		if (!req->t_err)
+			req->t_err = err;
+		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
 	}
 }
 
@@ -235,10 +240,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
 	if (!ts)
 		return -EREMOTEIO;
 
-	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+	if (!ts->rd->f_op->poll)
 		return -EIO;
 
-	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+	if (!ts->wr->f_op->poll)
 		return -EIO;
 
 	ret = ts->rd->f_op->poll(ts->rd, pt);
@@ -275,7 +280,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
 		return -EREMOTEIO;
 
 	if (!(ts->rd->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n");
+		p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
 
 	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
@@ -293,13 +298,14 @@ static void p9_read_work(struct work_struct *work)
 {
 	int n, err;
 	struct p9_conn *m;
+	int status = REQ_STATUS_ERROR;
 
 	m = container_of(work, struct p9_conn, rq);
 
 	if (m->err < 0)
 		return;
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+	p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
 
 	if (!m->rbuf) {
 		m->rbuf = m->tmp_buf;
@@ -308,14 +314,13 @@ static void p9_read_work(struct work_struct *work)
 	}
 
 	clear_bit(Rpending, &m->wsched);
-	P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m,
-					m->rpos, m->rsize, m->rsize-m->rpos);
+	p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
+		 m, m->rpos, m->rsize, m->rsize-m->rpos);
 	err = p9_fd_read(m->client, m->rbuf + m->rpos,
 						m->rsize - m->rpos);
-	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
+	p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
 	if (err == -EAGAIN) {
-		clear_bit(Rworksched, &m->wsched);
-		return;
+		goto end_clear;
 	}
 
 	if (err <= 0)
@@ -325,25 +330,24 @@ static void p9_read_work(struct work_struct *work)
 
 	if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
 		u16 tag;
-		P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n");
+		p9_debug(P9_DEBUG_TRANS, "got new header\n");
 
 		n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
 		if (n >= m->client->msize) {
-			P9_DPRINTK(P9_DEBUG_ERROR,
-				"requested packet size too big: %d\n", n);
+			p9_debug(P9_DEBUG_ERROR,
+				 "requested packet size too big: %d\n", n);
 			err = -EIO;
 			goto error;
 		}
 
 		tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
-		P9_DPRINTK(P9_DEBUG_TRANS,
-			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+		p9_debug(P9_DEBUG_TRANS,
+			 "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
 
 		m->req = p9_tag_lookup(m->client, tag);
-		if (!m->req || (m->req->status != REQ_STATUS_SENT &&
-					m->req->status != REQ_STATUS_FLSH)) {
-			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
-								 tag);
+		if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
+			p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
+				 tag);
 			err = -EIO;
 			goto error;
 		}
@@ -364,32 +368,33 @@ static void p9_read_work(struct work_struct *work)
 
 	/* not an else because some packets (like clunk) have no payload */
 	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
-		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
+		p9_debug(P9_DEBUG_TRANS, "got new packet\n");
 		spin_lock(&m->client->lock);
 		if (m->req->status != REQ_STATUS_ERROR)
-			m->req->status = REQ_STATUS_RCVD;
+			status = REQ_STATUS_RCVD;
 		list_del(&m->req->req_list);
 		spin_unlock(&m->client->lock);
-		p9_client_cb(m->client, m->req);
+		p9_client_cb(m->client, m->req, status);
 		m->rbuf = NULL;
 		m->rpos = 0;
 		m->rsize = 0;
 		m->req = NULL;
 	}
 
+end_clear:
+	clear_bit(Rworksched, &m->wsched);
+
 	if (!list_empty(&m->req_list)) {
 		if (test_and_clear_bit(Rpending, &m->wsched))
 			n = POLLIN;
 		else
 			n = p9_fd_poll(m->client, NULL);
 
-		if (n & POLLIN) {
-			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
+		if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
+			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
 			schedule_work(&m->rq);
-		} else
-			clear_bit(Rworksched, &m->wsched);
-	} else
-		clear_bit(Rworksched, &m->wsched);
+		}
+	}
 
 	return;
 error:
@@ -418,7 +423,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
 		return -EREMOTEIO;
 
 	if (!(ts->wr->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n");
+		p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
 
 	oldfs = get_fs();
 	set_fs(get_ds());
@@ -451,16 +456,17 @@ static void p9_write_work(struct work_struct *work)
 	}
 
 	if (!m->wsize) {
+		spin_lock(&m->client->lock);
 		if (list_empty(&m->unsent_req_list)) {
 			clear_bit(Wworksched, &m->wsched);
+			spin_unlock(&m->client->lock);
 			return;
 		}
 
-		spin_lock(&m->client->lock);
 		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
 			       req_list);
 		req->status = REQ_STATUS_SENT;
-		P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req);
+		p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
 		list_move_tail(&req->req_list, &m->req_list);
 
 		m->wbuf = req->tc->sdata;
@@ -469,15 +475,14 @@ static void p9_write_work(struct work_struct *work)
 		spin_unlock(&m->client->lock);
 	}
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos,
-								m->wsize);
+	p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
+		 m, m->wpos, m->wsize);
 	clear_bit(Wpending, &m->wsched);
 	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
-	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
-	if (err == -EAGAIN) {
-		clear_bit(Wworksched, &m->wsched);
-		return;
-	}
+	p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
+	if (err == -EAGAIN)
+		goto end_clear;
+
 
 	if (err < 0)
 		goto error;
@@ -490,19 +495,21 @@ static void p9_write_work(struct work_struct *work)
 	if (m->wpos == m->wsize)
 		m->wpos = m->wsize = 0;
 
-	if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
+end_clear:
+	clear_bit(Wworksched, &m->wsched);
+
+	if (m->wsize || !list_empty(&m->unsent_req_list)) {
 		if (test_and_clear_bit(Wpending, &m->wsched))
 			n = POLLOUT;
 		else
 			n = p9_fd_poll(m->client, NULL);
 
-		if (n & POLLOUT) {
-			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
+		if ((n & POLLOUT) &&
+		   !test_and_set_bit(Wworksched, &m->wsched)) {
+			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
 			schedule_work(&m->wq);
-		} else
-			clear_bit(Wworksched, &m->wsched);
-	} else
-		clear_bit(Wworksched, &m->wsched);
+		}
+	}
 
 	return;
 
@@ -511,7 +518,7 @@ error:
 	clear_bit(Wworksched, &m->wsched);
 }
 
-static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key)
 {
 	struct p9_poll_wait *pwait =
 		container_of(wait, struct p9_poll_wait, wait);
@@ -551,7 +558,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 	}
 
 	if (!pwait) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
+		p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n");
 		return;
 	}
 
@@ -562,22 +569,19 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 }
 
 /**
- * p9_conn_create - allocate and initialize the per-session mux data
+ * p9_conn_create - initialize the per-session mux data
  * @client: client instance
  *
  * Note: Creates the polling task if this is the first session.
  */
 
-static struct p9_conn *p9_conn_create(struct p9_client *client)
+static void p9_conn_create(struct p9_client *client)
 {
 	int n;
-	struct p9_conn *m;
+	struct p9_trans_fd *ts = client->trans;
+	struct p9_conn *m = &ts->conn;
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client,
-								client->msize);
-	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
-	if (!m)
-		return ERR_PTR(-ENOMEM);
+	p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
 
 	INIT_LIST_HEAD(&m->mux_list);
 	m->client = client;
@@ -591,16 +595,14 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
 
 	n = p9_fd_poll(client, &m->pt);
 	if (n & POLLIN) {
-		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
+		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
 		set_bit(Rpending, &m->wsched);
 	}
 
 	if (n & POLLOUT) {
-		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
+		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
 		set_bit(Wpending, &m->wsched);
 	}
-
-	return m;
 }
 
 /**
@@ -618,7 +620,7 @@ static void p9_poll_mux(struct p9_conn *m)
 
 	n = p9_fd_poll(m->client, NULL);
 	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-		P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
+		p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
 		if (n >= 0)
 			n = -ECONNRESET;
 		p9_conn_cancel(m, n);
@@ -626,19 +628,19 @@ static void p9_poll_mux(struct p9_conn *m)
 
 	if (n & POLLIN) {
 		set_bit(Rpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
+		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
 		if (!test_and_set_bit(Rworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
+			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
 			schedule_work(&m->rq);
 		}
 	}
 
 	if (n & POLLOUT) {
 		set_bit(Wpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
+		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
 		if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
 		    !test_and_set_bit(Wworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
+			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
 			schedule_work(&m->wq);
 		}
 	}
@@ -659,10 +661,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 {
 	int n;
 	struct p9_trans_fd *ts = client->trans;
-	struct p9_conn *m = ts->conn;
+	struct p9_conn *m = &ts->conn;
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m,
-						current, req->tc, req->tc->id);
+	p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
+		 m, current, req->tc, req->tc->id);
 	if (m->err < 0)
 		return m->err;
 
@@ -686,7 +688,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 {
 	int ret = 1;
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
 
 	spin_lock(&client->lock);
 
@@ -694,14 +696,26 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 		list_del(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
 		ret = 0;
-	} else if (req->status == REQ_STATUS_SENT)
-		req->status = REQ_STATUS_FLSH;
-
+	}
 	spin_unlock(&client->lock);
 
 	return ret;
 }
 
+static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+
+	/* we haven't received a response for oldreq,
+	 * remove it from the list.
+	 */
+	spin_lock(&client->lock);
+	list_del(&req->req_list);
+	spin_unlock(&client->lock);
+
+	return 0;
+}
+
 /**
  * parse_opts - parse mount options into p9_fd_opts structure
  * @params: options string passed from mount
@@ -726,8 +740,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 
 	tmp_options = kstrdup(params, GFP_KERNEL);
 	if (!tmp_options) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-				"failed to allocate copy of option string\n");
+		p9_debug(P9_DEBUG_ERROR,
+			 "failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
 	options = tmp_options;
@@ -738,11 +752,11 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
-		if (token != Opt_err) {
+		if ((token != Opt_err) && (token != Opt_privport)) {
 			r = match_int(&args[0], &option);
 			if (r < 0) {
-				P9_DPRINTK(P9_DEBUG_ERROR,
-				"integer field, but no integer?\n");
+				p9_debug(P9_DEBUG_ERROR,
+					 "integer field, but no integer?\n");
 				continue;
 			}
 		}
@@ -756,6 +770,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 		case Opt_wfdno:
 			opts->wfd = option;
 			break;
+		case Opt_privport:
+			opts->privport = 1;
+			break;
 		default:
 			continue;
 		}
@@ -767,7 +784,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 
 static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
 {
-	struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd),
+	struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
 					   GFP_KERNEL);
 	if (!ts)
 		return -ENOMEM;
@@ -792,53 +809,43 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
 static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 {
 	struct p9_trans_fd *p;
-	int ret, fd;
+	struct file *file;
 
-	p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
+	p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
 	csocket->sk->sk_allocation = GFP_NOIO;
-	fd = sock_map_fd(csocket, 0);
-	if (fd < 0) {
-		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n");
+	file = sock_alloc_file(csocket, 0, NULL);
+	if (IS_ERR(file)) {
+		pr_err("%s (%d): failed to map fd\n",
+		       __func__, task_pid_nr(current));
 		sock_release(csocket);
 		kfree(p);
-		return fd;
+		return PTR_ERR(file);
 	}
 
-	get_file(csocket->file);
-	get_file(csocket->file);
-	p->wr = p->rd = csocket->file;
+	get_file(file);
+	p->wr = p->rd = file;
 	client->trans = p;
 	client->status = Connected;
 
-	sys_close(fd);	/* still racy */
-
 	p->rd->f_flags |= O_NONBLOCK;
 
-	p->conn = p9_conn_create(client);
-	if (IS_ERR(p->conn)) {
-		ret = PTR_ERR(p->conn);
-		p->conn = NULL;
-		kfree(p);
-		sockfd_put(csocket);
-		sockfd_put(csocket);
-		return ret;
-	}
+	p9_conn_create(client);
 	return 0;
 }
 
 /**
- * p9_mux_destroy - cancels all pending requests and frees mux resources
+ * p9_mux_destroy - cancels all pending requests of mux
  * @m: mux to destroy
  *
  */
 
 static void p9_conn_destroy(struct p9_conn *m)
 {
-	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m,
-		m->mux_list.prev, m->mux_list.next);
+	p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n",
+		 m, m->mux_list.prev, m->mux_list.next);
 
 	p9_mux_poll_stop(m);
 	cancel_work_sync(&m->rq);
@@ -847,7 +854,6 @@ static void p9_conn_destroy(struct p9_conn *m)
 	p9_conn_cancel(m, -ECONNRESET);
 
 	m->client = NULL;
-	kfree(m);
 }
 
 /**
@@ -869,7 +875,7 @@ static void p9_fd_close(struct p9_client *client)
 
 	client->status = Disconnected;
 
-	p9_conn_destroy(ts->conn);
+	p9_conn_destroy(&ts->conn);
 
 	if (ts->rd)
 		fput(ts->rd);
@@ -896,6 +902,24 @@ static inline int valid_ipaddr4(const char *buf)
 	return 0;
 }
 
+static int p9_bind_privport(struct socket *sock)
+{
+	struct sockaddr_in cl;
+	int port, err = -EINVAL;
+
+	memset(&cl, 0, sizeof(cl));
+	cl.sin_family = AF_INET;
+	cl.sin_addr.s_addr = INADDR_ANY;
+	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
+		cl.sin_port = htons((ushort)port);
+		err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
+		if (err != -EADDRINUSE)
+			break;
+	}
+	return err;
+}
+
+
 static int
 p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 {
@@ -919,17 +943,27 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 	err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_INET,
 			    SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
 	if (err) {
-		P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n");
+		pr_err("%s (%d): problem creating socket\n",
+		       __func__, task_pid_nr(current));
 		return err;
 	}
 
+	if (opts.privport) {
+		err = p9_bind_privport(csocket);
+		if (err < 0) {
+			pr_err("%s (%d): problem binding to privport\n",
+			       __func__, task_pid_nr(current));
+			sock_release(csocket);
+			return err;
+		}
+	}
+
 	err = csocket->ops->connect(csocket,
 				    (struct sockaddr *)&sin_server,
 				    sizeof(struct sockaddr_in), 0);
 	if (err < 0) {
-		P9_EPRINTK(KERN_ERR,
-			"p9_trans_tcp: problem connecting socket to %s\n",
-			addr);
+		pr_err("%s (%d): problem connecting socket to %s\n",
+		       __func__, task_pid_nr(current), addr);
 		sock_release(csocket);
 		return err;
 	}
@@ -947,8 +981,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 	csocket = NULL;
 
 	if (strlen(addr) >= UNIX_PATH_MAX) {
-		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
-			addr);
+		pr_err("%s (%d): address too long: %s\n",
+		       __func__, task_pid_nr(current), addr);
 		return -ENAMETOOLONG;
 	}
 
@@ -957,15 +991,16 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 	err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_UNIX,
 			    SOCK_STREAM, 0, &csocket, 1);
 	if (err < 0) {
-		P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n");
+		pr_err("%s (%d): problem creating socket\n",
+		       __func__, task_pid_nr(current));
+
 		return err;
 	}
 	err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
 			sizeof(struct sockaddr_un) - 1, 0);
 	if (err < 0) {
-		P9_EPRINTK(KERN_ERR,
-			"p9_trans_unix: problem connecting socket: %s: %d\n",
-			addr, err);
+		pr_err("%s (%d): problem connecting socket: %s: %d\n",
+		       __func__, task_pid_nr(current), addr, err);
 		sock_release(csocket);
 		return err;
 	}
@@ -983,7 +1018,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
 	parse_opts(args, &opts);
 
 	if (opts.rfd == ~0 || opts.wfd == ~0) {
-		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+		pr_err("Insufficient options for proto=fd\n");
 		return -ENOPROTOOPT;
 	}
 
@@ -992,14 +1027,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
 		return err;
 
 	p = (struct p9_trans_fd *) client->trans;
-	p->conn = p9_conn_create(client);
-	if (IS_ERR(p->conn)) {
-		err = PTR_ERR(p->conn);
-		p->conn = NULL;
-		fput(p->rd);
-		fput(p->wr);
-		return err;
-	}
+	p9_conn_create(client);
 
 	return 0;
 }
@@ -1007,11 +1035,12 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
 static struct p9_trans_module p9_tcp_trans = {
 	.name = "tcp",
 	.maxsize = MAX_SOCK_BUF,
-	.def = 1,
+	.def = 0,
 	.create = p9_fd_create_tcp,
 	.close = p9_fd_close,
 	.request = p9_fd_request,
 	.cancel = p9_fd_cancel,
+	.cancelled = p9_fd_cancelled,
 	.owner = THIS_MODULE,
 };
 
@@ -1023,6 +1052,7 @@ static struct p9_trans_module p9_unix_trans = {
 	.close = p9_fd_close,
 	.request = p9_fd_request,
 	.cancel = p9_fd_cancel,
+	.cancelled = p9_fd_cancelled,
 	.owner = THIS_MODULE,
 };
 
@@ -1034,6 +1064,7 @@ static struct p9_trans_module p9_fd_trans = {
 	.close = p9_fd_close,
 	.request = p9_fd_request,
 	.cancel = p9_fd_cancel,
+	.cancelled = p9_fd_cancelled,
 	.owner = THIS_MODULE,
 };
 
@@ -1050,7 +1081,7 @@ static void p9_poll_workfn(struct work_struct *work)
 {
 	unsigned long flags;
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
+	p9_debug(P9_DEBUG_TRANS, "start %p\n", current);
 
 	spin_lock_irqsave(&p9_poll_lock, flags);
 	while (!list_empty(&p9_poll_pending_list)) {
@@ -1066,7 +1097,7 @@ static void p9_poll_workfn(struct work_struct *work)
 	}
 	spin_unlock_irqrestore(&p9_poll_lock, flags);
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
+	p9_debug(P9_DEBUG_TRANS, "finish\n");
 }
 
 int p9_trans_fd_init(void)
@@ -1080,7 +1111,7 @@ int p9_trans_fd_init(void)
 
 void p9_trans_fd_exit(void)
 {
-	flush_work_sync(&p9_poll_work);
+	flush_work(&p9_poll_work);
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 159c50f1c6b..14ad43b5cf8 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -26,6 +26,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/net.h>
@@ -55,9 +57,7 @@
 #define P9_RDMA_IRD		0
 #define P9_RDMA_ORD		0
 #define P9_RDMA_TIMEOUT		30000		/* 30 seconds */
-#define P9_RDMA_MAXSIZE		(4*4096)	/* Min SGE is 4, so we can
-						 * safely advertise a maxsize
-						 * of 64k */
+#define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */
 
 /**
  * struct p9_trans_rdma - RDMA transport instance
@@ -73,7 +73,9 @@
  * @sq_depth: The depth of the Send Queue
  * @sq_sem: Semaphore for the SQ
  * @rq_depth: The depth of the Receive Queue.
- * @rq_count: Count of requests in the Receive Queue.
+ * @rq_sem: Semaphore for the RQ
+ * @excess_rc : Amount of posted Receive Contexts without a pending request.
+ *		See rdma_request()
  * @addr: The remote peer's address
  * @req_lock: Protects the active request list
  * @cm_done: Completion event for connection management tracking
@@ -98,7 +100,8 @@ struct p9_trans_rdma {
 	int sq_depth;
 	struct semaphore sq_sem;
 	int rq_depth;
-	atomic_t rq_count;
+	struct semaphore rq_sem;
+	atomic_t excess_rc;
 	struct sockaddr_in addr;
 	spinlock_t req_lock;
 
@@ -178,8 +181,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 
 	tmp_options = kstrdup(params, GFP_KERNEL);
 	if (!tmp_options) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-			   "failed to allocate copy of option string\n");
+		p9_debug(P9_DEBUG_ERROR,
+			 "failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
 	options = tmp_options;
@@ -190,10 +193,12 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
+		if (token == Opt_err)
+			continue;
 		r = match_int(&args[0], &option);
 		if (r < 0) {
-			P9_DPRINTK(P9_DEBUG_ERROR,
-				   "integer field, but no integer?\n");
+			p9_debug(P9_DEBUG_ERROR,
+				 "integer field, but no integer?\n");
 			continue;
 		}
 		switch (token) {
@@ -294,15 +299,20 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 	if (!req)
 		goto err_out;
 
+	/* Check that we have not yet received a reply for this request.
+	 */
+	if (unlikely(req->rc)) {
+		pr_err("Duplicate reply for request %d", tag);
+		goto err_out;
+	}
+
 	req->rc = c->rc;
-	req->status = REQ_STATUS_RCVD;
-	p9_client_cb(client, req);
+	p9_client_cb(client, req, REQ_STATUS_RCVD);
 
 	return;
 
  err_out:
-	P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n",
-		   req, err, status);
+	p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status);
 	rdma->state = P9_RDMA_FLUSHING;
 	client->status = Disconnected;
 }
@@ -318,8 +328,8 @@ handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,
 
 static void qp_event_handler(struct ib_event *event, void *context)
 {
-	P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event,
-								context);
+	p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n",
+		 event->event, context);
 }
 
 static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
@@ -335,8 +345,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
 
 		switch (c->wc_op) {
 		case IB_WC_RECV:
-			atomic_dec(&rdma->rq_count);
 			handle_recv(client, rdma, c, wc.status, wc.byte_len);
+			up(&rdma->rq_sem);
 			break;
 
 		case IB_WC_SEND:
@@ -345,8 +355,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
 			break;
 
 		default:
-			printk(KERN_ERR "9prdma: unexpected completion type, "
-			       "c->wc_op=%d, wc.opcode=%d, status=%d\n",
+			pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
 			       c->wc_op, wc.opcode, wc.status);
 			break;
 		}
@@ -356,7 +365,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
 
 static void cq_event_handler(struct ib_event *e, void *v)
 {
-	P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
+	p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
 }
 
 static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
@@ -407,7 +416,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
 	return ib_post_recv(rdma->qp, &wr, &bad_wr);
 
  error:
-	P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
+	p9_debug(P9_DEBUG_ERROR, "EIO\n");
 	return -EIO;
 }
 
@@ -421,32 +430,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	struct p9_rdma_context *c = NULL;
 	struct p9_rdma_context *rpl_context = NULL;
 
+	/* When an error occurs between posting the recv and the send,
+	 * there will be a receive context posted without a pending request.
+	 * Since there is no way to "un-post" it, we remember it and skip
+	 * post_recv() for the next request.
+	 * So here,
+	 * see if we are this `next request' and need to absorb an excess rc.
+	 * If yes, then drop and free our own, and do not recv_post().
+	 **/
+	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
+		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
+			/* Got one ! */
+			kfree(req->rc);
+			req->rc = NULL;
+			goto dont_need_post_recv;
+		} else {
+			/* We raced and lost. */
+			atomic_inc(&rdma->excess_rc);
+		}
+	}
+
 	/* Allocate an fcall for the reply */
 	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
 	if (!rpl_context) {
 		err = -ENOMEM;
-		goto err_close;
-	}
-
-	/*
-	 * If the request has a buffer, steal it, otherwise
-	 * allocate a new one.  Typically, requests should already
-	 * have receive buffers allocated and just swap them around
-	 */
-	if (!req->rc) {
-		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
-				  GFP_NOFS);
-		if (req->rc) {
-			req->rc->sdata = (char *) req->rc +
-						sizeof(struct p9_fcall);
-			req->rc->capacity = client->msize;
-		}
+		goto recv_error;
 	}
 	rpl_context->rc = req->rc;
-	if (!rpl_context->rc) {
-		err = -ENOMEM;
-		goto err_free2;
-	}
 
 	/*
 	 * Post a receive buffer for this request. We need to ensure
@@ -455,29 +465,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	 * outstanding request, so we must keep a count to avoid
 	 * overflowing the RQ.
 	 */
-	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
-		err = post_recv(client, rpl_context);
-		if (err)
-			goto err_free1;
-	} else
-		atomic_dec(&rdma->rq_count);
+	if (down_interruptible(&rdma->rq_sem)) {
+		err = -EINTR;
+		goto recv_error;
+	}
 
+	err = post_recv(client, rpl_context);
+	if (err) {
+		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
+		goto recv_error;
+	}
 	/* remove posted receive buffer from request structure */
 	req->rc = NULL;
 
+dont_need_post_recv:
 	/* Post the request */
 	c = kmalloc(sizeof *c, GFP_NOFS);
 	if (!c) {
 		err = -ENOMEM;
-		goto err_free1;
+		goto send_error;
 	}
 	c->req = req;
 
 	c->busa = ib_dma_map_single(rdma->cm_id->device,
 				    c->req->tc->sdata, c->req->tc->size,
 				    DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
-		goto error;
+	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
+		err = -EIO;
+		goto send_error;
+	}
 
 	sge.addr = c->busa;
 	sge.length = c->req->tc->size;
@@ -491,22 +507,38 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	wr.sg_list = &sge;
 	wr.num_sge = 1;
 
-	if (down_interruptible(&rdma->sq_sem))
-		goto error;
+	if (down_interruptible(&rdma->sq_sem)) {
+		err = -EINTR;
+		goto send_error;
+	}
 
-	return ib_post_send(rdma->qp, &wr, &bad_wr);
+	/* Mark request as `sent' *before* we actually send it,
+	 * because doing if after could erase the REQ_STATUS_RCVD
+	 * status in case of a very fast reply.
+	 */
+	req->status = REQ_STATUS_SENT;
+	err = ib_post_send(rdma->qp, &wr, &bad_wr);
+	if (err)
+		goto send_error;
 
- error:
+	/* Success */
+	return 0;
+
+ /* Handle errors that happened during or while preparing the send: */
+ send_error:
+	req->status = REQ_STATUS_ERROR;
 	kfree(c);
-	kfree(rpl_context->rc);
-	kfree(rpl_context);
-	P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
-	return -EIO;
- err_free1:
-	kfree(rpl_context->rc);
- err_free2:
+	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
+
+	/* Ach.
+	 *  We did recv_post(), but not send. We have one recv_post in excess.
+	 */
+	atomic_inc(&rdma->excess_rc);
+	return err;
+
+ /* Handle errors that happened during or while preparing post_recv(): */
+ recv_error:
 	kfree(rpl_context);
- err_close:
 	spin_lock_irqsave(&rdma->req_lock, flags);
 	if (rdma->state < P9_RDMA_CLOSING) {
 		rdma->state = P9_RDMA_CLOSING;
@@ -551,17 +583,30 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
 	spin_lock_init(&rdma->req_lock);
 	init_completion(&rdma->cm_done);
 	sema_init(&rdma->sq_sem, rdma->sq_depth);
-	atomic_set(&rdma->rq_count, 0);
+	sema_init(&rdma->rq_sem, rdma->rq_depth);
+	atomic_set(&rdma->excess_rc, 0);
 
 	return rdma;
 }
 
-/* its not clear to me we can do anything after send has been posted */
 static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
 {
+	/* Nothing to do here.
+	 * We will take care of it (if we have to) in rdma_cancelled()
+	 */
 	return 1;
 }
 
+/* A request has been fully flushed without a reply.
+ * That means we have posted one buffer in excess.
+ */
+static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+	struct p9_trans_rdma *rdma = client->trans;
+	atomic_inc(&rdma->excess_rc);
+	return 0;
+}
+
 /**
  * trans_create_rdma - Transport method for creating atransport instance
  * @client: client instance
@@ -695,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = {
 	.close = rdma_close,
 	.request = rdma_request,
 	.cancel = rdma_cancel,
+	.cancelled = rdma_cancelled,
 };
 
 /**
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 244e7074218..6940d8fe897 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -26,6 +26,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/net.h>
@@ -37,6 +39,7 @@
 #include <linux/inet.h>
 #include <linux/idr.h>
 #include <linux/file.h>
+#include <linux/highmem.h>
 #include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
@@ -84,7 +87,7 @@ struct virtio_chan {
 	/* This is global limit. Since we don't have a global structure,
 	 * will be placing it in each channel.
 	 */
-	int p9_max_pages;
+	unsigned long p9_max_pages;
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 
@@ -145,37 +148,23 @@ static void req_done(struct virtqueue *vq)
 	struct p9_req_t *req;
 	unsigned long flags;
 
-	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
+	p9_debug(P9_DEBUG_TRANS, ": request done\n");
 
 	while (1) {
 		spin_lock_irqsave(&chan->lock, flags);
 		rc = virtqueue_get_buf(chan->vq, &len);
-
 		if (rc == NULL) {
 			spin_unlock_irqrestore(&chan->lock, flags);
 			break;
 		}
-
 		chan->ring_bufs_avail = 1;
 		spin_unlock_irqrestore(&chan->lock, flags);
 		/* Wakeup if anyone waiting for VirtIO ring space. */
 		wake_up(chan->vc_wq);
-		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
-		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
+		p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc);
+		p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
 		req = p9_tag_lookup(chan->client, rc->tag);
-		if (req->tc->private) {
-			struct trans_rpage_info *rp = req->tc->private;
-			int p = rp->rp_nr_pages;
-			/*Release pages */
-			p9_release_req_pages(rp);
-			atomic_sub(p, &vp_pinned);
-			wake_up(&vp_wq);
-			if (rp->rp_alloc)
-				kfree(rp);
-			req->tc->private = NULL;
-		}
-		req->status = REQ_STATUS_RCVD;
-		p9_client_cb(chan->client, req);
+		p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
 	}
 }
 
@@ -193,9 +182,8 @@ static void req_done(struct virtqueue *vq)
  *
  */
 
-static int
-pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
-								int count)
+static int pack_sg_list(struct scatterlist *sg, int start,
+			int limit, char *data, int count)
 {
 	int s;
 	int index = start;
@@ -204,12 +192,15 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
 		s = rest_of_page(data);
 		if (s > count)
 			s = count;
+		BUG_ON(index > limit);
+		/* Make sure we don't terminate early. */
+		sg_unmark_end(&sg[index]);
 		sg_set_buf(&sg[index++], data, s);
 		count -= s;
 		data += s;
-		BUG_ON(index > limit);
 	}
-
+	if (index-start)
+		sg_mark_end(&sg[index - 1]);
 	return index-start;
 }
 
@@ -224,31 +215,41 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
  * this takes a list of pages.
  * @sg: scatter/gather list to pack into
  * @start: which segment of the sg_list to start at
- * @pdata_off: Offset into the first page
- * @**pdata: a list of pages to add into sg.
+ * @pdata: a list of pages to add into sg.
+ * @nr_pages: number of pages to pack into the scatter/gather list
+ * @data: data to pack into scatter/gather list
  * @count: amount of data to pack into the scatter/gather list
  */
 static int
-pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
-		struct page **pdata, int count)
+pack_sg_list_p(struct scatterlist *sg, int start, int limit,
+	       struct page **pdata, int nr_pages, char *data, int count)
 {
-	int s;
-	int i = 0;
+	int i = 0, s;
+	int data_off;
 	int index = start;
 
-	if (pdata_off) {
-		s = min((int)(PAGE_SIZE - pdata_off), count);
-		sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
+	BUG_ON(nr_pages > (limit - start));
+	/*
+	 * if the first page doesn't start at
+	 * page boundary find the offset
+	 */
+	data_off = offset_in_page(data);
+	while (nr_pages) {
+		s = rest_of_page(data);
+		if (s > count)
+			s = count;
+		/* Make sure we don't terminate early. */
+		sg_unmark_end(&sg[index]);
+		sg_set_page(&sg[index++], pdata[i++], s, data_off);
+		data_off = 0;
+		data += s;
 		count -= s;
+		nr_pages--;
 	}
 
-	while (count) {
-		BUG_ON(index > limit);
-		s = min((int)PAGE_SIZE, count);
-		sg_set_page(&sg[index++], pdata[i++], s, 0);
-		count -= s;
-	}
-	return index-start;
+	if (index-start)
+		sg_mark_end(&sg[index - 1]);
+	return index - start;
 }
 
 /**
@@ -261,143 +262,237 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
 static int
 p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
-	int in, out, inp, outp;
-	struct virtio_chan *chan = client->trans;
-	char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
+	int err;
+	int in, out, out_sgs, in_sgs;
 	unsigned long flags;
-	size_t pdata_off = 0;
-	struct trans_rpage_info *rpinfo = NULL;
-	int err, pdata_len = 0;
+	struct virtio_chan *chan = client->trans;
+	struct scatterlist *sgs[2];
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
+	p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
 	req->status = REQ_STATUS_SENT;
+req_retry:
+	spin_lock_irqsave(&chan->lock, flags);
 
-	if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
-		int nr_pages = p9_nr_pages(req);
-		int rpinfo_size = sizeof(struct trans_rpage_info) +
-			sizeof(struct page *) * nr_pages;
-
-		if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
-			err = wait_event_interruptible(vp_wq,
-				atomic_read(&vp_pinned) < chan->p9_max_pages);
+	out_sgs = in_sgs = 0;
+	/* Handle out VirtIO ring buffers */
+	out = pack_sg_list(chan->sg, 0,
+			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+	if (out)
+		sgs[out_sgs++] = chan->sg;
+
+	in = pack_sg_list(chan->sg, out,
+			  VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+	if (in)
+		sgs[out_sgs + in_sgs++] = chan->sg + out;
+
+	err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
+				GFP_ATOMIC);
+	if (err < 0) {
+		if (err == -ENOSPC) {
+			chan->ring_bufs_avail = 0;
+			spin_unlock_irqrestore(&chan->lock, flags);
+			err = wait_event_interruptible(*chan->vc_wq,
+							chan->ring_bufs_avail);
 			if (err  == -ERESTARTSYS)
 				return err;
-			P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
-		}
 
-		if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
-			/* We can use sdata */
-			req->tc->private = req->tc->sdata + req->tc->size;
-			rpinfo = (struct trans_rpage_info *)req->tc->private;
-			rpinfo->rp_alloc = 0;
+			p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n");
+			goto req_retry;
 		} else {
-			req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
-			if (!req->tc->private) {
-				P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
-					"private kmalloc returned NULL");
-				return -ENOMEM;
-			}
-			rpinfo = (struct trans_rpage_info *)req->tc->private;
-			rpinfo->rp_alloc = 1;
+			spin_unlock_irqrestore(&chan->lock, flags);
+			p9_debug(P9_DEBUG_TRANS,
+				 "virtio rpc add_sgs returned failure\n");
+			return -EIO;
 		}
+	}
+	virtqueue_kick(chan->vq);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
-		err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
-				req->tc->id == P9_TREAD ? 1 : 0);
-		if (err < 0) {
-			if (rpinfo->rp_alloc)
-				kfree(rpinfo);
+	p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
+	return 0;
+}
+
+static int p9_get_mapped_pages(struct virtio_chan *chan,
+			       struct page **pages, char *data,
+			       int nr_pages, int write, int kern_buf)
+{
+	int err;
+	if (!kern_buf) {
+		/*
+		 * We allow only p9_max_pages pinned. We wait for the
+		 * Other zc request to finish here
+		 */
+		if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+			err = wait_event_interruptible(vp_wq,
+			      (atomic_read(&vp_pinned) < chan->p9_max_pages));
+			if (err == -ERESTARTSYS)
+				return err;
+		}
+		err = p9_payload_gup(data, &nr_pages, pages, write);
+		if (err < 0)
 			return err;
-		} else {
-			atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
+		atomic_add(nr_pages, &vp_pinned);
+	} else {
+		/* kernel buffer, no need to pin pages */
+		int s, index = 0;
+		int count = nr_pages;
+		while (nr_pages) {
+			s = rest_of_page(data);
+			if (is_vmalloc_addr(data))
+				pages[index++] = vmalloc_to_page(data);
+			else
+				pages[index++] = kmap_to_page(data);
+			data += s;
+			nr_pages--;
 		}
+		nr_pages = count;
 	}
+	return nr_pages;
+}
 
-req_retry_pinned:
-	spin_lock_irqsave(&chan->lock, flags);
+/**
+ * p9_virtio_zc_request - issue a zero copy request
+ * @client: client instance issuing the request
+ * @req: request to be issued
+ * @uidata: user bffer that should be ued for zero copy read
+ * @uodata: user buffer that shoud be user for zero copy write
+ * @inlen: read buffer size
+ * @olen: write buffer size
+ * @hdrlen: reader header size, This is the size of response protocol data
+ *
+ */
+static int
+p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
+		     char *uidata, char *uodata, int inlen,
+		     int outlen, int in_hdr_len, int kern_buf)
+{
+	int in, out, err, out_sgs, in_sgs;
+	unsigned long flags;
+	int in_nr_pages = 0, out_nr_pages = 0;
+	struct page **in_pages = NULL, **out_pages = NULL;
+	struct virtio_chan *chan = client->trans;
+	struct scatterlist *sgs[4];
 
-	/* Handle out VirtIO ring buffers */
-	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
-			req->tc->size);
-
-	if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
-		/* We have additional write payload buffer to take care */
-		if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
-			outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
-					pdata_off, rpinfo->rp_data, pdata_len);
-		} else {
-			char *pbuf;
-			if (req->tc->pubuf)
-				pbuf = (__force char *) req->tc->pubuf;
-			else
-				pbuf = req->tc->pkbuf;
-			outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
-					req->tc->pbuf_size);
+	p9_debug(P9_DEBUG_TRANS, "virtio request\n");
+
+	if (uodata) {
+		out_nr_pages = p9_nr_pages(uodata, outlen);
+		out_pages = kmalloc(sizeof(struct page *) * out_nr_pages,
+				    GFP_NOFS);
+		if (!out_pages) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+		out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata,
+						   out_nr_pages, 0, kern_buf);
+		if (out_nr_pages < 0) {
+			err = out_nr_pages;
+			kfree(out_pages);
+			out_pages = NULL;
+			goto err_out;
+		}
+	}
+	if (uidata) {
+		in_nr_pages = p9_nr_pages(uidata, inlen);
+		in_pages = kmalloc(sizeof(struct page *) * in_nr_pages,
+				   GFP_NOFS);
+		if (!in_pages) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+		in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata,
+						  in_nr_pages, 1, kern_buf);
+		if (in_nr_pages < 0) {
+			err = in_nr_pages;
+			kfree(in_pages);
+			in_pages = NULL;
+			goto err_out;
 		}
-		out += outp;
 	}
+	req->status = REQ_STATUS_SENT;
+req_retry_pinned:
+	spin_lock_irqsave(&chan->lock, flags);
 
-	/* Handle in VirtIO ring buffers */
-	if (req->tc->pbuf_size &&
-		((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
-		/*
-		 * Take care of additional Read payload.
-		 * 11 is the read/write header = PDU Header(7) + IO Size (4).
-		 * Arrange in such a way that server places header in the
-		 * alloced memory and payload onto the user buffer.
-		 */
-		inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
-		/*
-		 * Running executables in the filesystem may result in
-		 * a read request with kernel buffer as opposed to user buffer.
-		 */
-		if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
-			in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
-					pdata_off, rpinfo->rp_data, pdata_len);
-		} else {
-			char *pbuf;
-			if (req->tc->pubuf)
-				pbuf = (__force char *) req->tc->pubuf;
-			else
-				pbuf = req->tc->pkbuf;
+	out_sgs = in_sgs = 0;
 
-			in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
-					pbuf, req->tc->pbuf_size);
-		}
-		in += inp;
-	} else {
-		in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
-				client->msize);
+	/* out data */
+	out = pack_sg_list(chan->sg, 0,
+			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+
+	if (out)
+		sgs[out_sgs++] = chan->sg;
+
+	if (out_pages) {
+		sgs[out_sgs++] = chan->sg + out;
+		out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
+				      out_pages, out_nr_pages, uodata, outlen);
+	}
+		
+	/*
+	 * Take care of in data
+	 * For example TREAD have 11.
+	 * 11 is the read/write header = PDU Header(7) + IO Size (4).
+	 * Arrange in such a way that server places header in the
+	 * alloced memory and payload onto the user buffer.
+	 */
+	in = pack_sg_list(chan->sg, out,
+			  VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+	if (in)
+		sgs[out_sgs + in_sgs++] = chan->sg + out;
+
+	if (in_pages) {
+		sgs[out_sgs + in_sgs++] = chan->sg + out + in;
+		in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
+				     in_pages, in_nr_pages, uidata, inlen);
 	}
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+	BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
+	err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
+				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
 			chan->ring_bufs_avail = 0;
 			spin_unlock_irqrestore(&chan->lock, flags);
 			err = wait_event_interruptible(*chan->vc_wq,
-							chan->ring_bufs_avail);
+						       chan->ring_bufs_avail);
 			if (err  == -ERESTARTSYS)
-				return err;
+				goto err_out;
 
-			P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
+			p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n");
 			goto req_retry_pinned;
 		} else {
 			spin_unlock_irqrestore(&chan->lock, flags);
-			P9_DPRINTK(P9_DEBUG_TRANS,
-					"9p debug: "
-					"virtio rpc add_buf returned failure");
-			if (rpinfo && rpinfo->rp_alloc)
-				kfree(rpinfo);
-			return -EIO;
+			p9_debug(P9_DEBUG_TRANS,
+				 "virtio rpc add_sgs returned failure\n");
+			err = -EIO;
+			goto err_out;
 		}
 	}
-
 	virtqueue_kick(chan->vq);
 	spin_unlock_irqrestore(&chan->lock, flags);
-
-	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
-	return 0;
+	p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
+	err = wait_event_interruptible(*req->wq,
+				       req->status >= REQ_STATUS_RCVD);
+	/*
+	 * Non kernel buffers are pinned, unpin them
+	 */
+err_out:
+	if (!kern_buf) {
+		if (in_pages) {
+			p9_release_pages(in_pages, in_nr_pages);
+			atomic_sub(in_nr_pages, &vp_pinned);
+		}
+		if (out_pages) {
+			p9_release_pages(out_pages, out_nr_pages);
+			atomic_sub(out_nr_pages, &vp_pinned);
+		}
+		/* wakeup anybody waiting for slots to pin pages */
+		wake_up(&vp_wq);
+	}
+	kfree(in_pages);
+	kfree(out_pages);
+	return err;
 }
 
 static ssize_t p9_mount_tag_show(struct device *dev,
@@ -431,7 +526,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 
 	chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
 	if (!chan) {
-		printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
+		pr_err("Failed to allocate virtio 9P channel\n");
 		err = -ENOMEM;
 		goto fail;
 	}
@@ -451,9 +546,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 
 	chan->inuse = false;
 	if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
-		vdev->config->get(vdev,
-				offsetof(struct virtio_9p_config, tag_len),
-				&tag_len, sizeof(tag_len));
+		virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len);
 	} else {
 		err = -EINVAL;
 		goto out_free_vq;
@@ -463,8 +556,9 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 		err = -ENOMEM;
 		goto out_free_vq;
 	}
-	vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
-			tag, tag_len);
+
+	virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag),
+			   tag, tag_len);
 	chan->tag = tag;
 	chan->tag_len = tag_len;
 	err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
@@ -484,6 +578,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
 	mutex_unlock(&virtio_9p_lock);
+
+	/* Let udev rules use the new mount_tag attribute. */
+	kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
+
 	return 0;
 
 out_free_tag:
@@ -532,7 +630,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 	mutex_unlock(&virtio_9p_lock);
 
 	if (!found) {
-		printk(KERN_ERR "9p: no channels available\n");
+		pr_err("no channels available\n");
 		return ret;
 	}
 
@@ -553,13 +651,15 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 {
 	struct virtio_chan *chan = vdev->priv;
 
-	BUG_ON(chan->inuse);
+	if (chan->inuse)
+		p9_virtio_close(chan->client);
 	vdev->config->del_vqs(vdev);
 
 	mutex_lock(&virtio_9p_lock);
 	list_del(&chan->chan_list);
 	mutex_unlock(&virtio_9p_lock);
 	sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+	kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
 	kfree(chan->tag);
 	kfree(chan->vc_wq);
 	kfree(chan);
@@ -591,10 +691,16 @@ static struct p9_trans_module p9_virtio_trans = {
 	.create = p9_virtio_create,
 	.close = p9_virtio_close,
 	.request = p9_virtio_request,
+	.zc_request = p9_virtio_zc_request,
 	.cancel = p9_virtio_cancel,
-	.maxsize = PAGE_SIZE*16,
-	.pref = P9_TRANS_PREF_PAYLOAD_SEP,
-	.def = 0,
+	/*
+	 * We leave one entry for input and one entry for response
+	 * headers. We also skip one more entry to accomodate, address
+	 * that are not at page boundary, that can result in an extra
+	 * page in zero copy.
+	 */
+	.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
+	.def = 1,
 	.owner = THIS_MODULE,
 };
 
diff --git a/net/9p/util.c b/net/9p/util.c
index 9c1c9348ac3..59f278e64f5 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -87,26 +87,21 @@ EXPORT_SYMBOL(p9_idpool_destroy);
 
 int p9_idpool_get(struct p9_idpool *p)
 {
-	int i = 0;
-	int error;
+	int i;
 	unsigned long flags;
 
-retry:
-	if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
-		return -1;
-
+	idr_preload(GFP_NOFS);
 	spin_lock_irqsave(&p->lock, flags);
 
 	/* no need to store exactly p, we just need something non-null */
-	error = idr_get_new(&p->pool, p, &i);
-	spin_unlock_irqrestore(&p->lock, flags);
+	i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT);
 
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error)
+	spin_unlock_irqrestore(&p->lock, flags);
+	idr_preload_end();
+	if (i < 0)
 		return -1;
 
-	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
+	p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
 	return i;
 }
 EXPORT_SYMBOL(p9_idpool_get);
@@ -124,7 +119,7 @@ void p9_idpool_put(int id, struct p9_idpool *p)
 {
 	unsigned long flags;
 
-	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
+	p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
 
 	spin_lock_irqsave(&p->lock, flags);
 	idr_remove(&p->pool, id);
diff --git a/net/Kconfig b/net/Kconfig
index 878151c772c..d92afe4204d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -5,6 +5,7 @@
 menuconfig NET
 	bool "Networking support"
 	select NLATTR
+	select GENERIC_NET_UTILS
 	---help---
 	  Unless you really know what you are doing, you should say Y here.
 	  The reason is that some programs need kernel networking support even
@@ -52,6 +53,8 @@ source "net/iucv/Kconfig"
 
 config INET
 	bool "TCP/IP networking"
+	select CRYPTO
+	select CRYPTO_AES
 	---help---
 	  These are the protocols used on the Internet and on most local
 	  Ethernets. It is highly recommended to say Y here (this will enlarge
@@ -86,9 +89,12 @@ config NETWORK_SECMARK
 	  to nfmark, but designated for security purposes.
 	  If you are unsure how to answer this question, answer N.
 
+config NET_PTP_CLASSIFY
+	def_bool n
+
 config NETWORK_PHY_TIMESTAMPING
 	bool "Timestamping in PHY devices"
-	depends on EXPERIMENTAL
+	select NET_PTP_CLASSIFY
 	help
 	  This allows timestamping of network packets by PHYs with
 	  hardware timestamping capabilities. This option adds some
@@ -207,33 +213,58 @@ source "net/ipx/Kconfig"
 source "drivers/net/appletalk/Kconfig"
 source "net/x25/Kconfig"
 source "net/lapb/Kconfig"
-source "net/econet/Kconfig"
-source "net/wanrouter/Kconfig"
 source "net/phonet/Kconfig"
 source "net/ieee802154/Kconfig"
+source "net/mac802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
+source "net/openvswitch/Kconfig"
+source "net/vmw_vsock/Kconfig"
+source "net/netlink/Kconfig"
+source "net/mpls/Kconfig"
+source "net/hsr/Kconfig"
 
 config RPS
 	boolean
-	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	depends on SMP && SYSFS
 	default y
 
 config RFS_ACCEL
 	boolean
-	depends on RPS && GENERIC_HARDIRQS
+	depends on RPS
 	select CPU_RMAP
 	default y
 
 config XPS
 	boolean
-	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	depends on SMP
 	default y
 
-config HAVE_BPF_JIT
-	bool
+config CGROUP_NET_PRIO
+	bool "Network priority cgroup"
+	depends on CGROUPS
+	---help---
+	  Cgroup subsystem for use in assigning processes to network priorities on
+	  a per-interface basis.
+
+config CGROUP_NET_CLASSID
+	boolean "Network classid cgroup"
+	depends on CGROUPS
+	---help---
+	  Cgroup subsystem for use as general purpose socket classid marker that is
+	  being used in cls_cgroup and for netfilter matching.
+
+config NET_RX_BUSY_POLL
+	boolean
+	default y
+
+config BQL
+	boolean
+	depends on SYSFS
+	select DQL
+	default y
 
 config BPF_JIT
 	bool "enable BPF Just In Time compiler"
@@ -246,11 +277,23 @@ config BPF_JIT
 	  packet sniffing (libpcap/tcpdump). Note : Admin should enable
 	  this feature changing /proc/sys/net/core/bpf_jit_enable
 
+config NET_FLOW_LIMIT
+	boolean
+	depends on RPS
+	default y
+	---help---
+	  The network stack has to drop packets when a receive processing CPU's
+	  backlog reaches netdev_max_backlog. If a few out of many active flows
+	  generate the vast majority of load, drop their traffic earlier to
+	  maintain capacity for the other flows. This feature provides servers
+	  with many clients some protection against DoS by a single (spoofed)
+	  flow that greatly exceeds average workload.
+
 menu "Network testing"
 
 config NET_PKTGEN
 	tristate "Packet Generator (USE WITH CAUTION)"
-	depends on PROC_FS
+	depends on INET && PROC_FS
 	---help---
 	  This module will inject preconfigured packets, at a configurable
 	  rate, out of a given interface.  It is used for network interface
@@ -265,7 +308,7 @@ config NET_PKTGEN
 
 config NET_TCPPROBE
 	tristate "TCP connection probing"
-	depends on INET && EXPERIMENTAL && PROC_FS && KPROBES
+	depends on INET && PROC_FS && KPROBES
 	---help---
 	This module allows for capturing the changes to TCP connection
 	state in response to incoming packets. It is used for debugging
@@ -281,8 +324,8 @@ config NET_TCPPROBE
 	module will be called tcp_probe.
 
 config NET_DROP_MONITOR
-	boolean "Network packet drop alerting service"
-	depends on INET && EXPERIMENTAL && TRACEPOINTS
+	tristate "Network packet drop alerting service"
+	depends on INET && TRACEPOINTS
 	---help---
 	This feature provides an alerting service to userspace in the
 	event that packets are discarded in the network stack.  Alerts
@@ -322,6 +365,11 @@ source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
 source "net/caif/Kconfig"
 source "net/ceph/Kconfig"
+source "net/nfc/Kconfig"
 
 
 endif   # if NET
+
+# Used by archs to tell that they support BPF_JIT
+config HAVE_BPF_JIT
+	bool
diff --git a/net/Makefile b/net/Makefile
index a51d9465e62..cbbbe6d657c 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_BRIDGE)		+= bridge/
 obj-$(CONFIG_NET_DSA)		+= dsa/
 obj-$(CONFIG_IPX)		+= ipx/
 obj-$(CONFIG_ATALK)		+= appletalk/
-obj-$(CONFIG_WAN_ROUTER)	+= wanrouter/
 obj-$(CONFIG_X25)		+= x25/
 obj-$(CONFIG_LAPB)		+= lapb/
 obj-$(CONFIG_NETROM)		+= netrom/
@@ -40,7 +39,6 @@ obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
-obj-$(CONFIG_ECONET)		+= econet/
 obj-$(CONFIG_PHONET)		+= phonet/
 ifneq ($(CONFIG_VLAN_8021Q),)
 obj-y				+= 8021q/
@@ -59,7 +57,8 @@ obj-$(CONFIG_CAIF)		+= caif/
 ifneq ($(CONFIG_DCB),)
 obj-y				+= dcb/
 endif
-obj-$(CONFIG_IEEE802154)	+= ieee802154/
+obj-y				+= ieee802154/
+obj-$(CONFIG_MAC802154)		+= mac802154/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
@@ -68,3 +67,8 @@ obj-$(CONFIG_WIMAX)		+= wimax/
 obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
 obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
+obj-$(CONFIG_NFC)		+= nfc/
+obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
+obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
+obj-$(CONFIG_NET_MPLS_GSO)	+= mpls/
+obj-$(CONFIG_HSR)		+= hsr/
diff --git a/net/TUNABLE b/net/TUNABLE
deleted file mode 100644
index 9913211f07a..00000000000
--- a/net/TUNABLE
+++ /dev/null
@@ -1,50 +0,0 @@
-The following parameters should be tunable at compile time. Some of them
-exist as sysctls too.
-
-This is far from complete
-
-Item			Description
-----------------------------------------------------------------------------
-MAX_LINKS		Maximum number of netlink minor devices. (1-32)
-RIF_TABLE_SIZE		Token ring RIF cache size (tunable)
-AARP_HASH_SIZE		Size of Appletalk hash table (tunable)
-AX25_DEF_T1		AX.25 parameters. These are all tunable via
-AX25_DEF_T2		SIOCAX25SETPARMS
-AX25_DEF_T3		T1-T3,N2 have the meanings in the specification
-AX25_DEF_N2
-AX25_DEF_AXDEFMODE	8 = normal 128 is PE1CHL extended
-AX25_DEF_IPDEFMODE	'D' - datagram  'V' - virtual connection
-AX25_DEF_BACKOFF	'E'xponential 'L'inear
-AX25_DEF_NETROM		Allow netrom 1=Y
-AX25_DF_TEXT		Allow PID=Text 1=Y
-AX25_DEF_WINDOW		Window for normal mode
-AX25_DEF_EWINDOW	Window for PE1CHL mode
-AX25_DEF_DIGI		1 for inband 2 for cross band 3 for both
-AX25_DEF_CONMODE	Allow connected modes 1=Yes
-AX25_ROUTE_MAX		AX.25 route cache size - no currently tunable
-Unnamed (16)		Number of protocol hash slots (tunable)
-DEV_NUMBUFFS		Number of priority levels (not easily tunable)
-Unnamed (300)		Maximum packet backlog queue (tunable)
-MAX_IOVEC		Maximum number of iovecs in a message (tunable)
-MIN_WINDOW		Offered minimum window (tunable)
-MAX_WINDOW		Offered maximum window (tunable)
-MAX_HEADER		Largest physical header (tunable)
-MAX_ADDR_LEN		Largest physical address (tunable)
-SOCK_ARRAY_SIZE		IP socket array hash size (tunable)
-IP_MAX_MEMBERSHIPS	Largest number of groups per socket (BSD style) (tunable)
-16			Hard coded constant for amount of room allowed for
-			cache align and faster forwarding (tunable)
-IP_FRAG_TIME		Time we hold a fragment for. (tunable)
-PORT_MASQ_BEGIN		First port reserved for masquerade (tunable)
-PORT_MASQ_END		Last port used for masquerade	(tunable)
-MASQUERADE_EXPIRE_TCP_FIN	Time we keep a masquerade for after a FIN
-MASQUERADE_EXPIRE_UDP	Time we keep a UDP masquerade for (tunable)
-MAXVIFS			Maximum mrouted vifs (1-32)
-MFC_LINES		Lines in the multicast router cache (tunable)
-
-NetROM parameters are tunable via an ioctl passing a struct
-
-4000			Size a Unix domain socket malloc falls back to 
-			(tunable) should be 8K - a bit for 8K machines like
-			the ALPHA
-
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 50dce798132..d1c55d8dd0a 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -39,6 +39,8 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/export.h>
+#include <linux/etherdevice.h>
 
 int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME;
 int sysctl_aarp_tick_time = AARP_TICK_TIME;
@@ -66,7 +68,7 @@ struct aarp_entry {
 	unsigned long		expires_at;
 	struct atalk_addr	target_addr;
 	struct net_device	*dev;
-	char			hwaddr[6];
+	char			hwaddr[ETH_ALEN];
 	unsigned short		xmit_count;
 	struct aarp_entry	*next;
 };
@@ -133,7 +135,7 @@ static void __aarp_send_query(struct aarp_entry *a)
 	eah->pa_len	 = AARP_PA_ALEN;
 	eah->function	 = htons(AARP_REQUEST);
 
-	memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN);
+	ether_addr_copy(eah->hw_src, dev->dev_addr);
 
 	eah->pa_src_zero = 0;
 	eah->pa_src_net	 = sat->s_net;
@@ -180,7 +182,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
 	eah->pa_len	 = AARP_PA_ALEN;
 	eah->function	 = htons(AARP_REPLY);
 
-	memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN);
+	ether_addr_copy(eah->hw_src, dev->dev_addr);
 
 	eah->pa_src_zero = 0;
 	eah->pa_src_net	 = us->s_net;
@@ -189,7 +191,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
 	if (!sha)
 		memset(eah->hw_dst, '\0', ETH_ALEN);
 	else
-		memcpy(eah->hw_dst, sha, ETH_ALEN);
+		ether_addr_copy(eah->hw_dst, sha);
 
 	eah->pa_dst_zero = 0;
 	eah->pa_dst_net	 = them->s_net;
@@ -231,7 +233,7 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
 	eah->pa_len	 = AARP_PA_ALEN;
 	eah->function	 = htons(AARP_PROBE);
 
-	memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN);
+	ether_addr_copy(eah->hw_src, dev->dev_addr);
 
 	eah->pa_src_zero = 0;
 	eah->pa_src_net	 = us->s_net;
@@ -331,7 +333,7 @@ static void aarp_expire_timeout(unsigned long unused)
 static int aarp_device_event(struct notifier_block *this, unsigned long event,
 			     void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	int ct;
 
 	if (!net_eq(dev_net(dev), &init_net))
@@ -779,87 +781,87 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	switch (function) {
-		case AARP_REPLY:
-			if (!unresolved_count)	/* Speed up */
-				break;
-
-			/* Find the entry.  */
-			a = __aarp_find_entry(unresolved[hash], dev, &sa);
-			if (!a || dev != a->dev)
-				break;
+	case AARP_REPLY:
+		if (!unresolved_count)	/* Speed up */
+			break;
 
-			/* We can fill one in - this is good. */
-			memcpy(a->hwaddr, ea->hw_src, ETH_ALEN);
-			__aarp_resolved(&unresolved[hash], a, hash);
-			if (!unresolved_count)
-				mod_timer(&aarp_timer,
-					  jiffies + sysctl_aarp_expiry_time);
+		/* Find the entry.  */
+		a = __aarp_find_entry(unresolved[hash], dev, &sa);
+		if (!a || dev != a->dev)
 			break;
 
-		case AARP_REQUEST:
-		case AARP_PROBE:
+		/* We can fill one in - this is good. */
+		ether_addr_copy(a->hwaddr, ea->hw_src);
+		__aarp_resolved(&unresolved[hash], a, hash);
+		if (!unresolved_count)
+			mod_timer(&aarp_timer,
+				  jiffies + sysctl_aarp_expiry_time);
+		break;
+
+	case AARP_REQUEST:
+	case AARP_PROBE:
+
+		/*
+		 * If it is my address set ma to my address and reply.
+		 * We can treat probe and request the same.  Probe
+		 * simply means we shouldn't cache the querying host,
+		 * as in a probe they are proposing an address not
+		 * using one.
+		 *
+		 * Support for proxy-AARP added. We check if the
+		 * address is one of our proxies before we toss the
+		 * packet out.
+		 */
+
+		sa.s_node = ea->pa_dst_node;
+		sa.s_net  = ea->pa_dst_net;
+
+		/* See if we have a matching proxy. */
+		ma = __aarp_proxy_find(dev, &sa);
+		if (!ma)
+			ma = &ifa->address;
+		else { /* We need to make a copy of the entry. */
+			da.s_node = sa.s_node;
+			da.s_net = sa.s_net;
+			ma = &da;
+		}
 
+		if (function == AARP_PROBE) {
 			/*
-			 * If it is my address set ma to my address and reply.
-			 * We can treat probe and request the same.  Probe
-			 * simply means we shouldn't cache the querying host,
-			 * as in a probe they are proposing an address not
-			 * using one.
-			 *
-			 * Support for proxy-AARP added. We check if the
-			 * address is one of our proxies before we toss the
-			 * packet out.
+			 * A probe implies someone trying to get an
+			 * address. So as a precaution flush any
+			 * entries we have for this address.
 			 */
+			a = __aarp_find_entry(resolved[sa.s_node %
+						       (AARP_HASH_SIZE - 1)],
+					      skb->dev, &sa);
 
-			sa.s_node = ea->pa_dst_node;
-			sa.s_net  = ea->pa_dst_net;
-
-			/* See if we have a matching proxy. */
-			ma = __aarp_proxy_find(dev, &sa);
-			if (!ma)
-				ma = &ifa->address;
-			else { /* We need to make a copy of the entry. */
-				da.s_node = sa.s_node;
-				da.s_net = sa.s_net;
-				ma = &da;
-			}
-
-			if (function == AARP_PROBE) {
-				/*
-				 * A probe implies someone trying to get an
-				 * address. So as a precaution flush any
-				 * entries we have for this address.
-				 */
-				a = __aarp_find_entry(resolved[sa.s_node %
-							  (AARP_HASH_SIZE - 1)],
-						      skb->dev, &sa);
-
-				/*
-				 * Make it expire next tick - that avoids us
-				 * getting into a probe/flush/learn/probe/
-				 * flush/learn cycle during probing of a slow
-				 * to respond host addr.
-				 */
-				if (a) {
-					a->expires_at = jiffies - 1;
-					mod_timer(&aarp_timer, jiffies +
-							sysctl_aarp_tick_time);
-				}
+			/*
+			 * Make it expire next tick - that avoids us
+			 * getting into a probe/flush/learn/probe/
+			 * flush/learn cycle during probing of a slow
+			 * to respond host addr.
+			 */
+			if (a) {
+				a->expires_at = jiffies - 1;
+				mod_timer(&aarp_timer, jiffies +
+					  sysctl_aarp_tick_time);
 			}
+		}
 
-			if (sa.s_node != ma->s_node)
-				break;
+		if (sa.s_node != ma->s_node)
+			break;
 
-			if (sa.s_net && ma->s_net && sa.s_net != ma->s_net)
-				break;
+		if (sa.s_net && ma->s_net && sa.s_net != ma->s_net)
+			break;
 
-			sa.s_node = ea->pa_src_node;
-			sa.s_net = ea->pa_src_net;
+		sa.s_node = ea->pa_src_node;
+		sa.s_net = ea->pa_src_net;
 
-			/* aarp_my_address has found the address to use for us.
-			*/
-			aarp_send_reply(dev, ma, &sa, ea->hw_src);
-			break;
+		/* aarp_my_address has found the address to use for us.
+		 */
+		aarp_send_reply(dev, ma, &sa, ea->hw_src);
+		break;
 	}
 
 unlock:
@@ -924,7 +926,7 @@ static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos)
 	struct aarp_entry *entry;
 
  rescan:
-	while(ct < AARP_HASH_SIZE) {
+	while (ct < AARP_HASH_SIZE) {
 		for (entry = table[ct]; entry; entry = entry->next) {
 			if (!pos || ++off == *pos) {
 				iter->table = table;
@@ -993,7 +995,7 @@ static const char *dt2str(unsigned long ticks)
 {
 	static char buf[32];
 
-	sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100 ) / HZ);
+	sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100) / HZ);
 
 	return buf;
 }
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 6ef0e761e5d..af46bc49e1e 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -14,6 +14,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <linux/atalk.h>
+#include <linux/export.h>
 
 
 static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
@@ -177,12 +178,13 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v)
 	at = at_sk(s);
 
 	seq_printf(seq, "%02X   %04X:%02X:%02X  %04X:%02X:%02X  %08X:%08X "
-			"%02X %d\n",
+			"%02X %u\n",
 		   s->sk_type, ntohs(at->src_net), at->src_node, at->src_port,
 		   ntohs(at->dest_net), at->dest_node, at->dest_port,
 		   sk_wmem_alloc_get(s),
 		   sk_rmem_alloc_get(s),
-		   s->sk_state, SOCK_INODE(s->sk_socket)->i_uid);
+		   s->sk_state,
+		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)));
 out:
 	return 0;
 }
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 956a5302002..bfcf6be1d66 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -63,7 +63,7 @@
 #include <net/tcp_states.h>
 #include <net/route.h>
 #include <linux/atalk.h>
-#include "../core/kmap_skb.h"
+#include <linux/highmem.h>
 
 struct datalink_proto *ddp_dl, *aarp_dl;
 static const struct proto_ops atalk_dgram_ops;
@@ -93,10 +93,9 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
 					struct atalk_iface *atif)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	read_lock_bh(&atalk_sockets_lock);
-	sk_for_each(s, node, &atalk_sockets) {
+	sk_for_each(s, &atalk_sockets) {
 		struct atalk_sock *at = at_sk(s);
 
 		if (to->sat_port != at->src_port)
@@ -129,8 +128,8 @@ found:
 
 /**
  * atalk_find_or_insert_socket - Try to find a socket matching ADDR
- * @sk - socket to insert in the list if it is not there already
- * @sat - address to search for
+ * @sk: socket to insert in the list if it is not there already
+ * @sat: address to search for
  *
  * Try to find a socket matching ADDR in the socket list, if found then return
  * it. If not, insert SK into the socket list.
@@ -141,11 +140,10 @@ static struct sock *atalk_find_or_insert_socket(struct sock *sk,
 						struct sockaddr_at *sat)
 {
 	struct sock *s;
-	struct hlist_node *node;
 	struct atalk_sock *at;
 
 	write_lock_bh(&atalk_sockets_lock);
-	sk_for_each(s, node, &atalk_sockets) {
+	sk_for_each(s, &atalk_sockets) {
 		at = at_sk(s);
 
 		if (at->src_net == sat->sat_addr.s_net &&
@@ -295,7 +293,7 @@ static int atif_probe_device(struct atalk_iface *atif)
 
 /* Perform AARP probing for a proxy address */
 static int atif_proxy_probe_device(struct atalk_iface *atif,
-				   struct atalk_addr* proxy_addr)
+				   struct atalk_addr *proxy_addr)
 {
 	int netrange = ntohs(atif->nets.nr_lastnet) -
 			ntohs(atif->nets.nr_firstnet) + 1;
@@ -583,7 +581,7 @@ out:
 }
 
 /* Delete a route. Find it and discard it */
-static int atrtr_delete(struct atalk_addr * addr)
+static int atrtr_delete(struct atalk_addr *addr)
 {
 	struct atalk_route **r = &atalk_routes;
 	int retval = 0;
@@ -646,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev)
 static int ddp_device_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
 	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
@@ -684,192 +682,192 @@ static int atif_ioctl(int cmd, void __user *arg)
 	atif = atalk_find_dev(dev);
 
 	switch (cmd) {
-		case SIOCSIFADDR:
-			if (!capable(CAP_NET_ADMIN))
-				return -EPERM;
-			if (sa->sat_family != AF_APPLETALK)
-				return -EINVAL;
-			if (dev->type != ARPHRD_ETHER &&
-			    dev->type != ARPHRD_LOOPBACK &&
-			    dev->type != ARPHRD_LOCALTLK &&
-			    dev->type != ARPHRD_PPP)
-				return -EPROTONOSUPPORT;
-
-			nr = (struct atalk_netrange *)&sa->sat_zero[0];
-			add_route = 1;
-
-			/*
-			 * if this is a point-to-point iface, and we already
-			 * have an iface for this AppleTalk address, then we
-			 * should not add a route
-			 */
-			if ((dev->flags & IFF_POINTOPOINT) &&
-			    atalk_find_interface(sa->sat_addr.s_net,
-						 sa->sat_addr.s_node)) {
-				printk(KERN_DEBUG "AppleTalk: point-to-point "
-						  "interface added with "
-						  "existing address\n");
-				add_route = 0;
-			}
-
-			/*
-			 * Phase 1 is fine on LocalTalk but we don't do
-			 * EtherTalk phase 1. Anyone wanting to add it go ahead.
-			 */
-			if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
-				return -EPROTONOSUPPORT;
-			if (sa->sat_addr.s_node == ATADDR_BCAST ||
-			    sa->sat_addr.s_node == 254)
-				return -EINVAL;
-			if (atif) {
-				/* Already setting address */
-				if (atif->status & ATIF_PROBE)
-					return -EBUSY;
-
-				atif->address.s_net  = sa->sat_addr.s_net;
-				atif->address.s_node = sa->sat_addr.s_node;
-				atrtr_device_down(dev);	/* Flush old routes */
-			} else {
-				atif = atif_add_device(dev, &sa->sat_addr);
-				if (!atif)
-					return -ENOMEM;
-			}
-			atif->nets = *nr;
+	case SIOCSIFADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (sa->sat_family != AF_APPLETALK)
+			return -EINVAL;
+		if (dev->type != ARPHRD_ETHER &&
+		    dev->type != ARPHRD_LOOPBACK &&
+		    dev->type != ARPHRD_LOCALTLK &&
+		    dev->type != ARPHRD_PPP)
+			return -EPROTONOSUPPORT;
+
+		nr = (struct atalk_netrange *)&sa->sat_zero[0];
+		add_route = 1;
 
-			/*
-			 * Check if the chosen address is used. If so we
-			 * error and atalkd will try another.
-			 */
-
-			if (!(dev->flags & IFF_LOOPBACK) &&
-			    !(dev->flags & IFF_POINTOPOINT) &&
-			    atif_probe_device(atif) < 0) {
-				atif_drop_device(dev);
-				return -EADDRINUSE;
-			}
-
-			/* Hey it worked - add the direct routes */
-			sa = (struct sockaddr_at *)&rtdef.rt_gateway;
-			sa->sat_family = AF_APPLETALK;
-			sa->sat_addr.s_net  = atif->address.s_net;
-			sa->sat_addr.s_node = atif->address.s_node;
-			sa = (struct sockaddr_at *)&rtdef.rt_dst;
-			rtdef.rt_flags = RTF_UP;
-			sa->sat_family = AF_APPLETALK;
-			sa->sat_addr.s_node = ATADDR_ANYNODE;
-			if (dev->flags & IFF_LOOPBACK ||
-			    dev->flags & IFF_POINTOPOINT)
-				rtdef.rt_flags |= RTF_HOST;
-
-			/* Routerless initial state */
-			if (nr->nr_firstnet == htons(0) &&
-			    nr->nr_lastnet == htons(0xFFFE)) {
-				sa->sat_addr.s_net = atif->address.s_net;
-				atrtr_create(&rtdef, dev);
-				atrtr_set_default(dev);
-			} else {
-				limit = ntohs(nr->nr_lastnet);
-				if (limit - ntohs(nr->nr_firstnet) > 4096) {
-					printk(KERN_WARNING "Too many routes/"
-							    "iface.\n");
-					return -EINVAL;
-				}
-				if (add_route)
-					for (ct = ntohs(nr->nr_firstnet);
-					     ct <= limit; ct++) {
-						sa->sat_addr.s_net = htons(ct);
-						atrtr_create(&rtdef, dev);
-					}
-			}
-			dev_mc_add_global(dev, aarp_mcast);
-			return 0;
+		/*
+		 * if this is a point-to-point iface, and we already
+		 * have an iface for this AppleTalk address, then we
+		 * should not add a route
+		 */
+		if ((dev->flags & IFF_POINTOPOINT) &&
+		    atalk_find_interface(sa->sat_addr.s_net,
+					 sa->sat_addr.s_node)) {
+			printk(KERN_DEBUG "AppleTalk: point-to-point "
+			       "interface added with "
+			       "existing address\n");
+			add_route = 0;
+		}
 
-		case SIOCGIFADDR:
+		/*
+		 * Phase 1 is fine on LocalTalk but we don't do
+		 * EtherTalk phase 1. Anyone wanting to add it go ahead.
+		 */
+		if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
+			return -EPROTONOSUPPORT;
+		if (sa->sat_addr.s_node == ATADDR_BCAST ||
+		    sa->sat_addr.s_node == 254)
+			return -EINVAL;
+		if (atif) {
+			/* Already setting address */
+			if (atif->status & ATIF_PROBE)
+				return -EBUSY;
+
+			atif->address.s_net  = sa->sat_addr.s_net;
+			atif->address.s_node = sa->sat_addr.s_node;
+			atrtr_device_down(dev);	/* Flush old routes */
+		} else {
+			atif = atif_add_device(dev, &sa->sat_addr);
 			if (!atif)
-				return -EADDRNOTAVAIL;
+				return -ENOMEM;
+		}
+		atif->nets = *nr;
 
-			sa->sat_family = AF_APPLETALK;
-			sa->sat_addr = atif->address;
-			break;
+		/*
+		 * Check if the chosen address is used. If so we
+		 * error and atalkd will try another.
+		 */
 
-		case SIOCGIFBRDADDR:
-			if (!atif)
-				return -EADDRNOTAVAIL;
+		if (!(dev->flags & IFF_LOOPBACK) &&
+		    !(dev->flags & IFF_POINTOPOINT) &&
+		    atif_probe_device(atif) < 0) {
+			atif_drop_device(dev);
+			return -EADDRINUSE;
+		}
 
-			sa->sat_family = AF_APPLETALK;
+		/* Hey it worked - add the direct routes */
+		sa = (struct sockaddr_at *)&rtdef.rt_gateway;
+		sa->sat_family = AF_APPLETALK;
+		sa->sat_addr.s_net  = atif->address.s_net;
+		sa->sat_addr.s_node = atif->address.s_node;
+		sa = (struct sockaddr_at *)&rtdef.rt_dst;
+		rtdef.rt_flags = RTF_UP;
+		sa->sat_family = AF_APPLETALK;
+		sa->sat_addr.s_node = ATADDR_ANYNODE;
+		if (dev->flags & IFF_LOOPBACK ||
+		    dev->flags & IFF_POINTOPOINT)
+			rtdef.rt_flags |= RTF_HOST;
+
+		/* Routerless initial state */
+		if (nr->nr_firstnet == htons(0) &&
+		    nr->nr_lastnet == htons(0xFFFE)) {
 			sa->sat_addr.s_net = atif->address.s_net;
-			sa->sat_addr.s_node = ATADDR_BCAST;
-			break;
-
-		case SIOCATALKDIFADDR:
-		case SIOCDIFADDR:
-			if (!capable(CAP_NET_ADMIN))
-				return -EPERM;
-			if (sa->sat_family != AF_APPLETALK)
-				return -EINVAL;
-			atalk_dev_down(dev);
-			break;
-
-		case SIOCSARP:
-			if (!capable(CAP_NET_ADMIN))
-				return -EPERM;
-			if (sa->sat_family != AF_APPLETALK)
+			atrtr_create(&rtdef, dev);
+			atrtr_set_default(dev);
+		} else {
+			limit = ntohs(nr->nr_lastnet);
+			if (limit - ntohs(nr->nr_firstnet) > 4096) {
+				printk(KERN_WARNING "Too many routes/"
+				       "iface.\n");
 				return -EINVAL;
-			/*
-			 * for now, we only support proxy AARP on ELAP;
-			 * we should be able to do it for LocalTalk, too.
-			 */
-			if (dev->type != ARPHRD_ETHER)
-				return -EPROTONOSUPPORT;
-
-			/*
-			 * atif points to the current interface on this network;
-			 * we aren't concerned about its current status (at
-			 * least for now), but it has all the settings about
-			 * the network we're going to probe. Consequently, it
-			 * must exist.
-			 */
-			if (!atif)
-				return -EADDRNOTAVAIL;
+			}
+			if (add_route)
+				for (ct = ntohs(nr->nr_firstnet);
+				     ct <= limit; ct++) {
+					sa->sat_addr.s_net = htons(ct);
+					atrtr_create(&rtdef, dev);
+				}
+		}
+		dev_mc_add_global(dev, aarp_mcast);
+		return 0;
+
+	case SIOCGIFADDR:
+		if (!atif)
+			return -EADDRNOTAVAIL;
+
+		sa->sat_family = AF_APPLETALK;
+		sa->sat_addr = atif->address;
+		break;
+
+	case SIOCGIFBRDADDR:
+		if (!atif)
+			return -EADDRNOTAVAIL;
+
+		sa->sat_family = AF_APPLETALK;
+		sa->sat_addr.s_net = atif->address.s_net;
+		sa->sat_addr.s_node = ATADDR_BCAST;
+		break;
+
+	case SIOCATALKDIFADDR:
+	case SIOCDIFADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (sa->sat_family != AF_APPLETALK)
+			return -EINVAL;
+		atalk_dev_down(dev);
+		break;
 
-			nr = (struct atalk_netrange *)&(atif->nets);
-			/*
-			 * Phase 1 is fine on Localtalk but we don't do
-			 * Ethertalk phase 1. Anyone wanting to add it go ahead.
-			 */
-			if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
-				return -EPROTONOSUPPORT;
+	case SIOCSARP:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (sa->sat_family != AF_APPLETALK)
+			return -EINVAL;
+		/*
+		 * for now, we only support proxy AARP on ELAP;
+		 * we should be able to do it for LocalTalk, too.
+		 */
+		if (dev->type != ARPHRD_ETHER)
+			return -EPROTONOSUPPORT;
 
-			if (sa->sat_addr.s_node == ATADDR_BCAST ||
-			    sa->sat_addr.s_node == 254)
-				return -EINVAL;
+		/*
+		 * atif points to the current interface on this network;
+		 * we aren't concerned about its current status (at
+		 * least for now), but it has all the settings about
+		 * the network we're going to probe. Consequently, it
+		 * must exist.
+		 */
+		if (!atif)
+			return -EADDRNOTAVAIL;
 
-			/*
-			 * Check if the chosen address is used. If so we
-			 * error and ATCP will try another.
-			 */
-			if (atif_proxy_probe_device(atif, &(sa->sat_addr)) < 0)
-				return -EADDRINUSE;
+		nr = (struct atalk_netrange *)&(atif->nets);
+		/*
+		 * Phase 1 is fine on Localtalk but we don't do
+		 * Ethertalk phase 1. Anyone wanting to add it go ahead.
+		 */
+		if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
+			return -EPROTONOSUPPORT;
 
-			/*
-			 * We now have an address on the local network, and
-			 * the AARP code will defend it for us until we take it
-			 * down. We don't set up any routes right now, because
-			 * ATCP will install them manually via SIOCADDRT.
-			 */
-			break;
+		if (sa->sat_addr.s_node == ATADDR_BCAST ||
+		    sa->sat_addr.s_node == 254)
+			return -EINVAL;
 
-		case SIOCDARP:
-			if (!capable(CAP_NET_ADMIN))
-				return -EPERM;
-			if (sa->sat_family != AF_APPLETALK)
-				return -EINVAL;
-			if (!atif)
-				return -EADDRNOTAVAIL;
+		/*
+		 * Check if the chosen address is used. If so we
+		 * error and ATCP will try another.
+		 */
+		if (atif_proxy_probe_device(atif, &(sa->sat_addr)) < 0)
+			return -EADDRINUSE;
 
-			/* give to aarp module to remove proxy entry */
-			aarp_proxy_remove(atif->dev, &(sa->sat_addr));
-			return 0;
+		/*
+		 * We now have an address on the local network, and
+		 * the AARP code will defend it for us until we take it
+		 * down. We don't set up any routes right now, because
+		 * ATCP will install them manually via SIOCADDRT.
+		 */
+		break;
+
+	case SIOCDARP:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (sa->sat_family != AF_APPLETALK)
+			return -EINVAL;
+		if (!atif)
+			return -EADDRNOTAVAIL;
+
+		/* give to aarp module to remove proxy entry */
+		aarp_proxy_remove(atif->dev, &(sa->sat_addr));
+		return 0;
 	}
 
 	return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
@@ -884,25 +882,25 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg)
 		return -EFAULT;
 
 	switch (cmd) {
-		case SIOCDELRT:
-			if (rt.rt_dst.sa_family != AF_APPLETALK)
-				return -EINVAL;
-			return atrtr_delete(&((struct sockaddr_at *)
-						&rt.rt_dst)->sat_addr);
-
-		case SIOCADDRT: {
-			struct net_device *dev = NULL;
-			if (rt.rt_dev) {
-				char name[IFNAMSIZ];
-				if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1))
-					return -EFAULT;
-				name[IFNAMSIZ-1] = '\0';
-				dev = __dev_get_by_name(&init_net, name);
-				if (!dev)
-					return -ENODEV;
-			}
-			return atrtr_create(&rt, dev);
+	case SIOCDELRT:
+		if (rt.rt_dst.sa_family != AF_APPLETALK)
+			return -EINVAL;
+		return atrtr_delete(&((struct sockaddr_at *)
+				      &rt.rt_dst)->sat_addr);
+
+	case SIOCADDRT: {
+		struct net_device *dev = NULL;
+		if (rt.rt_dev) {
+			char name[IFNAMSIZ];
+			if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1))
+				return -EFAULT;
+			name[IFNAMSIZ-1] = '\0';
+			dev = __dev_get_by_name(&init_net, name);
+			if (!dev)
+				return -ENODEV;
 		}
+		return atrtr_create(&rt, dev);
+	}
 	}
 	return -EINVAL;
 }
@@ -938,11 +936,11 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 	int i, copy;
 
 	/* checksum stuff in header space */
-	if ( (copy = start - offset) > 0) {
+	if ((copy = start - offset) > 0) {
 		if (copy > len)
 			copy = len;
 		sum = atalk_sum_partial(skb->data + offset, copy, sum);
-		if ( (len -= copy) == 0)
+		if ((len -= copy) == 0)
 			return sum;
 
 		offset += copy;
@@ -951,20 +949,19 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 	/* checksum stuff in frags */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
-
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		WARN_ON(start > offset + len);
 
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
 			u8 *vaddr;
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap_skb_frag(frag);
+			vaddr = kmap_atomic(skb_frag_page(frag));
 			sum = atalk_sum_partial(vaddr + frag->page_offset +
 						  offset - start, copy, sum);
-			kunmap_skb_frag(vaddr);
+			kunmap_atomic(vaddr);
 
 			if (!(len -= copy))
 				return sum;
@@ -1067,8 +1064,8 @@ static int atalk_release(struct socket *sock)
 
 /**
  * atalk_pick_and_bind_port - Pick a source port when one is not given
- * @sk - socket to insert into the tables
- * @sat - address to search for
+ * @sk: socket to insert into the tables
+ * @sat: address to search for
  *
  * Pick a source port when one is not given. If we can find a suitable free
  * one, we insert the socket into the tables using it.
@@ -1085,9 +1082,8 @@ static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat)
 	     sat->sat_port < ATPORT_LAST;
 	     sat->sat_port++) {
 		struct sock *s;
-		struct hlist_node *node;
 
-		sk_for_each(s, node, &atalk_sockets) {
+		sk_for_each(s, &atalk_sockets) {
 			struct atalk_sock *at = at_sk(s);
 
 			if (at->src_net == sat->sat_addr.s_net &&
@@ -1155,7 +1151,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			goto out;
 
 		at->src_net  = addr->sat_addr.s_net = ap->s_net;
-		at->src_node = addr->sat_addr.s_node= ap->s_node;
+		at->src_node = addr->sat_addr.s_node = ap->s_node;
 	} else {
 		err = -EADDRNOTAVAIL;
 		if (!atalk_find_interface(addr->sat_addr.s_net,
@@ -1209,9 +1205,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
 	if (addr->sat_addr.s_node == ATADDR_BCAST &&
 	    !sock_flag(sk, SOCK_BROADCAST)) {
 #if 1
-		printk(KERN_WARNING "%s is broken and did not set "
-				    "SO_BROADCAST. It will break when 2.2 is "
-				    "released.\n",
+		pr_warn("atalk_connect: %s is broken and did not set SO_BROADCAST.\n",
 			current->comm);
 #else
 		return -EACCES;
@@ -1259,7 +1253,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
 			goto out;
 
 	*uaddr_len = sizeof(struct sockaddr_at);
-	memset(&sat.sat_zero, 0, sizeof(sat.sat_zero));
+	memset(&sat, 0, sizeof(sat));
 
 	if (peer) {
 		err = -ENOTCONN;
@@ -1495,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 
 	/* Queue packet (standard) */
-	skb->sk = sock;
-
 	if (sock_queue_rcv_skb(sock, skb) < 0)
 		goto drop;
 
@@ -1572,7 +1564,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 {
 	struct sock *sk = sock->sk;
 	struct atalk_sock *at = at_sk(sk);
-	struct sockaddr_at *usat = (struct sockaddr_at *)msg->msg_name;
+	DECLARE_SOCKADDR(struct sockaddr_at *, usat, msg->msg_name);
 	int flags = msg->msg_flags;
 	int loopback = 0;
 	struct sockaddr_at local_satalk, gsat;
@@ -1650,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 	if (!skb)
 		goto out;
 
-	skb->sk = sk;
 	skb_reserve(skb, ddp_dl->header_length);
 	skb_reserve(skb, dev->hard_header_len);
 	skb->dev = dev;
@@ -1675,7 +1666,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 		goto out;
 	}
 
-	if (sk->sk_no_check == 1)
+	if (sk->sk_no_check_tx)
 		ddp->deh_sum = 0;
 	else
 		ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp));
@@ -1741,7 +1732,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 			 size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name;
 	struct ddpehdr *ddp;
 	int copied = 0;
 	int offset = 0;
@@ -1770,14 +1760,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 	}
 	err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
 
-	if (!err) {
-		if (sat) {
-			sat->sat_family      = AF_APPLETALK;
-			sat->sat_port        = ddp->deh_sport;
-			sat->sat_addr.s_node = ddp->deh_snode;
-			sat->sat_addr.s_net  = ddp->deh_snet;
-		}
-		msg->msg_namelen = sizeof(*sat);
+	if (!err && msg->msg_name) {
+		DECLARE_SOCKADDR(struct sockaddr_at *, sat, msg->msg_name);
+		sat->sat_family      = AF_APPLETALK;
+		sat->sat_port        = ddp->deh_sport;
+		sat->sat_addr.s_node = ddp->deh_snode;
+		sat->sat_addr.s_net  = ddp->deh_snet;
+		msg->msg_namelen     = sizeof(*sat);
 	}
 
 	skb_free_datagram(sk, skb);	/* Free the datagram. */
@@ -1798,53 +1787,53 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
-		/* Protocol layer */
-		case TIOCOUTQ: {
-			long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
-
-			if (amount < 0)
-				amount = 0;
-			rc = put_user(amount, (int __user *)argp);
-			break;
-		}
-		case TIOCINQ: {
-			/*
-			 * These two are safe on a single CPU system as only
-			 * user tasks fiddle here
-			 */
-			struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
-			long amount = 0;
+	/* Protocol layer */
+	case TIOCOUTQ: {
+		long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+
+		if (amount < 0)
+			amount = 0;
+		rc = put_user(amount, (int __user *)argp);
+		break;
+	}
+	case TIOCINQ: {
+		/*
+		 * These two are safe on a single CPU system as only
+		 * user tasks fiddle here
+		 */
+		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+		long amount = 0;
 
-			if (skb)
-				amount = skb->len - sizeof(struct ddpehdr);
-			rc = put_user(amount, (int __user *)argp);
-			break;
-		}
-		case SIOCGSTAMP:
-			rc = sock_get_timestamp(sk, argp);
-			break;
-		case SIOCGSTAMPNS:
-			rc = sock_get_timestampns(sk, argp);
-			break;
-		/* Routing */
-		case SIOCADDRT:
-		case SIOCDELRT:
-			rc = -EPERM;
-			if (capable(CAP_NET_ADMIN))
-				rc = atrtr_ioctl(cmd, argp);
-			break;
-		/* Interface */
-		case SIOCGIFADDR:
-		case SIOCSIFADDR:
-		case SIOCGIFBRDADDR:
-		case SIOCATALKDIFADDR:
-		case SIOCDIFADDR:
-		case SIOCSARP:		/* proxy AARP */
-		case SIOCDARP:		/* proxy AARP */
-			rtnl_lock();
-			rc = atif_ioctl(cmd, argp);
-			rtnl_unlock();
-			break;
+		if (skb)
+		amount = skb->len - sizeof(struct ddpehdr);
+		rc = put_user(amount, (int __user *)argp);
+		break;
+	}
+	case SIOCGSTAMP:
+		rc = sock_get_timestamp(sk, argp);
+		break;
+	case SIOCGSTAMPNS:
+		rc = sock_get_timestampns(sk, argp);
+		break;
+	/* Routing */
+	case SIOCADDRT:
+	case SIOCDELRT:
+		rc = -EPERM;
+		if (capable(CAP_NET_ADMIN))
+			rc = atrtr_ioctl(cmd, argp);
+		break;
+	/* Interface */
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCATALKDIFADDR:
+	case SIOCDIFADDR:
+	case SIOCSARP:		/* proxy AARP */
+	case SIOCDARP:		/* proxy AARP */
+		rtnl_lock();
+		rc = atif_ioctl(cmd, argp);
+		rtnl_unlock();
+		break;
 	}
 
 	return rc;
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index 04e9c0da7aa..ebb864361f7 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -42,20 +42,14 @@ static struct ctl_table atalk_table[] = {
 	{ },
 };
 
-static struct ctl_path atalk_path[] = {
-	{ .procname = "net", },
-	{ .procname = "appletalk", },
-	{ }
-};
-
 static struct ctl_table_header *atalk_table_header;
 
 void atalk_register_sysctl(void)
 {
-	atalk_table_header = register_sysctl_paths(atalk_path, atalk_table);
+	atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table);
 }
 
 void atalk_unregister_sysctl(void)
 {
-	unregister_sysctl_table(atalk_table_header);
+	unregister_net_sysctl_table(atalk_table_header);
 }
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index fc63526d869..876fbe83e2e 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -9,7 +9,7 @@
 #include <linux/sonet.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 int atm_charge(struct atm_vcc *vcc, int truesize)
 {
@@ -26,7 +26,7 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc, int pdu_size,
 				 gfp_t gfp_flags)
 {
 	struct sock *sk = sk_atm(vcc);
-	int guess = atm_guess_pdu2truesize(pdu_size);
+	int guess = SKB_TRUESIZE(pdu_size);
 
 	atm_force_charge(vcc, guess);
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index f49da5814bc..350bf62b2ae 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -14,49 +14,45 @@ static ssize_t show_type(struct device *cdev,
 			 struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
-	return sprintf(buf, "%s\n", adev->type);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type);
 }
 
 static ssize_t show_address(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
-	int i;
-
-	for (i = 0; i < (ESI_LEN - 1); i++)
-		pos += sprintf(pos, "%02x:", adev->esi[i]);
-	pos += sprintf(pos, "%02x\n", adev->esi[i]);
 
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi);
 }
 
 static ssize_t show_atmaddress(struct device *cdev,
 			       struct device_attribute *attr, char *buf)
 {
 	unsigned long flags;
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 	struct atm_dev_addr *aaddr;
 	int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin;
-	int i, j;
+	int i, j, count = 0;
 
 	spin_lock_irqsave(&adev->lock, flags);
 	list_for_each_entry(aaddr, &adev->local, entry) {
 		for (i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) {
 			if (j == *fmt) {
-				pos += sprintf(pos, ".");
+				count += scnprintf(buf + count,
+						   PAGE_SIZE - count, ".");
 				++fmt;
 				j = 0;
 			}
-			pos += sprintf(pos, "%02x",
-				       aaddr->addr.sas_addr.prv[i]);
+			count += scnprintf(buf + count,
+					   PAGE_SIZE - count, "%02x",
+					   aaddr->addr.sas_addr.prv[i]);
 		}
-		pos += sprintf(pos, "\n");
+		count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
 	}
 	spin_unlock_irqrestore(&adev->lock, flags);
 
-	return pos - buf;
+	return count;
 }
 
 static ssize_t show_atmindex(struct device *cdev,
@@ -64,25 +60,21 @@ static ssize_t show_atmindex(struct device *cdev,
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
 
-	return sprintf(buf, "%d\n", adev->number);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number);
 }
 
 static ssize_t show_carrier(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 
-	pos += sprintf(pos, "%d\n",
-		       adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
-
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			 adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
 }
 
 static ssize_t show_link_rate(struct device *cdev,
 			      struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 	int link_rate;
 
@@ -100,9 +92,7 @@ static ssize_t show_link_rate(struct device *cdev,
 	default:
 		link_rate = adev->link_rate * 8 * 53;
 	}
-	pos += sprintf(pos, "%d\n", link_rate);
-
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
 }
 
 static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 2252c2085da..403e71fa88f 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -53,6 +53,7 @@ static const unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
 static const unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
 static const unsigned char llc_oui_pid_pad[] =
 			{ LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
+static const unsigned char pad[] = { PAD_BRIDGED };
 static const unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
 static const unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
 
@@ -67,12 +68,15 @@ struct br2684_vcc {
 	/* keep old push, pop functions for chaining */
 	void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb);
 	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
+	void (*old_release_cb)(struct atm_vcc *vcc);
+	struct module *old_owner;
 	enum br2684_encaps encaps;
 	struct list_head brvccs;
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 	struct br2684_filter filter;
 #endif /* CONFIG_ATM_BR2684_IPFILTER */
-	unsigned copies_needed, copies_failed;
+	unsigned int copies_needed, copies_failed;
+	atomic_t qspace;
 };
 
 struct br2684_dev {
@@ -180,18 +184,15 @@ static struct notifier_block atm_dev_notifier = {
 static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	struct br2684_vcc *brvcc = BR2684_VCC(vcc);
-	struct net_device *net_dev = skb->dev;
 
-	pr_debug("(vcc %p ; net_dev %p )\n", vcc, net_dev);
+	pr_debug("(vcc %p ; net_dev %p )\n", vcc, brvcc->device);
 	brvcc->old_pop(vcc, skb);
 
-	if (!net_dev)
-		return;
-
-	if (atm_may_send(vcc, 0))
-		netif_wake_queue(net_dev);
-
+	/* If the queue space just went up from zero, wake */
+	if (atomic_inc_return(&brvcc->qspace) == 1)
+		netif_wake_queue(brvcc->device);
 }
+
 /*
  * Send a packet out a particular vcc.  Not to useful right now, but paves
  * the way for multiple vcc's per itf.  Returns true if we can send,
@@ -202,7 +203,10 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
 {
 	struct br2684_dev *brdev = BRPRIV(dev);
 	struct atm_vcc *atmvcc;
-	int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
+	int minheadroom = (brvcc->encaps == e_llc) ?
+		((brdev->payload == p_bridged) ?
+			sizeof(llc_oui_pid_pad) : sizeof(llc_oui_ipv4)) :
+		((brdev->payload == p_bridged) ? BR2684_PAD_LEN : 0);
 
 	if (skb_headroom(skb) < minheadroom) {
 		struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
@@ -242,8 +246,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
 		if (brdev->payload == p_bridged) {
 			skb_push(skb, 2);
 			memset(skb->data, 0, 2);
-		} else { /* p_routed */
-			skb_pull(skb, ETH_HLEN);
 		}
 	}
 	skb_debug(skb);
@@ -254,16 +256,30 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
 	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
-	atmvcc->send(atmvcc, skb);
 
-	if (!atm_may_send(atmvcc, 0)) {
+	if (atomic_dec_return(&brvcc->qspace) < 1) {
+		/* No more please! */
 		netif_stop_queue(brvcc->device);
-		/*check for race with br2684_pop*/
-		if (atm_may_send(atmvcc, 0))
-			netif_start_queue(brvcc->device);
+		/* We might have raced with br2684_pop() */
+		if (unlikely(atomic_read(&brvcc->qspace) > 0))
+			netif_wake_queue(brvcc->device);
 	}
 
-	return 1;
+	/* If this fails immediately, the skb will be freed and br2684_pop()
+	   will wake the queue if appropriate. Just return an error so that
+	   the stats are updated correctly */
+	return !atmvcc->send(atmvcc, skb);
+}
+
+static void br2684_release_cb(struct atm_vcc *atmvcc)
+{
+	struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
+
+	if (atomic_read(&brvcc->qspace) > 0)
+		netif_wake_queue(brvcc->device);
+
+	if (brvcc->old_release_cb)
+		brvcc->old_release_cb(atmvcc);
 }
 
 static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb,
@@ -277,6 +293,8 @@ static netdev_tx_t br2684_start_xmit(struct sk_buff *skb,
 {
 	struct br2684_dev *brdev = BRPRIV(dev);
 	struct br2684_vcc *brvcc;
+	struct atm_vcc *atmvcc;
+	netdev_tx_t ret = NETDEV_TX_OK;
 
 	pr_debug("skb_dst(skb)=%p\n", skb_dst(skb));
 	read_lock(&devs_lock);
@@ -287,9 +305,26 @@ static netdev_tx_t br2684_start_xmit(struct sk_buff *skb,
 		dev->stats.tx_carrier_errors++;
 		/* netif_stop_queue(dev); */
 		dev_kfree_skb(skb);
-		read_unlock(&devs_lock);
-		return NETDEV_TX_OK;
+		goto out_devs;
+	}
+	atmvcc = brvcc->atmvcc;
+
+	bh_lock_sock(sk_atm(atmvcc));
+
+	if (test_bit(ATM_VF_RELEASED, &atmvcc->flags) ||
+	    test_bit(ATM_VF_CLOSE, &atmvcc->flags) ||
+	    !test_bit(ATM_VF_READY, &atmvcc->flags)) {
+		dev->stats.tx_dropped++;
+		dev_kfree_skb(skb);
+		goto out;
+	}
+
+	if (sock_owned_by_user(sk_atm(atmvcc))) {
+		netif_stop_queue(brvcc->device);
+		ret = NETDEV_TX_BUSY;
+		goto out;
 	}
+
 	if (!br2684_xmit_vcc(skb, dev, brvcc)) {
 		/*
 		 * We should probably use netif_*_queue() here, but that
@@ -301,8 +336,11 @@ static netdev_tx_t br2684_start_xmit(struct sk_buff *skb,
 		dev->stats.tx_errors++;
 		dev->stats.tx_fifo_errors++;
 	}
+ out:
+	bh_unlock_sock(sk_atm(atmvcc));
+ out_devs:
 	read_unlock(&devs_lock);
-	return NETDEV_TX_OK;
+	return ret;
 }
 
 /*
@@ -375,9 +413,10 @@ static void br2684_close_vcc(struct br2684_vcc *brvcc)
 	list_del(&brvcc->brvccs);
 	write_unlock_irq(&devs_lock);
 	brvcc->atmvcc->user_back = NULL;	/* what about vcc->recvq ??? */
+	brvcc->atmvcc->release_cb = brvcc->old_release_cb;
 	brvcc->old_push(brvcc->atmvcc, NULL);	/* pass on the bad news */
+	module_put(brvcc->old_owner);
 	kfree(brvcc);
-	module_put(THIS_MODULE);
 }
 
 /* when AAL5 PDU comes in: */
@@ -452,7 +491,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			skb->pkt_type = PACKET_HOST;
 		} else { /* p_bridged */
 			/* first 2 chars should be 0 */
-			if (*((u16 *) (skb->data)) != 0)
+			if (memcmp(skb->data, pad, BR2684_PAD_LEN) != 0)
 				goto error;
 			skb_pull(skb, BR2684_PAD_LEN);
 			skb->protocol = eth_type_trans(skb, net_dev);
@@ -491,21 +530,24 @@ free_skb:
  */
 static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 {
-	struct sk_buff_head queue;
-	int err;
 	struct br2684_vcc *brvcc;
-	struct sk_buff *skb, *tmp;
-	struct sk_buff_head *rq;
 	struct br2684_dev *brdev;
 	struct net_device *net_dev;
 	struct atm_backend_br2684 be;
-	unsigned long flags;
+	int err;
 
 	if (copy_from_user(&be, arg, sizeof be))
 		return -EFAULT;
 	brvcc = kzalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
 	if (!brvcc)
 		return -ENOMEM;
+	/*
+	 * Allow two packets in the ATM queue. One actually being sent, and one
+	 * for the ATM 'TX done' handler to send. It shouldn't take long to get
+	 * the next one from the netdev queue, when we need it. More than that
+	 * would be bufferbloat.
+	 */
+	atomic_set(&brvcc->qspace, 2);
 	write_lock_irq(&devs_lock);
 	net_dev = br2684_find_dev(&be.ifspec);
 	if (net_dev == NULL) {
@@ -548,25 +590,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 	brvcc->encaps = (enum br2684_encaps)be.encaps;
 	brvcc->old_push = atmvcc->push;
 	brvcc->old_pop = atmvcc->pop;
+	brvcc->old_release_cb = atmvcc->release_cb;
+	brvcc->old_owner = atmvcc->owner;
 	barrier();
 	atmvcc->push = br2684_push;
 	atmvcc->pop = br2684_pop;
-
-	__skb_queue_head_init(&queue);
-	rq = &sk_atm(atmvcc)->sk_receive_queue;
-
-	spin_lock_irqsave(&rq->lock, flags);
-	skb_queue_splice_init(rq, &queue);
-	spin_unlock_irqrestore(&rq->lock, flags);
-
-	skb_queue_walk_safe(&queue, skb, tmp) {
-		struct net_device *dev = skb->dev;
-
-		dev->stats.rx_bytes -= skb->len;
-		dev->stats.rx_packets--;
-
-		br2684_push(atmvcc, skb);
-	}
+	atmvcc->release_cb = br2684_release_cb;
+	atmvcc->owner = THIS_MODULE;
 
 	/* initialize netdev carrier state */
 	if (atmvcc->dev->signal == ATM_PHY_SIG_LOST)
@@ -575,6 +605,10 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 		netif_carrier_on(net_dev);
 
 	__module_get(THIS_MODULE);
+
+	/* re-process everything received between connection setup and
+	   backend setup */
+	vcc_process_recv_queue(atmvcc);
 	return 0;
 
 error:
@@ -601,6 +635,7 @@ static void br2684_setup(struct net_device *netdev)
 	struct br2684_dev *brdev = BRPRIV(netdev);
 
 	ether_setup(netdev);
+	netdev->hard_header_len += sizeof(llc_oui_pid_pad); /* worst case */
 	brdev->net_dev = netdev;
 
 	netdev->netdev_ops = &br2684_netdev_ops;
@@ -613,7 +648,7 @@ static void br2684_setup_routed(struct net_device *netdev)
 	struct br2684_dev *brdev = BRPRIV(netdev);
 
 	brdev->net_dev = netdev;
-	netdev->hard_header_len = 0;
+	netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */
 	netdev->netdev_ops = &br2684_netdev_ops_routed;
 	netdev->addr_len = 0;
 	netdev->mtu = 1500;
@@ -700,10 +735,13 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
 			return -ENOIOCTLCMD;
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		if (cmd == ATM_SETBACKEND)
+		if (cmd == ATM_SETBACKEND) {
+			if (sock->state != SS_CONNECTED)
+				return -EINVAL;
 			return br2684_regvcc(atmvcc, argp);
-		else
+		} else {
 			return br2684_create(argp);
+		}
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 	case BR2684_SETFILT:
 		if (atmvcc->push != br2684_push)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 1d4be60e139..ba291ce4bdf 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -33,11 +33,11 @@
 #include <linux/slab.h>
 #include <net/route.h> /* for struct rtable and routing */
 #include <net/icmp.h> /* icmp_send */
+#include <net/arp.h>
 #include <linux/param.h> /* for HZ */
 #include <linux/uaccess.h>
 #include <asm/byteorder.h> /* for htons etc. */
-#include <asm/system.h> /* save/restore_flags */
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "common.h"
 #include "resources.h"
@@ -45,8 +45,8 @@
 
 static struct net_device *clip_devs;
 static struct atm_vcc *atmarpd;
-static struct neigh_table clip_tbl;
 static struct timer_list idle_timer;
+static const struct neigh_ops clip_neigh_ops;
 
 static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
 {
@@ -68,7 +68,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
 
 	sk = sk_atm(atmarpd);
 	skb_queue_tail(&sk->sk_receive_queue, skb);
-	sk->sk_data_ready(sk, skb->len);
+	sk->sk_data_ready(sk);
 	return 0;
 }
 
@@ -119,9 +119,11 @@ out:
 /* The neighbour entry n->lock is held. */
 static int neigh_check_cb(struct neighbour *n)
 {
-	struct atmarp_entry *entry = NEIGH2ENTRY(n);
+	struct atmarp_entry *entry = neighbour_priv(n);
 	struct clip_vcc *cv;
 
+	if (n->ops != &clip_neigh_ops)
+		return 0;
 	for (cv = entry->vccs; cv; cv = cv->next) {
 		unsigned long exp = cv->last_use + cv->idle_timeout;
 
@@ -153,10 +155,10 @@ static int neigh_check_cb(struct neighbour *n)
 
 static void idle_timer_check(unsigned long dummy)
 {
-	write_lock(&clip_tbl.lock);
-	__neigh_for_each_release(&clip_tbl, neigh_check_cb);
+	write_lock(&arp_tbl.lock);
+	__neigh_for_each_release(&arp_tbl, neigh_check_cb);
 	mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
-	write_unlock(&clip_tbl.lock);
+	write_unlock(&arp_tbl.lock);
 }
 
 static int clip_arp_rcv(struct sk_buff *skb)
@@ -189,6 +191,13 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
 
 	pr_debug("\n");
+
+	if (!clip_devs) {
+		atm_return(vcc, skb->truesize);
+		kfree_skb(skb);
+		return;
+	}
+
 	if (!skb) {
 		pr_debug("removing VCC %p\n", clip_vcc);
 		if (clip_vcc->entry)
@@ -255,8 +264,10 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 
 static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
 {
+	__be32 *ip = (__be32 *) neigh->primary_key;
+
 	pr_debug("(neigh %p, skb %p)\n", neigh, skb);
-	to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip);
+	to_atmarpd(act_need, PRIV(neigh->dev)->number, *ip);
 }
 
 static void clip_neigh_error(struct neighbour *neigh, struct sk_buff *skb)
@@ -271,80 +282,30 @@ static const struct neigh_ops clip_neigh_ops = {
 	.family =		AF_INET,
 	.solicit =		clip_neigh_solicit,
 	.error_report =		clip_neigh_error,
-	.output =		dev_queue_xmit,
-	.connected_output =	dev_queue_xmit,
-	.hh_output =		dev_queue_xmit,
-	.queue_xmit =		dev_queue_xmit,
+	.output =		neigh_direct_output,
+	.connected_output =	neigh_direct_output,
 };
 
 static int clip_constructor(struct neighbour *neigh)
 {
-	struct atmarp_entry *entry = NEIGH2ENTRY(neigh);
-	struct net_device *dev = neigh->dev;
-	struct in_device *in_dev;
-	struct neigh_parms *parms;
+	struct atmarp_entry *entry = neighbour_priv(neigh);
 
-	pr_debug("(neigh %p, entry %p)\n", neigh, entry);
-	neigh->type = inet_addr_type(&init_net, entry->ip);
-	if (neigh->type != RTN_UNICAST)
+	if (neigh->tbl->family != AF_INET)
 		return -EINVAL;
 
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(dev);
-	if (!in_dev) {
-		rcu_read_unlock();
+	if (neigh->type != RTN_UNICAST)
 		return -EINVAL;
-	}
-
-	parms = in_dev->arp_parms;
-	__neigh_parms_put(neigh->parms);
-	neigh->parms = neigh_parms_clone(parms);
-	rcu_read_unlock();
 
+	neigh->nud_state = NUD_NONE;
 	neigh->ops = &clip_neigh_ops;
-	neigh->output = neigh->nud_state & NUD_VALID ?
-	    neigh->ops->connected_output : neigh->ops->output;
+	neigh->output = neigh->ops->output;
 	entry->neigh = neigh;
 	entry->vccs = NULL;
 	entry->expires = jiffies - 1;
+
 	return 0;
 }
 
-static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd)
-{
-	return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd);
-}
-
-static struct neigh_table clip_tbl = {
-	.family 	= AF_INET,
-	.entry_size 	= sizeof(struct neighbour)+sizeof(struct atmarp_entry),
-	.key_len 	= 4,
-	.hash 		= clip_hash,
-	.constructor 	= clip_constructor,
-	.id 		= "clip_arp_cache",
-
-	/* parameters are copied from ARP ... */
-	.parms = {
-		.tbl 			= &clip_tbl,
-		.base_reachable_time 	= 30 * HZ,
-		.retrans_time 		= 1 * HZ,
-		.gc_staletime 		= 60 * HZ,
-		.reachable_time 	= 30 * HZ,
-		.delay_probe_time 	= 5 * HZ,
-		.queue_len 		= 3,
-		.ucast_probes 		= 3,
-		.mcast_probes 		= 3,
-		.anycast_delay 		= 1 * HZ,
-		.proxy_delay 		= (8 * HZ) / 10,
-		.proxy_qlen 		= 64,
-		.locktime 		= 1 * HZ,
-	},
-	.gc_interval 	= 30 * HZ,
-	.gc_thresh1 	= 128,
-	.gc_thresh2 	= 512,
-	.gc_thresh3 	= 1024,
-};
-
 /* @@@ copy bh locking from arp.c -- need to bh-enable atm code before */
 
 /*
@@ -364,38 +325,40 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 				   struct net_device *dev)
 {
 	struct clip_priv *clip_priv = PRIV(dev);
+	struct dst_entry *dst = skb_dst(skb);
 	struct atmarp_entry *entry;
+	struct neighbour *n;
 	struct atm_vcc *vcc;
+	struct rtable *rt;
+	__be32 *daddr;
 	int old;
 	unsigned long flags;
 
 	pr_debug("(skb %p)\n", skb);
-	if (!skb_dst(skb)) {
+	if (!dst) {
 		pr_err("skb_dst(skb) == NULL\n");
 		dev_kfree_skb(skb);
 		dev->stats.tx_dropped++;
 		return NETDEV_TX_OK;
 	}
-	if (!skb_dst(skb)->neighbour) {
-#if 0
-		skb_dst(skb)->neighbour = clip_find_neighbour(skb_dst(skb), 1);
-		if (!skb_dst(skb)->neighbour) {
-			dev_kfree_skb(skb);	/* lost that one */
-			dev->stats.tx_dropped++;
-			return 0;
-		}
-#endif
+	rt = (struct rtable *) dst;
+	if (rt->rt_gateway)
+		daddr = &rt->rt_gateway;
+	else
+		daddr = &ip_hdr(skb)->daddr;
+	n = dst_neigh_lookup(dst, daddr);
+	if (!n) {
 		pr_err("NO NEIGHBOUR !\n");
 		dev_kfree_skb(skb);
 		dev->stats.tx_dropped++;
 		return NETDEV_TX_OK;
 	}
-	entry = NEIGH2ENTRY(skb_dst(skb)->neighbour);
+	entry = neighbour_priv(n);
 	if (!entry->vccs) {
 		if (time_after(jiffies, entry->expires)) {
 			/* should be resolved */
 			entry->expires = jiffies + ATMARP_RETRY_DELAY * HZ;
-			to_atmarpd(act_need, PRIV(dev)->number, entry->ip);
+			to_atmarpd(act_need, PRIV(dev)->number, *((__be32 *)n->primary_key));
 		}
 		if (entry->neigh->arp_queue.qlen < ATMARP_MAX_UNRES_PACKETS)
 			skb_queue_tail(&entry->neigh->arp_queue, skb);
@@ -403,11 +366,11 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 			dev_kfree_skb(skb);
 			dev->stats.tx_dropped++;
 		}
-		return NETDEV_TX_OK;
+		goto out_release_neigh;
 	}
 	pr_debug("neigh %p, vccs %p\n", entry, entry->vccs);
 	ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc;
-	pr_debug("using neighbour %p, vcc %p\n", skb_dst(skb)->neighbour, vcc);
+	pr_debug("using neighbour %p, vcc %p\n", n, vcc);
 	if (entry->vccs->encap) {
 		void *here;
 
@@ -422,14 +385,14 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 	old = xchg(&entry->vccs->xoff, 1);	/* assume XOFF ... */
 	if (old) {
 		pr_warning("XOFF->XOFF transition\n");
-		return NETDEV_TX_OK;
+		goto out_release_neigh;
 	}
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
 	vcc->send(vcc, skb);
 	if (atm_may_send(vcc, 0)) {
 		entry->vccs->xoff = 0;
-		return NETDEV_TX_OK;
+		goto out_release_neigh;
 	}
 	spin_lock_irqsave(&clip_priv->xoff_lock, flags);
 	netif_stop_queue(dev);	/* XOFF -> throttle immediately */
@@ -441,15 +404,14 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 	   of the brief netif_stop_queue. If this isn't true or if it
 	   changes, use netif_wake_queue instead. */
 	spin_unlock_irqrestore(&clip_priv->xoff_lock, flags);
+out_release_neigh:
+	neigh_release(n);
 	return NETDEV_TX_OK;
 }
 
 static int clip_mkip(struct atm_vcc *vcc, int timeout)
 {
-	struct sk_buff_head *rq, queue;
 	struct clip_vcc *clip_vcc;
-	struct sk_buff *skb, *tmp;
-	unsigned long flags;
 
 	if (!vcc->push)
 		return -EBADFD;
@@ -470,29 +432,9 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
 	vcc->push = clip_push;
 	vcc->pop = clip_pop;
 
-	__skb_queue_head_init(&queue);
-	rq = &sk_atm(vcc)->sk_receive_queue;
-
-	spin_lock_irqsave(&rq->lock, flags);
-	skb_queue_splice_init(rq, &queue);
-	spin_unlock_irqrestore(&rq->lock, flags);
-
 	/* re-process everything received between connection setup and MKIP */
-	skb_queue_walk_safe(&queue, skb, tmp) {
-		if (!clip_devs) {
-			atm_return(vcc, skb->truesize);
-			kfree_skb(skb);
-		} else {
-			struct net_device *dev = skb->dev;
-			unsigned int len = skb->len;
-
-			skb_get(skb);
-			clip_push(vcc, skb);
-			dev->stats.rx_packets--;
-			dev->stats.rx_bytes -= len;
-			kfree_skb(skb);
-		}
-	}
+	vcc_process_recv_queue(vcc);
+
 	return 0;
 }
 
@@ -521,11 +463,11 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	rt = ip_route_output(&init_net, ip, 0, 1, 0);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
-	neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
+	neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1);
 	ip_rt_put(rt);
 	if (!neigh)
 		return -ENOMEM;
-	entry = NEIGH2ENTRY(neigh);
+	entry = neighbour_priv(neigh);
 	if (entry != clip_vcc->entry) {
 		if (!clip_vcc->entry)
 			pr_debug("add\n");
@@ -542,13 +484,15 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 }
 
 static const struct net_device_ops clip_netdev_ops = {
-	.ndo_start_xmit = clip_start_xmit,
+	.ndo_start_xmit		= clip_start_xmit,
+	.ndo_neigh_construct	= clip_constructor,
 };
 
 static void clip_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &clip_netdev_ops;
 	dev->type = ARPHRD_ATM;
+	dev->neigh_priv_len = sizeof(struct atmarp_entry);
 	dev->hard_header_len = RFC1483LLC_LEN;
 	dev->mtu = RFC1626_MTU;
 	dev->tx_queue_len = 100;	/* "normal" queue (packets) */
@@ -595,17 +539,15 @@ static int clip_create(int number)
 }
 
 static int clip_device_event(struct notifier_block *this, unsigned long event,
-			     void *arg)
+			     void *ptr)
 {
-	struct net_device *dev = arg;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
 	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
-	if (event == NETDEV_UNREGISTER) {
-		neigh_ifdown(&clip_tbl, dev);
+	if (event == NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
-	}
 
 	/* ignore non-CLIP devices */
 	if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops)
@@ -633,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event,
 			   void *ifa)
 {
 	struct in_device *in_dev;
+	struct netdev_notifier_info info;
 
 	in_dev = ((struct in_ifaddr *)ifa)->ifa_dev;
 	/*
@@ -641,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event,
 	 */
 	if (event != NETDEV_UP)
 		return NOTIFY_DONE;
-	return clip_device_event(this, NETDEV_CHANGE, in_dev->dev);
+	netdev_notifier_info_init(&info, in_dev->dev);
+	return clip_device_event(this, NETDEV_CHANGE, &info);
 }
 
 static struct notifier_block clip_dev_notifier = {
@@ -785,9 +729,10 @@ static void svc_addr(struct seq_file *seq, struct sockaddr_atmsvc *addr)
 /* This means the neighbour entry has no attached VCC objects. */
 #define SEQ_NO_VCC_TOKEN	((void *) 2)
 
-static void atmarp_info(struct seq_file *seq, struct net_device *dev,
+static void atmarp_info(struct seq_file *seq, struct neighbour *n,
 			struct atmarp_entry *entry, struct clip_vcc *clip_vcc)
 {
+	struct net_device *dev = n->dev;
 	unsigned long exp;
 	char buf[17];
 	int svc, llc, off;
@@ -807,8 +752,7 @@ static void atmarp_info(struct seq_file *seq, struct net_device *dev,
 	seq_printf(seq, "%-6s%-4s%-4s%5ld ",
 		   dev->name, svc ? "SVC" : "PVC", llc ? "LLC" : "NULL", exp);
 
-	off = scnprintf(buf, sizeof(buf) - 1, "%pI4",
-			&entry->ip);
+	off = scnprintf(buf, sizeof(buf) - 1, "%pI4", n->primary_key);
 	while (off < 16)
 		buf[off++] = ' ';
 	buf[off] = '\0';
@@ -879,14 +823,17 @@ static void *clip_seq_sub_iter(struct neigh_seq_state *_state,
 {
 	struct clip_seq_state *state = (struct clip_seq_state *)_state;
 
-	return clip_seq_vcc_walk(state, NEIGH2ENTRY(n), pos);
+	if (n->dev->type != ARPHRD_ATM)
+		return NULL;
+
+	return clip_seq_vcc_walk(state, neighbour_priv(n), pos);
 }
 
 static void *clip_seq_start(struct seq_file *seq, loff_t * pos)
 {
 	struct clip_seq_state *state = seq->private;
 	state->ns.neigh_sub_iter = clip_seq_sub_iter;
-	return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY);
+	return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_NEIGH_ONLY);
 }
 
 static int clip_seq_show(struct seq_file *seq, void *v)
@@ -898,10 +845,10 @@ static int clip_seq_show(struct seq_file *seq, void *v)
 		seq_puts(seq, atm_arp_banner);
 	} else {
 		struct clip_seq_state *state = seq->private;
-		struct neighbour *n = v;
 		struct clip_vcc *vcc = state->vcc;
+		struct neighbour *n = v;
 
-		atmarp_info(seq, n->dev, NEIGH2ENTRY(n), vcc);
+		atmarp_info(seq, n, neighbour_priv(n), vcc);
 	}
 	return 0;
 }
@@ -932,9 +879,6 @@ static void atm_clip_exit_noproc(void);
 
 static int __init atm_clip_init(void)
 {
-	neigh_table_init_no_netlink(&clip_tbl);
-
-	clip_tbl_hook = &clip_tbl;
 	register_atm_ioctl(&clip_ioctl_ops);
 	register_netdevice_notifier(&clip_dev_notifier);
 	register_inetaddr_notifier(&clip_inet_notifier);
@@ -971,12 +915,6 @@ static void atm_clip_exit_noproc(void)
 	 */
 	del_timer_sync(&idle_timer);
 
-	/* Next, purge the table, so that the device
-	 * unregister loop below does not hang due to
-	 * device references remaining in the table.
-	 */
-	neigh_ifdown(&clip_tbl, NULL);
-
 	dev = clip_devs;
 	while (dev) {
 		next = PRIV(dev)->next;
@@ -984,11 +922,6 @@ static void atm_clip_exit_noproc(void)
 		free_netdev(dev);
 		dev = next;
 	}
-
-	/* Now it is safe to fully shutdown whole table. */
-	neigh_table_clear(&clip_tbl);
-
-	clip_tbl_hook = NULL;
 }
 
 static void __exit atm_clip_exit(void)
diff --git a/net/atm/common.c b/net/atm/common.c
index 22b963d06a1..7b491006eaf 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -23,7 +23,7 @@
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "resources.h"		/* atm_find_dev */
 #include "common.h"		/* prototypes */
@@ -126,10 +126,19 @@ static void vcc_write_space(struct sock *sk)
 	rcu_read_unlock();
 }
 
+static void vcc_release_cb(struct sock *sk)
+{
+	struct atm_vcc *vcc = atm_sk(sk);
+
+	if (vcc->release_cb)
+		vcc->release_cb(vcc);
+}
+
 static struct proto vcc_proto = {
 	.name	  = "VCC",
 	.owner	  = THIS_MODULE,
 	.obj_size = sizeof(struct atm_vcc),
+	.release_cb = vcc_release_cb,
 };
 
 int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
@@ -156,7 +165,9 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
 	atomic_set(&sk->sk_rmem_alloc, 0);
 	vcc->push = NULL;
 	vcc->pop = NULL;
+	vcc->owner = NULL;
 	vcc->push_oam = NULL;
+	vcc->release_cb = NULL;
 	vcc->vpi = vcc->vci = 0; /* no VCI/VPI yet */
 	vcc->atm_options = vcc->aal_options = 0;
 	sk->sk_destruct = vcc_sock_destruct;
@@ -175,6 +186,7 @@ static void vcc_destroy_socket(struct sock *sk)
 			vcc->dev->ops->close(vcc);
 		if (vcc->push)
 			vcc->push(vcc, NULL); /* atmarpd has no push */
+		module_put(vcc->owner);
 
 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 			atm_return(vcc, skb->truesize);
@@ -214,6 +226,26 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
 }
 EXPORT_SYMBOL(vcc_release_async);
 
+void vcc_process_recv_queue(struct atm_vcc *vcc)
+{
+	struct sk_buff_head queue, *rq;
+	struct sk_buff *skb, *tmp;
+	unsigned long flags;
+
+	__skb_queue_head_init(&queue);
+	rq = &sk_atm(vcc)->sk_receive_queue;
+
+	spin_lock_irqsave(&rq->lock, flags);
+	skb_queue_splice_init(rq, &queue);
+	spin_unlock_irqrestore(&rq->lock, flags);
+
+	skb_queue_walk_safe(&queue, skb, tmp) {
+		__skb_unlink(skb, &queue);
+		vcc->push(vcc, skb);
+	}
+}
+EXPORT_SYMBOL(vcc_process_recv_queue);
+
 void atm_dev_signal_change(struct atm_dev *dev, char signal)
 {
 	pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
@@ -238,11 +270,11 @@ void atm_dev_release_vccs(struct atm_dev *dev)
 	write_lock_irq(&vcc_sklist_lock);
 	for (i = 0; i < VCC_HTABLE_SIZE; i++) {
 		struct hlist_head *head = &vcc_hash[i];
-		struct hlist_node *node, *tmp;
+		struct hlist_node *tmp;
 		struct sock *s;
 		struct atm_vcc *vcc;
 
-		sk_for_each_safe(s, node, tmp, head) {
+		sk_for_each_safe(s, tmp, head) {
 			vcc = atm_sk(s);
 			if (vcc->dev == dev) {
 				vcc_release_async(vcc, -EPIPE);
@@ -285,11 +317,10 @@ static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
 static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
 {
 	struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)];
-	struct hlist_node *node;
 	struct sock *s;
 	struct atm_vcc *walk;
 
-	sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
 		walk = atm_sk(s);
 		if (walk->dev != vcc->dev)
 			continue;
@@ -502,8 +533,11 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 
 	if (sock->state != SS_CONNECTED)
 		return -ENOTCONN;
-	if (flags & ~MSG_DONTWAIT)		/* only handle MSG_DONTWAIT */
+
+	/* only handle MSG_DONTWAIT and MSG_PEEK */
+	if (flags & ~(MSG_DONTWAIT | MSG_PEEK))
 		return -EOPNOTSUPP;
+
 	vcc = ATM_SD(sock);
 	if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
 	    test_bit(ATM_VF_CLOSE, &vcc->flags) ||
@@ -524,8 +558,13 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	if (error)
 		return error;
 	sock_recv_ts_and_drops(msg, sk, skb);
-	pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
-	atm_return(vcc, skb->truesize);
+
+	if (!(flags & MSG_PEEK)) {
+		pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc),
+			 skb->truesize);
+		atm_return(vcc, skb->truesize);
+	}
+
 	skb_free_datagram(sk, skb);
 	return copied;
 }
@@ -784,6 +823,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
 
 		if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
 			return -ENOTCONN;
+		memset(&pvc, 0, sizeof(pvc));
 		pvc.sap_family = AF_ATMPVC;
 		pvc.sap_addr.itf = vcc->dev->number;
 		pvc.sap_addr.vpi = vcc->vpi;
diff --git a/net/atm/common.h b/net/atm/common.h
index f48a76b6cdf..cc3c2dae4d7 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -24,6 +24,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
 		   char __user *optval, unsigned int optlen);
 int vcc_getsockopt(struct socket *sock, int level, int optname,
 		   char __user *optval, int __user *optlen);
+void vcc_process_recv_queue(struct atm_vcc *vcc);
 
 int atmpvc_init(void);
 void atmpvc_exit(void);
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 62dc8bfe6fe..bbd3b639992 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -97,9 +97,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
 			error = sock_get_timestampns(sk, argp);
 		goto done;
 	case ATM_SETSC:
-		if (net_ratelimit())
-			pr_warning("ATM_SETSC is obsolete; used by %s:%d\n",
-				   current->comm, task_pid_nr(current));
+		net_warn_ratelimited("ATM_SETSC is obsolete; used by %s:%d\n",
+				     current->comm, task_pid_nr(current));
 		error = 0;
 		goto done;
 	case ATMSIGD_CTRL:
@@ -123,8 +122,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
 		   work for 32-bit userspace. TBH I don't really want
 		   to think about it at all. dwmw2. */
 		if (compat) {
-			if (net_ratelimit())
-				pr_warning("32-bit task cannot be atmsigd\n");
+			net_warn_ratelimited("32-bit task cannot be atmsigd\n");
 			error = -EINVAL;
 			goto done;
 		}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ba48daa68c1..4c5b8ba0f84 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -26,11 +26,6 @@
 #include <linux/spinlock.h>
 #include <linux/seq_file.h>
 
-/* TokenRing if needed */
-#ifdef CONFIG_TR
-#include <linux/trdevice.h>
-#endif
-
 /* And atm device */
 #include <linux/atmdev.h>
 #include <linux/atmlec.h>
@@ -157,56 +152,12 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
 		atm_force_charge(priv->lecd, skb2->truesize);
 		sk = sk_atm(priv->lecd);
 		skb_queue_tail(&sk->sk_receive_queue, skb2);
-		sk->sk_data_ready(sk, skb2->len);
+		sk->sk_data_ready(sk);
 	}
 }
 #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
 
 /*
- * Modelled after tr_type_trans
- * All multicast and ARE or STE frames go to BUS.
- * Non source routed frames go by destination address.
- * Last hop source routed frames go by destination address.
- * Not last hop source routed frames go by _next_ route descriptor.
- * Returns pointer to destination MAC address or fills in rdesc
- * and returns NULL.
- */
-#ifdef CONFIG_TR
-static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
-{
-	struct trh_hdr *trh;
-	unsigned int riflen, num_rdsc;
-
-	trh = (struct trh_hdr *)packet;
-	if (trh->daddr[0] & (uint8_t) 0x80)
-		return bus_mac;	/* multicast */
-
-	if (trh->saddr[0] & TR_RII) {
-		riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
-		if ((ntohs(trh->rcf) >> 13) != 0)
-			return bus_mac;	/* ARE or STE */
-	} else
-		return trh->daddr;	/* not source routed */
-
-	if (riflen < 6)
-		return trh->daddr;	/* last hop, source routed */
-
-	/* riflen is 6 or more, packet has more than one route descriptor */
-	num_rdsc = (riflen / 2) - 1;
-	memset(rdesc, 0, ETH_ALEN);
-	/* offset 4 comes from LAN destination field in LE control frames */
-	if (trh->rcf & htons((uint16_t) TR_RCF_DIR_BIT))
-		memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(__be16));
-	else {
-		memcpy(&rdesc[4], &trh->rseg[1], sizeof(__be16));
-		rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0));
-	}
-
-	return NULL;
-}
-#endif /* CONFIG_TR */
-
-/*
  * Open/initialize the netdevice. This is called (in the current kernel)
  * sometime after booting when the 'ifconfig' program is run.
  *
@@ -257,9 +208,6 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	struct lec_arp_table *entry;
 	unsigned char *dst;
 	int min_frame_size;
-#ifdef CONFIG_TR
-	unsigned char rdesc[ETH_ALEN];	/* Token Ring route descriptor */
-#endif
 	int is_rdesc;
 
 	pr_debug("called\n");
@@ -283,31 +231,19 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	if (skb_headroom(skb) < 2) {
 		pr_debug("reallocating skb\n");
 		skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
-		kfree_skb(skb);
-		if (skb2 == NULL)
+		if (unlikely(!skb2)) {
+			kfree_skb(skb);
 			return NETDEV_TX_OK;
+		}
+		consume_skb(skb);
 		skb = skb2;
 	}
 	skb_push(skb, 2);
 
-	/* Put le header to place, works for TokenRing too */
+	/* Put le header to place */
 	lec_h = (struct lecdatahdr_8023 *)skb->data;
 	lec_h->le_header = htons(priv->lecid);
 
-#ifdef CONFIG_TR
-	/*
-	 * Ugly. Use this to realign Token Ring packets for
-	 * e.g. PCA-200E driver.
-	 */
-	if (priv->is_trdev) {
-		skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
-		kfree_skb(skb);
-		if (skb2 == NULL)
-			return NETDEV_TX_OK;
-		skb = skb2;
-	}
-#endif
-
 #if DUMP_PACKETS >= 2
 #define MAX_DUMP_SKB 99
 #elif DUMP_PACKETS >= 1
@@ -321,12 +257,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 #endif /* DUMP_PACKETS >= 1 */
 
 	/* Minimum ethernet-frame size */
-#ifdef CONFIG_TR
-	if (priv->is_trdev)
-		min_frame_size = LEC_MINIMUM_8025_SIZE;
-	else
-#endif
-		min_frame_size = LEC_MINIMUM_8023_SIZE;
+	min_frame_size = LEC_MINIMUM_8023_SIZE;
 	if (skb->len < min_frame_size) {
 		if ((skb->len + skb_tailroom(skb)) < min_frame_size) {
 			skb2 = skb_copy_expand(skb, 0,
@@ -345,15 +276,6 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
 	/* Send to right vcc */
 	is_rdesc = 0;
 	dst = lec_h->h_dest;
-#ifdef CONFIG_TR
-	if (priv->is_trdev) {
-		dst = get_tr_dst(skb->data + 2, rdesc);
-		if (dst == NULL) {
-			dst = rdesc;
-			is_rdesc = 1;
-		}
-	}
-#endif
 	entry = NULL;
 	vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry);
 	pr_debug("%s:vcc:%p vcc_flags:%lx, entry:%p\n",
@@ -525,7 +447,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 			atm_force_charge(priv->lecd, skb2->truesize);
 			sk = sk_atm(priv->lecd);
 			skb_queue_tail(&sk->sk_receive_queue, skb2);
-			sk->sk_data_ready(sk, skb2->len);
+			sk->sk_data_ready(sk);
 		}
 	}
 #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
@@ -599,7 +521,7 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
 	if (data != NULL)
 		mesg->sizeoftlvs = data->len;
 	if (mac_addr)
-		memcpy(&mesg->content.normal.mac_addr, mac_addr, ETH_ALEN);
+		ether_addr_copy(mesg->content.normal.mac_addr, mac_addr);
 	else
 		mesg->content.normal.targetless_le_arp = 1;
 	if (atm_addr)
@@ -608,13 +530,13 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
 	atm_force_charge(priv->lecd, skb->truesize);
 	sk = sk_atm(priv->lecd);
 	skb_queue_tail(&sk->sk_receive_queue, skb);
-	sk->sk_data_ready(sk, skb->len);
+	sk->sk_data_ready(sk);
 
 	if (data != NULL) {
 		pr_debug("about to send %d bytes of data\n", data->len);
 		atm_force_charge(priv->lecd, data->truesize);
 		skb_queue_tail(&sk->sk_receive_queue, data);
-		sk->sk_data_ready(sk, skb->len);
+		sk->sk_data_ready(sk);
 	}
 
 	return 0;
@@ -643,7 +565,7 @@ static const struct net_device_ops lec_netdev_ops = {
 	.ndo_start_xmit		= lec_start_xmit,
 	.ndo_change_mtu		= lec_change_mtu,
 	.ndo_tx_timeout		= lec_tx_timeout,
-	.ndo_set_multicast_list	= lec_set_multicast_list,
+	.ndo_set_rx_mode	= lec_set_multicast_list,
 };
 
 static const unsigned char lec_ctrl_magic[] = {
@@ -694,7 +616,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 
 		pr_debug("%s: To daemon\n", dev->name);
 		skb_queue_tail(&sk->sk_receive_queue, skb);
-		sk->sk_data_ready(sk, skb->len);
+		sk->sk_data_ready(sk);
 	} else {		/* Data frame, queue to protocol handlers */
 		struct lec_arp_table *entry;
 		unsigned char *src, *dst;
@@ -710,12 +632,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 			dev_kfree_skb(skb);
 			return;
 		}
-#ifdef CONFIG_TR
-		if (priv->is_trdev)
-			dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest;
-		else
-#endif
-			dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest;
+		dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest;
 
 		/*
 		 * If this is a Data Direct VCC, and the VCC does not match
@@ -723,16 +640,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		 */
 		spin_lock_irqsave(&priv->lec_arp_lock, flags);
 		if (lec_is_data_direct(vcc)) {
-#ifdef CONFIG_TR
-			if (priv->is_trdev)
-				src =
-				    ((struct lecdatahdr_8025 *)skb->data)->
-				    h_source;
-			else
-#endif
-				src =
-				    ((struct lecdatahdr_8023 *)skb->data)->
-				    h_source;
+			src = ((struct lecdatahdr_8023 *)skb->data)->h_source;
 			entry = lec_arp_find(priv, src);
 			if (entry && entry->vcc != vcc) {
 				lec_arp_remove(priv, entry);
@@ -750,12 +658,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		if (!hlist_empty(&priv->lec_arp_empty_ones))
 			lec_arp_check_empties(priv, vcc, skb);
 		skb_pull(skb, 2);	/* skip lec_id */
-#ifdef CONFIG_TR
-		if (priv->is_trdev)
-			skb->protocol = tr_type_trans(skb, dev);
-		else
-#endif
-			skb->protocol = eth_type_trans(skb, dev);
+		skb->protocol = eth_type_trans(skb, dev);
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += skb->len;
 		memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
@@ -827,27 +730,13 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
 		i = 0;
 	else
 		i = arg;
-#ifdef CONFIG_TR
 	if (arg >= MAX_LEC_ITF)
 		return -EINVAL;
-#else				/* Reserve the top NUM_TR_DEVS for TR */
-	if (arg >= (MAX_LEC_ITF - NUM_TR_DEVS))
-		return -EINVAL;
-#endif
 	if (!dev_lec[i]) {
-		int is_trdev, size;
-
-		is_trdev = 0;
-		if (i >= (MAX_LEC_ITF - NUM_TR_DEVS))
-			is_trdev = 1;
+		int size;
 
 		size = sizeof(struct lec_priv);
-#ifdef CONFIG_TR
-		if (is_trdev)
-			dev_lec[i] = alloc_trdev(size);
-		else
-#endif
-			dev_lec[i] = alloc_etherdev(size);
+		dev_lec[i] = alloc_etherdev(size);
 		if (!dev_lec[i])
 			return -ENOMEM;
 		dev_lec[i]->netdev_ops = &lec_netdev_ops;
@@ -858,7 +747,6 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
 		}
 
 		priv = netdev_priv(dev_lec[i]);
-		priv->is_trdev = is_trdev;
 	} else {
 		priv = netdev_priv(dev_lec[i]);
 		if (priv->lecd)
@@ -954,7 +842,9 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
 		--*l;
 	}
 
-	hlist_for_each_entry_from(tmp, e, next) {
+	tmp = container_of(e, struct lec_arp_table, next);
+
+	hlist_for_each_entry_from(tmp, next) {
 		if (--*l < 0)
 			break;
 	}
@@ -1255,7 +1145,7 @@ static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst,
 	struct sk_buff *skb;
 	struct lec_priv *priv = netdev_priv(dev);
 
-	if (compare_ether_addr(lan_dst, dev->dev_addr))
+	if (!ether_addr_equal(lan_dst, dev->dev_addr))
 		return 0;	/* not our mac address */
 
 	kfree(priv->tlvs);	/* NULL if there was no previous association */
@@ -1335,7 +1225,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/param.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/inetdevice.h>
 #include <net/route.h>
 
@@ -1419,7 +1309,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry)
 static int
 lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 {
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i, remove_vcc = 1;
 
@@ -1438,7 +1327,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 		 * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
 		 */
 		for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-			hlist_for_each_entry(entry, node,
+			hlist_for_each_entry(entry,
 					     &priv->lec_arp_tables[i], next) {
 				if (memcmp(to_remove->atm_addr,
 					   entry->atm_addr, ATM_ESA_LEN) == 0) {
@@ -1476,14 +1365,13 @@ static const char *get_status_string(unsigned char st)
 
 static void dump_arp_table(struct lec_priv *priv)
 {
-	struct hlist_node *node;
 	struct lec_arp_table *rulla;
 	char buf[256];
 	int i, j, offset;
 
 	pr_info("Dump %p:\n", priv);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(rulla, node,
+		hlist_for_each_entry(rulla,
 				     &priv->lec_arp_tables[i], next) {
 			offset = 0;
 			offset += sprintf(buf, "%d: %p\n", i, rulla);
@@ -1515,7 +1403,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->lec_no_forward))
 		pr_info("No forward\n");
-	hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) {
+	hlist_for_each_entry(rulla, &priv->lec_no_forward, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1540,7 +1428,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->lec_arp_empty_ones))
 		pr_info("Empty ones\n");
-	hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) {
+	hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1565,7 +1453,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->mcast_fwds))
 		pr_info("Multicast Forward VCCs\n");
-	hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) {
+	hlist_for_each_entry(rulla, &priv->mcast_fwds, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1599,7 +1487,7 @@ static void dump_arp_table(struct lec_priv *priv)
 static void lec_arp_destroy(struct lec_priv *priv)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -1611,7 +1499,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			lec_arp_remove(priv, entry);
 			lec_arp_put(entry);
@@ -1619,7 +1507,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 		INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
@@ -1628,7 +1516,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	}
 	INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_no_forward, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
@@ -1637,7 +1525,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	}
 	INIT_HLIST_HEAD(&priv->lec_no_forward);
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+	hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) {
 		/* No timer, LANEv2 7.1.20 and 2.3.5.3 */
 		lec_arp_clear_vccs(entry);
 		hlist_del(&entry->next);
@@ -1654,15 +1542,14 @@ static void lec_arp_destroy(struct lec_priv *priv)
 static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
 					  const unsigned char *mac_addr)
 {
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct lec_arp_table *entry;
 
 	pr_debug("%pM\n", mac_addr);
 
 	head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])];
-	hlist_for_each_entry(entry, node, head, next) {
-		if (!compare_ether_addr(mac_addr, entry->mac_addr))
+	hlist_for_each_entry(entry, head, next) {
+		if (ether_addr_equal(mac_addr, entry->mac_addr))
 			return entry;
 	}
 	return NULL;
@@ -1678,7 +1565,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
 		pr_info("LEC: Arp entry kmalloc failed\n");
 		return NULL;
 	}
-	memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
+	ether_addr_copy(to_return->mac_addr, mac_addr);
 	INIT_HLIST_NODE(&to_return->next);
 	setup_timer(&to_return->timer, lec_arp_expire_arp,
 			(unsigned long)to_return);
@@ -1716,7 +1603,7 @@ static void lec_arp_expire_vcc(unsigned long data)
 {
 	unsigned long flags;
 	struct lec_arp_table *to_remove = (struct lec_arp_table *)data;
-	struct lec_priv *priv = (struct lec_priv *)to_remove->priv;
+	struct lec_priv *priv = to_remove->priv;
 
 	del_timer(&to_remove->timer);
 
@@ -1798,7 +1685,7 @@ static void lec_arp_check_expire(struct work_struct *work)
 	unsigned long flags;
 	struct lec_priv *priv =
 		container_of(work, struct lec_priv, lec_arp_work.work);
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	unsigned long now;
 	int i;
@@ -1808,7 +1695,7 @@ static void lec_arp_check_expire(struct work_struct *work)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (__lec_arp_check_expire(entry, now, priv)) {
 				struct sk_buff *skb;
@@ -1849,7 +1736,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
 		case 1:
 			return priv->mcast_vcc;
 		case 2:	/* LANE2 wants arp for multicast addresses */
-			if (!compare_ether_addr(mac_to_find, bus_mac))
+			if (ether_addr_equal(mac_to_find, bus_mac))
 				return priv->mcast_vcc;
 			break;
 		default:
@@ -1935,14 +1822,14 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
 		unsigned long permanent)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
 	pr_debug("\n");
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) &&
 			    (permanent ||
@@ -1967,7 +1854,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	       unsigned int targetless_le_arp)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry, *tmp;
 	int i;
 
@@ -1982,7 +1869,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 				 * we have no entry in the cache. 7.1.30
 				 */
 	if (!hlist_empty(&priv->lec_arp_empty_ones)) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_empty_ones, next) {
 			if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) {
 				hlist_del(&entry->next);
@@ -2000,7 +1887,8 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 					entry = tmp;
 				} else {
 					entry->status = ESI_FORWARD_DIRECT;
-					memcpy(entry->mac_addr, mac_addr, ETH_ALEN);
+					ether_addr_copy(entry->mac_addr,
+							mac_addr);
 					entry->last_used = jiffies;
 					lec_arp_add(priv, entry);
 				}
@@ -2027,7 +1915,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
 	del_timer(&entry->timer);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(tmp, node,
+		hlist_for_each_entry(tmp,
 				     &priv->lec_arp_tables[i], next) {
 			if (entry != tmp &&
 			    !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) {
@@ -2068,7 +1956,6 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 	      void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb))
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i, found_entry = 0;
 
@@ -2138,7 +2025,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 		 ioc_data->atm_addr[16], ioc_data->atm_addr[17],
 		 ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (memcmp
 			    (ioc_data->atm_addr, entry->atm_addr,
@@ -2215,7 +2102,6 @@ out:
 static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -2223,7 +2109,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (entry->flush_tran_id == tran_id &&
 			    entry->status == ESI_FLUSH_PENDING) {
@@ -2252,13 +2138,12 @@ lec_set_flush_tran_id(struct lec_priv *priv,
 		      const unsigned char *atm_addr, unsigned long tran_id)
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
 				entry->flush_tran_id = tran_id;
@@ -2310,7 +2195,7 @@ out:
 static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -2320,7 +2205,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (vcc == entry->vcc) {
 				lec_arp_remove(priv, entry);
@@ -2331,7 +2216,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		if (entry->vcc == vcc) {
 			lec_arp_clear_vccs(entry);
@@ -2341,7 +2226,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_no_forward, next) {
 		if (entry->recv_vcc == vcc) {
 			lec_arp_clear_vccs(entry);
@@ -2351,7 +2236,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+	hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) {
 		if (entry->recv_vcc == vcc) {
 			lec_arp_clear_vccs(entry);
 			/* No timer, LANEv2 7.1.20 and 2.3.5.3 */
@@ -2369,25 +2254,17 @@ lec_arp_check_empties(struct lec_priv *priv,
 		      struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry, *tmp;
 	struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data;
-	unsigned char *src;
-#ifdef CONFIG_TR
-	struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data;
-
-	if (priv->is_trdev)
-		src = tr_hdr->h_source;
-	else
-#endif
-		src = hdr->h_source;
+	unsigned char *src = hdr->h_source;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		if (vcc == entry->vcc) {
 			del_timer(&entry->timer);
-			memcpy(entry->mac_addr, src, ETH_ALEN);
+			ether_addr_copy(entry->mac_addr, src);
 			entry->status = ESI_FORWARD_DIRECT;
 			entry->last_used = jiffies;
 			/* We might have got an entry */
diff --git a/net/atm/lec.h b/net/atm/lec.h
index dfc07196646..4149db1b788 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -55,11 +55,11 @@ struct lane2_ops {
  * frames.
  *
  * 1. Dix Ethernet EtherType frames encoded by placing EtherType
- *    field in h_type field. Data follows immediatelly after header.
+ *    field in h_type field. Data follows immediately after header.
  * 2. LLC Data frames whose total length, including LLC field and data,
  *    but not padding required to meet the minimum data frame length,
- *    is less than 1536(0x0600) MUST be encoded by placing that length
- *    in the h_type field. The LLC field follows header immediatelly.
+ *    is less than ETH_P_802_3_MIN MUST be encoded by placing that length
+ *    in the h_type field. The LLC field follows header immediately.
  * 3. LLC data frames longer than this maximum MUST be encoded by placing
  *    the value 0 in the h_type field.
  *
@@ -142,7 +142,6 @@ struct lec_priv {
 	int itfnum;				/* e.g. 2 for lec2, 5 for lec5 */
 	struct lane2_ops *lane2_ops;		/* can be NULL for LANE v1 */
 	int is_proxy;				/* bridge between ATM and Ethernet */
-	int is_trdev;				/* Device type, 0 = Ethernet, 1 = TokenRing */
 };
 
 struct lec_vcc_priv {
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 3ccca42e6f9..e8e0e7a8a23 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -478,7 +478,7 @@ static const uint8_t *copy_macs(struct mpoa_client *mpc,
 			return NULL;
 		}
 	}
-	memcpy(mpc->mps_macs, router_mac, ETH_ALEN);
+	ether_addr_copy(mpc->mps_macs, router_mac);
 	tlvs += 20; if (device_type == MPS_AND_MPC) tlvs += 20;
 	if (mps_macs > 0)
 		memcpy(mpc->mps_macs, tlvs, mps_macs*ETH_ALEN);
@@ -592,8 +592,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 		goto non_ip;
 
 	while (i < mpc->number_of_mps_macs) {
-		if (!compare_ether_addr(eth->h_dest,
-					(mpc->mps_macs + i*ETH_ALEN)))
+		if (ether_addr_equal(eth->h_dest, mpc->mps_macs + i * ETH_ALEN))
 			if (send_via_shortcut(skb, mpc) == 0) /* try shortcut */
 				return NETDEV_TX_OK;
 		i++;
@@ -707,7 +706,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		dprintk("(%s) control packet arrived\n", dev->name);
 		/* Pass control packets to daemon */
 		skb_queue_tail(&sk->sk_receive_queue, skb);
-		sk->sk_data_ready(sk, skb->len);
+		sk->sk_data_ready(sk);
 		return;
 	}
 
@@ -993,20 +992,18 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
 
 	sk = sk_atm(mpc->mpoad_vcc);
 	skb_queue_tail(&sk->sk_receive_queue, skb);
-	sk->sk_data_ready(sk, skb->len);
+	sk->sk_data_ready(sk);
 
 	return 0;
 }
 
 static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
-			       unsigned long event, void *dev_ptr)
+			       unsigned long event, void *ptr)
 {
-	struct net_device *dev;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct mpoa_client *mpc;
 	struct lec_priv *priv;
 
-	dev = (struct net_device *)dev_ptr;
-
 	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
@@ -1276,7 +1273,7 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
 
 	sk = sk_atm(vcc);
 	skb_queue_tail(&sk->sk_receive_queue, skb);
-	sk->sk_data_ready(sk, skb->len);
+	sk->sk_data_ready(sk);
 	dprintk("exiting\n");
 }
 
@@ -1495,7 +1492,7 @@ static void __exit atm_mpoa_cleanup(void)
 
 	mpc_proc_clean();
 
-	del_timer(&mpc_timer);
+	del_timer_sync(&mpc_timer);
 	unregister_netdevice_notifier(&mpoa_notifier);
 	deregister_atm_ioctl(&atm_ioctl_ops);
 
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 53e50029227..5bdd300db0f 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -207,7 +207,7 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
 			      size_t nbytes, loff_t *ppos)
 {
 	char *page, *p;
-	unsigned len;
+	unsigned int len;
 
 	if (nbytes == 0)
 		return 0;
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index e9aced0ec56..c4e09846d1d 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -37,13 +37,14 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
 #include <linux/capability.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/ppp_channel.h>
 #include <linux/atmppp.h>
 
@@ -59,14 +60,29 @@ struct pppoatm_vcc {
 	struct atm_vcc	*atmvcc;	/* VCC descriptor */
 	void (*old_push)(struct atm_vcc *, struct sk_buff *);
 	void (*old_pop)(struct atm_vcc *, struct sk_buff *);
+	void (*old_release_cb)(struct atm_vcc *);
+	struct module *old_owner;
 					/* keep old push/pop for detaching */
 	enum pppoatm_encaps encaps;
+	atomic_t inflight;
+	unsigned long blocked;
 	int flags;			/* SC_COMP_PROT - compress protocol */
 	struct ppp_channel chan;	/* interface to generic ppp layer */
 	struct tasklet_struct wakeup_tasklet;
 };
 
 /*
+ * We want to allow two packets in the queue. The one that's currently in
+ * flight, and *one* queued up ready for the ATM device to send immediately
+ * from its TX done IRQ. We want to be able to use atomic_inc_not_zero(), so
+ * inflight == -2 represents an empty queue, -1 one packet, and zero means
+ * there are two packets in the queue.
+ */
+#define NONE_INFLIGHT -2
+
+#define BLOCKED 0
+
+/*
  * Header used for LLC Encapsulated PPP (4 bytes) followed by the LCP protocol
  * ID (0xC021) used in autodetection
  */
@@ -93,6 +109,24 @@ static void pppoatm_wakeup_sender(unsigned long arg)
 	ppp_output_wakeup((struct ppp_channel *) arg);
 }
 
+static void pppoatm_release_cb(struct atm_vcc *atmvcc)
+{
+	struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
+
+	/*
+	 * As in pppoatm_pop(), it's safe to clear the BLOCKED bit here because
+	 * the wakeup *can't* race with pppoatm_send(). They both hold the PPP
+	 * channel's ->downl lock. And the potential race with *setting* it,
+	 * which leads to the double-check dance in pppoatm_may_send(), doesn't
+	 * exist here. In the sock_owned_by_user() case in pppoatm_send(), we
+	 * set the BLOCKED bit while the socket is still locked. We know that
+	 * ->release_cb() can't be called until that's done.
+	 */
+	if (test_and_clear_bit(BLOCKED, &pvcc->blocked))
+		tasklet_schedule(&pvcc->wakeup_tasklet);
+	if (pvcc->old_release_cb)
+		pvcc->old_release_cb(atmvcc);
+}
 /*
  * This gets called every time the ATM card has finished sending our
  * skb.  The ->old_pop will take care up normal atm flow control,
@@ -101,16 +135,30 @@ static void pppoatm_wakeup_sender(unsigned long arg)
 static void pppoatm_pop(struct atm_vcc *atmvcc, struct sk_buff *skb)
 {
 	struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
+
 	pvcc->old_pop(atmvcc, skb);
+	atomic_dec(&pvcc->inflight);
+
 	/*
-	 * We don't really always want to do this since it's
-	 * really inefficient - it would be much better if we could
-	 * test if we had actually throttled the generic layer.
-	 * Unfortunately then there would be a nasty SMP race where
-	 * we could clear that flag just as we refuse another packet.
-	 * For now we do the safe thing.
+	 * We always used to run the wakeup tasklet unconditionally here, for
+	 * fear of race conditions where we clear the BLOCKED flag just as we
+	 * refuse another packet in pppoatm_send(). This was quite inefficient.
+	 *
+	 * In fact it's OK. The PPP core will only ever call pppoatm_send()
+	 * while holding the channel->downl lock. And ppp_output_wakeup() as
+	 * called by the tasklet will *also* grab that lock. So even if another
+	 * CPU is in pppoatm_send() right now, the tasklet isn't going to race
+	 * with it. The wakeup *will* happen after the other CPU is safely out
+	 * of pppoatm_send() again.
+	 *
+	 * So if the CPU in pppoatm_send() has already set the BLOCKED bit and
+	 * it about to return, that's fine. We trigger a wakeup which will
+	 * happen later. And if the CPU in pppoatm_send() *hasn't* set the
+	 * BLOCKED bit yet, that's fine too because of the double check in
+	 * pppoatm_may_send() which is commented there.
 	 */
-	tasklet_schedule(&pvcc->wakeup_tasklet);
+	if (test_and_clear_bit(BLOCKED, &pvcc->blocked))
+		tasklet_schedule(&pvcc->wakeup_tasklet);
 }
 
 /*
@@ -123,12 +171,11 @@ static void pppoatm_unassign_vcc(struct atm_vcc *atmvcc)
 	pvcc = atmvcc_to_pvcc(atmvcc);
 	atmvcc->push = pvcc->old_push;
 	atmvcc->pop = pvcc->old_pop;
+	atmvcc->release_cb = pvcc->old_release_cb;
 	tasklet_kill(&pvcc->wakeup_tasklet);
 	ppp_unregister_channel(&pvcc->chan);
 	atmvcc->user_back = NULL;
 	kfree(pvcc);
-	/* Gee, I hope we have the big kernel lock here... */
-	module_put(THIS_MODULE);
 }
 
 /* Called when an AAL5 PDU comes in */
@@ -137,9 +184,13 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
 	pr_debug("\n");
 	if (skb == NULL) {			/* VCC was closed */
+		struct module *module;
+
 		pr_debug("removing ATMPPP VCC %p\n", pvcc);
+		module = pvcc->old_owner;
 		pppoatm_unassign_vcc(atmvcc);
 		atmvcc->push(atmvcc, NULL);	/* Pass along bad news */
+		module_put(module);
 		return;
 	}
 	atm_return(atmvcc, skb->truesize);
@@ -183,6 +234,51 @@ error:
 	ppp_input_error(&pvcc->chan, 0);
 }
 
+static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
+{
+	/*
+	 * It's not clear that we need to bother with using atm_may_send()
+	 * to check we don't exceed sk->sk_sndbuf. If userspace sets a
+	 * value of sk_sndbuf which is lower than the MTU, we're going to
+	 * block for ever. But the code always did that before we introduced
+	 * the packet count limit, so...
+	 */
+	if (atm_may_send(pvcc->atmvcc, size) &&
+	    atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT))
+		return 1;
+
+	/*
+	 * We use test_and_set_bit() rather than set_bit() here because
+	 * we need to ensure there's a memory barrier after it. The bit
+	 * *must* be set before we do the atomic_inc() on pvcc->inflight.
+	 * There's no smp_mb__after_set_bit(), so it's this or abuse
+	 * smp_mb__after_atomic().
+	 */
+	test_and_set_bit(BLOCKED, &pvcc->blocked);
+
+	/*
+	 * We may have raced with pppoatm_pop(). If it ran for the
+	 * last packet in the queue, *just* before we set the BLOCKED
+	 * bit, then it might never run again and the channel could
+	 * remain permanently blocked. Cope with that race by checking
+	 * *again*. If it did run in that window, we'll have space on
+	 * the queue now and can return success. It's harmless to leave
+	 * the BLOCKED flag set, since it's only used as a trigger to
+	 * run the wakeup tasklet. Another wakeup will never hurt.
+	 * If pppoatm_pop() is running but hasn't got as far as making
+	 * space on the queue yet, then it hasn't checked the BLOCKED
+	 * flag yet either, so we're safe in that case too. It'll issue
+	 * an "immediate" wakeup... where "immediate" actually involves
+	 * taking the PPP channel's ->downl lock, which is held by the
+	 * code path that calls pppoatm_send(), and is thus going to
+	 * wait for us to finish.
+	 */
+	if (atm_may_send(pvcc->atmvcc, size) &&
+	    atomic_inc_not_zero(&pvcc->inflight))
+		return 1;
+
+	return 0;
+}
 /*
  * Called by the ppp_generic.c to send a packet - returns true if packet
  * was accepted.  If we return false, then it's our job to call
@@ -196,33 +292,59 @@ error:
 static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 {
 	struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
+	struct atm_vcc *vcc;
+	int ret;
+
 	ATM_SKB(skb)->vcc = pvcc->atmvcc;
 	pr_debug("(skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc);
 	if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT))
 		(void) skb_pull(skb, 1);
+
+	vcc = ATM_SKB(skb)->vcc;
+	bh_lock_sock(sk_atm(vcc));
+	if (sock_owned_by_user(sk_atm(vcc))) {
+		/*
+		 * Needs to happen (and be flushed, hence test_and_) before we unlock
+		 * the socket. It needs to be seen by the time our ->release_cb gets
+		 * called.
+		 */
+		test_and_set_bit(BLOCKED, &pvcc->blocked);
+		goto nospace;
+	}
+	if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
+	    test_bit(ATM_VF_CLOSE, &vcc->flags) ||
+	    !test_bit(ATM_VF_READY, &vcc->flags)) {
+		bh_unlock_sock(sk_atm(vcc));
+		kfree_skb(skb);
+		return DROP_PACKET;
+	}
+
 	switch (pvcc->encaps) {		/* LLC encapsulation needed */
 	case e_llc:
 		if (skb_headroom(skb) < LLC_LEN) {
 			struct sk_buff *n;
 			n = skb_realloc_headroom(skb, LLC_LEN);
 			if (n != NULL &&
-			    !atm_may_send(pvcc->atmvcc, n->truesize)) {
+			    !pppoatm_may_send(pvcc, n->truesize)) {
 				kfree_skb(n);
 				goto nospace;
 			}
-			kfree_skb(skb);
+			consume_skb(skb);
 			skb = n;
-			if (skb == NULL)
+			if (skb == NULL) {
+				bh_unlock_sock(sk_atm(vcc));
 				return DROP_PACKET;
-		} else if (!atm_may_send(pvcc->atmvcc, skb->truesize))
+			}
+		} else if (!pppoatm_may_send(pvcc, skb->truesize))
 			goto nospace;
 		memcpy(skb_push(skb, LLC_LEN), pppllc, LLC_LEN);
 		break;
 	case e_vc:
-		if (!atm_may_send(pvcc->atmvcc, skb->truesize))
+		if (!pppoatm_may_send(pvcc, skb->truesize))
 			goto nospace;
 		break;
 	case e_autodetect:
+		bh_unlock_sock(sk_atm(vcc));
 		pr_debug("Trying to send without setting encaps!\n");
 		kfree_skb(skb);
 		return 1;
@@ -232,9 +354,12 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 	ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
 		 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
-	return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
+	ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
 	    ? DROP_PACKET : 1;
+	bh_unlock_sock(sk_atm(vcc));
+	return ret;
 nospace:
+	bh_unlock_sock(sk_atm(vcc));
 	/*
 	 * We don't have space to send this SKB now, but we might have
 	 * already applied SC_COMP_PROT compression, so may need to undo
@@ -284,8 +409,13 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
 	if (pvcc == NULL)
 		return -ENOMEM;
 	pvcc->atmvcc = atmvcc;
+
+	/* Maximum is zero, so that we can use atomic_inc_not_zero() */
+	atomic_set(&pvcc->inflight, NONE_INFLIGHT);
 	pvcc->old_push = atmvcc->push;
 	pvcc->old_pop = atmvcc->pop;
+	pvcc->old_owner = atmvcc->owner;
+	pvcc->old_release_cb = atmvcc->release_cb;
 	pvcc->encaps = (enum pppoatm_encaps) be.encaps;
 	pvcc->chan.private = pvcc;
 	pvcc->chan.ops = &pppoatm_ops;
@@ -301,7 +431,13 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
 	atmvcc->user_back = pvcc;
 	atmvcc->push = pppoatm_push;
 	atmvcc->pop = pppoatm_pop;
+	atmvcc->release_cb = pppoatm_release_cb;
 	__module_get(THIS_MODULE);
+	atmvcc->owner = THIS_MODULE;
+
+	/* re-process everything received between connection setup and
+	   backend setup */
+	vcc_process_recv_queue(atmvcc);
 	return 0;
 }
 
@@ -326,6 +462,8 @@ static int pppoatm_ioctl(struct socket *sock, unsigned int cmd,
 			return -ENOIOCTLCMD;
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
+		if (sock->state != SS_CONNECTED)
+			return -EINVAL;
 		return pppoatm_assign_vcc(atmvcc, argp);
 		}
 	case PPPIOCGCHAN:
diff --git a/net/atm/proc.c b/net/atm/proc.c
index be3afdefec5..bbb6461a4b7 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -27,7 +27,7 @@
 #include <net/atmclip.h>
 #include <linux/uaccess.h>
 #include <linux/param.h> /* for HZ */
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "resources.h"
 #include "common.h" /* atm_proc_init prototype */
 #include "signaling.h" /* to get sigd - ugly too */
@@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
 	page = get_zeroed_page(GFP_KERNEL);
 	if (!page)
 		return -ENOMEM;
-	dev = PDE(file->f_path.dentry->d_inode)->data;
+	dev = PDE_DATA(file_inode(file));
 	if (!dev->ops->proc_read)
 		length = -EINVAL;
 	else {
@@ -460,7 +460,7 @@ static void atm_proc_dirs_remove(void)
 		if (e->dirent)
 			remove_proc_entry(e->name, atm_proc_root);
 	}
-	proc_net_remove(&init_net, "atm");
+	remove_proc_entry("atm", init_net.proc_net);
 }
 
 int __init atm_proc_init(void)
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 437ee70c5e6..ae032402140 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/bitops.h>
+#include <linux/export.h>
 #include <net/sock.h>		/* for sock_no_* */
 
 #include "resources.h"		/* devs and vccs */
@@ -94,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
 		return -ENOTCONN;
 	*sockaddr_len = sizeof(struct sockaddr_atmpvc);
 	addr = (struct sockaddr_atmpvc *)sockaddr;
+	memset(addr, 0, sizeof(*addr));
 	addr->sap_family = AF_ATMPVC;
 	addr->sap_addr.itf = vcc->dev->number;
 	addr->sap_addr.vpi = vcc->vpi;
diff --git a/net/atm/raw.c b/net/atm/raw.c
index b4f7b9ff3c7..2e17e97a7a8 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -25,7 +25,7 @@ static void atm_push_raw(struct atm_vcc *vcc, struct sk_buff *skb)
 		struct sock *sk = sk_atm(vcc);
 
 		skb_queue_tail(&sk->sk_receive_queue, skb);
-		sk->sk_data_ready(sk, skb->len);
+		sk->sk_data_ready(sk);
 	}
 }
 
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 23f45ce6f35..0447d5d0b63 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -432,7 +432,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 			size = dev->ops->ioctl(dev, cmd, buf);
 		}
 		if (size < 0) {
-			error = (size == -ENOIOCTLCMD ? -EINVAL : size);
+			error = (size == -ENOIOCTLCMD ? -ENOTTY : size);
 			goto done;
 		}
 	}
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 509c8ac02b6..523bce72f69 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -51,7 +51,7 @@ static void sigd_put_skb(struct sk_buff *skb)
 #endif
 	atm_force_charge(sigd, skb->truesize);
 	skb_queue_tail(&sk_atm(sigd)->sk_receive_queue, skb);
-	sk_atm(sigd)->sk_data_ready(sk_atm(sigd), skb->len);
+	sk_atm(sigd)->sk_data_ready(sk_atm(sigd));
 }
 
 static void modify_qos(struct atm_vcc *vcc, struct atmsvc_msg *msg)
@@ -166,7 +166,7 @@ void sigd_enq2(struct atm_vcc *vcc, enum atmsvc_msg_type type,
 {
 	struct sk_buff *skb;
 	struct atmsvc_msg *msg;
-	static unsigned session = 0;
+	static unsigned int session = 0;
 
 	pr_debug("%d (0x%p)\n", (int)type, vcc);
 	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg), GFP_KERNEL)))
@@ -217,7 +217,6 @@ static void purge_vcc(struct atm_vcc *vcc)
 
 static void sigd_close(struct atm_vcc *vcc)
 {
-	struct hlist_node *node;
 	struct sock *s;
 	int i;
 
@@ -231,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc)
 	for (i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
-		sk_for_each(s, node, head) {
+		sk_for_each(s, head) {
 			vcc = atm_sk(s);
 
 			purge_vcc(vcc);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 754ee4791d9..d8e5d0c2ebb 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -20,6 +20,7 @@
 #include <linux/bitops.h>
 #include <net/sock.h>		/* for sock_no_* */
 #include <linux/uaccess.h>
+#include <linux/export.h>
 
 #include "resources.h"
 #include "common.h"		/* common for PVCs and SVCs */
@@ -262,17 +263,11 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 			goto out;
 		}
 	}
-/*
- * Not supported yet
- *
- * #ifndef CONFIG_SINGLE_SIGITF
- */
+
 	vcc->qos.txtp.max_pcr = SELECT_TOP_PCR(vcc->qos.txtp);
 	vcc->qos.txtp.pcr = 0;
 	vcc->qos.txtp.min_pcr = 0;
-/*
- * #endif
- */
+
 	error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci);
 	if (!error)
 		sock->state = SS_CONNECTED;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index e7c69f4619e..c35c3f48fc0 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -33,7 +33,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
 #include <linux/mm.h>
@@ -82,14 +81,13 @@ static void ax25_kill_by_device(struct net_device *dev)
 {
 	ax25_dev *ax25_dev;
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
 		return;
 
 	spin_lock_bh(&ax25_list_lock);
 again:
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->ax25_dev == ax25_dev) {
 			s->ax25_dev = NULL;
 			spin_unlock_bh(&ax25_list_lock);
@@ -113,9 +111,9 @@ again:
  *	Handle device status changes.
  */
 static int ax25_device_event(struct notifier_block *this, unsigned long event,
-	void *ptr)
+			     void *ptr)
 {
-	struct net_device *dev = (struct net_device *)ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
 	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
@@ -159,10 +157,9 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi,
 	struct net_device *dev, int type)
 {
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if ((s->iamdigi && !digi) || (!s->iamdigi && digi))
 			continue;
 		if (s->sk && !ax25cmp(&s->source_addr, addr) &&
@@ -188,10 +185,9 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
 {
 	struct sock *sk = NULL;
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk && !ax25cmp(&s->source_addr, my_addr) &&
 		    !ax25cmp(&s->dest_addr, dest_addr) &&
 		    s->sk->sk_type == type) {
@@ -214,10 +210,9 @@ ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr,
 	ax25_digi *digi, struct net_device *dev)
 {
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk && s->sk->sk_type != SOCK_SEQPACKET)
 			continue;
 		if (s->ax25_dev == NULL)
@@ -249,10 +244,9 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
 {
 	ax25_cb *s;
 	struct sk_buff *copy;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 &&
 		    s->sk->sk_type == SOCK_RAW &&
 		    s->sk->sk_protocol == proto &&
@@ -402,14 +396,14 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
 		break;
 
 	case AX25_T1:
-		if (ax25_ctl.arg < 1)
+		if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ)
 			goto einval_put;
 		ax25->rtt = (ax25_ctl.arg * HZ) / 2;
 		ax25->t1  = ax25_ctl.arg * HZ;
 		break;
 
 	case AX25_T2:
-		if (ax25_ctl.arg < 1)
+		if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ)
 			goto einval_put;
 		ax25->t2 = ax25_ctl.arg * HZ;
 		break;
@@ -422,10 +416,15 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
 		break;
 
 	case AX25_T3:
+		if (ax25_ctl.arg > ULONG_MAX / HZ)
+			goto einval_put;
 		ax25->t3 = ax25_ctl.arg * HZ;
 		break;
 
 	case AX25_IDLE:
+		if (ax25_ctl.arg > ULONG_MAX / (60 * HZ))
+			goto einval_put;
+
 		ax25->idle = ax25_ctl.arg * 60 * HZ;
 		break;
 
@@ -540,15 +539,16 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
 	ax25_cb *ax25;
 	struct net_device *dev;
 	char devname[IFNAMSIZ];
-	int opt, res = 0;
+	unsigned long opt;
+	int res = 0;
 
 	if (level != SOL_AX25)
 		return -ENOPROTOOPT;
 
-	if (optlen < sizeof(int))
+	if (optlen < sizeof(unsigned int))
 		return -EINVAL;
 
-	if (get_user(opt, (int __user *)optval))
+	if (get_user(opt, (unsigned int __user *)optval))
 		return -EFAULT;
 
 	lock_sock(sk);
@@ -571,7 +571,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case AX25_T1:
-		if (opt < 1) {
+		if (opt < 1 || opt > ULONG_MAX / HZ) {
 			res = -EINVAL;
 			break;
 		}
@@ -580,7 +580,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case AX25_T2:
-		if (opt < 1) {
+		if (opt < 1 || opt > ULONG_MAX / HZ) {
 			res = -EINVAL;
 			break;
 		}
@@ -596,7 +596,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case AX25_T3:
-		if (opt < 1) {
+		if (opt < 1 || opt > ULONG_MAX / HZ) {
 			res = -EINVAL;
 			break;
 		}
@@ -604,7 +604,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case AX25_IDLE:
-		if (opt < 0) {
+		if (opt > ULONG_MAX / (60 * HZ)) {
 			res = -EINVAL;
 			break;
 		}
@@ -612,7 +612,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case AX25_BACKOFF:
-		if (opt < 0 || opt > 2) {
+		if (opt > 2) {
 			res = -EINVAL;
 			break;
 		}
@@ -837,6 +837,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
 		case AX25_P_NETROM:
 			if (ax25_protocol_is_registered(AX25_P_NETROM))
 				return -ESOCKTNOSUPPORT;
+			break;
 #endif
 #ifdef CONFIG_ROSE_MODULE
 		case AX25_P_ROSE:
@@ -1434,7 +1435,7 @@ out:
 static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 			struct msghdr *msg, size_t len)
 {
-	struct sockaddr_ax25 *usax = (struct sockaddr_ax25 *)msg->msg_name;
+	DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name);
 	struct sock *sk = sock->sk;
 	struct sockaddr_ax25 sax;
 	struct sk_buff *skb;
@@ -1635,12 +1636,13 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
-	if (msg->msg_namelen != 0) {
-		struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
+	if (msg->msg_name) {
 		ax25_digi digi;
 		ax25_address src;
 		const unsigned char *mac = skb_mac_header(skb);
+		DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name);
 
+		memset(sax, 0, sizeof(struct full_sockaddr_ax25));
 		ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
 				&digi, NULL, NULL);
 		sax->sax25_family = AF_AX25;
@@ -1733,7 +1735,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			res = -EFAULT;
 			break;
 		}
-		if (amount > AX25_NOUID_BLOCK) {
+		if (amount < 0 || amount > AX25_NOUID_BLOCK) {
 			res = -EINVAL;
 			break;
 		}
@@ -1972,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = {
 };
 
 static struct notifier_block ax25_dev_notifier = {
-	.notifier_call =ax25_device_event,
+	.notifier_call = ax25_device_event,
 };
 
 static int __init ax25_init(void)
@@ -1985,11 +1987,11 @@ static int __init ax25_init(void)
 	sock_register(&ax25_family_ops);
 	dev_add_pack(&ax25_packet_type);
 	register_netdevice_notifier(&ax25_dev_notifier);
-	ax25_register_sysctl();
 
-	proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops);
-	proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops);
-	proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops);
+	proc_create("ax25_route", S_IRUGO, init_net.proc_net,
+		    &ax25_route_fops);
+	proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops);
+	proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops);
 out:
 	return rc;
 }
@@ -2003,19 +2005,19 @@ MODULE_ALIAS_NETPROTO(PF_AX25);
 
 static void __exit ax25_exit(void)
 {
-	proc_net_remove(&init_net, "ax25_route");
-	proc_net_remove(&init_net, "ax25");
-	proc_net_remove(&init_net, "ax25_calls");
-	ax25_rt_free();
-	ax25_uid_free();
-	ax25_dev_free();
+	remove_proc_entry("ax25_route", init_net.proc_net);
+	remove_proc_entry("ax25", init_net.proc_net);
+	remove_proc_entry("ax25_calls", init_net.proc_net);
 
-	ax25_unregister_sysctl();
 	unregister_netdevice_notifier(&ax25_dev_notifier);
 
 	dev_remove_pack(&ax25_packet_type);
 
 	sock_unregister(PF_AX25);
 	proto_unregister(&ax25_proto);
+
+	ax25_rt_free();
+	ax25_uid_free();
+	ax25_dev_free();
 }
 module_exit(ax25_exit);
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
index 7e7964dd987..e7c9b0ea17a 100644
--- a/net/ax25/ax25_addr.c
+++ b/net/ax25/ax25_addr.c
@@ -22,7 +22,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -190,8 +189,10 @@ const unsigned char *ax25_addr_parse(const unsigned char *buf, int len,
 	digi->ndigi      = 0;
 
 	while (!(buf[-1] & AX25_EBIT)) {
-		if (d >= AX25_MAX_DIGIS)  return NULL;	/* Max of 6 digis */
-		if (len < 7) return NULL;	/* Short packet */
+		if (d >= AX25_MAX_DIGIS)
+			return NULL;
+		if (len < AX25_ADDR_LEN)
+			return NULL;
 
 		memcpy(&digi->calls[d], buf, AX25_ADDR_LEN);
 		digi->ndigi = d + 1;
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c1cb982f6e8..3d106767b27 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -24,7 +24,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -60,8 +59,6 @@ void ax25_dev_device_up(struct net_device *dev)
 		return;
 	}
 
-	ax25_unregister_sysctl();
-
 	dev->ax25_ptr     = ax25_dev;
 	ax25_dev->dev     = dev;
 	dev_hold(dev);
@@ -91,7 +88,7 @@ void ax25_dev_device_up(struct net_device *dev)
 	ax25_dev_list  = ax25_dev;
 	spin_unlock_bh(&ax25_dev_lock);
 
-	ax25_register_sysctl();
+	ax25_register_dev_sysctl(ax25_dev);
 }
 
 void ax25_dev_device_down(struct net_device *dev)
@@ -101,7 +98,7 @@ void ax25_dev_device_down(struct net_device *dev)
 	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
 		return;
 
-	ax25_unregister_sysctl();
+	ax25_unregister_dev_sysctl(ax25_dev);
 
 	spin_lock_bh(&ax25_dev_lock);
 
@@ -121,7 +118,6 @@ void ax25_dev_device_down(struct net_device *dev)
 		spin_unlock_bh(&ax25_dev_lock);
 		dev_put(dev);
 		kfree(ax25_dev);
-		ax25_register_sysctl();
 		return;
 	}
 
@@ -131,7 +127,6 @@ void ax25_dev_device_down(struct net_device *dev)
 			spin_unlock_bh(&ax25_dev_lock);
 			dev_put(dev);
 			kfree(ax25_dev);
-			ax25_register_sysctl();
 			return;
 		}
 
@@ -139,8 +134,6 @@ void ax25_dev_device_down(struct net_device *dev)
 	}
 	spin_unlock_bh(&ax25_dev_lock);
 	dev->ax25_ptr = NULL;
-
-	ax25_register_sysctl();
 }
 
 int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
index 8273b1200ee..9bd31e88aec 100644
--- a/net/ax25/ax25_ds_in.c
+++ b/net/ax25/ax25_ds_in.c
@@ -23,7 +23,6 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 85816e612dc..e05bd57b5af 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -24,7 +24,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -40,7 +39,6 @@ void ax25_ds_nr_error_recovery(ax25_cb *ax25)
 void ax25_ds_enquiry_response(ax25_cb *ax25)
 {
 	ax25_cb *ax25o;
-	struct hlist_node *node;
 
 	/* Please note that neither DK4EG's nor DG2FEF's
 	 * DAMA spec mention the following behaviour as seen
@@ -81,7 +79,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25)
 	ax25_ds_set_timer(ax25->ax25_dev);
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25o, node, &ax25_list) {
+	ax25_for_each(ax25o, &ax25_list) {
 		if (ax25o == ax25)
 			continue;
 
@@ -160,10 +158,9 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev)
 {
 	ax25_cb *ax25;
 	int res = 0;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, node, &ax25_list)
+	ax25_for_each(ax25, &ax25_list)
 		if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) {
 			res = 1;
 			break;
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index c7d81436213..951cd57bb07 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -25,7 +25,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -71,7 +70,6 @@ static void ax25_ds_timeout(unsigned long arg)
 {
 	ax25_dev *ax25_dev = (struct ax25_dev *) arg;
 	ax25_cb *ax25;
-	struct hlist_node *node;
 
 	if (ax25_dev == NULL || !ax25_dev->dama.slave)
 		return;			/* Yikes! */
@@ -82,7 +80,7 @@ static void ax25_ds_timeout(unsigned long arg)
 	}
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, node, &ax25_list) {
+	ax25_for_each(ax25, &ax25_list) {
 		if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE))
 			continue;
 
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 60b545e2822..7f16e8a931b 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -24,7 +24,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -194,10 +193,9 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev)
 void ax25_link_failed(ax25_cb *ax25, int reason)
 {
 	struct ax25_linkfail *lf;
-	struct hlist_node *node;
 
 	spin_lock_bh(&linkfail_lock);
-	hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node)
+	hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node)
 		lf->func(ax25, reason);
 	spin_unlock_bh(&linkfail_lock);
 }
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 9bb77654120..7ed8ab72481 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -27,7 +27,6 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -423,7 +422,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	if (sk) {
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_data_ready(sk, skb->len);
+			sk->sk_data_ready(sk);
 		sock_put(sk);
 	} else {
 free:
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index cf0c47a2653..67de6b33f2c 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -24,7 +24,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
 #include <linux/mm.h>
@@ -49,7 +48,7 @@
 
 int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
 		     unsigned short type, const void *daddr,
-		     const void *saddr, unsigned len)
+		     const void *saddr, unsigned int len)
 {
 	unsigned char *buff;
 
@@ -220,7 +219,7 @@ put:
 
 int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
 		     unsigned short type, const void *daddr,
-		     const void *saddr, unsigned len)
+		     const void *saddr, unsigned int len)
 {
 	return -AX25_HEADER_LEN;
 }
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 37507d806f6..be2acab9be9 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -27,7 +27,6 @@
 #include <linux/netfilter.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -351,7 +350,7 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
 		if (skb->sk != NULL)
 			skb_set_owner_w(skbn, skb->sk);
 
-		kfree_skb(skb);
+		consume_skb(skb);
 		skb = skbn;
 	}
 
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index a1690845dc6..d39097737e3 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -32,12 +32,12 @@
 #include <linux/spinlock.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
+#include <linux/export.h>
 
 static ax25_route *ax25_route_list;
 static DEFINE_RWLOCK(ax25_route_lock);
@@ -474,7 +474,7 @@ struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
 		if (skb->sk != NULL)
 			skb_set_owner_w(skbn, skb->sk);
 
-		kfree_skb(skb);
+		consume_skb(skb);
 
 		skb = skbn;
 	}
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
index a8eef88d865..3fbf8f7b2cf 100644
--- a/net/ax25/ax25_std_in.c
+++ b/net/ax25/ax25_std_in.c
@@ -30,7 +30,6 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c
index 277f81bb979..8b66a41e538 100644
--- a/net/ax25/ax25_std_subr.c
+++ b/net/ax25/ax25_std_subr.c
@@ -21,7 +21,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 96e4b927325..004467c9e6e 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -25,7 +25,6 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index c6715ee4ab8..1997538a5d2 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -26,7 +26,6 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index db29ea71e80..c3cffa79baf 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -29,7 +29,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index d349be9578f..71c4badbc80 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -26,7 +26,6 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -37,6 +36,7 @@
 #include <linux/stat.h>
 #include <linux/netfilter.h>
 #include <linux/sysctl.h>
+#include <linux/export.h>
 #include <net/ip.h>
 #include <net/arp.h>
 
@@ -51,14 +51,13 @@ int ax25_uid_policy;
 
 EXPORT_SYMBOL(ax25_uid_policy);
 
-ax25_uid_assoc *ax25_findbyuid(uid_t uid)
+ax25_uid_assoc *ax25_findbyuid(kuid_t uid)
 {
 	ax25_uid_assoc *ax25_uid, *res = NULL;
-	struct hlist_node *node;
 
 	read_lock(&ax25_uid_lock);
-	ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
-		if (ax25_uid->uid == uid) {
+	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
+		if (uid_eq(ax25_uid->uid, uid)) {
 			ax25_uid_hold(ax25_uid);
 			res = ax25_uid;
 			break;
@@ -74,7 +73,6 @@ EXPORT_SYMBOL(ax25_findbyuid);
 int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 {
 	ax25_uid_assoc *ax25_uid;
-	struct hlist_node *node;
 	ax25_uid_assoc *user;
 	unsigned long res;
 
@@ -82,9 +80,9 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 	case SIOCAX25GETUID:
 		res = -ENOENT;
 		read_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
-				res = ax25_uid->uid;
+				res = from_kuid_munged(current_user_ns(), ax25_uid->uid);
 				break;
 			}
 		}
@@ -93,9 +91,14 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 		return res;
 
 	case SIOCAX25ADDUID:
+	{
+		kuid_t sax25_kuid;
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		user = ax25_findbyuid(sax->sax25_uid);
+		sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid);
+		if (!uid_valid(sax25_kuid))
+			return -EINVAL;
+		user = ax25_findbyuid(sax25_kuid);
 		if (user) {
 			ax25_uid_put(user);
 			return -EEXIST;
@@ -106,7 +109,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 			return -ENOMEM;
 
 		atomic_set(&ax25_uid->refcount, 1);
-		ax25_uid->uid  = sax->sax25_uid;
+		ax25_uid->uid  = sax25_kuid;
 		ax25_uid->call = sax->sax25_call;
 
 		write_lock(&ax25_uid_lock);
@@ -114,14 +117,14 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 		write_unlock(&ax25_uid_lock);
 
 		return 0;
-
+	}
 	case SIOCAX25DELUID:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
 		ax25_uid = NULL;
 		write_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0)
 				break;
 		}
@@ -172,7 +175,9 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v)
 		struct ax25_uid_assoc *pt;
 
 		pt = hlist_entry(v, struct ax25_uid_assoc, uid_node);
-		seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(buf, &pt->call));
+		seq_printf(seq, "%6d %s\n",
+			from_kuid_munged(seq_user_ns(seq), pt->uid),
+			ax2asc(buf, &pt->call));
 	}
 	return 0;
 }
@@ -205,11 +210,10 @@ const struct file_operations ax25_uid_fops = {
 void __exit ax25_uid_free(void)
 {
 	ax25_uid_assoc *ax25_uid;
-	struct hlist_node *node;
 
 	write_lock(&ax25_uid_lock);
 again:
-	ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 		hlist_del_init(&ax25_uid->uid_node);
 		ax25_uid_put(ax25_uid);
 		goto again;
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index ebe0ef3f1d8..919a5ce4751 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -29,18 +29,7 @@ static int min_proto[1],		max_proto[] = { AX25_PROTO_MAX };
 static int min_ds_timeout[1],		max_ds_timeout[] = {65535000};
 #endif
 
-static struct ctl_table_header *ax25_table_header;
-
-static ctl_table *ax25_table;
-static int ax25_table_size;
-
-static struct ctl_path ax25_path[] = {
-	{ .procname = "net", },
-	{ .procname = "ax25", },
-	{ }
-};
-
-static const ctl_table ax25_param_table[] = {
+static const struct ctl_table ax25_param_table[] = {
 	{
 		.procname	= "ip_default_mode",
 		.maxlen		= sizeof(int),
@@ -159,52 +148,37 @@ static const ctl_table ax25_param_table[] = {
 	{ }	/* that's all, folks! */
 };
 
-void ax25_register_sysctl(void)
+int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
 {
-	ax25_dev *ax25_dev;
-	int n, k;
-
-	spin_lock_bh(&ax25_dev_lock);
-	for (ax25_table_size = sizeof(ctl_table), ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
-		ax25_table_size += sizeof(ctl_table);
-
-	if ((ax25_table = kzalloc(ax25_table_size, GFP_ATOMIC)) == NULL) {
-		spin_unlock_bh(&ax25_dev_lock);
-		return;
-	}
-
-	for (n = 0, ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) {
-		struct ctl_table *child = kmemdup(ax25_param_table,
-						  sizeof(ax25_param_table),
-						  GFP_ATOMIC);
-		if (!child) {
-			while (n--)
-				kfree(ax25_table[n].child);
-			kfree(ax25_table);
-			spin_unlock_bh(&ax25_dev_lock);
-			return;
-		}
-		ax25_table[n].child = ax25_dev->systable = child;
-		ax25_table[n].procname     = ax25_dev->dev->name;
-		ax25_table[n].mode         = 0555;
-
-
-		for (k = 0; k < AX25_MAX_VALUES; k++)
-			child[k].data = &ax25_dev->values[k];
-
-		n++;
+	char path[sizeof("net/ax25/") + IFNAMSIZ];
+	int k;
+	struct ctl_table *table;
+
+	table = kmemdup(ax25_param_table, sizeof(ax25_param_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	for (k = 0; k < AX25_MAX_VALUES; k++)
+		table[k].data = &ax25_dev->values[k];
+
+	snprintf(path, sizeof(path), "net/ax25/%s", ax25_dev->dev->name);
+	ax25_dev->sysheader = register_net_sysctl(&init_net, path, table);
+	if (!ax25_dev->sysheader) {
+		kfree(table);
+		return -ENOMEM;
 	}
-	spin_unlock_bh(&ax25_dev_lock);
-
-	ax25_table_header = register_sysctl_paths(ax25_path, ax25_table);
+	return 0;
 }
 
-void ax25_unregister_sysctl(void)
+void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev)
 {
-	ctl_table *p;
-	unregister_sysctl_table(ax25_table_header);
-
-	for (p = ax25_table; p->procname; p++)
-		kfree(p->child);
-	kfree(ax25_table);
+	struct ctl_table_header *header = ax25_dev->sysheader;
+	struct ctl_table *table;
+
+	if (header) {
+		ax25_dev->sysheader = NULL;
+		table = header->ctl_table_arg;
+		unregister_net_sysctl_table(header);
+		kfree(table);
+	}
 }
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 6c051ad833e..11660a3aab5 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -5,20 +5,64 @@
 config BATMAN_ADV
 	tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
 	depends on NET
+	select CRC16
+	select LIBCRC32C
         default n
-	---help---
+	help
+          B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
+          a routing protocol for multi-hop ad-hoc mesh networks. The
+          networks may be wired or wireless. See
+          http://www.open-mesh.org/ for more information and user space
+          tools.
 
-        B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
-        a routing protocol for multi-hop ad-hoc mesh networks. The
-        networks may be wired or wireless. See
-        http://www.open-mesh.org/ for more information and user space
-        tools.
+config BATMAN_ADV_BLA
+	bool "Bridge Loop Avoidance"
+	depends on BATMAN_ADV && INET
+	default y
+	help
+	  This option enables BLA (Bridge Loop Avoidance), a mechanism
+	  to avoid Ethernet frames looping when mesh nodes are connected
+	  to both the same LAN and the same mesh. If you will never use
+	  more than one mesh node in the same LAN, you can safely remove
+	  this feature and save some space.
+
+config BATMAN_ADV_DAT
+	bool "Distributed ARP Table"
+	depends on BATMAN_ADV && INET
+	default n
+	help
+	  This option enables DAT (Distributed ARP Table), a DHT based
+	  mechanism that increases ARP reliability on sparse wireless
+	  mesh networks. If you think that your network does not need
+	  this option you can safely remove it and save some space.
+
+config BATMAN_ADV_NC
+	bool "Network Coding"
+	depends on BATMAN_ADV
+	default n
+	help
+	  This option enables network coding, a mechanism that aims to
+	  increase the overall network throughput by fusing multiple
+	  packets in one transmission.
+	  Note that interfaces controlled by batman-adv must be manually
+	  configured to have promiscuous mode enabled in order to make
+	  network coding work.
+	  If you think that your network does not need this feature you
+	  can safely disable it and save some space.
+
+config BATMAN_ADV_MCAST
+	bool "Multicast optimisation"
+	depends on BATMAN_ADV
+	default n
+	help
+	  This option enables the multicast optimisation which aims to
+	  reduce the air overhead while improving the reliability of
+	  multicast messages.
 
 config BATMAN_ADV_DEBUG
 	bool "B.A.T.M.A.N. debugging"
-	depends on BATMAN_ADV != n
-	---help---
-
+	depends on BATMAN_ADV
+	help
 	  This is an option for use by developers; most people should
 	  say N here. This enables compilation of support for
 	  outputting debugging information to the kernel log. The
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 2de93d00631..eb7d8c0388e 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
@@ -13,27 +13,27 @@
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-# 02110-1301, USA
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 
 obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
-batman-adv-y += aggregation.o
-batman-adv-y += bat_debugfs.o
-batman-adv-y += bat_sysfs.o
+batman-adv-y += bat_iv_ogm.o
 batman-adv-y += bitarray.o
+batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
+batman-adv-y += debugfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
+batman-adv-y += fragmentation.o
 batman-adv-y += gateway_client.o
 batman-adv-y += gateway_common.o
 batman-adv-y += hard-interface.o
 batman-adv-y += hash.o
 batman-adv-y += icmp_socket.o
 batman-adv-y += main.o
+batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
 batman-adv-y += originator.o
-batman-adv-y += ring_buffer.o
 batman-adv-y += routing.o
 batman-adv-y += send.o
 batman-adv-y += soft-interface.o
+batman-adv-y += sysfs.o
 batman-adv-y += translation-table.o
-batman-adv-y += unicast.o
-batman-adv-y += vis.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
deleted file mode 100644
index a8c32030527..00000000000
--- a/net/batman-adv/aggregation.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- *
- */
-
-#include "main.h"
-#include "aggregation.h"
-#include "send.h"
-#include "routing.h"
-#include "hard-interface.h"
-
-/* calculate the size of the tt information for a given packet */
-static int tt_len(struct batman_packet *batman_packet)
-{
-	return batman_packet->num_tt * ETH_ALEN;
-}
-
-/* return true if new_packet can be aggregated with forw_packet */
-static bool can_aggregate_with(struct batman_packet *new_batman_packet,
-			       int packet_len,
-			       unsigned long send_time,
-			       bool directlink,
-			       struct hard_iface *if_incoming,
-			       struct forw_packet *forw_packet)
-{
-	struct batman_packet *batman_packet =
-		(struct batman_packet *)forw_packet->skb->data;
-	int aggregated_bytes = forw_packet->packet_len + packet_len;
-
-	/**
-	 * we can aggregate the current packet to this aggregated packet
-	 * if:
-	 *
-	 * - the send time is within our MAX_AGGREGATION_MS time
-	 * - the resulting packet wont be bigger than
-	 *   MAX_AGGREGATION_BYTES
-	 */
-
-	if (time_before(send_time, forw_packet->send_time) &&
-	    time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
-					forw_packet->send_time) &&
-	    (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
-
-		/**
-		 * check aggregation compatibility
-		 * -> direct link packets are broadcasted on
-		 *    their interface only
-		 * -> aggregate packet if the current packet is
-		 *    a "global" packet as well as the base
-		 *    packet
-		 */
-
-		/* packets without direct link flag and high TTL
-		 * are flooded through the net  */
-		if ((!directlink) &&
-		    (!(batman_packet->flags & DIRECTLINK)) &&
-		    (batman_packet->ttl != 1) &&
-
-		    /* own packets originating non-primary
-		     * interfaces leave only that interface */
-		    ((!forw_packet->own) ||
-		     (forw_packet->if_incoming->if_num == 0)))
-			return true;
-
-		/* if the incoming packet is sent via this one
-		 * interface only - we still can aggregate */
-		if ((directlink) &&
-		    (new_batman_packet->ttl == 1) &&
-		    (forw_packet->if_incoming == if_incoming) &&
-
-		    /* packets from direct neighbors or
-		     * own secondary interface packets
-		     * (= secondary interface packets in general) */
-		    (batman_packet->flags & DIRECTLINK ||
-		     (forw_packet->own &&
-		      forw_packet->if_incoming->if_num != 0)))
-			return true;
-	}
-
-	return false;
-}
-
-/* create a new aggregated packet and add this packet to it */
-static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
-				  unsigned long send_time, bool direct_link,
-				  struct hard_iface *if_incoming,
-				  int own_packet)
-{
-	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
-	struct forw_packet *forw_packet_aggr;
-	unsigned char *skb_buff;
-
-	if (!atomic_inc_not_zero(&if_incoming->refcount))
-		return;
-
-	/* own packet should always be scheduled */
-	if (!own_packet) {
-		if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
-			bat_dbg(DBG_BATMAN, bat_priv,
-				"batman packet queue full\n");
-			goto out;
-		}
-	}
-
-	forw_packet_aggr = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC);
-	if (!forw_packet_aggr) {
-		if (!own_packet)
-			atomic_inc(&bat_priv->batman_queue_left);
-		goto out;
-	}
-
-	if ((atomic_read(&bat_priv->aggregated_ogms)) &&
-	    (packet_len < MAX_AGGREGATION_BYTES))
-		forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
-						      sizeof(struct ethhdr));
-	else
-		forw_packet_aggr->skb = dev_alloc_skb(packet_len +
-						      sizeof(struct ethhdr));
-
-	if (!forw_packet_aggr->skb) {
-		if (!own_packet)
-			atomic_inc(&bat_priv->batman_queue_left);
-		kfree(forw_packet_aggr);
-		goto out;
-	}
-	skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
-
-	INIT_HLIST_NODE(&forw_packet_aggr->list);
-
-	skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
-	forw_packet_aggr->packet_len = packet_len;
-	memcpy(skb_buff, packet_buff, packet_len);
-
-	forw_packet_aggr->own = own_packet;
-	forw_packet_aggr->if_incoming = if_incoming;
-	forw_packet_aggr->num_packets = 0;
-	forw_packet_aggr->direct_link_flags = 0;
-	forw_packet_aggr->send_time = send_time;
-
-	/* save packet direct link flag status */
-	if (direct_link)
-		forw_packet_aggr->direct_link_flags |= 1;
-
-	/* add new packet to packet list */
-	spin_lock_bh(&bat_priv->forw_bat_list_lock);
-	hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
-	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
-	/* start timer for this packet */
-	INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
-			  send_outstanding_bat_packet);
-	queue_delayed_work(bat_event_workqueue,
-			   &forw_packet_aggr->delayed_work,
-			   send_time - jiffies);
-
-	return;
-out:
-	hardif_free_ref(if_incoming);
-}
-
-/* aggregate a new packet into the existing aggregation */
-static void aggregate(struct forw_packet *forw_packet_aggr,
-		      unsigned char *packet_buff,
-		      int packet_len,
-		      bool direct_link)
-{
-	unsigned char *skb_buff;
-
-	skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
-	memcpy(skb_buff, packet_buff, packet_len);
-	forw_packet_aggr->packet_len += packet_len;
-	forw_packet_aggr->num_packets++;
-
-	/* save packet direct link flag status */
-	if (direct_link)
-		forw_packet_aggr->direct_link_flags |=
-			(1 << forw_packet_aggr->num_packets);
-}
-
-void add_bat_packet_to_list(struct bat_priv *bat_priv,
-			    unsigned char *packet_buff, int packet_len,
-			    struct hard_iface *if_incoming, char own_packet,
-			    unsigned long send_time)
-{
-	/**
-	 * _aggr -> pointer to the packet we want to aggregate with
-	 * _pos -> pointer to the position in the queue
-	 */
-	struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
-	struct hlist_node *tmp_node;
-	struct batman_packet *batman_packet =
-		(struct batman_packet *)packet_buff;
-	bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0;
-
-	/* find position for the packet in the forward queue */
-	spin_lock_bh(&bat_priv->forw_bat_list_lock);
-	/* own packets are not to be aggregated */
-	if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
-		hlist_for_each_entry(forw_packet_pos, tmp_node,
-				     &bat_priv->forw_bat_list, list) {
-			if (can_aggregate_with(batman_packet,
-					       packet_len,
-					       send_time,
-					       direct_link,
-					       if_incoming,
-					       forw_packet_pos)) {
-				forw_packet_aggr = forw_packet_pos;
-				break;
-			}
-		}
-	}
-
-	/* nothing to aggregate with - either aggregation disabled or no
-	 * suitable aggregation packet found */
-	if (!forw_packet_aggr) {
-		/* the following section can run without the lock */
-		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
-		/**
-		 * if we could not aggregate this packet with one of the others
-		 * we hold it back for a while, so that it might be aggregated
-		 * later on
-		 */
-		if ((!own_packet) &&
-		    (atomic_read(&bat_priv->aggregated_ogms)))
-			send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
-
-		new_aggregated_packet(packet_buff, packet_len,
-				      send_time, direct_link,
-				      if_incoming, own_packet);
-	} else {
-		aggregate(forw_packet_aggr,
-			  packet_buff, packet_len,
-			  direct_link);
-		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-	}
-}
-
-/* unpack the aggregated packets and process them one by one */
-void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
-			     int packet_len, struct hard_iface *if_incoming)
-{
-	struct batman_packet *batman_packet;
-	int buff_pos = 0;
-	unsigned char *tt_buff;
-
-	batman_packet = (struct batman_packet *)packet_buff;
-
-	do {
-		/* network to host order for our 32bit seqno, and the
-		   orig_interval. */
-		batman_packet->seqno = ntohl(batman_packet->seqno);
-
-		tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN;
-		receive_bat_packet(ethhdr, batman_packet,
-				   tt_buff, tt_len(batman_packet),
-				   if_incoming);
-
-		buff_pos += BAT_PACKET_LEN + tt_len(batman_packet);
-		batman_packet = (struct batman_packet *)
-			(packet_buff + buff_pos);
-	} while (aggregated_packet(buff_pos, packet_len,
-				   batman_packet->num_tt));
-}
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
deleted file mode 100644
index 7e6d72fbf54..00000000000
--- a/net/batman-adv/aggregation.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- *
- */
-
-#ifndef _NET_BATMAN_ADV_AGGREGATION_H_
-#define _NET_BATMAN_ADV_AGGREGATION_H_
-
-#include "main.h"
-
-/* is there another aggregated packet here? */
-static inline int aggregated_packet(int buff_pos, int packet_len, int num_tt)
-{
-	int next_buff_pos = buff_pos + BAT_PACKET_LEN + (num_tt * ETH_ALEN);
-
-	return (next_buff_pos <= packet_len) &&
-		(next_buff_pos <= MAX_AGGREGATION_BYTES);
-}
-
-void add_bat_packet_to_list(struct bat_priv *bat_priv,
-			    unsigned char *packet_buff, int packet_len,
-			    struct hard_iface *if_incoming, char own_packet,
-			    unsigned long send_time);
-void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
-			     int packet_len, struct hard_iface *if_incoming);
-
-#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
new file mode 100644
index 00000000000..4e49666f8c6
--- /dev/null
+++ b/net/batman-adv/bat_algo.h
@@ -0,0 +1,23 @@
+/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_BAT_ALGO_H_
+#define _NET_BATMAN_ADV_BAT_ALGO_H_
+
+int batadv_iv_init(void);
+
+#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c
deleted file mode 100644
index abaeec5f624..00000000000
--- a/net/batman-adv/bat_debugfs.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- *
- */
-
-#include "main.h"
-
-#include <linux/debugfs.h>
-
-#include "bat_debugfs.h"
-#include "translation-table.h"
-#include "originator.h"
-#include "hard-interface.h"
-#include "gateway_common.h"
-#include "gateway_client.h"
-#include "soft-interface.h"
-#include "vis.h"
-#include "icmp_socket.h"
-
-static struct dentry *bat_debugfs;
-
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-#define LOG_BUFF_MASK (log_buff_len-1)
-#define LOG_BUFF(idx) (debug_log->log_buff[(idx) & LOG_BUFF_MASK])
-
-static int log_buff_len = LOG_BUF_LEN;
-
-static void emit_log_char(struct debug_log *debug_log, char c)
-{
-	LOG_BUFF(debug_log->log_end) = c;
-	debug_log->log_end++;
-
-	if (debug_log->log_end - debug_log->log_start > log_buff_len)
-		debug_log->log_start = debug_log->log_end - log_buff_len;
-}
-
-static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
-{
-	va_list args;
-	static char debug_log_buf[256];
-	char *p;
-
-	if (!debug_log)
-		return 0;
-
-	spin_lock_bh(&debug_log->lock);
-	va_start(args, fmt);
-	vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
-	va_end(args);
-
-	for (p = debug_log_buf; *p != 0; p++)
-		emit_log_char(debug_log, *p);
-
-	spin_unlock_bh(&debug_log->lock);
-
-	wake_up(&debug_log->queue_wait);
-
-	return 0;
-}
-
-int debug_log(struct bat_priv *bat_priv, char *fmt, ...)
-{
-	va_list args;
-	char tmp_log_buf[256];
-
-	va_start(args, fmt);
-	vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
-	fdebug_log(bat_priv->debug_log, "[%10u] %s",
-		   (jiffies / HZ), tmp_log_buf);
-	va_end(args);
-
-	return 0;
-}
-
-static int log_open(struct inode *inode, struct file *file)
-{
-	nonseekable_open(inode, file);
-	file->private_data = inode->i_private;
-	inc_module_count();
-	return 0;
-}
-
-static int log_release(struct inode *inode, struct file *file)
-{
-	dec_module_count();
-	return 0;
-}
-
-static ssize_t log_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
-{
-	struct bat_priv *bat_priv = file->private_data;
-	struct debug_log *debug_log = bat_priv->debug_log;
-	int error, i = 0;
-	char c;
-
-	if ((file->f_flags & O_NONBLOCK) &&
-	    !(debug_log->log_end - debug_log->log_start))
-		return -EAGAIN;
-
-	if ((!buf) || (count < 0))
-		return -EINVAL;
-
-	if (count == 0)
-		return 0;
-
-	if (!access_ok(VERIFY_WRITE, buf, count))
-		return -EFAULT;
-
-	error = wait_event_interruptible(debug_log->queue_wait,
-				(debug_log->log_start - debug_log->log_end));
-
-	if (error)
-		return error;
-
-	spin_lock_bh(&debug_log->lock);
-
-	while ((!error) && (i < count) &&
-	       (debug_log->log_start != debug_log->log_end)) {
-		c = LOG_BUFF(debug_log->log_start);
-
-		debug_log->log_start++;
-
-		spin_unlock_bh(&debug_log->lock);
-
-		error = __put_user(c, buf);
-
-		spin_lock_bh(&debug_log->lock);
-
-		buf++;
-		i++;
-
-	}
-
-	spin_unlock_bh(&debug_log->lock);
-
-	if (!error)
-		return i;
-
-	return error;
-}
-
-static unsigned int log_poll(struct file *file, poll_table *wait)
-{
-	struct bat_priv *bat_priv = file->private_data;
-	struct debug_log *debug_log = bat_priv->debug_log;
-
-	poll_wait(file, &debug_log->queue_wait, wait);
-
-	if (debug_log->log_end - debug_log->log_start)
-		return POLLIN | POLLRDNORM;
-
-	return 0;
-}
-
-static const struct file_operations log_fops = {
-	.open           = log_open,
-	.release        = log_release,
-	.read           = log_read,
-	.poll           = log_poll,
-	.llseek         = no_llseek,
-};
-
-static int debug_log_setup(struct bat_priv *bat_priv)
-{
-	struct dentry *d;
-
-	if (!bat_priv->debug_dir)
-		goto err;
-
-	bat_priv->debug_log = kzalloc(sizeof(struct debug_log), GFP_ATOMIC);
-	if (!bat_priv->debug_log)
-		goto err;
-
-	spin_lock_init(&bat_priv->debug_log->lock);
-	init_waitqueue_head(&bat_priv->debug_log->queue_wait);
-
-	d = debugfs_create_file("log", S_IFREG | S_IRUSR,
-				bat_priv->debug_dir, bat_priv, &log_fops);
-	if (d)
-		goto err;
-
-	return 0;
-
-err:
-	return 1;
-}
-
-static void debug_log_cleanup(struct bat_priv *bat_priv)
-{
-	kfree(bat_priv->debug_log);
-	bat_priv->debug_log = NULL;
-}
-#else /* CONFIG_BATMAN_ADV_DEBUG */
-static int debug_log_setup(struct bat_priv *bat_priv)
-{
-	bat_priv->debug_log = NULL;
-	return 0;
-}
-
-static void debug_log_cleanup(struct bat_priv *bat_priv)
-{
-	return;
-}
-#endif
-
-static int originators_open(struct inode *inode, struct file *file)
-{
-	struct net_device *net_dev = (struct net_device *)inode->i_private;
-	return single_open(file, orig_seq_print_text, net_dev);
-}
-
-static int gateways_open(struct inode *inode, struct file *file)
-{
-	struct net_device *net_dev = (struct net_device *)inode->i_private;
-	return single_open(file, gw_client_seq_print_text, net_dev);
-}
-
-static int softif_neigh_open(struct inode *inode, struct file *file)
-{
-	struct net_device *net_dev = (struct net_device *)inode->i_private;
-	return single_open(file, softif_neigh_seq_print_text, net_dev);
-}
-
-static int transtable_global_open(struct inode *inode, struct file *file)
-{
-	struct net_device *net_dev = (struct net_device *)inode->i_private;
-	return single_open(file, tt_global_seq_print_text, net_dev);
-}
-
-static int transtable_local_open(struct inode *inode, struct file *file)
-{
-	struct net_device *net_dev = (struct net_device *)inode->i_private;
-	return single_open(file, tt_local_seq_print_text, net_dev);
-}
-
-static int vis_data_open(struct inode *inode, struct file *file)
-{
-	struct net_device *net_dev = (struct net_device *)inode->i_private;
-	return single_open(file, vis_seq_print_text, net_dev);
-}
-
-struct bat_debuginfo {
-	struct attribute attr;
-	const struct file_operations fops;
-};
-
-#define BAT_DEBUGINFO(_name, _mode, _open)	\
-struct bat_debuginfo bat_debuginfo_##_name = {	\
-	.attr = { .name = __stringify(_name),	\
-		  .mode = _mode, },		\
-	.fops = { .owner = THIS_MODULE,		\
-		  .open = _open,		\
-		  .read	= seq_read,		\
-		  .llseek = seq_lseek,		\
-		  .release = single_release,	\
-		}				\
-};
-
-static BAT_DEBUGINFO(originators, S_IRUGO, originators_open);
-static BAT_DEBUGINFO(gateways, S_IRUGO, gateways_open);
-static BAT_DEBUGINFO(softif_neigh, S_IRUGO, softif_neigh_open);
-static BAT_DEBUGINFO(transtable_global, S_IRUGO, transtable_global_open);
-static BAT_DEBUGINFO(transtable_local, S_IRUGO, transtable_local_open);
-static BAT_DEBUGINFO(vis_data, S_IRUGO, vis_data_open);
-
-static struct bat_debuginfo *mesh_debuginfos[] = {
-	&bat_debuginfo_originators,
-	&bat_debuginfo_gateways,
-	&bat_debuginfo_softif_neigh,
-	&bat_debuginfo_transtable_global,
-	&bat_debuginfo_transtable_local,
-	&bat_debuginfo_vis_data,
-	NULL,
-};
-
-void debugfs_init(void)
-{
-	bat_debugfs = debugfs_create_dir(DEBUGFS_BAT_SUBDIR, NULL);
-	if (bat_debugfs == ERR_PTR(-ENODEV))
-		bat_debugfs = NULL;
-}
-
-void debugfs_destroy(void)
-{
-	if (bat_debugfs) {
-		debugfs_remove_recursive(bat_debugfs);
-		bat_debugfs = NULL;
-	}
-}
-
-int debugfs_add_meshif(struct net_device *dev)
-{
-	struct bat_priv *bat_priv = netdev_priv(dev);
-	struct bat_debuginfo **bat_debug;
-	struct dentry *file;
-
-	if (!bat_debugfs)
-		goto out;
-
-	bat_priv->debug_dir = debugfs_create_dir(dev->name, bat_debugfs);
-	if (!bat_priv->debug_dir)
-		goto out;
-
-	bat_socket_setup(bat_priv);
-	debug_log_setup(bat_priv);
-
-	for (bat_debug = mesh_debuginfos; *bat_debug; ++bat_debug) {
-		file = debugfs_create_file(((*bat_debug)->attr).name,
-					  S_IFREG | ((*bat_debug)->attr).mode,
-					  bat_priv->debug_dir,
-					  dev, &(*bat_debug)->fops);
-		if (!file) {
-			bat_err(dev, "Can't add debugfs file: %s/%s\n",
-				dev->name, ((*bat_debug)->attr).name);
-			goto rem_attr;
-		}
-	}
-
-	return 0;
-rem_attr:
-	debugfs_remove_recursive(bat_priv->debug_dir);
-	bat_priv->debug_dir = NULL;
-out:
-#ifdef CONFIG_DEBUG_FS
-	return -ENOMEM;
-#else
-	return 0;
-#endif /* CONFIG_DEBUG_FS */
-}
-
-void debugfs_del_meshif(struct net_device *dev)
-{
-	struct bat_priv *bat_priv = netdev_priv(dev);
-
-	debug_log_cleanup(bat_priv);
-
-	if (bat_debugfs) {
-		debugfs_remove_recursive(bat_priv->debug_dir);
-		bat_priv->debug_dir = NULL;
-	}
-}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
new file mode 100644
index 00000000000..f04224c3200
--- /dev/null
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -0,0 +1,1979 @@
+/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+#include "translation-table.h"
+#include "originator.h"
+#include "routing.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "hard-interface.h"
+#include "send.h"
+#include "bat_algo.h"
+#include "network-coding.h"
+
+
+/**
+ * batadv_dup_status - duplicate status
+ * @BATADV_NO_DUP: the packet is a duplicate
+ * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
+ *  neighbor)
+ * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
+ * @BATADV_PROTECTED: originator is currently protected (after reboot)
+ */
+enum batadv_dup_status {
+	BATADV_NO_DUP = 0,
+	BATADV_ORIG_DUP,
+	BATADV_NEIGH_DUP,
+	BATADV_PROTECTED,
+};
+
+/**
+ * batadv_ring_buffer_set - update the ring buffer with the given value
+ * @lq_recv: pointer to the ring buffer
+ * @lq_index: index to store the value at
+ * @value: value to store in the ring buffer
+ */
+static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
+				   uint8_t value)
+{
+	lq_recv[*lq_index] = value;
+	*lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE;
+}
+
+/**
+ * batadv_ring_buffer_set - compute the average of all non-zero values stored
+ * in the given ring buffer
+ * @lq_recv: pointer to the ring buffer
+ *
+ * Returns computed average value.
+ */
+static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
+{
+	const uint8_t *ptr;
+	uint16_t count = 0, i = 0, sum = 0;
+
+	ptr = lq_recv;
+
+	while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) {
+		if (*ptr != 0) {
+			count++;
+			sum += *ptr;
+		}
+
+		i++;
+		ptr++;
+	}
+
+	if (count == 0)
+		return 0;
+
+	return (uint8_t)(sum / count);
+}
+
+/**
+ * batadv_iv_ogm_orig_free - free the private resources allocated for this
+ *  orig_node
+ * @orig_node: the orig_node for which the resources have to be free'd
+ */
+static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
+{
+	kfree(orig_node->bat_iv.bcast_own);
+	kfree(orig_node->bat_iv.bcast_own_sum);
+}
+
+/**
+ * batadv_iv_ogm_orig_add_if - change the private structures of the orig_node to
+ *  include the new hard-interface
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
+				     int max_if_num)
+{
+	void *data_ptr;
+	size_t data_size, old_size;
+	int ret = -ENOMEM;
+
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS;
+	old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
+	data_ptr = kmalloc(data_size, GFP_ATOMIC);
+	if (!data_ptr)
+		goto unlock;
+
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own, old_size);
+	kfree(orig_node->bat_iv.bcast_own);
+	orig_node->bat_iv.bcast_own = data_ptr;
+
+	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+	if (!data_ptr) {
+		kfree(orig_node->bat_iv.bcast_own);
+		goto unlock;
+	}
+
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
+	       (max_if_num - 1) * sizeof(uint8_t));
+	kfree(orig_node->bat_iv.bcast_own_sum);
+	orig_node->bat_iv.bcast_own_sum = data_ptr;
+
+	ret = 0;
+
+unlock:
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	return ret;
+}
+
+/**
+ * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to
+ *  exclude the removed interface
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ * @del_if_num: the index of the interface being removed
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
+				     int max_if_num, int del_if_num)
+{
+	int chunk_size,  ret = -ENOMEM, if_offset;
+	void *data_ptr = NULL;
+
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	/* last interface was removed */
+	if (max_if_num == 0)
+		goto free_bcast_own;
+
+	chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
+	data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
+	if (!data_ptr)
+		goto unlock;
+
+	/* copy first part */
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
+
+	/* copy second part */
+	memcpy((char *)data_ptr + del_if_num * chunk_size,
+	       orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size),
+	       (max_if_num - del_if_num) * chunk_size);
+
+free_bcast_own:
+	kfree(orig_node->bat_iv.bcast_own);
+	orig_node->bat_iv.bcast_own = data_ptr;
+
+	if (max_if_num == 0)
+		goto free_own_sum;
+
+	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+	if (!data_ptr) {
+		kfree(orig_node->bat_iv.bcast_own);
+		goto unlock;
+	}
+
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
+	       del_if_num * sizeof(uint8_t));
+
+	if_offset = (del_if_num + 1) * sizeof(uint8_t);
+	memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t),
+	       orig_node->bat_iv.bcast_own_sum + if_offset,
+	       (max_if_num - del_if_num) * sizeof(uint8_t));
+
+free_own_sum:
+	kfree(orig_node->bat_iv.bcast_own_sum);
+	orig_node->bat_iv.bcast_own_sum = data_ptr;
+
+	ret = 0;
+unlock:
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	return ret;
+}
+
+/**
+ * batadv_iv_ogm_orig_get - retrieve or create (if does not exist) an originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: mac address of the originator
+ *
+ * Returns the originator object corresponding to the passed mac address or NULL
+ * on failure.
+ * If the object does not exists it is created an initialised.
+ */
+static struct batadv_orig_node *
+batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
+{
+	struct batadv_orig_node *orig_node;
+	int size, hash_added;
+
+	orig_node = batadv_orig_hash_find(bat_priv, addr);
+	if (orig_node)
+		return orig_node;
+
+	orig_node = batadv_orig_node_new(bat_priv, addr);
+	if (!orig_node)
+		return NULL;
+
+	spin_lock_init(&orig_node->bat_iv.ogm_cnt_lock);
+
+	size = bat_priv->num_ifaces * sizeof(unsigned long) * BATADV_NUM_WORDS;
+	orig_node->bat_iv.bcast_own = kzalloc(size, GFP_ATOMIC);
+	if (!orig_node->bat_iv.bcast_own)
+		goto free_orig_node;
+
+	size = bat_priv->num_ifaces * sizeof(uint8_t);
+	orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
+	if (!orig_node->bat_iv.bcast_own_sum)
+		goto free_orig_node;
+
+	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
+				     batadv_choose_orig, orig_node,
+				     &orig_node->hash_entry);
+	if (hash_added != 0)
+		goto free_orig_node;
+
+	return orig_node;
+
+free_orig_node:
+	/* free twice, as batadv_orig_node_new sets refcount to 2 */
+	batadv_orig_node_free_ref(orig_node);
+	batadv_orig_node_free_ref(orig_node);
+
+	return NULL;
+}
+
+static struct batadv_neigh_node *
+batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
+			const uint8_t *neigh_addr,
+			struct batadv_orig_node *orig_node,
+			struct batadv_orig_node *orig_neigh)
+{
+	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+	struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
+
+	neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node);
+	if (!neigh_node)
+		goto out;
+
+	if (!atomic_inc_not_zero(&hard_iface->refcount)) {
+		kfree(neigh_node);
+		neigh_node = NULL;
+		goto out;
+	}
+
+	neigh_node->orig_node = orig_neigh;
+	neigh_node->if_incoming = hard_iface;
+
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface,
+					       neigh_addr);
+	if (!tmp_neigh_node) {
+		hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+	} else {
+		kfree(neigh_node);
+		batadv_hardif_free_ref(hard_iface);
+		neigh_node = tmp_neigh_node;
+	}
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+
+	if (!tmp_neigh_node)
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Creating new neighbor %pM for orig_node %pM on interface %s\n",
+			   neigh_addr, orig_node->orig,
+			   hard_iface->net_dev->name);
+
+out:
+	return neigh_node;
+}
+
+static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
+{
+	struct batadv_ogm_packet *batadv_ogm_packet;
+	unsigned char *ogm_buff;
+	uint32_t random_seqno;
+	int res = -ENOMEM;
+
+	/* randomize initial seqno to avoid collision */
+	get_random_bytes(&random_seqno, sizeof(random_seqno));
+	atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno);
+
+	hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
+	ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
+	if (!ogm_buff)
+		goto out;
+
+	hard_iface->bat_iv.ogm_buff = ogm_buff;
+
+	batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+	batadv_ogm_packet->packet_type = BATADV_IV_OGM;
+	batadv_ogm_packet->version = BATADV_COMPAT_VERSION;
+	batadv_ogm_packet->ttl = 2;
+	batadv_ogm_packet->flags = BATADV_NO_FLAGS;
+	batadv_ogm_packet->reserved = 0;
+	batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
+
+	res = 0;
+
+out:
+	return res;
+}
+
+static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
+{
+	kfree(hard_iface->bat_iv.ogm_buff);
+	hard_iface->bat_iv.ogm_buff = NULL;
+}
+
+static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
+{
+	struct batadv_ogm_packet *batadv_ogm_packet;
+	unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+
+	batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+	ether_addr_copy(batadv_ogm_packet->orig,
+			hard_iface->net_dev->dev_addr);
+	ether_addr_copy(batadv_ogm_packet->prev_sender,
+			hard_iface->net_dev->dev_addr);
+}
+
+static void
+batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
+{
+	struct batadv_ogm_packet *batadv_ogm_packet;
+	unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+
+	batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+	batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP;
+	batadv_ogm_packet->ttl = BATADV_TTL;
+}
+
+/* when do we schedule our own ogm to be sent */
+static unsigned long
+batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
+{
+	unsigned int msecs;
+
+	msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
+	msecs += prandom_u32() % (2 * BATADV_JITTER);
+
+	return jiffies + msecs_to_jiffies(msecs);
+}
+
+/* when do we schedule a ogm packet to be sent */
+static unsigned long batadv_iv_ogm_fwd_send_time(void)
+{
+	return jiffies + msecs_to_jiffies(prandom_u32() % (BATADV_JITTER / 2));
+}
+
+/* apply hop penalty for a normal link */
+static uint8_t batadv_hop_penalty(uint8_t tq,
+				  const struct batadv_priv *bat_priv)
+{
+	int hop_penalty = atomic_read(&bat_priv->hop_penalty);
+	int new_tq;
+
+	new_tq = tq * (BATADV_TQ_MAX_VALUE - hop_penalty);
+	new_tq /= BATADV_TQ_MAX_VALUE;
+
+	return new_tq;
+}
+
+/* is there another aggregated packet here? */
+static int batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
+				     __be16 tvlv_len)
+{
+	int next_buff_pos = 0;
+
+	next_buff_pos += buff_pos + BATADV_OGM_HLEN;
+	next_buff_pos += ntohs(tvlv_len);
+
+	return (next_buff_pos <= packet_len) &&
+	       (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
+}
+
+/* send a batman ogm to a given interface */
+static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
+				     struct batadv_hard_iface *hard_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+	char *fwd_str;
+	uint8_t packet_num;
+	int16_t buff_pos;
+	struct batadv_ogm_packet *batadv_ogm_packet;
+	struct sk_buff *skb;
+	uint8_t *packet_pos;
+
+	if (hard_iface->if_status != BATADV_IF_ACTIVE)
+		return;
+
+	packet_num = 0;
+	buff_pos = 0;
+	packet_pos = forw_packet->skb->data;
+	batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
+
+	/* adjust all flags and log packets */
+	while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
+					 batadv_ogm_packet->tvlv_len)) {
+		/* we might have aggregated direct link packets with an
+		 * ordinary base packet
+		 */
+		if (forw_packet->direct_link_flags & BIT(packet_num) &&
+		    forw_packet->if_incoming == hard_iface)
+			batadv_ogm_packet->flags |= BATADV_DIRECTLINK;
+		else
+			batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK;
+
+		if (packet_num > 0 || !forw_packet->own)
+			fwd_str = "Forwarding";
+		else
+			fwd_str = "Sending own";
+
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s) on interface %s [%pM]\n",
+			   fwd_str, (packet_num > 0 ? "aggregated " : ""),
+			   batadv_ogm_packet->orig,
+			   ntohl(batadv_ogm_packet->seqno),
+			   batadv_ogm_packet->tq, batadv_ogm_packet->ttl,
+			   (batadv_ogm_packet->flags & BATADV_DIRECTLINK ?
+			    "on" : "off"),
+			   hard_iface->net_dev->name,
+			   hard_iface->net_dev->dev_addr);
+
+		buff_pos += BATADV_OGM_HLEN;
+		buff_pos += ntohs(batadv_ogm_packet->tvlv_len);
+		packet_num++;
+		packet_pos = forw_packet->skb->data + buff_pos;
+		batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
+	}
+
+	/* create clone because function is called more than once */
+	skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
+	if (skb) {
+		batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
+		batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
+				   skb->len + ETH_HLEN);
+		batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr);
+	}
+}
+
+/* send a batman ogm packet */
+static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
+{
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+
+	if (!forw_packet->if_incoming) {
+		pr_err("Error - can't forward packet: incoming iface not specified\n");
+		goto out;
+	}
+
+	soft_iface = forw_packet->if_incoming->soft_iface;
+	bat_priv = netdev_priv(soft_iface);
+
+	if (WARN_ON(!forw_packet->if_outgoing))
+		goto out;
+
+	if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+		goto out;
+
+	if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
+		goto out;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if)
+		goto out;
+
+	/* only for one specific outgoing interface */
+	batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing);
+
+out:
+	if (primary_if)
+		batadv_hardif_free_ref(primary_if);
+}
+
+/**
+ * batadv_iv_ogm_can_aggregate - find out if an OGM can be aggregated on an
+ *  existing forward packet
+ * @new_bat_ogm_packet: OGM packet to be aggregated
+ * @bat_priv: the bat priv with all the soft interface information
+ * @packet_len: (total) length of the OGM
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ * @direktlink: true if this is a direct link packet
+ * @if_incoming: interface where the packet was received
+ * @if_outgoing: interface for which the retransmission should be considered
+ * @forw_packet: the forwarded packet which should be checked
+ *
+ * Returns true if new_packet can be aggregated with forw_packet
+ */
+static bool
+batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
+			    struct batadv_priv *bat_priv,
+			    int packet_len, unsigned long send_time,
+			    bool directlink,
+			    const struct batadv_hard_iface *if_incoming,
+			    const struct batadv_hard_iface *if_outgoing,
+			    const struct batadv_forw_packet *forw_packet)
+{
+	struct batadv_ogm_packet *batadv_ogm_packet;
+	int aggregated_bytes = forw_packet->packet_len + packet_len;
+	struct batadv_hard_iface *primary_if = NULL;
+	bool res = false;
+	unsigned long aggregation_end_time;
+
+	batadv_ogm_packet = (struct batadv_ogm_packet *)forw_packet->skb->data;
+	aggregation_end_time = send_time;
+	aggregation_end_time += msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS);
+
+	/* we can aggregate the current packet to this aggregated packet
+	 * if:
+	 *
+	 * - the send time is within our MAX_AGGREGATION_MS time
+	 * - the resulting packet wont be bigger than
+	 *   MAX_AGGREGATION_BYTES
+	 */
+	if (time_before(send_time, forw_packet->send_time) &&
+	    time_after_eq(aggregation_end_time, forw_packet->send_time) &&
+	    (aggregated_bytes <= BATADV_MAX_AGGREGATION_BYTES)) {
+		/* check aggregation compatibility
+		 * -> direct link packets are broadcasted on
+		 *    their interface only
+		 * -> aggregate packet if the current packet is
+		 *    a "global" packet as well as the base
+		 *    packet
+		 */
+		primary_if = batadv_primary_if_get_selected(bat_priv);
+		if (!primary_if)
+			goto out;
+
+		/* packet is not leaving on the same interface. */
+		if (forw_packet->if_outgoing != if_outgoing)
+			goto out;
+
+		/* packets without direct link flag and high TTL
+		 * are flooded through the net
+		 */
+		if ((!directlink) &&
+		    (!(batadv_ogm_packet->flags & BATADV_DIRECTLINK)) &&
+		    (batadv_ogm_packet->ttl != 1) &&
+
+		    /* own packets originating non-primary
+		     * interfaces leave only that interface
+		     */
+		    ((!forw_packet->own) ||
+		     (forw_packet->if_incoming == primary_if))) {
+			res = true;
+			goto out;
+		}
+
+		/* if the incoming packet is sent via this one
+		 * interface only - we still can aggregate
+		 */
+		if ((directlink) &&
+		    (new_bat_ogm_packet->ttl == 1) &&
+		    (forw_packet->if_incoming == if_incoming) &&
+
+		    /* packets from direct neighbors or
+		     * own secondary interface packets
+		     * (= secondary interface packets in general)
+		     */
+		    (batadv_ogm_packet->flags & BATADV_DIRECTLINK ||
+		     (forw_packet->own &&
+		      forw_packet->if_incoming != primary_if))) {
+			res = true;
+			goto out;
+		}
+	}
+
+out:
+	if (primary_if)
+		batadv_hardif_free_ref(primary_if);
+	return res;
+}
+
+/**
+ * batadv_iv_ogm_aggregate_new - create a new aggregated packet and add this
+ *  packet to it.
+ * @packet_buff: pointer to the OGM
+ * @packet_len: (total) length of the OGM
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ * @direct_link: whether this OGM has direct link status
+ * @if_incoming: interface where the packet was received
+ * @if_outgoing: interface for which the retransmission should be considered
+ * @own_packet: true if it is a self-generated ogm
+ */
+static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
+					int packet_len, unsigned long send_time,
+					bool direct_link,
+					struct batadv_hard_iface *if_incoming,
+					struct batadv_hard_iface *if_outgoing,
+					int own_packet)
+{
+	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct batadv_forw_packet *forw_packet_aggr;
+	unsigned char *skb_buff;
+	unsigned int skb_size;
+
+	if (!atomic_inc_not_zero(&if_incoming->refcount))
+		return;
+
+	if (!atomic_inc_not_zero(&if_outgoing->refcount))
+		goto out_free_incoming;
+
+	/* own packet should always be scheduled */
+	if (!own_packet) {
+		if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+				   "batman packet queue full\n");
+			goto out;
+		}
+	}
+
+	forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+	if (!forw_packet_aggr) {
+		if (!own_packet)
+			atomic_inc(&bat_priv->batman_queue_left);
+		goto out;
+	}
+
+	if ((atomic_read(&bat_priv->aggregated_ogms)) &&
+	    (packet_len < BATADV_MAX_AGGREGATION_BYTES))
+		skb_size = BATADV_MAX_AGGREGATION_BYTES;
+	else
+		skb_size = packet_len;
+
+	skb_size += ETH_HLEN;
+
+	forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
+	if (!forw_packet_aggr->skb) {
+		if (!own_packet)
+			atomic_inc(&bat_priv->batman_queue_left);
+		kfree(forw_packet_aggr);
+		goto out;
+	}
+	forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
+	skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
+
+	skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+	forw_packet_aggr->packet_len = packet_len;
+	memcpy(skb_buff, packet_buff, packet_len);
+
+	forw_packet_aggr->own = own_packet;
+	forw_packet_aggr->if_incoming = if_incoming;
+	forw_packet_aggr->if_outgoing = if_outgoing;
+	forw_packet_aggr->num_packets = 0;
+	forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS;
+	forw_packet_aggr->send_time = send_time;
+
+	/* save packet direct link flag status */
+	if (direct_link)
+		forw_packet_aggr->direct_link_flags |= 1;
+
+	/* add new packet to packet list */
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
+	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+	/* start timer for this packet */
+	INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
+			  batadv_send_outstanding_bat_ogm_packet);
+	queue_delayed_work(batadv_event_workqueue,
+			   &forw_packet_aggr->delayed_work,
+			   send_time - jiffies);
+
+	return;
+out:
+	batadv_hardif_free_ref(if_outgoing);
+out_free_incoming:
+	batadv_hardif_free_ref(if_incoming);
+}
+
+/* aggregate a new packet into the existing ogm packet */
+static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
+				    const unsigned char *packet_buff,
+				    int packet_len, bool direct_link)
+{
+	unsigned char *skb_buff;
+	unsigned long new_direct_link_flag;
+
+	skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+	memcpy(skb_buff, packet_buff, packet_len);
+	forw_packet_aggr->packet_len += packet_len;
+	forw_packet_aggr->num_packets++;
+
+	/* save packet direct link flag status */
+	if (direct_link) {
+		new_direct_link_flag = BIT(forw_packet_aggr->num_packets);
+		forw_packet_aggr->direct_link_flags |= new_direct_link_flag;
+	}
+}
+
+/**
+ * batadv_iv_ogm_queue_add - queue up an OGM for transmission
+ * @bat_priv: the bat priv with all the soft interface information
+ * @packet_buff: pointer to the OGM
+ * @packet_len: (total) length of the OGM
+ * @if_incoming: interface where the packet was received
+ * @if_outgoing: interface for which the retransmission should be considered
+ * @own_packet: true if it is a self-generated ogm
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ */
+static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
+				    unsigned char *packet_buff,
+				    int packet_len,
+				    struct batadv_hard_iface *if_incoming,
+				    struct batadv_hard_iface *if_outgoing,
+				    int own_packet, unsigned long send_time)
+{
+	/* _aggr -> pointer to the packet we want to aggregate with
+	 * _pos -> pointer to the position in the queue
+	 */
+	struct batadv_forw_packet *forw_packet_aggr = NULL;
+	struct batadv_forw_packet *forw_packet_pos = NULL;
+	struct batadv_ogm_packet *batadv_ogm_packet;
+	bool direct_link;
+	unsigned long max_aggregation_jiffies;
+
+	batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
+	direct_link = batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0;
+	max_aggregation_jiffies = msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS);
+
+	/* find position for the packet in the forward queue */
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	/* own packets are not to be aggregated */
+	if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
+		hlist_for_each_entry(forw_packet_pos,
+				     &bat_priv->forw_bat_list, list) {
+			if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet,
+							bat_priv, packet_len,
+							send_time, direct_link,
+							if_incoming,
+							if_outgoing,
+							forw_packet_pos)) {
+				forw_packet_aggr = forw_packet_pos;
+				break;
+			}
+		}
+	}
+
+	/* nothing to aggregate with - either aggregation disabled or no
+	 * suitable aggregation packet found
+	 */
+	if (!forw_packet_aggr) {
+		/* the following section can run without the lock */
+		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+		/* if we could not aggregate this packet with one of the others
+		 * we hold it back for a while, so that it might be aggregated
+		 * later on
+		 */
+		if (!own_packet && atomic_read(&bat_priv->aggregated_ogms))
+			send_time += max_aggregation_jiffies;
+
+		batadv_iv_ogm_aggregate_new(packet_buff, packet_len,
+					    send_time, direct_link,
+					    if_incoming, if_outgoing,
+					    own_packet);
+	} else {
+		batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff,
+					packet_len, direct_link);
+		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+	}
+}
+
+static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
+				  const struct ethhdr *ethhdr,
+				  struct batadv_ogm_packet *batadv_ogm_packet,
+				  bool is_single_hop_neigh,
+				  bool is_from_best_next_hop,
+				  struct batadv_hard_iface *if_incoming,
+				  struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	uint16_t tvlv_len;
+
+	if (batadv_ogm_packet->ttl <= 1) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n");
+		return;
+	}
+
+	if (!is_from_best_next_hop) {
+		/* Mark the forwarded packet when it is not coming from our
+		 * best next hop. We still need to forward the packet for our
+		 * neighbor link quality detection to work in case the packet
+		 * originated from a single hop neighbor. Otherwise we can
+		 * simply drop the ogm.
+		 */
+		if (is_single_hop_neigh)
+			batadv_ogm_packet->flags |= BATADV_NOT_BEST_NEXT_HOP;
+		else
+			return;
+	}
+
+	tvlv_len = ntohs(batadv_ogm_packet->tvlv_len);
+
+	batadv_ogm_packet->ttl--;
+	ether_addr_copy(batadv_ogm_packet->prev_sender, ethhdr->h_source);
+
+	/* apply hop penalty */
+	batadv_ogm_packet->tq = batadv_hop_penalty(batadv_ogm_packet->tq,
+						   bat_priv);
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Forwarding packet: tq: %i, ttl: %i\n",
+		   batadv_ogm_packet->tq, batadv_ogm_packet->ttl);
+
+	/* switch of primaries first hop flag when forwarding */
+	batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP;
+	if (is_single_hop_neigh)
+		batadv_ogm_packet->flags |= BATADV_DIRECTLINK;
+	else
+		batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK;
+
+	batadv_iv_ogm_queue_add(bat_priv, (unsigned char *)batadv_ogm_packet,
+				BATADV_OGM_HLEN + tvlv_len,
+				if_incoming, if_outgoing, 0,
+				batadv_iv_ogm_fwd_send_time());
+}
+
+/**
+ * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for
+ * the given interface
+ * @hard_iface: the interface for which the windows have to be shifted
+ */
+static void
+batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	struct batadv_orig_node *orig_node;
+	unsigned long *word;
+	uint32_t i;
+	size_t word_index;
+	uint8_t *w;
+	int if_num;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+			spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+			word_index = hard_iface->if_num * BATADV_NUM_WORDS;
+			word = &(orig_node->bat_iv.bcast_own[word_index]);
+
+			batadv_bit_get_packet(bat_priv, word, 1, 0);
+			if_num = hard_iface->if_num;
+			w = &orig_node->bat_iv.bcast_own_sum[if_num];
+			*w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
+			spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+		}
+		rcu_read_unlock();
+	}
+}
+
+static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+	unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff;
+	struct batadv_ogm_packet *batadv_ogm_packet;
+	struct batadv_hard_iface *primary_if, *tmp_hard_iface;
+	int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len;
+	uint32_t seqno;
+	uint16_t tvlv_len = 0;
+	unsigned long send_time;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+
+	if (hard_iface == primary_if) {
+		/* tt changes have to be committed before the tvlv data is
+		 * appended as it may alter the tt tvlv container
+		 */
+		batadv_tt_local_commit_changes(bat_priv);
+		tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
+							    ogm_buff_len,
+							    BATADV_OGM_HLEN);
+	}
+
+	batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff);
+	batadv_ogm_packet->tvlv_len = htons(tvlv_len);
+
+	/* change sequence number to network order */
+	seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno);
+	batadv_ogm_packet->seqno = htonl(seqno);
+	atomic_inc(&hard_iface->bat_iv.ogm_seqno);
+
+	batadv_iv_ogm_slide_own_bcast_window(hard_iface);
+
+	send_time = batadv_iv_ogm_emit_send_time(bat_priv);
+
+	if (hard_iface != primary_if) {
+		/* OGMs from secondary interfaces are only scheduled on their
+		 * respective interfaces.
+		 */
+		batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len,
+					hard_iface, hard_iface, 1, send_time);
+		goto out;
+	}
+
+	/* OGMs from primary interfaces are scheduled on all
+	 * interfaces.
+	 */
+	rcu_read_lock();
+	list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) {
+		if (tmp_hard_iface->soft_iface != hard_iface->soft_iface)
+				continue;
+		batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
+					*ogm_buff_len, hard_iface,
+					tmp_hard_iface, 1, send_time);
+	}
+	rcu_read_unlock();
+
+out:
+	if (primary_if)
+		batadv_hardif_free_ref(primary_if);
+}
+
+/**
+ * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an
+ *  originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: the orig node who originally emitted the ogm packet
+ * @orig_ifinfo: ifinfo for the outgoing interface of the orig_node
+ * @ethhdr: Ethernet header of the OGM
+ * @batadv_ogm_packet: the ogm packet
+ * @if_incoming: interface where the packet was received
+ * @if_outgoing: interface for which the retransmission should be considered
+ * @dup_status: the duplicate status of this ogm packet.
+ */
+static void
+batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
+			  struct batadv_orig_node *orig_node,
+			  struct batadv_orig_ifinfo *orig_ifinfo,
+			  const struct ethhdr *ethhdr,
+			  const struct batadv_ogm_packet *batadv_ogm_packet,
+			  struct batadv_hard_iface *if_incoming,
+			  struct batadv_hard_iface *if_outgoing,
+			  enum batadv_dup_status dup_status)
+{
+	struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
+	struct batadv_neigh_node *router = NULL;
+	struct batadv_orig_node *orig_node_tmp;
+	int if_num;
+	uint8_t sum_orig, sum_neigh;
+	uint8_t *neigh_addr;
+	uint8_t tq_avg;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "update_originator(): Searching and updating originator entry of received packet\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp_neigh_node,
+				 &orig_node->neigh_list, list) {
+		neigh_addr = tmp_neigh_node->addr;
+		if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
+		    tmp_neigh_node->if_incoming == if_incoming &&
+		    atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
+			if (WARN(neigh_node, "too many matching neigh_nodes"))
+				batadv_neigh_node_free_ref(neigh_node);
+			neigh_node = tmp_neigh_node;
+			continue;
+		}
+
+		if (dup_status != BATADV_NO_DUP)
+			continue;
+
+		/* only update the entry for this outgoing interface */
+		neigh_ifinfo = batadv_neigh_ifinfo_get(tmp_neigh_node,
+						       if_outgoing);
+		if (!neigh_ifinfo)
+			continue;
+
+		spin_lock_bh(&tmp_neigh_node->ifinfo_lock);
+		batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv,
+				       &neigh_ifinfo->bat_iv.tq_index, 0);
+		tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv);
+		neigh_ifinfo->bat_iv.tq_avg = tq_avg;
+		spin_unlock_bh(&tmp_neigh_node->ifinfo_lock);
+
+		batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+		neigh_ifinfo = NULL;
+	}
+
+	if (!neigh_node) {
+		struct batadv_orig_node *orig_tmp;
+
+		orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source);
+		if (!orig_tmp)
+			goto unlock;
+
+		neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
+						     ethhdr->h_source,
+						     orig_node, orig_tmp);
+
+		batadv_orig_node_free_ref(orig_tmp);
+		if (!neigh_node)
+			goto unlock;
+	} else
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Updating existing last-hop neighbor of originator\n");
+
+	rcu_read_unlock();
+	neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
+	if (!neigh_ifinfo)
+		goto out;
+
+	neigh_node->last_seen = jiffies;
+
+	spin_lock_bh(&neigh_node->ifinfo_lock);
+	batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv,
+			       &neigh_ifinfo->bat_iv.tq_index,
+			       batadv_ogm_packet->tq);
+	tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv);
+	neigh_ifinfo->bat_iv.tq_avg = tq_avg;
+	spin_unlock_bh(&neigh_node->ifinfo_lock);
+
+	if (dup_status == BATADV_NO_DUP) {
+		orig_ifinfo->last_ttl = batadv_ogm_packet->ttl;
+		neigh_ifinfo->last_ttl = batadv_ogm_packet->ttl;
+	}
+
+	/* if this neighbor already is our next hop there is nothing
+	 * to change
+	 */
+	router = batadv_orig_router_get(orig_node, if_outgoing);
+	if (router == neigh_node)
+		goto out;
+
+	if (router) {
+		router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
+		if (!router_ifinfo)
+			goto out;
+
+		/* if this neighbor does not offer a better TQ we won't
+		 * consider it
+		 */
+		if (router_ifinfo->bat_iv.tq_avg > neigh_ifinfo->bat_iv.tq_avg)
+			goto out;
+	}
+
+	/* if the TQ is the same and the link not more symmetric we
+	 * won't consider it either
+	 */
+	if (router_ifinfo &&
+	    (neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg)) {
+		orig_node_tmp = router->orig_node;
+		spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
+		if_num = router->if_incoming->if_num;
+		sum_orig = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
+		spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
+
+		orig_node_tmp = neigh_node->orig_node;
+		spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
+		if_num = neigh_node->if_incoming->if_num;
+		sum_neigh = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
+		spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
+
+		if (sum_orig >= sum_neigh)
+			goto out;
+	}
+
+	batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
+	goto out;
+
+unlock:
+	rcu_read_unlock();
+out:
+	if (neigh_node)
+		batadv_neigh_node_free_ref(neigh_node);
+	if (router)
+		batadv_neigh_node_free_ref(router);
+	if (neigh_ifinfo)
+		batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_free_ref(router_ifinfo);
+}
+
+/**
+ * batadv_iv_ogm_calc_tq - calculate tq for current received ogm packet
+ * @orig_node: the orig node who originally emitted the ogm packet
+ * @orig_neigh_node: the orig node struct of the neighbor who sent the packet
+ * @batadv_ogm_packet: the ogm packet
+ * @if_incoming: interface where the packet was received
+ * @if_outgoing: interface for which the retransmission should be considered
+ *
+ * Returns 1 if the link can be considered bidirectional, 0 otherwise
+ */
+static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
+				 struct batadv_orig_node *orig_neigh_node,
+				 struct batadv_ogm_packet *batadv_ogm_packet,
+				 struct batadv_hard_iface *if_incoming,
+				 struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node;
+	struct batadv_neigh_ifinfo *neigh_ifinfo;
+	uint8_t total_count;
+	uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
+	unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
+	int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0;
+	unsigned int combined_tq;
+	int tq_iface_penalty;
+
+	/* find corresponding one hop neighbor */
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp_neigh_node,
+				 &orig_neigh_node->neigh_list, list) {
+		if (!batadv_compare_eth(tmp_neigh_node->addr,
+					orig_neigh_node->orig))
+			continue;
+
+		if (tmp_neigh_node->if_incoming != if_incoming)
+			continue;
+
+		if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+			continue;
+
+		neigh_node = tmp_neigh_node;
+		break;
+	}
+	rcu_read_unlock();
+
+	if (!neigh_node)
+		neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
+						     orig_neigh_node->orig,
+						     orig_neigh_node,
+						     orig_neigh_node);
+
+	if (!neigh_node)
+		goto out;
+
+	/* if orig_node is direct neighbor update neigh_node last_seen */
+	if (orig_node == orig_neigh_node)
+		neigh_node->last_seen = jiffies;
+
+	orig_node->last_seen = jiffies;
+
+	/* find packet count of corresponding one hop neighbor */
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+	if_num = if_incoming->if_num;
+	orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
+	neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
+	if (neigh_ifinfo) {
+		neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count;
+		batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+	} else {
+		neigh_rq_count = 0;
+	}
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	/* pay attention to not get a value bigger than 100 % */
+	if (orig_eq_count > neigh_rq_count)
+		total_count = neigh_rq_count;
+	else
+		total_count = orig_eq_count;
+
+	/* if we have too few packets (too less data) we set tq_own to zero
+	 * if we receive too few packets it is not considered bidirectional
+	 */
+	if (total_count < BATADV_TQ_LOCAL_BIDRECT_SEND_MINIMUM ||
+	    neigh_rq_count < BATADV_TQ_LOCAL_BIDRECT_RECV_MINIMUM)
+		tq_own = 0;
+	else
+		/* neigh_node->real_packet_count is never zero as we
+		 * only purge old information when getting new
+		 * information
+		 */
+		tq_own = (BATADV_TQ_MAX_VALUE * total_count) /	neigh_rq_count;
+
+	/* 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
+	 * affect the nearly-symmetric links only a little, but
+	 * punishes asymmetric links more.  This will give a value
+	 * between 0 and TQ_MAX_VALUE
+	 */
+	neigh_rq_inv = BATADV_TQ_LOCAL_WINDOW_SIZE - neigh_rq_count;
+	neigh_rq_inv_cube = neigh_rq_inv * neigh_rq_inv * neigh_rq_inv;
+	neigh_rq_max_cube = BATADV_TQ_LOCAL_WINDOW_SIZE *
+			    BATADV_TQ_LOCAL_WINDOW_SIZE *
+			    BATADV_TQ_LOCAL_WINDOW_SIZE;
+	inv_asym_penalty = BATADV_TQ_MAX_VALUE * neigh_rq_inv_cube;
+	inv_asym_penalty /= neigh_rq_max_cube;
+	tq_asym_penalty = BATADV_TQ_MAX_VALUE - inv_asym_penalty;
+
+	/* penalize if the OGM is forwarded on the same interface. WiFi
+	 * interfaces and other half duplex devices suffer from throughput
+	 * drops as they can't send and receive at the same time.
+	 */
+	tq_iface_penalty = BATADV_TQ_MAX_VALUE;
+	if (if_outgoing && (if_incoming == if_outgoing) &&
+	    batadv_is_wifi_netdev(if_outgoing->net_dev))
+		tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
+						      bat_priv);
+
+	combined_tq = batadv_ogm_packet->tq *
+		      tq_own *
+		      tq_asym_penalty *
+		      tq_iface_penalty;
+	combined_tq /= BATADV_TQ_MAX_VALUE *
+		       BATADV_TQ_MAX_VALUE *
+		       BATADV_TQ_MAX_VALUE;
+	batadv_ogm_packet->tq = combined_tq;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
+		   orig_node->orig, orig_neigh_node->orig, total_count,
+		   neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty,
+		   batadv_ogm_packet->tq, if_incoming->net_dev->name,
+		   if_outgoing ? if_outgoing->net_dev->name : "DEFAULT");
+
+	/* if link has the minimum required transmission quality
+	 * consider it bidirectional
+	 */
+	if (batadv_ogm_packet->tq >= BATADV_TQ_TOTAL_BIDRECT_LIMIT)
+		ret = 1;
+
+out:
+	if (neigh_node)
+		batadv_neigh_node_free_ref(neigh_node);
+	return ret;
+}
+
+/**
+ * batadv_iv_ogm_update_seqnos -  process a batman packet for all interfaces,
+ *  adjust the sequence number and find out whether it is a duplicate
+ * @ethhdr: ethernet header of the packet
+ * @batadv_ogm_packet: OGM packet to be considered
+ * @if_incoming: interface on which the OGM packet was received
+ * @if_outgoing: interface for which the retransmission should be considered
+ *
+ * Returns duplicate status as enum batadv_dup_status
+ */
+static enum batadv_dup_status
+batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
+			    const struct batadv_ogm_packet *batadv_ogm_packet,
+			    const struct batadv_hard_iface *if_incoming,
+			    struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct batadv_orig_node *orig_node;
+	struct batadv_orig_ifinfo *orig_ifinfo = NULL;
+	struct batadv_neigh_node *neigh_node;
+	struct batadv_neigh_ifinfo *neigh_ifinfo;
+	int is_dup;
+	int32_t seq_diff;
+	int need_update = 0;
+	int set_mark;
+	enum batadv_dup_status ret = BATADV_NO_DUP;
+	uint32_t seqno = ntohl(batadv_ogm_packet->seqno);
+	uint8_t *neigh_addr;
+	uint8_t packet_count;
+	unsigned long *bitmap;
+
+	orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig);
+	if (!orig_node)
+		return BATADV_NO_DUP;
+
+	orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
+	if (WARN_ON(!orig_ifinfo)) {
+		batadv_orig_node_free_ref(orig_node);
+		return 0;
+	}
+
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+	seq_diff = seqno - orig_ifinfo->last_real_seqno;
+
+	/* signalize caller that the packet is to be dropped. */
+	if (!hlist_empty(&orig_node->neigh_list) &&
+	    batadv_window_protected(bat_priv, seq_diff,
+				    &orig_ifinfo->batman_seqno_reset)) {
+		ret = BATADV_PROTECTED;
+		goto out;
+	}
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node,
+						       if_outgoing);
+		if (!neigh_ifinfo)
+			continue;
+
+		neigh_addr = neigh_node->addr;
+		is_dup = batadv_test_bit(neigh_ifinfo->bat_iv.real_bits,
+					 orig_ifinfo->last_real_seqno,
+					 seqno);
+
+		if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
+		    neigh_node->if_incoming == if_incoming) {
+			set_mark = 1;
+			if (is_dup)
+				ret = BATADV_NEIGH_DUP;
+		} else {
+			set_mark = 0;
+			if (is_dup && (ret != BATADV_NEIGH_DUP))
+				ret = BATADV_ORIG_DUP;
+		}
+
+		/* if the window moved, set the update flag. */
+		bitmap = neigh_ifinfo->bat_iv.real_bits;
+		need_update |= batadv_bit_get_packet(bat_priv, bitmap,
+						     seq_diff, set_mark);
+
+		packet_count = bitmap_weight(bitmap,
+					     BATADV_TQ_LOCAL_WINDOW_SIZE);
+		neigh_ifinfo->bat_iv.real_packet_count = packet_count;
+		batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+	}
+	rcu_read_unlock();
+
+	if (need_update) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "%s updating last_seqno: old %u, new %u\n",
+			   if_outgoing ? if_outgoing->net_dev->name : "DEFAULT",
+			   orig_ifinfo->last_real_seqno, seqno);
+		orig_ifinfo->last_real_seqno = seqno;
+	}
+
+out:
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+	batadv_orig_node_free_ref(orig_node);
+	if (orig_ifinfo)
+		batadv_orig_ifinfo_free_ref(orig_ifinfo);
+	return ret;
+}
+
+
+/**
+ * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if
+ * @skb: the skb containing the OGM
+ * @orig_node: the (cached) orig node for the originator of this OGM
+ * @if_incoming: the interface where this packet was received
+ * @if_outgoing: the interface for which the packet should be considered
+ */
+static void
+batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
+				struct batadv_orig_node *orig_node,
+				struct batadv_hard_iface *if_incoming,
+				struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct batadv_neigh_node *router = NULL, *router_router = NULL;
+	struct batadv_orig_node *orig_neigh_node;
+	struct batadv_orig_ifinfo *orig_ifinfo;
+	struct batadv_neigh_node *orig_neigh_router = NULL;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_ogm_packet *ogm_packet;
+	enum batadv_dup_status dup_status;
+	bool is_from_best_next_hop = false;
+	bool is_single_hop_neigh = false;
+	bool sameseq, similar_ttl;
+	struct sk_buff *skb_priv;
+	struct ethhdr *ethhdr;
+	uint8_t *prev_sender;
+	int is_bidirect;
+
+	/* create a private copy of the skb, as some functions change tq value
+	 * and/or flags.
+	 */
+	skb_priv = skb_copy(skb, GFP_ATOMIC);
+	if (!skb_priv)
+		return;
+
+	ethhdr = eth_hdr(skb_priv);
+	ogm_packet = (struct batadv_ogm_packet *)(skb_priv->data + ogm_offset);
+
+	dup_status = batadv_iv_ogm_update_seqnos(ethhdr, ogm_packet,
+						 if_incoming, if_outgoing);
+	if (batadv_compare_eth(ethhdr->h_source, ogm_packet->orig))
+		is_single_hop_neigh = true;
+
+	if (dup_status == BATADV_PROTECTED) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: packet within seqno protection time (sender: %pM)\n",
+			   ethhdr->h_source);
+		goto out;
+	}
+
+	if (ogm_packet->tq == 0) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: originator packet with tq equal 0\n");
+		goto out;
+	}
+
+	router = batadv_orig_router_get(orig_node, if_outgoing);
+	if (router) {
+		router_router = batadv_orig_router_get(router->orig_node,
+						       if_outgoing);
+		router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
+	}
+
+	if ((router_ifinfo && router_ifinfo->bat_iv.tq_avg != 0) &&
+	    (batadv_compare_eth(router->addr, ethhdr->h_source)))
+		is_from_best_next_hop = true;
+
+	prev_sender = ogm_packet->prev_sender;
+	/* avoid temporary routing loops */
+	if (router && router_router &&
+	    (batadv_compare_eth(router->addr, prev_sender)) &&
+	    !(batadv_compare_eth(ogm_packet->orig, prev_sender)) &&
+	    (batadv_compare_eth(router->addr, router_router->addr))) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: ignoring all rebroadcast packets that may make me loop (sender: %pM)\n",
+			   ethhdr->h_source);
+		goto out;
+	}
+
+	if (if_outgoing == BATADV_IF_DEFAULT)
+		batadv_tvlv_ogm_receive(bat_priv, ogm_packet, orig_node);
+
+	/* if sender is a direct neighbor the sender mac equals
+	 * originator mac
+	 */
+	if (is_single_hop_neigh)
+		orig_neigh_node = orig_node;
+	else
+		orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
+							 ethhdr->h_source);
+
+	if (!orig_neigh_node)
+		goto out;
+
+	/* Update nc_nodes of the originator */
+	batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node,
+				 ogm_packet, is_single_hop_neigh);
+
+	orig_neigh_router = batadv_orig_router_get(orig_neigh_node,
+						   if_outgoing);
+
+	/* drop packet if sender is not a direct neighbor and if we
+	 * don't route towards it
+	 */
+	if (!is_single_hop_neigh && (!orig_neigh_router)) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: OGM via unknown neighbor!\n");
+		goto out_neigh;
+	}
+
+	is_bidirect = batadv_iv_ogm_calc_tq(orig_node, orig_neigh_node,
+					    ogm_packet, if_incoming,
+					    if_outgoing);
+
+	/* update ranking if it is not a duplicate or has the same
+	 * seqno and similar ttl as the non-duplicate
+	 */
+	orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
+	if (!orig_ifinfo)
+		goto out_neigh;
+
+	sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
+	similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
+
+	if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
+			    (sameseq && similar_ttl))) {
+		batadv_iv_ogm_orig_update(bat_priv, orig_node,
+					  orig_ifinfo, ethhdr,
+					  ogm_packet, if_incoming,
+					  if_outgoing, dup_status);
+	}
+	batadv_orig_ifinfo_free_ref(orig_ifinfo);
+
+	/* only forward for specific interface, not for the default one. */
+	if (if_outgoing == BATADV_IF_DEFAULT)
+		goto out_neigh;
+
+	/* is single hop (direct) neighbor */
+	if (is_single_hop_neigh) {
+		/* OGMs from secondary interfaces should only scheduled once
+		 * per interface where it has been received, not multiple times
+		 */
+		if ((ogm_packet->ttl <= 2) &&
+		    (if_incoming != if_outgoing)) {
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+				   "Drop packet: OGM from secondary interface and wrong outgoing interface\n");
+			goto out_neigh;
+		}
+		/* mark direct link on incoming interface */
+		batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet,
+				      is_single_hop_neigh,
+				      is_from_best_next_hop, if_incoming,
+				      if_outgoing);
+
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Forwarding packet: rebroadcast neighbor packet with direct link flag\n");
+		goto out_neigh;
+	}
+
+	/* multihop originator */
+	if (!is_bidirect) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: not received via bidirectional link\n");
+		goto out_neigh;
+	}
+
+	if (dup_status == BATADV_NEIGH_DUP) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: duplicate packet received\n");
+		goto out_neigh;
+	}
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Forwarding packet: rebroadcast originator packet\n");
+	batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet,
+			      is_single_hop_neigh, is_from_best_next_hop,
+			      if_incoming, if_outgoing);
+
+out_neigh:
+	if ((orig_neigh_node) && (!is_single_hop_neigh))
+		batadv_orig_node_free_ref(orig_neigh_node);
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_free_ref(router_ifinfo);
+	if (router)
+		batadv_neigh_node_free_ref(router);
+	if (router_router)
+		batadv_neigh_node_free_ref(router_router);
+	if (orig_neigh_router)
+		batadv_neigh_node_free_ref(orig_neigh_router);
+
+	kfree_skb(skb_priv);
+}
+
+/**
+ * batadv_iv_ogm_process - process an incoming batman iv OGM
+ * @skb: the skb containing the OGM
+ * @ogm_offset: offset to the OGM which should be processed (for aggregates)
+ * @if_incoming: the interface where this packet was receved
+ */
+static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
+				  struct batadv_hard_iface *if_incoming)
+{
+	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct batadv_orig_node *orig_neigh_node, *orig_node;
+	struct batadv_hard_iface *hard_iface;
+	struct batadv_ogm_packet *ogm_packet;
+	uint32_t if_incoming_seqno;
+	bool has_directlink_flag;
+	struct ethhdr *ethhdr;
+	bool is_my_oldorig = false;
+	bool is_my_addr = false;
+	bool is_my_orig = false;
+
+	ogm_packet = (struct batadv_ogm_packet *)(skb->data + ogm_offset);
+	ethhdr = eth_hdr(skb);
+
+	/* Silently drop when the batman packet is actually not a
+	 * correct packet.
+	 *
+	 * This might happen if a packet is padded (e.g. Ethernet has a
+	 * minimum frame length of 64 byte) and the aggregation interprets
+	 * it as an additional length.
+	 *
+	 * TODO: A more sane solution would be to have a bit in the
+	 * batadv_ogm_packet to detect whether the packet is the last
+	 * packet in an aggregation.  Here we expect that the padding
+	 * is always zero (or not 0x01)
+	 */
+	if (ogm_packet->packet_type != BATADV_IV_OGM)
+		return;
+
+	/* could be changed by schedule_own_packet() */
+	if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno);
+
+	if (ogm_packet->flags & BATADV_DIRECTLINK)
+		has_directlink_flag = true;
+	else
+		has_directlink_flag = false;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n",
+		   ethhdr->h_source, if_incoming->net_dev->name,
+		   if_incoming->net_dev->dev_addr, ogm_packet->orig,
+		   ogm_packet->prev_sender, ntohl(ogm_packet->seqno),
+		   ogm_packet->tq, ogm_packet->ttl,
+		   ogm_packet->version, has_directlink_flag);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->if_status != BATADV_IF_ACTIVE)
+			continue;
+
+		if (hard_iface->soft_iface != if_incoming->soft_iface)
+			continue;
+
+		if (batadv_compare_eth(ethhdr->h_source,
+				       hard_iface->net_dev->dev_addr))
+			is_my_addr = true;
+
+		if (batadv_compare_eth(ogm_packet->orig,
+				       hard_iface->net_dev->dev_addr))
+			is_my_orig = true;
+
+		if (batadv_compare_eth(ogm_packet->prev_sender,
+				       hard_iface->net_dev->dev_addr))
+			is_my_oldorig = true;
+	}
+	rcu_read_unlock();
+
+	if (is_my_addr) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: received my own broadcast (sender: %pM)\n",
+			   ethhdr->h_source);
+		return;
+	}
+
+	if (is_my_orig) {
+		unsigned long *word;
+		int offset;
+		int32_t bit_pos;
+		int16_t if_num;
+		uint8_t *weight;
+
+		orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
+							 ethhdr->h_source);
+		if (!orig_neigh_node)
+			return;
+
+		/* neighbor has to indicate direct link and it has to
+		 * come via the corresponding interface
+		 * save packet seqno for bidirectional check
+		 */
+		if (has_directlink_flag &&
+		    batadv_compare_eth(if_incoming->net_dev->dev_addr,
+				       ogm_packet->orig)) {
+			if_num = if_incoming->if_num;
+			offset = if_num * BATADV_NUM_WORDS;
+
+			spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
+			word = &(orig_neigh_node->bat_iv.bcast_own[offset]);
+			bit_pos = if_incoming_seqno - 2;
+			bit_pos -= ntohl(ogm_packet->seqno);
+			batadv_set_bit(word, bit_pos);
+			weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num];
+			*weight = bitmap_weight(word,
+						BATADV_TQ_LOCAL_WINDOW_SIZE);
+			spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
+		}
+
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: originator packet from myself (via neighbor)\n");
+		batadv_orig_node_free_ref(orig_neigh_node);
+		return;
+	}
+
+	if (is_my_oldorig) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: ignoring all rebroadcast echos (sender: %pM)\n",
+			   ethhdr->h_source);
+		return;
+	}
+
+	if (ogm_packet->flags & BATADV_NOT_BEST_NEXT_HOP) {
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Drop packet: ignoring all packets not forwarded from the best next hop (sender: %pM)\n",
+			   ethhdr->h_source);
+		return;
+	}
+
+	orig_node = batadv_iv_ogm_orig_get(bat_priv, ogm_packet->orig);
+	if (!orig_node)
+		return;
+
+	batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node,
+					if_incoming, BATADV_IF_DEFAULT);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->if_status != BATADV_IF_ACTIVE)
+			continue;
+
+		if (hard_iface->soft_iface != bat_priv->soft_iface)
+			continue;
+
+		batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node,
+						if_incoming, hard_iface);
+	}
+	rcu_read_unlock();
+
+	batadv_orig_node_free_ref(orig_node);
+}
+
+static int batadv_iv_ogm_receive(struct sk_buff *skb,
+				 struct batadv_hard_iface *if_incoming)
+{
+	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct batadv_ogm_packet *ogm_packet;
+	uint8_t *packet_pos;
+	int ogm_offset;
+	bool ret;
+
+	ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
+	if (!ret)
+		return NET_RX_DROP;
+
+	/* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
+	 * that does not have B.A.T.M.A.N. IV enabled ?
+	 */
+	if (bat_priv->bat_algo_ops->bat_ogm_emit != batadv_iv_ogm_emit)
+		return NET_RX_DROP;
+
+	batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
+	batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
+			   skb->len + ETH_HLEN);
+
+	ogm_offset = 0;
+	ogm_packet = (struct batadv_ogm_packet *)skb->data;
+
+	/* unpack the aggregated packets and process them one by one */
+	while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb),
+					 ogm_packet->tvlv_len)) {
+		batadv_iv_ogm_process(skb, ogm_offset, if_incoming);
+
+		ogm_offset += BATADV_OGM_HLEN;
+		ogm_offset += ntohs(ogm_packet->tvlv_len);
+
+		packet_pos = skb->data + ogm_offset;
+		ogm_packet = (struct batadv_ogm_packet *)packet_pos;
+	}
+
+	kfree_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table
+ * @orig_node: the orig_node for which the neighbors are printed
+ * @if_outgoing: outgoing interface for these entries
+ * @seq: debugfs table seq_file struct
+ *
+ * Must be called while holding an rcu lock.
+ */
+static void
+batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node,
+			       struct batadv_hard_iface *if_outgoing,
+			       struct seq_file *seq)
+{
+	struct batadv_neigh_node *neigh_node;
+	struct batadv_neigh_ifinfo *n_ifinfo;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+		if (!n_ifinfo)
+			continue;
+
+		seq_printf(seq, " %pM (%3i)",
+			   neigh_node->addr,
+			   n_ifinfo->bat_iv.tq_avg);
+
+		batadv_neigh_ifinfo_free_ref(n_ifinfo);
+	}
+}
+
+/**
+ * batadv_iv_ogm_orig_print - print the originator table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: debugfs table seq_file struct
+ * @if_outgoing: the outgoing interface for which this should be printed
+ */
+static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
+				     struct seq_file *seq,
+				     struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_neigh_node *neigh_node;
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	int last_seen_msecs, last_seen_secs;
+	struct batadv_orig_node *orig_node;
+	struct batadv_neigh_ifinfo *n_ifinfo;
+	unsigned long last_seen_jiffies;
+	struct hlist_head *head;
+	int batman_count = 0;
+	uint32_t i;
+
+	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
+		   "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
+		   "Nexthop", "outgoingIF", "Potential nexthops");
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+			neigh_node = batadv_orig_router_get(orig_node,
+							    if_outgoing);
+			if (!neigh_node)
+				continue;
+
+			n_ifinfo = batadv_neigh_ifinfo_get(neigh_node,
+							   if_outgoing);
+			if (!n_ifinfo)
+				goto next;
+
+			if (n_ifinfo->bat_iv.tq_avg == 0)
+				goto next;
+
+			last_seen_jiffies = jiffies - orig_node->last_seen;
+			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
+			last_seen_secs = last_seen_msecs / 1000;
+			last_seen_msecs = last_seen_msecs % 1000;
+
+			seq_printf(seq, "%pM %4i.%03is   (%3i) %pM [%10s]:",
+				   orig_node->orig, last_seen_secs,
+				   last_seen_msecs, n_ifinfo->bat_iv.tq_avg,
+				   neigh_node->addr,
+				   neigh_node->if_incoming->net_dev->name);
+
+			batadv_iv_ogm_orig_print_neigh(orig_node, if_outgoing,
+						       seq);
+			seq_puts(seq, "\n");
+			batman_count++;
+
+next:
+			batadv_neigh_node_free_ref(neigh_node);
+			if (n_ifinfo)
+				batadv_neigh_ifinfo_free_ref(n_ifinfo);
+		}
+		rcu_read_unlock();
+	}
+
+	if (batman_count == 0)
+		seq_puts(seq, "No batman nodes in range ...\n");
+}
+
+/**
+ * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @if_outgoing1: outgoing interface for the first neighbor
+ * @neigh2: the second neighbor object of the comparison
+ * @if_outgoing2: outgoing interface for the second neighbor
+ *
+ * Returns a value less, equal to or greater than 0 if the metric via neigh1 is
+ * lower, the same as or higher than the metric via neigh2
+ */
+static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
+				   struct batadv_hard_iface *if_outgoing1,
+				   struct batadv_neigh_node *neigh2,
+				   struct batadv_hard_iface *if_outgoing2)
+{
+	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
+	uint8_t tq1, tq2;
+	int diff;
+
+	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
+	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+
+	if (!neigh1_ifinfo || !neigh2_ifinfo) {
+		diff = 0;
+		goto out;
+	}
+
+	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
+	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
+	diff = tq1 - tq2;
+
+out:
+	if (neigh1_ifinfo)
+		batadv_neigh_ifinfo_free_ref(neigh1_ifinfo);
+	if (neigh2_ifinfo)
+		batadv_neigh_ifinfo_free_ref(neigh2_ifinfo);
+
+	return diff;
+}
+
+/**
+ * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than
+ *  neigh2 from the metric prospective
+ * @neigh1: the first neighbor object of the comparison
+ * @if_outgoing: outgoing interface for the first neighbor
+ * @neigh2: the second neighbor object of the comparison
+ * @if_outgoing2: outgoing interface for the second neighbor
+
+ * Returns true if the metric via neigh1 is equally good or better than
+ * the metric via neigh2, false otherwise.
+ */
+static bool
+batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1,
+			   struct batadv_hard_iface *if_outgoing1,
+			   struct batadv_neigh_node *neigh2,
+			   struct batadv_hard_iface *if_outgoing2)
+{
+	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
+	uint8_t tq1, tq2;
+	bool ret;
+
+	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
+	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+
+	/* we can't say that the metric is better */
+	if (!neigh1_ifinfo || !neigh2_ifinfo) {
+		ret = false;
+		goto out;
+	}
+
+	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
+	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
+	ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD;
+
+out:
+	if (neigh1_ifinfo)
+		batadv_neigh_ifinfo_free_ref(neigh1_ifinfo);
+	if (neigh2_ifinfo)
+		batadv_neigh_ifinfo_free_ref(neigh2_ifinfo);
+
+	return ret;
+}
+
+static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
+	.name = "BATMAN_IV",
+	.bat_iface_enable = batadv_iv_ogm_iface_enable,
+	.bat_iface_disable = batadv_iv_ogm_iface_disable,
+	.bat_iface_update_mac = batadv_iv_ogm_iface_update_mac,
+	.bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
+	.bat_ogm_schedule = batadv_iv_ogm_schedule,
+	.bat_ogm_emit = batadv_iv_ogm_emit,
+	.bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
+	.bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob,
+	.bat_orig_print = batadv_iv_ogm_orig_print,
+	.bat_orig_free = batadv_iv_ogm_orig_free,
+	.bat_orig_add_if = batadv_iv_ogm_orig_add_if,
+	.bat_orig_del_if = batadv_iv_ogm_orig_del_if,
+};
+
+int __init batadv_iv_init(void)
+{
+	int ret;
+
+	/* batman originator packet */
+	ret = batadv_recv_handler_register(BATADV_IV_OGM,
+					   batadv_iv_ogm_receive);
+	if (ret < 0)
+		goto out;
+
+	ret = batadv_algo_register(&batadv_batman_iv);
+	if (ret < 0)
+		goto handler_unregister;
+
+	goto out;
+
+handler_unregister:
+	batadv_recv_handler_unregister(BATADV_IV_OGM);
+out:
+	return ret;
+}
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
deleted file mode 100644
index 497a0700cc3..00000000000
--- a/net/batman-adv/bat_sysfs.c
+++ /dev/null
@@ -1,596 +0,0 @@
-/*
- * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- *
- */
-
-#include "main.h"
-#include "bat_sysfs.h"
-#include "translation-table.h"
-#include "originator.h"
-#include "hard-interface.h"
-#include "gateway_common.h"
-#include "gateway_client.h"
-#include "vis.h"
-
-#define to_dev(obj)		container_of(obj, struct device, kobj)
-#define kobj_to_netdev(obj)	to_net_dev(to_dev(obj->parent))
-#define kobj_to_batpriv(obj)	netdev_priv(kobj_to_netdev(obj))
-
-/* Use this, if you have customized show and store functions */
-#define BAT_ATTR(_name, _mode, _show, _store)	\
-struct bat_attribute bat_attr_##_name = {	\
-	.attr = {.name = __stringify(_name),	\
-		 .mode = _mode },		\
-	.show   = _show,			\
-	.store  = _store,			\
-};
-
-#define BAT_ATTR_STORE_BOOL(_name, _post_func)				\
-ssize_t store_##_name(struct kobject *kobj, struct attribute *attr,	\
-		      char *buff, size_t count)				\
-{									\
-	struct net_device *net_dev = kobj_to_netdev(kobj);		\
-	struct bat_priv *bat_priv = netdev_priv(net_dev);		\
-	return __store_bool_attr(buff, count, _post_func, attr,		\
-				 &bat_priv->_name, net_dev);		\
-}
-
-#define BAT_ATTR_SHOW_BOOL(_name)					\
-ssize_t show_##_name(struct kobject *kobj, struct attribute *attr,	\
-			    char *buff)					\
-{									\
-	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);		\
-	return sprintf(buff, "%s\n",					\
-		       atomic_read(&bat_priv->_name) == 0 ?		\
-		       "disabled" : "enabled");				\
-}									\
-
-/* Use this, if you are going to turn a [name] in bat_priv on or off */
-#define BAT_ATTR_BOOL(_name, _mode, _post_func)				\
-	static BAT_ATTR_STORE_BOOL(_name, _post_func)			\
-	static BAT_ATTR_SHOW_BOOL(_name)				\
-	static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
-
-
-#define BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func)		\
-ssize_t store_##_name(struct kobject *kobj, struct attribute *attr,	\
-			     char *buff, size_t count)			\
-{									\
-	struct net_device *net_dev = kobj_to_netdev(kobj);		\
-	struct bat_priv *bat_priv = netdev_priv(net_dev);		\
-	return __store_uint_attr(buff, count, _min, _max, _post_func,	\
-				 attr, &bat_priv->_name, net_dev);	\
-}
-
-#define BAT_ATTR_SHOW_UINT(_name)					\
-ssize_t show_##_name(struct kobject *kobj, struct attribute *attr,	\
-			    char *buff)					\
-{									\
-	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);		\
-	return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name));	\
-}									\
-
-/* Use this, if you are going to set [name] in bat_priv to unsigned integer
- * values only */
-#define BAT_ATTR_UINT(_name, _mode, _min, _max, _post_func)		\
-	static BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func)	\
-	static BAT_ATTR_SHOW_UINT(_name)				\
-	static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
-
-
-static int store_bool_attr(char *buff, size_t count,
-			   struct net_device *net_dev,
-			   char *attr_name, atomic_t *attr)
-{
-	int enabled = -1;
-
-	if (buff[count - 1] == '\n')
-		buff[count - 1] = '\0';
-
-	if ((strncmp(buff, "1", 2) == 0) ||
-	    (strncmp(buff, "enable", 7) == 0) ||
-	    (strncmp(buff, "enabled", 8) == 0))
-		enabled = 1;
-
-	if ((strncmp(buff, "0", 2) == 0) ||
-	    (strncmp(buff, "disable", 8) == 0) ||
-	    (strncmp(buff, "disabled", 9) == 0))
-		enabled = 0;
-
-	if (enabled < 0) {
-		bat_info(net_dev,
-			 "%s: Invalid parameter received: %s\n",
-			 attr_name, buff);
-		return -EINVAL;
-	}
-
-	if (atomic_read(attr) == enabled)
-		return count;
-
-	bat_info(net_dev, "%s: Changing from: %s to: %s\n", attr_name,
-		 atomic_read(attr) == 1 ? "enabled" : "disabled",
-		 enabled == 1 ? "enabled" : "disabled");
-
-	atomic_set(attr, (unsigned)enabled);
-	return count;
-}
-
-static inline ssize_t __store_bool_attr(char *buff, size_t count,
-			void (*post_func)(struct net_device *),
-			struct attribute *attr,
-			atomic_t *attr_store, struct net_device *net_dev)
-{
-	int ret;
-
-	ret = store_bool_attr(buff, count, net_dev, (char *)attr->name,
-			      attr_store);
-	if (post_func && ret)
-		post_func(net_dev);
-
-	return ret;
-}
-
-static int store_uint_attr(char *buff, size_t count,
-			   struct net_device *net_dev, char *attr_name,
-			   unsigned int min, unsigned int max, atomic_t *attr)
-{
-	unsigned long uint_val;
-	int ret;
-
-	ret = strict_strtoul(buff, 10, &uint_val);
-	if (ret) {
-		bat_info(net_dev,
-			 "%s: Invalid parameter received: %s\n",
-			 attr_name, buff);
-		return -EINVAL;
-	}
-
-	if (uint_val < min) {
-		bat_info(net_dev, "%s: Value is too small: %lu min: %u\n",
-			 attr_name, uint_val, min);
-		return -EINVAL;
-	}
-
-	if (uint_val > max) {
-		bat_info(net_dev, "%s: Value is too big: %lu max: %u\n",
-			 attr_name, uint_val, max);
-		return -EINVAL;
-	}
-
-	if (atomic_read(attr) == uint_val)
-		return count;
-
-	bat_info(net_dev, "%s: Changing from: %i to: %lu\n",
-		 attr_name, atomic_read(attr), uint_val);
-
-	atomic_set(attr, uint_val);
-	return count;
-}
-
-static inline ssize_t __store_uint_attr(char *buff, size_t count,
-			int min, int max,
-			void (*post_func)(struct net_device *),
-			struct attribute *attr,
-			atomic_t *attr_store, struct net_device *net_dev)
-{
-	int ret;
-
-	ret = store_uint_attr(buff, count, net_dev, (char *)attr->name,
-			      min, max, attr_store);
-	if (post_func && ret)
-		post_func(net_dev);
-
-	return ret;
-}
-
-static ssize_t show_vis_mode(struct kobject *kobj, struct attribute *attr,
-			     char *buff)
-{
-	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
-	int vis_mode = atomic_read(&bat_priv->vis_mode);
-
-	return sprintf(buff, "%s\n",
-		       vis_mode == VIS_TYPE_CLIENT_UPDATE ?
-							"client" : "server");
-}
-
-static ssize_t store_vis_mode(struct kobject *kobj, struct attribute *attr,
-			      char *buff, size_t count)
-{
-	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct bat_priv *bat_priv = netdev_priv(net_dev);
-	unsigned long val;
-	int ret, vis_mode_tmp = -1;
-
-	ret = strict_strtoul(buff, 10, &val);
-
-	if (((count == 2) && (!ret) && (val == VIS_TYPE_CLIENT_UPDATE)) ||
-	    (strncmp(buff, "client", 6) == 0) ||
-	    (strncmp(buff, "off", 3) == 0))
-		vis_mode_tmp = VIS_TYPE_CLIENT_UPDATE;
-
-	if (((count == 2) && (!ret) && (val == VIS_TYPE_SERVER_SYNC)) ||
-	    (strncmp(buff, "server", 6) == 0))
-		vis_mode_tmp = VIS_TYPE_SERVER_SYNC;
-
-	if (vis_mode_tmp < 0) {
-		if (buff[count - 1] == '\n')
-			buff[count - 1] = '\0';
-
-		bat_info(net_dev,
-			 "Invalid parameter for 'vis mode' setting received: "
-			 "%s\n", buff);
-		return -EINVAL;
-	}
-
-	if (atomic_read(&bat_priv->vis_mode) == vis_mode_tmp)
-		return count;
-
-	bat_info(net_dev, "Changing vis mode from: %s to: %s\n",
-		 atomic_read(&bat_priv->vis_mode) == VIS_TYPE_CLIENT_UPDATE ?
-		 "client" : "server", vis_mode_tmp == VIS_TYPE_CLIENT_UPDATE ?
-		 "client" : "server");
-
-	atomic_set(&bat_priv->vis_mode, (unsigned)vis_mode_tmp);
-	return count;
-}
-
-static void post_gw_deselect(struct net_device *net_dev)
-{
-	struct bat_priv *bat_priv = netdev_priv(net_dev);
-	gw_deselect(bat_priv);
-}
-
-static ssize_t show_gw_mode(struct kobject *kobj, struct attribute *attr,
-			    char *buff)
-{
-	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
-	int bytes_written;
-
-	switch (atomic_read(&bat_priv->gw_mode)) {
-	case GW_MODE_CLIENT:
-		bytes_written = sprintf(buff, "%s\n", GW_MODE_CLIENT_NAME);
-		break;
-	case GW_MODE_SERVER:
-		bytes_written = sprintf(buff, "%s\n", GW_MODE_SERVER_NAME);
-		break;
-	default:
-		bytes_written = sprintf(buff, "%s\n", GW_MODE_OFF_NAME);
-		break;
-	}
-
-	return bytes_written;
-}
-
-static ssize_t store_gw_mode(struct kobject *kobj, struct attribute *attr,
-			     char *buff, size_t count)
-{
-	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct bat_priv *bat_priv = netdev_priv(net_dev);
-	char *curr_gw_mode_str;
-	int gw_mode_tmp = -1;
-
-	if (buff[count - 1] == '\n')
-		buff[count - 1] = '\0';
-
-	if (strncmp(buff, GW_MODE_OFF_NAME, strlen(GW_MODE_OFF_NAME)) == 0)
-		gw_mode_tmp = GW_MODE_OFF;
-
-	if (strncmp(buff, GW_MODE_CLIENT_NAME,
-		   strlen(GW_MODE_CLIENT_NAME)) == 0)
-		gw_mode_tmp = GW_MODE_CLIENT;
-
-	if (strncmp(buff, GW_MODE_SERVER_NAME,
-		   strlen(GW_MODE_SERVER_NAME)) == 0)
-		gw_mode_tmp = GW_MODE_SERVER;
-
-	if (gw_mode_tmp < 0) {
-		bat_info(net_dev,
-			 "Invalid parameter for 'gw mode' setting received: "
-			 "%s\n", buff);
-		return -EINVAL;
-	}
-
-	if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp)
-		return count;
-
-	switch (atomic_read(&bat_priv->gw_mode)) {
-	case GW_MODE_CLIENT:
-		curr_gw_mode_str = GW_MODE_CLIENT_NAME;
-		break;
-	case GW_MODE_SERVER:
-		curr_gw_mode_str = GW_MODE_SERVER_NAME;
-		break;
-	default:
-		curr_gw_mode_str = GW_MODE_OFF_NAME;
-		break;
-	}
-
-	bat_info(net_dev, "Changing gw mode from: %s to: %s\n",
-		 curr_gw_mode_str, buff);
-
-	gw_deselect(bat_priv);
-	atomic_set(&bat_priv->gw_mode, (unsigned)gw_mode_tmp);
-	return count;
-}
-
-static ssize_t show_gw_bwidth(struct kobject *kobj, struct attribute *attr,
-			      char *buff)
-{
-	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
-	int down, up;
-	int gw_bandwidth = atomic_read(&bat_priv->gw_bandwidth);
-
-	gw_bandwidth_to_kbit(gw_bandwidth, &down, &up);
-	return sprintf(buff, "%i%s/%i%s\n",
-		       (down > 2048 ? down / 1024 : down),
-		       (down > 2048 ? "MBit" : "KBit"),
-		       (up > 2048 ? up / 1024 : up),
-		       (up > 2048 ? "MBit" : "KBit"));
-}
-
-static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr,
-			       char *buff, size_t count)
-{
-	struct net_device *net_dev = kobj_to_netdev(kobj);
-
-	if (buff[count - 1] == '\n')
-		buff[count - 1] = '\0';
-
-	return gw_bandwidth_set(net_dev, buff, count);
-}
-
-BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
-BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
-BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu);
-static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode);
-static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode);
-BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL);
-BAT_ATTR_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, TQ_MAX_VALUE, NULL);
-BAT_ATTR_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE,
-	      post_gw_deselect);
-static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth,
-		store_gw_bwidth);
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 3, NULL);
-#endif
-
-static struct bat_attribute *mesh_attrs[] = {
-	&bat_attr_aggregated_ogms,
-	&bat_attr_bonding,
-	&bat_attr_fragmentation,
-	&bat_attr_vis_mode,
-	&bat_attr_gw_mode,
-	&bat_attr_orig_interval,
-	&bat_attr_hop_penalty,
-	&bat_attr_gw_sel_class,
-	&bat_attr_gw_bandwidth,
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-	&bat_attr_log_level,
-#endif
-	NULL,
-};
-
-int sysfs_add_meshif(struct net_device *dev)
-{
-	struct kobject *batif_kobject = &dev->dev.kobj;
-	struct bat_priv *bat_priv = netdev_priv(dev);
-	struct bat_attribute **bat_attr;
-	int err;
-
-	bat_priv->mesh_obj = kobject_create_and_add(SYSFS_IF_MESH_SUBDIR,
-						    batif_kobject);
-	if (!bat_priv->mesh_obj) {
-		bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
-			SYSFS_IF_MESH_SUBDIR);
-		goto out;
-	}
-
-	for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr) {
-		err = sysfs_create_file(bat_priv->mesh_obj,
-					&((*bat_attr)->attr));
-		if (err) {
-			bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
-				dev->name, SYSFS_IF_MESH_SUBDIR,
-				((*bat_attr)->attr).name);
-			goto rem_attr;
-		}
-	}
-
-	return 0;
-
-rem_attr:
-	for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
-		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
-
-	kobject_put(bat_priv->mesh_obj);
-	bat_priv->mesh_obj = NULL;
-out:
-	return -ENOMEM;
-}
-
-void sysfs_del_meshif(struct net_device *dev)
-{
-	struct bat_priv *bat_priv = netdev_priv(dev);
-	struct bat_attribute **bat_attr;
-
-	for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
-		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
-
-	kobject_put(bat_priv->mesh_obj);
-	bat_priv->mesh_obj = NULL;
-}
-
-static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr,
-			       char *buff)
-{
-	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
-	ssize_t length;
-
-	if (!hard_iface)
-		return 0;
-
-	length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ?
-			 "none" : hard_iface->soft_iface->name);
-
-	hardif_free_ref(hard_iface);
-
-	return length;
-}
-
-static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
-				char *buff, size_t count)
-{
-	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
-	int status_tmp = -1;
-	int ret = count;
-
-	if (!hard_iface)
-		return count;
-
-	if (buff[count - 1] == '\n')
-		buff[count - 1] = '\0';
-
-	if (strlen(buff) >= IFNAMSIZ) {
-		pr_err("Invalid parameter for 'mesh_iface' setting received: "
-		       "interface name too long '%s'\n", buff);
-		hardif_free_ref(hard_iface);
-		return -EINVAL;
-	}
-
-	if (strncmp(buff, "none", 4) == 0)
-		status_tmp = IF_NOT_IN_USE;
-	else
-		status_tmp = IF_I_WANT_YOU;
-
-	if (hard_iface->if_status == status_tmp)
-		goto out;
-
-	if ((hard_iface->soft_iface) &&
-	    (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
-		goto out;
-
-	if (!rtnl_trylock()) {
-		ret = -ERESTARTSYS;
-		goto out;
-	}
-
-	if (status_tmp == IF_NOT_IN_USE) {
-		hardif_disable_interface(hard_iface);
-		goto unlock;
-	}
-
-	/* if the interface already is in use */
-	if (hard_iface->if_status != IF_NOT_IN_USE)
-		hardif_disable_interface(hard_iface);
-
-	ret = hardif_enable_interface(hard_iface, buff);
-
-unlock:
-	rtnl_unlock();
-out:
-	hardif_free_ref(hard_iface);
-	return ret;
-}
-
-static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
-				 char *buff)
-{
-	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
-	ssize_t length;
-
-	if (!hard_iface)
-		return 0;
-
-	switch (hard_iface->if_status) {
-	case IF_TO_BE_REMOVED:
-		length = sprintf(buff, "disabling\n");
-		break;
-	case IF_INACTIVE:
-		length = sprintf(buff, "inactive\n");
-		break;
-	case IF_ACTIVE:
-		length = sprintf(buff, "active\n");
-		break;
-	case IF_TO_BE_ACTIVATED:
-		length = sprintf(buff, "enabling\n");
-		break;
-	case IF_NOT_IN_USE:
-	default:
-		length = sprintf(buff, "not in use\n");
-		break;
-	}
-
-	hardif_free_ref(hard_iface);
-
-	return length;
-}
-
-static BAT_ATTR(mesh_iface, S_IRUGO | S_IWUSR,
-		show_mesh_iface, store_mesh_iface);
-static BAT_ATTR(iface_status, S_IRUGO, show_iface_status, NULL);
-
-static struct bat_attribute *batman_attrs[] = {
-	&bat_attr_mesh_iface,
-	&bat_attr_iface_status,
-	NULL,
-};
-
-int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev)
-{
-	struct kobject *hardif_kobject = &dev->dev.kobj;
-	struct bat_attribute **bat_attr;
-	int err;
-
-	*hardif_obj = kobject_create_and_add(SYSFS_IF_BAT_SUBDIR,
-						    hardif_kobject);
-
-	if (!*hardif_obj) {
-		bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
-			SYSFS_IF_BAT_SUBDIR);
-		goto out;
-	}
-
-	for (bat_attr = batman_attrs; *bat_attr; ++bat_attr) {
-		err = sysfs_create_file(*hardif_obj, &((*bat_attr)->attr));
-		if (err) {
-			bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
-				dev->name, SYSFS_IF_BAT_SUBDIR,
-				((*bat_attr)->attr).name);
-			goto rem_attr;
-		}
-	}
-
-	return 0;
-
-rem_attr:
-	for (bat_attr = batman_attrs; *bat_attr; ++bat_attr)
-		sysfs_remove_file(*hardif_obj, &((*bat_attr)->attr));
-out:
-	return -ENOMEM;
-}
-
-void sysfs_del_hardif(struct kobject **hardif_obj)
-{
-	kobject_put(*hardif_obj);
-	*hardif_obj = NULL;
-}
diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h
deleted file mode 100644