4 files changed, 299 insertions, 71 deletions
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index 22e4e8d4eb2..964b93974d8 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -24,6 +24,15 @@
 #define SKIP_GENERIC_SYNC 0
 #define SYNC_START_ERROR -1
 #define DO_GENERIC_SYNC 1
+#define SPUS_PER_NODE   8
+#define DEFAULT_TIMER_EXPIRE  (HZ / 10)
+
+extern struct delayed_work spu_work;
+extern int spu_prof_running;
+
+#define TRACE_ARRAY_SIZE 1024
+
+extern spinlock_t oprof_spu_smpl_arry_lck;
 
 struct spu_overlay_info {	/* map of sections within an SPU overlay */
 	unsigned int vma;	/* SPU virtual memory address from elf */
@@ -62,11 +71,19 @@ struct vma_to_fileoffset_map {	/* map of sections within an SPU program */
 
 };
 
+struct spu_buffer {
+	int last_guard_val;
+	int ctx_sw_seen;
+	unsigned long *buff;
+	unsigned int head, tail;
+};
+
+
 /* The three functions below are for maintaining and accessing
  * the vma-to-fileoffset map.
  */
 struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
-					     u64 objectid);
+					     unsigned long objectid);
 unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
 			    unsigned int vma, const struct spu *aSpu,
 			    int *grd_val);
@@ -76,10 +93,11 @@ void vma_map_free(struct vma_to_fileoffset_map *map);
  * Entry point for SPU profiling.
  * cycles_reset is the SPU_CYCLES count value specified by the user.
  */
-int start_spu_profiling(unsigned int cycles_reset);
-
-void stop_spu_profiling(void);
+int start_spu_profiling_cycles(unsigned int cycles_reset);
+void start_spu_profiling_events(void);
 
+void stop_spu_profiling_cycles(void);
+void stop_spu_profiling_events(void);
 
 /* add the necessary profiling hooks */
 int spu_sync_start(void);
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index 380d7e21753..b129d007e7f 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -16,24 +16,34 @@
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <asm/cell-pmu.h>
+#include <asm/time.h>
 #include "pr_util.h"
 
-#define TRACE_ARRAY_SIZE 1024
 #define SCALE_SHIFT 14
 
 static u32 *samples;
 
-static int spu_prof_running;
+/* spu_prof_running is a flag used to indicate if spu profiling is enabled
+ * or not.  It is set by the routines start_spu_profiling_cycles() and
+ * start_spu_profiling_events().  The flag is cleared by the routines
+ * stop_spu_profiling_cycles() and stop_spu_profiling_events().  These
+ * routines are called via global_start() and global_stop() which are called in
+ * op_powerpc_start() and op_powerpc_stop().  These routines are called once
+ * per system as a result of the user starting/stopping oprofile.  Hence, only
+ * one CPU per user at a time will be changing  the value of spu_prof_running.
+ * In general, OProfile does not protect against multiple users trying to run
+ * OProfile at a time.
+ */
+int spu_prof_running;
 static unsigned int profiling_interval;
 
 #define NUM_SPU_BITS_TRBUF 16
 #define SPUS_PER_TB_ENTRY   4
-#define SPUS_PER_NODE	     8
 
 #define SPU_PC_MASK	     0xFFFF
 
-static DEFINE_SPINLOCK(sample_array_lock);
-unsigned long sample_array_lock_flags;
+DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
+unsigned long oprof_spu_smpl_arry_lck_flags;
 
 void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
 {
@@ -50,7 +60,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese
 	 * of precision.  This is close enough for the purpose at hand.
 	 *
 	 * The value of the timeout should be small enough that the hw
-	 * trace buffer will not get more then about 1/3 full for the
+	 * trace buffer will not get more than about 1/3 full for the
 	 * maximum user specified (the LFSR value) hw sampling frequency.
 	 * This is to ensure the trace buffer will never fill even if the
 	 * kernel thread scheduling varies under a heavy system load.
@@ -146,13 +156,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
 		 * sample array must be loaded and then processed for a given
 		 * cpu.	 The sample array is not per cpu.
 		 */
-		spin_lock_irqsave(&sample_array_lock,
-				  sample_array_lock_flags);
+		spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
+				  oprof_spu_smpl_arry_lck_flags);
 		num_samples = cell_spu_pc_collection(cpu);
 
 		if (num_samples == 0) {
-			spin_unlock_irqrestore(&sample_array_lock,
-					       sample_array_lock_flags);
+			spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+					       oprof_spu_smpl_arry_lck_flags);
 			continue;
 		}
 
@@ -163,8 +173,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
 					num_samples);
 		}
 
-		spin_unlock_irqrestore(&sample_array_lock,
-				       sample_array_lock_flags);
+		spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+				       oprof_spu_smpl_arry_lck_flags);
 
 	}
 	smp_wmb();	/* insure spu event buffer updates are written */
@@ -183,20 +193,20 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
 
 static struct hrtimer timer;
 /*
- * Entry point for SPU profiling.
+ * Entry point for SPU cycle profiling.
  * NOTE:  SPU profiling is done system-wide, not per-CPU.
  *
  * cycles_reset is the count value specified by the user when
  * setting up OProfile to count SPU_CYCLES.
  */
-int start_spu_profiling(unsigned int cycles_reset)
+int start_spu_profiling_cycles(unsigned int cycles_reset)
 {
 	ktime_t kt;
 
 	pr_debug("timer resolution: %lu\n", TICK_NSEC);
 	kt = ktime_set(0, profiling_interval);
 	hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	timer.expires = kt;
+	hrtimer_set_expires(&timer, kt);
 	timer.function = profile_spus;
 
 	/* Allocate arrays for collecting SPU PC samples */
@@ -208,14 +218,35 @@ int start_spu_profiling(unsigned int cycles_reset)
 
 	spu_prof_running = 1;
 	hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
+	schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
 
 	return 0;
 }
 
-void stop_spu_profiling(void)
+/*
+ * Entry point for SPU event profiling.
+ * NOTE:  SPU profiling is done system-wide, not per-CPU.
+ *
+ * cycles_reset is the count value specified by the user when
+ * setting up OProfile to count SPU_CYCLES.
+ */
+void start_spu_profiling_events(void)
+{
+	spu_prof_running = 1;
+	schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
+
+	return;
+}
+
+void stop_spu_profiling_cycles(void)
 {
 	spu_prof_running = 0;
 	hrtimer_cancel(&timer);
 	kfree(samples);
-	pr_debug("SPU_PROF: stop_spu_profiling issued\n");
+	pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
+}
+
+void stop_spu_profiling_events(void)
+{
+	spu_prof_running = 0;
 }
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 257b13cb18a..28f1af2db1f 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -26,6 +26,7 @@
 #include <linux/notifier.h>
 #include <linux/numa.h>
 #include <linux/oprofile.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include "pr_util.h"
 
@@ -35,7 +36,102 @@ static DEFINE_SPINLOCK(buffer_lock);
 static DEFINE_SPINLOCK(cache_lock);
 static int num_spu_nodes;
 int spu_prof_num_nodes;
-int last_guard_val[MAX_NUMNODES * 8];
+
+struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
+struct delayed_work spu_work;
+static unsigned max_spu_buff;
+
+static void spu_buff_add(unsigned long int value, int spu)
+{
+	/* spu buff is a circular buffer.  Add entries to the
+	 * head.  Head is the index to store the next value.
+	 * The buffer is full when there is one available entry
+	 * in the queue, i.e. head and tail can't be equal.
+	 * That way we can tell the difference between the
+	 * buffer being full versus empty.
+	 *
+	 *  ASSUPTION: the buffer_lock is held when this function
+	 *             is called to lock the buffer, head and tail.
+	 */
+	int full = 1;
+
+	if (spu_buff[spu].head >= spu_buff[spu].tail) {
+		if ((spu_buff[spu].head - spu_buff[spu].tail)
+		    <  (max_spu_buff - 1))
+			full = 0;
+
+	} else if (spu_buff[spu].tail > spu_buff[spu].head) {
+		if ((spu_buff[spu].tail - spu_buff[spu].head)
+		    > 1)
+			full = 0;
+	}
+
+	if (!full) {
+		spu_buff[spu].buff[spu_buff[spu].head] = value;
+		spu_buff[spu].head++;
+
+		if (spu_buff[spu].head >= max_spu_buff)
+			spu_buff[spu].head = 0;
+	} else {
+		/* From the user's perspective make the SPU buffer
+		 * size management/overflow look like we are using
+		 * per cpu buffers.  The user uses the same
+		 * per cpu parameter to adjust the SPU buffer size.
+		 * Increment the sample_lost_overflow to inform
+		 * the user the buffer size needs to be increased.
+		 */
+		oprofile_cpu_buffer_inc_smpl_lost();
+	}
+}
+
+/* This function copies the per SPU buffers to the
+ * OProfile kernel buffer.
+ */
+void sync_spu_buff(void)
+{
+	int spu;
+	unsigned long flags;
+	int curr_head;
+
+	for (spu = 0; spu < num_spu_nodes; spu++) {
+		/* In case there was an issue and the buffer didn't
+		 * get created skip it.
+		 */
+		if (spu_buff[spu].buff == NULL)
+			continue;
+
+		/* Hold the lock to make sure the head/tail
+		 * doesn't change while spu_buff_add() is
+		 * deciding if the buffer is full or not.
+		 * Being a little paranoid.
+		 */
+		spin_lock_irqsave(&buffer_lock, flags);
+		curr_head = spu_buff[spu].head;
+		spin_unlock_irqrestore(&buffer_lock, flags);
+
+		/* Transfer the current contents to the kernel buffer.
+		 * data can still be added to the head of the buffer.
+		 */
+		oprofile_put_buff(spu_buff[spu].buff,
+				  spu_buff[spu].tail,
+				  curr_head, max_spu_buff);
+
+		spin_lock_irqsave(&buffer_lock, flags);
+		spu_buff[spu].tail = curr_head;
+		spin_unlock_irqrestore(&buffer_lock, flags);
+	}
+
+}
+
+static void wq_sync_spu_buff(struct work_struct *work)
+{
+	/* move data from spu buffers to kernel buffer */
+	sync_spu_buff();
+
+	/* only reschedule if profiling is not done */
+	if (spu_prof_running)
+		schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
+}
 
 /* Container for caching information about an active SPU task. */
 struct cached_info {
@@ -68,7 +164,7 @@ static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
 	if (spu_num >= num_spu_nodes) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: Invalid index %d into spu info cache\n",
-		       __FUNCTION__, __LINE__, spu_num);
+		       __func__, __LINE__, spu_num);
 		ret_info = NULL;
 		goto out;
 	}
@@ -115,7 +211,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
 	if (!info) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: create vma_map failed\n",
-		       __FUNCTION__, __LINE__);
+		       __func__, __LINE__);
 		retval = -ENOMEM;
 		goto err_alloc;
 	}
@@ -123,7 +219,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
 	if (!new_map) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: create vma_map failed\n",
-		       __FUNCTION__, __LINE__);
+		       __func__, __LINE__);
 		retval = -ENOMEM;
 		goto err_alloc;
 	}
@@ -171,7 +267,7 @@ static int release_cached_info(int spu_index)
 			printk(KERN_ERR "SPU_PROF: "
 				"%s, line %d: "
 				"Invalid index %d into spu info cache\n",
-				__FUNCTION__, __LINE__, spu_index);
+				__func__, __LINE__, spu_index);
 			goto out;
 		}
 		end = spu_index + 1;
@@ -202,13 +298,13 @@ static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (path->dentry->d_cookie)
+	if (path->dentry->d_flags & DCACHE_COOKIE)
 		return (unsigned long)path->dentry;
 	get_dcookie(path, &cookie);
 	return cookie;
 }
 
-/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
+/* Look up the dcookie for the task's mm->exe_file,
  * which corresponds loosely to "application name". Also, determine
  * the offset for the SPU ELF object.  If computed offset is
  * non-zero, it implies an embedded SPU object; otherwise, it's a
@@ -225,7 +321,6 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
 {
 	unsigned long app_cookie = 0;
 	unsigned int my_offset = 0;
-	struct file *app = NULL;
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = spu->mm;
 
@@ -234,16 +329,10 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
 
 	down_read(&mm->mmap_sem);
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (!vma->vm_file)
-			continue;
-		if (!(vma->vm_flags & VM_EXECUTABLE))
-			continue;
-		app_cookie = fast_get_dcookie(&vma->vm_file->f_path);
+	if (mm->exe_file) {
+		app_cookie = fast_get_dcookie(&mm->exe_file->f_path);
 		pr_debug("got dcookie for %s\n",
-			 vma->vm_file->f_dentry->d_name.name);
-		app = vma->vm_file;
-		break;
+			 mm->exe_file->f_dentry->d_name.name);
 	}
 
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
@@ -273,7 +362,7 @@ fail_no_image_cookie:
 
 	printk(KERN_ERR "SPU_PROF: "
 		"%s, line %d: Cannot find dcookie for SPU binary\n",
-		__FUNCTION__, __LINE__);
+		__func__, __LINE__);
 	goto out;
 }
 
@@ -305,14 +394,21 @@ static int process_context_switch(struct spu *spu, unsigned long objectId)
 
 	/* Record context info in event buffer */
 	spin_lock_irqsave(&buffer_lock, flags);
-	add_event_entry(ESCAPE_CODE);
-	add_event_entry(SPU_CTX_SWITCH_CODE);
-	add_event_entry(spu->number);
-	add_event_entry(spu->pid);
-	add_event_entry(spu->tgid);
-	add_event_entry(app_dcookie);
-	add_event_entry(spu_cookie);
-	add_event_entry(offset);
+	spu_buff_add(ESCAPE_CODE, spu->number);
+	spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
+	spu_buff_add(spu->number, spu->number);
+	spu_buff_add(spu->pid, spu->number);
+	spu_buff_add(spu->tgid, spu->number);
+	spu_buff_add(app_dcookie, spu->number);
+	spu_buff_add(spu_cookie, spu->number);
+	spu_buff_add(offset, spu->number);
+
+	/* Set flag to indicate SPU PC data can now be written out.  If
+	 * the SPU program counter data is seen before an SPU context
+	 * record is seen, the postprocessing will fail.
+	 */
+	spu_buff[spu->number].ctx_sw_seen = 1;
+
 	spin_unlock_irqrestore(&buffer_lock, flags);
 	smp_wmb();	/* insure spu event buffer updates are written */
 			/* don't want entries intermingled... */
@@ -360,6 +456,47 @@ static int number_of_online_nodes(void)
         return nodes;
 }
 
+static int oprofile_spu_buff_create(void)
+{
+	int spu;
+
+	max_spu_buff = oprofile_get_cpu_buffer_size();
+
+	for (spu = 0; spu < num_spu_nodes; spu++) {
+		/* create circular buffers to store the data in.
+		 * use locks to manage accessing the buffers
+		 */
+		spu_buff[spu].head = 0;
+		spu_buff[spu].tail = 0;
+
+		/*
+		 * Create a buffer for each SPU.  Can't reliably
+		 * create a single buffer for all spus due to not
+		 * enough contiguous kernel memory.
+		 */
+
+		spu_buff[spu].buff = kzalloc((max_spu_buff
+					      * sizeof(unsigned long)),
+					     GFP_KERNEL);
+
+		if (!spu_buff[spu].buff) {
+			printk(KERN_ERR "SPU_PROF: "
+			       "%s, line %d:  oprofile_spu_buff_create "
+		       "failed to allocate spu buffer %d.\n",
+			       __func__, __LINE__, spu);
+
+			/* release the spu buffers that have been allocated */
+			while (spu >= 0) {
+				kfree(spu_buff[spu].buff);
+				spu_buff[spu].buff = 0;
+				spu--;
+			}
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
 /* The main purpose of this function is to synchronize
  * OProfile with SPUFS by registering to be notified of
  * SPU task switches.
@@ -372,20 +509,35 @@ static int number_of_online_nodes(void)
  */
 int spu_sync_start(void)
 {
-	int k;
+	int spu;
 	int ret = SKIP_GENERIC_SYNC;
 	int register_ret;
 	unsigned long flags = 0;
 
 	spu_prof_num_nodes = number_of_online_nodes();
 	num_spu_nodes = spu_prof_num_nodes * 8;
+	INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
+
+	/* create buffer for storing the SPU data to put in
+	 * the kernel buffer.
+	 */
+	ret = oprofile_spu_buff_create();
+	if (ret)
+		goto out;
 
 	spin_lock_irqsave(&buffer_lock, flags);
-	add_event_entry(ESCAPE_CODE);
-	add_event_entry(SPU_PROFILING_CODE);
-	add_event_entry(num_spu_nodes);
+	for (spu = 0; spu < num_spu_nodes; spu++) {
+		spu_buff_add(ESCAPE_CODE, spu);
+		spu_buff_add(SPU_PROFILING_CODE, spu);
+		spu_buff_add(num_spu_nodes, spu);
+	}
 	spin_unlock_irqrestore(&buffer_lock, flags);
 
+	for (spu = 0; spu < num_spu_nodes; spu++) {
+		spu_buff[spu].ctx_sw_seen = 0;
+		spu_buff[spu].last_guard_val = 0;
+	}
+
 	/* Register for SPU events  */
 	register_ret = spu_switch_event_register(&spu_active);
 	if (register_ret) {
@@ -393,8 +545,6 @@ int spu_sync_start(void)
 		goto out;
 	}
 
-	for (k = 0; k < (MAX_NUMNODES * 8); k++)
-		last_guard_val[k] = 0;
 	pr_debug("spu_sync_start -- running.\n");
 out:
 	return ret;
@@ -446,13 +596,20 @@ void spu_sync_buffer(int spu_num, unsigned int *samples,
 		 * use.	 We need to discard samples taken during the time
 		 * period which an overlay occurs (i.e., guard value changes).
 		 */
-		if (grd_val && grd_val != last_guard_val[spu_num]) {
-			last_guard_val[spu_num] = grd_val;
+		if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
+			spu_buff[spu_num].last_guard_val = grd_val;
 			/* Drop the rest of the samples. */
 			break;
 		}
 
-		add_event_entry(file_offset | spu_num_shifted);
+		/* We must ensure that the SPU context switch has been written
+		 * out before samples for the SPU.  Otherwise, the SPU context
+		 * information is not available and the postprocessing of the
+		 * SPU PC will fail with no available anonymous map information.
+		 */
+		if (spu_buff[spu_num].ctx_sw_seen)
+			spu_buff_add((file_offset | spu_num_shifted),
+					 spu_num);
 	}
 	spin_unlock(&buffer_lock);
 out:
@@ -463,20 +620,41 @@ out:
 int spu_sync_stop(void)
 {
 	unsigned long flags = 0;
-	int ret = spu_switch_event_unregister(&spu_active);
-	if (ret) {
+	int ret;
+	int k;
+
+	ret = spu_switch_event_unregister(&spu_active);
+
+	if (ret)
 		printk(KERN_ERR "SPU_PROF: "
-			"%s, line %d: spu_switch_event_unregister returned %d\n",
-			__FUNCTION__, __LINE__, ret);
-		goto out;
-	}
+		       "%s, line %d: spu_switch_event_unregister "	\
+		       "returned %d\n",
+		       __func__, __LINE__, ret);
+
+	/* flush any remaining data in the per SPU buffers */
+	sync_spu_buff();
 
 	spin_lock_irqsave(&cache_lock, flags);
 	ret = release_cached_info(RELEASE_ALL);
 	spin_unlock_irqrestore(&cache_lock, flags);
-out:
+
+	/* remove scheduled work queue item rather then waiting
+	 * for every queued entry to execute.  Then flush pending
+	 * system wide buffer to event buffer.
+	 */
+	cancel_delayed_work(&spu_work);
+
+	for (k = 0; k < num_spu_nodes; k++) {
+		spu_buff[k].ctx_sw_seen = 0;
+
+		/*
+		 * spu_sys_buff will be null if there was a problem
+		 * allocating the buffer.  Only delete if it exists.
+		 */
+		kfree(spu_buff[k].buff);
+		spu_buff[k].buff = 0;
+	}
 	pr_debug("spu_sync_stop -- done.\n");
 	return ret;
 }
 
-
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
index 9a932177e70..c579b16845d 100644
--- a/arch/powerpc/oprofile/cell/vma_map.c
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/elf.h>
+#include <linux/slab.h>
 #include "pr_util.h"
 
 
@@ -72,7 +73,7 @@ vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
 		kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
 	if (!new) {
 		printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
-		       __FUNCTION__, __LINE__);
+		       __func__, __LINE__);
 		vma_map_free(map);
 		return NULL;
 	}
@@ -134,19 +135,19 @@ struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
 	if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
-		       __FUNCTION__, __LINE__);
+		       __func__, __LINE__);
 		goto fail;
 	}
 	if (ehdr.e_machine != EM_SPU) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
-		       __FUNCTION__,  __LINE__);
+		       __func__,  __LINE__);
 		goto fail;
 	}
 	if (ehdr.e_type != ET_EXEC) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: Unexpected e_type parsing SPU ELF\n",
-		       __FUNCTION__, __LINE__);
+		       __func__, __LINE__);
 		goto fail;
 	}
 	phdr_start = spu_elf_start + ehdr.e_phoff;
@@ -185,7 +186,7 @@ struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
 			goto fail;
 
 		if (shdr_str.sh_type != SHT_STRTAB)
-			goto fail;;
+			goto fail;
 
 		for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
 			if (copy_from_user(&sym, spu_elf_start +
@@ -229,10 +230,10 @@ struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
 	 */
 	overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
 					    aSpu, &grd_val);
-	if (overlay_tbl_offset < 0) {
+	if (overlay_tbl_offset > 0x10000000) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: Error finding SPU overlay table\n",
-		       __FUNCTION__, __LINE__);
+		       __func__, __LINE__);
 		goto fail;
 	}
 	ovly_table = spu_elf_start + overlay_tbl_offset;