aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/splashpres.pdfbin0 -> 131072 bytes
-rw-r--r--src/jsifier.js2
-rw-r--r--src/library.js5
-rw-r--r--src/library_browser.js33
-rw-r--r--src/library_gc.js2
-rw-r--r--src/library_glut.js10
-rw-r--r--src/library_sdl.js2
-rw-r--r--src/parseTools.js30
-rw-r--r--src/postamble.js2
-rw-r--r--src/preamble.js2
-rw-r--r--tests/cases/oob_ta2.ll25
-rw-r--r--tests/linpack.c1153
-rwxr-xr-xtests/runner.py7
-rw-r--r--tests/time/src.c5
14 files changed, 1249 insertions, 29 deletions
diff --git a/docs/splashpres.pdf b/docs/splashpres.pdf
new file mode 100644
index 00000000..02fa4ea0
--- /dev/null
+++ b/docs/splashpres.pdf
Binary files differ
diff --git a/src/jsifier.js b/src/jsifier.js
index 8ab96a25..2c83d036 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -1357,7 +1357,7 @@ function JSify(data, functionsOnly, givenFunctions) {
var ignoreFunctionIndexizing = [];
var useJSArgs = (simpleIdent + '__jsargs') in LibraryManager.library;
var hasVarArgs = isVarArgsFunctionType(type);
- var normalArgs = (hasVarArgs && !useJSArgs) ? countNormalArgs(type) : -1;
+ var normalArgs = (hasVarArgs && !useJSArgs) ? countNormalArgs(type, null, true) : -1;
var byPointer = getVarData(funcData, ident);
var byPointerForced = false;
diff --git a/src/library.js b/src/library.js
index 04976b92..d897556f 100644
--- a/src/library.js
+++ b/src/library.js
@@ -6186,8 +6186,9 @@ LibraryManager.library = {
clock_gettime__deps: ['__timespec_struct_layout'],
clock_gettime: function(clk_id, tp) {
// int clock_gettime(clockid_t clk_id, struct timespec *tp);
- {{{ makeSetValue('tp', '___timespec_struct_layout.tv_sec', '0', 'i32') }}}
- {{{ makeSetValue('tp', '___timespec_struct_layout.tv_nsec', '0', 'i32') }}}
+ var now = Date.now();
+ {{{ makeSetValue('tp', '___timespec_struct_layout.tv_sec', 'Math.floor(now/1000)', 'i32') }}}; // seconds
+ {{{ makeSetValue('tp', '___timespec_struct_layout.tv_nsec', '0', 'i32') }}}; // nanoseconds - not supported
return 0;
},
clock_settime: function(clk_id, tp) {
diff --git a/src/library_browser.js b/src/library_browser.js
index 0ed04e19..2e6c9150 100644
--- a/src/library_browser.js
+++ b/src/library_browser.js
@@ -184,7 +184,7 @@ mergeInto(LibraryManager.library, {
};
audio.src = url;
// workaround for chrome bug 124926 - we do not always get oncanplaythrough or onerror
- setTimeout(function() {
+ Browser.safeSetTimeout(function() {
finish(audio); // try to use it even though it is not necessarily ready to play
}, 10000);
} else {
@@ -361,6 +361,30 @@ mergeInto(LibraryManager.library, {
window.requestAnimationFrame(func);
},
+ // generic abort-aware wrapper for an async callback
+ safeCallback: function(func) {
+ return function() {
+ if (!ABORT) return func.apply(null, arguments);
+ };
+ },
+
+ // abort-aware versions
+ safeRequestAnimationFrame: function(func) {
+ Browser.requestAnimationFrame(function() {
+ if (!ABORT) func();
+ });
+ },
+ safeSetTimeout: function(func, timeout) {
+ setTimeout(function() {
+ if (!ABORT) func();
+ }, timeout);
+ },
+ safeSetInterval: function(func, timeout) {
+ setInterval(function() {
+ if (!ABORT) func();
+ }, timeout);
+ },
+
getMovementX: function(event) {
return event['movementX'] ||
event['mozMovementX'] ||
@@ -612,7 +636,7 @@ mergeInto(LibraryManager.library, {
Module['noExitRuntime'] = true;
// TODO: cache these to avoid generating garbage
- setTimeout(function() {
+ Browser.safeSetTimeout(function() {
_emscripten_run_script(script);
}, millis);
},
@@ -621,6 +645,7 @@ mergeInto(LibraryManager.library, {
Module['noExitRuntime'] = true;
Browser.mainLoop.runner = function() {
+ if (ABORT) return;
if (Browser.mainLoop.queue.length > 0) {
var start = Date.now();
var blocker = Browser.mainLoop.queue.shift();
@@ -723,9 +748,9 @@ mergeInto(LibraryManager.library, {
}
if (millis >= 0) {
- setTimeout(wrapper, millis);
+ Browser.safeSetTimeout(wrapper, millis);
} else {
- Browser.requestAnimationFrame(wrapper);
+ Browser.safeRequestAnimationFrame(wrapper);
}
},
diff --git a/src/library_gc.js b/src/library_gc.js
index 2a164250..b3dae0e9 100644
--- a/src/library_gc.js
+++ b/src/library_gc.js
@@ -26,7 +26,7 @@ if (GC_SUPPORT) {
_GC_finalizer_notifier = _malloc(4); setValue(_GC_finalizer_notifier, 0, 'i32');
if (ENVIRONMENT_IS_WEB) {
- setInterval(function() {
+ Browser.safeSetInterval(function() {
GC.maybeCollect();
}, 1000);
} else {
diff --git a/src/library_glut.js b/src/library_glut.js
index 35348028..38cfe55b 100644
--- a/src/library_glut.js
+++ b/src/library_glut.js
@@ -322,16 +322,17 @@ var LibraryGLUT = {
var callback = function() {
if (GLUT.idleFunc) {
Runtime.dynCall('v', GLUT.idleFunc);
- window.setTimeout(callback, 0);
+ Browser.safeSetTimeout(callback, 0);
}
}
- if (!GLUT.idleFunc)
- window.setTimeout(callback, 0);
+ if (!GLUT.idleFunc) {
+ Browser.safeSetTimeout(callback, 0);
+ }
GLUT.idleFunc = func;
},
glutTimerFunc: function(msec, func, value) {
- window.setTimeout(function() { Runtime.dynCall('vi', func, [value]); }, msec);
+ Browser.safeSetTimeout(function() { Runtime.dynCall('vi', func, [value]); }, msec);
},
glutDisplayFunc: function(func) {
@@ -419,6 +420,7 @@ var LibraryGLUT = {
glutPostRedisplay: function() {
if (GLUT.displayFunc) {
Browser.requestAnimationFrame(function() {
+ if (ABORT) return;
Runtime.dynCall('v', GLUT.displayFunc);
});
}
diff --git a/src/library_sdl.js b/src/library_sdl.js
index a5080a99..d31c37f5 100644
--- a/src/library_sdl.js
+++ b/src/library_sdl.js
@@ -1197,7 +1197,7 @@ var LibrarySDL = {
SDL_PauseAudio: function(pauseOn) {
if (SDL.audio.paused !== pauseOn) {
- SDL.audio.timer = pauseOn ? SDL.audio.timer && clearInterval(SDL.audio.timer) : setInterval(SDL.audio.caller, 1/35);
+ SDL.audio.timer = pauseOn ? SDL.audio.timer && clearInterval(SDL.audio.timer) : Browser.safeSetInterval(SDL.audio.caller, 1/35);
}
SDL.audio.paused = pauseOn;
},
diff --git a/src/parseTools.js b/src/parseTools.js
index f30883b5..45046558 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -129,9 +129,13 @@ function isPointerType(type) {
return type[type.length-1] == '*';
}
+function isArrayType(type) {
+ return /^\[\d+\ x\ (.*)\]/.test(type);
+}
+
function isStructType(type) {
if (isPointerType(type)) return false;
- if (/^\[\d+\ x\ (.*)\]/.test(type)) return true; // [15 x ?] blocks. Like structs
+ if (isArrayType(type)) return true;
if (/<?{ ?[^}]* ?}>?/.test(type)) return true; // { i32, i8 } etc. - anonymous struct types
// See comment in isStructPointerType()
return type[0] == '%';
@@ -286,18 +290,18 @@ function isVarArgsFunctionType(type) {
return type.substr(-varArgsSuffix.length) == varArgsSuffix;
}
-function getNumVars(type) { // how many variables are needed to represent this type
+function getNumLegalizedVars(type) { // how many legalized variables are needed to represent this type
if (type in Runtime.FLOAT_TYPES) return 1;
return Math.max(getNumIntChunks(type), 1);
}
-function countNormalArgs(type, out) {
+function countNormalArgs(type, out, legalized) {
out = out || {};
if (!isFunctionType(type, out)) return -1;
var ret = 0;
if (out.segments) {
for (var i = 0; i < out.segments.length; i++) {
- ret += getNumVars(out.segments[i][0].text);
+ ret += legalized ? getNumLegalizedVars(out.segments[i][0].text) : 1;
}
}
if (isVarArgsFunctionType(type)) ret--;
@@ -1754,10 +1758,11 @@ function getGetElementPtrIndexes(item) {
indexes.push(getFastValue(Runtime.getNativeTypeSize(type), '*', offset, 'i32'));
}
}
- item.params.slice(2, item.params.length).forEach(function(arg) {
+ item.params.slice(2, item.params.length).forEach(function(arg, i) {
var curr = arg;
// TODO: If index is constant, optimize
var typeData = Types.types[type];
+ assert(typeData || i == item.params.length - 3); // can be null, when we get to the end (a basic type)
if (isStructType(type) && typeData.needsFlattening) {
if (typeData.flatFactor) {
indexes.push(getFastValue(curr, '*', typeData.flatFactor, 'i32'));
@@ -1773,16 +1778,15 @@ function getGetElementPtrIndexes(item) {
indexes.push(curr);
}
}
- if (!isNumber(curr) || parseInt(curr) < 0) {
- // We have a *variable* to index with, or a negative number. In both
- // cases, in theory we might need to do something dynamic here. FIXME?
- // But, most likely all the possible types are the same, so do that case here now...
- for (var i = 1; i < typeData.fields.length; i++) {
- assert(typeData.fields[0] === typeData.fields[i]);
+ if (typeData) {
+ if (isArrayType(type)) {
+ type = typeData.fields[0]; // all the same, so accept even out-of-bounds this way
+ } else {
+ assert(isNumber(curr)); // cannot be dynamic
+ type = typeData.fields[curr];
}
- curr = 0;
+ assert(type);
}
- type = typeData && typeData.fields[curr] ? typeData.fields[curr] : '';
});
var ret = getFastValues(indexes, '+', 'i32');
diff --git a/src/postamble.js b/src/postamble.js
index d0b737f8..49fd9b3e 100644
--- a/src/postamble.js
+++ b/src/postamble.js
@@ -104,7 +104,7 @@ function run(args) {
setTimeout(function() {
Module['setStatus']('');
}, 1);
- doRun();
+ if (!ABORT) doRun();
}, 1);
return 0;
} else {
diff --git a/src/preamble.js b/src/preamble.js
index 659b3869..35dfeba9 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -231,7 +231,7 @@ var setjmpId = 1; // Used in setjmp/longjmp
var setjmpLabels = {};
#endif
-var ABORT = false;
+var ABORT = false; // whether we are quitting the application. no code should run after this. set in exit() and abort()
var undef = 0;
// tempInt is used for 32-bit signed values or smaller. tempBigInt is used
diff --git a/tests/cases/oob_ta2.ll b/tests/cases/oob_ta2.ll
new file mode 100644
index 00000000..3c94c13c
--- /dev/null
+++ b/tests/cases/oob_ta2.ll
@@ -0,0 +1,25 @@
+; ModuleID = 'tests/hello_world.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%structy = type { [2 x [10 x i8]] }
+
+@.str1 = private unnamed_addr constant [10 x i8] c"1234567890", align 1
+@.str2 = private unnamed_addr constant [10 x i8] c"wakawaka\0A\00", align 1
+@.stry = private unnamed_addr constant [2 x %structy] { %structy { [10 x i8] @.str1, [10 x i8] @.str2 }, %structy { [10 x i8] @.str1, [10 x i8] @.str2 } }
+
+@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1 ; [#uses=1 type=[15 x i8]*]
+
+; [#uses=0]
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+ %retval = alloca i32, align 4 ; [#uses=1 type=i32*]
+ store i32 0, i32* %retval
+ %ind = add i32 %argc, 13
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x %structy]* @.stry, i32 0, i32 2, i32 0, i32 %ind))
+ %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0 type=i32]
+ ret i32 1 ret i32 1
+}
+
+; [#uses=1]
+declare i32 @printf(i8*, ...)
diff --git a/tests/linpack.c b/tests/linpack.c
new file mode 100644
index 00000000..363415c3
--- /dev/null
+++ b/tests/linpack.c
@@ -0,0 +1,1153 @@
+/* gcc linpack.c cpuidc64.o cpuida64.o -m64 -lrt -lc -lm -o linpack
+ *
+ * Linpack 100x100 Benchmark In C/C++ For PCs
+ *
+ * Different compilers can produce different floating point numeric
+ * results, probably due to compiling instructions in a different
+ * sequence. As the program checks these, they may need to be changed.
+ * The log file indicates non-standard results and these values can
+ * be copied and pasted into this program. See // Values near the
+ * end of main().
+ *
+ * Different compilers do not optimise the code in the same way.
+ * This can lead to wide variations in benchmark speeds. See results
+ * with MS6 compiler ID and compare with those from same CPUs from
+ * the Watcom compiler generated code.
+ *
+ ***************************************************************************
+*/
+
+#define _CRT_SECURE_NO_WARNINGS 1
+#ifdef WIN32
+#include <Windows.h>
+#else
+#include <sys/time.h>
+#endif
+
+#define UNROLL
+#define DP
+
+#ifdef SP
+#define REAL float
+#define ZERO 0.0
+#define ONE 1.0
+#define PREC "Single"
+#endif
+
+#ifdef DP
+#define REAL double
+#define ZERO 0.0e0
+#define ONE 1.0e0
+#define PREC "Double"
+#endif
+
+#ifdef ROLL
+#define ROLLING "Rolled"
+#endif
+#ifdef UNROLL
+#define ROLLING "Unrolled"
+#endif
+
+ // VERSION
+
+ #ifdef CNNT
+ #define options "Non-optimised"
+ #define opt "0"
+ #else
+// #define options "Optimised"
+ #define options "Opt 3 64 Bit"
+ #define opt "1"
+ #endif
+
+#define NTIMES 10
+
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <time.h>
+
+
+/* this is truly rank, but it's minimally invasive, and lifted in part from the STREAM scores */
+
+static double secs;
+
+#ifndef WIN32
+
+double mysecond()
+{
+ struct timeval tp;
+ struct timezone tzp;
+ int i;
+
+ i = gettimeofday(&tp,&tzp);
+ return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
+}
+#else
+
+double mysecond()
+{
+ static LARGE_INTEGER freq = {0};
+ LARGE_INTEGER count = {0};
+ if(freq.QuadPart == 0LL) {
+ QueryPerformanceFrequency(&freq);
+ }
+ QueryPerformanceCounter(&count);
+ return (double)count.QuadPart / (double)freq.QuadPart;
+}
+
+#endif
+
+void start_time()
+{
+ secs = mysecond();
+}
+
+void end_time()
+{
+ secs = mysecond() - secs;
+}
+
+void print_time (int row);
+void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma);
+void dgefa (REAL a[], int lda, int n, int ipvt[], int *info);
+void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job);
+void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
+void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy);
+REAL epslon (REAL x);
+int idamax (int n, REAL dx[], int incx);
+void dscal (int n, REAL da, REAL dx[], int incx);
+REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy);
+
+static REAL atime[9][15];
+double runSecs = 1;
+
+
+int main (int argc, char *argv[])
+{
+ static REAL aa[200*200],a[200*201],b[200],x[200];
+ REAL cray,ops,total,norma,normx;
+ REAL resid,residn,eps,tm2,epsn,x1,x2;
+ REAL mflops;
+ static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
+ int endit, pass, loop;
+ REAL overhead1, overhead2, time2;
+ REAL max1, max2;
+ char was[5][20];
+ char expect[5][20];
+ char title[5][20];
+ int errors;
+
+
+ printf("\n");
+
+ printf("##########################################\n");
+
+
+
+ lda = 201;
+ ldaa = 200;
+ cray = .056;
+ n = 100;
+
+ fprintf(stdout, "%s ", ROLLING);
+ fprintf(stdout, "%s ", PREC);
+ fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n");
+
+ fprintf(stdout,"Optimisation %s\n\n",options);
+
+ ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
+
+ matgen(a,lda,n,b,&norma);
+ start_time();
+ dgefa(a,lda,n,ipvt,&info);
+ end_time();
+ atime[0][0] = secs;
+ start_time();
+ dgesl(a,lda,n,ipvt,b,0);
+ end_time();
+ atime[1][0] = secs;
+ total = atime[0][0] + atime[1][0];
+
+/* compute a residual to verify results. */
+
+ for (i = 0; i < n; i++) {
+ x[i] = b[i];
+ }
+ matgen(a,lda,n,b,&norma);
+ for (i = 0; i < n; i++) {
+ b[i] = -b[i];
+ }
+ dmxpy(n,b,n,lda,x,a);
+ resid = 0.0;
+ normx = 0.0;
+ for (i = 0; i < n; i++) {
+ resid = (resid > fabs((double)b[i]))
+ ? resid : fabs((double)b[i]);
+ normx = (normx > fabs((double)x[i]))
+ ? normx : fabs((double)x[i]);
+ }
+ eps = epslon(ONE);
+ residn = resid/( n*norma*normx*eps );
+ epsn = eps;
+ x1 = x[0] - 1;
+ x2 = x[n-1] - 1;
+
+ printf("norm resid resid machep");
+ printf(" x[0]-1 x[n-1]-1\n");
+ printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
+ (double)residn, (double)resid, (double)epsn,
+ (double)x1, (double)x2);
+
+ printf("Times are reported for matrices of order %5d\n",n);
+ printf("1 pass times for array with leading dimension of%5d\n\n",lda);
+ printf(" dgefa dgesl total Mflops unit");
+ printf(" ratio\n");
+
+ atime[2][0] = total;
+ if (total > 0.0)
+ {
+ atime[3][0] = ops/(1.0e6*total);
+ atime[4][0] = 2.0/atime[3][0];
+ }
+ else
+ {
+ atime[3][0] = 0.0;
+ atime[4][0] = 0.0;
+ }
+ atime[5][0] = total/cray;
+
+ print_time(0);
+
+/************************************************************************
+ * Calculate overhead of executing matgen procedure *
+ ************************************************************************/
+
+ printf("\nCalculating matgen overhead\n");
+ pass = -20;
+ loop = NTIMES;
+ do
+ {
+ start_time();
+ pass = pass + 1;
+ for ( i = 0 ; i < loop ; i++)
+ {
+ matgen(a,lda,n,b,&norma);
+ }
+ end_time();
+ overhead1 = secs;
+ printf("%10d times %6.2f seconds\n", loop, overhead1);
+ if (overhead1 > runSecs)
+ {
+ pass = 0;
+ }
+ if (pass < 0)
+ {
+ if (overhead1 < 0.1)
+ {
+ loop = loop * 10;
+ }
+ else
+ {
+ loop = loop * 2;
+ }
+ }
+ }
+ while (pass < 0);
+
+ overhead1 = overhead1 / (double)loop;
+
+ printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead1);
+
+/************************************************************************
+ * Calculate matgen/dgefa passes for runSecs seconds *
+ ************************************************************************/
+
+ printf("Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs);
+ pass = -20;
+ ntimes = NTIMES;
+ do
+ {
+ start_time();
+ pass = pass + 1;
+ for ( i = 0 ; i < ntimes ; i++)
+ {
+ matgen(a,lda,n,b,&norma);
+ dgefa(a,lda,n,ipvt,&info );
+ }
+ end_time();
+ time2 = secs;
+ printf("%10d times %6.2f seconds\n", ntimes, time2);
+ if (time2 > runSecs)
+ {
+ pass = 0;
+ }
+ if (pass < 0)
+ {
+ if (time2 < 0.1)
+ {
+ ntimes = ntimes * 10;
+ }
+ else
+ {
+ ntimes = ntimes * 2;
+ }
+ }
+ }
+ while (pass < 0);
+
+ ntimes = (int)(runSecs * (double)ntimes / time2);
+ if (ntimes == 0) ntimes = 1;
+
+ printf("Passes used %10d \n\n", ntimes);
+ printf("Times for array with leading dimension of%4d\n\n",lda);
+ printf(" dgefa dgesl total Mflops unit");
+ printf(" ratio\n");
+
+/************************************************************************
+ * Execute 5 passes *
+ ************************************************************************/
+
+ tm2 = ntimes * overhead1;
+ atime[3][6] = 0;
+
+ for (j=1 ; j<6 ; j++)
+ {
+ start_time();
+ for (i = 0; i < ntimes; i++)
+ {
+ matgen(a,lda,n,b,&norma);
+ dgefa(a,lda,n,ipvt,&info );
+ }
+ end_time();
+ atime[0][j] = (secs - tm2)/ntimes;
+
+ start_time();
+ for (i = 0; i < ntimes; i++)
+ {
+ dgesl(a,lda,n,ipvt,b,0);
+ }
+ end_time();
+
+ atime[1][j] = secs/ntimes;
+ total = atime[0][j] + atime[1][j];
+ atime[2][j] = total;
+ atime[3][j] = ops/(1.0e6*total);
+ atime[4][j] = 2.0/atime[3][j];
+ atime[5][j] = total/cray;
+ atime[3][6] = atime[3][6] + atime[3][j];
+
+ print_time(j);
+ }
+ atime[3][6] = atime[3][6] / 5.0;
+ printf("Average %11.2f\n",
+ (double)atime[3][6]);
+
+ printf("\nCalculating matgen2 overhead\n");
+
+/************************************************************************
+ * Calculate overhead of executing matgen procedure *
+ ************************************************************************/
+
+ start_time();
+ for ( i = 0 ; i < loop ; i++)
+ {
+ matgen(aa,ldaa,n,b,&norma);
+ }
+ end_time();
+ overhead2 = secs;
+ overhead2 = overhead2 / (double)loop;
+
+ printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
+ printf("Times for array with leading dimension of%4d\n\n",ldaa);
+ printf(" dgefa dgesl total Mflops unit");
+ printf(" ratio\n");
+
+/************************************************************************
+ * Execute 5 passes *
+ ************************************************************************/
+
+ tm2 = ntimes * overhead2;
+ atime[3][12] = 0;
+
+ for (j=7 ; j<12 ; j++)
+ {
+ start_time();
+ for (i = 0; i < ntimes; i++)
+ {
+ matgen(aa,ldaa,n,b,&norma);
+ dgefa(aa,ldaa,n,ipvt,&info );
+ }
+ end_time();
+ atime[0][j] = (secs - tm2)/ntimes;
+
+ start_time();
+ for (i = 0; i < ntimes; i++)
+ {
+ dgesl(aa,ldaa,n,ipvt,b,0);
+ }
+ end_time();
+ atime[1][j] = secs/ntimes;
+ total = atime[0][j] + atime[1][j];
+ atime[2][j] = total;
+ atime[3][j] = ops/(1.0e6*total);
+ atime[4][j] = 2.0/atime[3][j];
+ atime[5][j] = total/cray;
+ atime[3][12] = atime[3][12] + atime[3][j];
+
+ print_time(j);
+ }
+ atime[3][12] = atime[3][12] / 5.0;
+ printf("Average %11.2f\n",
+ (double)atime[3][12]);
+
+/************************************************************************
+ * Use minimum average as overall Mflops rating *
+ ************************************************************************/
+
+ mflops = atime[3][6];
+ if (atime[3][12] < mflops) mflops = atime[3][12];
+
+ printf("\n");
+ printf( "%s ", ROLLING);
+ printf( "%s ", PREC);
+ printf(" Precision %11.2f Mflops \n\n",mflops);
+
+
+ max1 = 0;
+ for (i=1 ; i<6 ; i++)
+ {
+ if (atime[3][i] > max1) max1 = atime[3][i];
+ }
+
+ max2 = 0;
+ for (i=7 ; i<12 ; i++)
+ {
+ if (atime[3][i] > max2) max2 = atime[3][i];
+ }
+ if (max1 < max2) max2 = max1;
+
+ sprintf(was[0], "%16.1f",(double)residn);
+ sprintf(was[1], "%16.8e",(double)resid);
+ sprintf(was[2], "%16.8e",(double)epsn);
+ sprintf(was[3], "%16.8e",(double)x1);
+ sprintf(was[4], "%16.8e",(double)x2);
+
+/*
+ // Values for Watcom
+
+ sprintf(expect[0], " 0.4");
+ sprintf(expect[1], " 7.41628980e-014");
+ sprintf(expect[2], " 1.00000000e-015");
+ sprintf(expect[3], "-1.49880108e-014");
+ sprintf(expect[4], "-1.89848137e-014");
+ // Values for Visual C++
+
+ sprintf(expect[0], " 1.7");
+ sprintf(expect[1], " 7.41628980e-014");
+ sprintf(expect[2], " 2.22044605e-016");
+ sprintf(expect[3], "-1.49880108e-014");
+ sprintf(expect[4], "-1.89848137e-014");
+
+ // Values for Ubuntu GCC 32 Bit
+
+ sprintf(expect[0], " 1.9");
+ sprintf(expect[1], " 8.39915160e-14");
+ sprintf(expect[2], " 2.22044605e-16");
+ sprintf(expect[3], " -6.22835117e-14");
+ sprintf(expect[4], " -4.16333634e-14");
+*/
+
+ // Values for Ubuntu GCC 32 Bit
+
+ sprintf(expect[0], " 1.7");
+ sprintf(expect[1], " 7.41628980e-14");
+ sprintf(expect[2], " 2.22044605e-16");
+ sprintf(expect[3], " -1.49880108e-14");
+ sprintf(expect[4], " -1.89848137e-14");
+
+ sprintf(title[0], "norm. resid");
+ sprintf(title[1], "resid ");
+ sprintf(title[2], "machep ");
+ sprintf(title[3], "x[0]-1 ");
+ sprintf(title[4], "x[n-1]-1 ");
+
+ if (strtol(opt, NULL, 10) == 0)
+ {
+ sprintf(expect[2], " 8.88178420e-016");
+ }
+ errors = 0;
+
+ printf ("\n");
+}
+
+/*----------------------*/
+void print_time (int row)
+
+{
+printf("%11.5f%11.5f%11.5f%11.2f%11.4f%11.4f\n", (double)atime[0][row],
+ (double)atime[1][row], (double)atime[2][row], (double)atime[3][row],
+ (double)atime[4][row], (double)atime[5][row]);
+ return;
+}
+
+/*----------------------*/
+
+void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma)
+
+
+/* We would like to declare a[][lda], but c does not allow it. In this
+function, references to a[i][j] are written a[lda*i+j]. */
+
+{
+ int init, i, j;
+
+ init = 1325;
+ *norma = 0.0;
+ for (j = 0; j < n; j++) {
+ for (i = 0; i < n; i++) {
+ init = 3125*init % 65536;
+ a[lda*j+i] = (init - 32768.0)/16384.0;
+ *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
+
+ /* alternative for some compilers
+ if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]);
+ */
+ }
+ }
+ for (i = 0; i < n; i++) {
+ b[i] = 0.0;
+ }
+ for (j = 0; j < n; j++) {
+ for (i = 0; i < n; i++) {
+ b[i] = b[i] + a[lda*j+i];
+ }
+ }
+ return;
+}
+
+/*----------------------*/
+void dgefa(REAL a[], int lda, int n, int ipvt[], int *info)
+
+
+/* We would like to declare a[][lda], but c does not allow it. In this
+function, references to a[i][j] are written a[lda*i+j]. */
+/*
+ dgefa factors a double precision matrix by gaussian elimination.
+
+ dgefa is usually called by dgeco, but it can be called
+ directly with a saving in time if rcond is not needed.
+ (time for dgeco) = (1 + 9/n)*(time for dgefa) .
+
+ on entry
+
+ a REAL precision[n][lda]
+ the matrix to be factored.
+
+ lda integer
+ the leading dimension of the array a .
+
+ n integer
+ the order of the matrix a .
+
+ on return
+
+ a an upper triangular matrix and the multipliers
+ which were used to obtain it.
+ the factorization can be written a = l*u where
+ l is a product of permutation and unit lower
+ triangular matrices and u is upper triangular.
+
+ ipvt integer[n]
+ an integer vector of pivot indices.
+
+ info integer
+ = 0 normal value.
+ = k if u[k][k] .eq. 0.0 . this is not an error
+ condition for this subroutine, but it does
+ indicate that dgesl or dgedi will divide by zero
+ if called. use rcond in dgeco for a reliable
+ indication of singularity.
+
+ linpack. this version dated 08/14/78 .
+ cleve moler, university of new mexico, argonne national lab.
+
+ functions
+
+ blas daxpy,dscal,idamax
+*/
+
+{
+/* internal variables */
+
+REAL t;
+int j,k,kp1,l,nm1;
+
+
+/* gaussian elimination with partial pivoting */
+
+ *info = 0;
+ nm1 = n - 1;
+ if (nm1 >= 0) {
+ for (k = 0; k < nm1; k++) {
+ kp1 = k + 1;
+
+ /* find l = pivot index */
+
+ l = idamax(n-k,&a[lda*k+k],1) + k;
+ ipvt[k] = l;
+
+ /* zero pivot implies this column already
+ triangularized */
+
+ if (a[lda*k+l] != ZERO) {
+
+ /* interchange if necessary */
+
+ if (l != k) {
+ t = a[lda*k+l];
+ a[lda*k+l] = a[lda*k+k];
+ a[lda*k+k] = t;
+ }
+
+ /* compute multipliers */
+
+ t = -ONE/a[lda*k+k];
+ dscal(n-(k+1),t,&a[lda*k+k+1],1);
+
+ /* row elimination with column indexing */
+
+ for (j = kp1; j < n; j++) {
+ t = a[lda*j+l];
+ if (l != k) {
+ a[lda*j+l] = a[lda*j+k];
+ a[lda*j+k] = t;
+ }
+ daxpy(n-(k+1),t,&a[lda*k+k+1],1,
+ &a[lda*j+k+1],1);
+ }
+ }
+ else {
+ *info = k;
+ }
+ }
+ }
+ ipvt[n-1] = n-1;
+ if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1;
+ return;
+}
+
+/*----------------------*/
+
+void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job )
+
+
+/* We would like to declare a[][lda], but c does not allow it. In this
+function, references to a[i][j] are written a[lda*i+j]. */
+
+/*
+ dgesl solves the double precision system
+ a * x = b or trans(a) * x = b
+ using the factors computed by dgeco or dgefa.
+
+ on entry
+