diff options
-rw-r--r-- | docs/splashpres.pdf | bin | 0 -> 131072 bytes | |||
-rw-r--r-- | src/jsifier.js | 2 | ||||
-rw-r--r-- | src/library.js | 5 | ||||
-rw-r--r-- | src/library_browser.js | 33 | ||||
-rw-r--r-- | src/library_gc.js | 2 | ||||
-rw-r--r-- | src/library_glut.js | 10 | ||||
-rw-r--r-- | src/library_sdl.js | 2 | ||||
-rw-r--r-- | src/parseTools.js | 30 | ||||
-rw-r--r-- | src/postamble.js | 2 | ||||
-rw-r--r-- | src/preamble.js | 2 | ||||
-rw-r--r-- | tests/cases/oob_ta2.ll | 25 | ||||
-rw-r--r-- | tests/linpack.c | 1153 | ||||
-rwxr-xr-x | tests/runner.py | 7 | ||||
-rw-r--r-- | tests/time/src.c | 5 |
14 files changed, 1249 insertions, 29 deletions
diff --git a/docs/splashpres.pdf b/docs/splashpres.pdf Binary files differnew file mode 100644 index 00000000..02fa4ea0 --- /dev/null +++ b/docs/splashpres.pdf diff --git a/src/jsifier.js b/src/jsifier.js index 8ab96a25..2c83d036 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -1357,7 +1357,7 @@ function JSify(data, functionsOnly, givenFunctions) { var ignoreFunctionIndexizing = []; var useJSArgs = (simpleIdent + '__jsargs') in LibraryManager.library; var hasVarArgs = isVarArgsFunctionType(type); - var normalArgs = (hasVarArgs && !useJSArgs) ? countNormalArgs(type) : -1; + var normalArgs = (hasVarArgs && !useJSArgs) ? countNormalArgs(type, null, true) : -1; var byPointer = getVarData(funcData, ident); var byPointerForced = false; diff --git a/src/library.js b/src/library.js index 04976b92..d897556f 100644 --- a/src/library.js +++ b/src/library.js @@ -6186,8 +6186,9 @@ LibraryManager.library = { clock_gettime__deps: ['__timespec_struct_layout'], clock_gettime: function(clk_id, tp) { // int clock_gettime(clockid_t clk_id, struct timespec *tp); - {{{ makeSetValue('tp', '___timespec_struct_layout.tv_sec', '0', 'i32') }}} - {{{ makeSetValue('tp', '___timespec_struct_layout.tv_nsec', '0', 'i32') }}} + var now = Date.now(); + {{{ makeSetValue('tp', '___timespec_struct_layout.tv_sec', 'Math.floor(now/1000)', 'i32') }}}; // seconds + {{{ makeSetValue('tp', '___timespec_struct_layout.tv_nsec', '0', 'i32') }}}; // nanoseconds - not supported return 0; }, clock_settime: function(clk_id, tp) { diff --git a/src/library_browser.js b/src/library_browser.js index 0ed04e19..2e6c9150 100644 --- a/src/library_browser.js +++ b/src/library_browser.js @@ -184,7 +184,7 @@ mergeInto(LibraryManager.library, { }; audio.src = url; // workaround for chrome bug 124926 - we do not always get oncanplaythrough or onerror - setTimeout(function() { + Browser.safeSetTimeout(function() { finish(audio); // try to use it even though it is not necessarily ready to play }, 10000); } else { @@ -361,6 +361,30 @@ mergeInto(LibraryManager.library, { window.requestAnimationFrame(func); }, + // generic abort-aware wrapper for an async callback + safeCallback: function(func) { + return function() { + if (!ABORT) return func.apply(null, arguments); + }; + }, + + // abort-aware versions + safeRequestAnimationFrame: function(func) { + Browser.requestAnimationFrame(function() { + if (!ABORT) func(); + }); + }, + safeSetTimeout: function(func, timeout) { + setTimeout(function() { + if (!ABORT) func(); + }, timeout); + }, + safeSetInterval: function(func, timeout) { + setInterval(function() { + if (!ABORT) func(); + }, timeout); + }, + getMovementX: function(event) { return event['movementX'] || event['mozMovementX'] || @@ -612,7 +636,7 @@ mergeInto(LibraryManager.library, { Module['noExitRuntime'] = true; // TODO: cache these to avoid generating garbage - setTimeout(function() { + Browser.safeSetTimeout(function() { _emscripten_run_script(script); }, millis); }, @@ -621,6 +645,7 @@ mergeInto(LibraryManager.library, { Module['noExitRuntime'] = true; Browser.mainLoop.runner = function() { + if (ABORT) return; if (Browser.mainLoop.queue.length > 0) { var start = Date.now(); var blocker = Browser.mainLoop.queue.shift(); @@ -723,9 +748,9 @@ mergeInto(LibraryManager.library, { } if (millis >= 0) { - setTimeout(wrapper, millis); + Browser.safeSetTimeout(wrapper, millis); } else { - Browser.requestAnimationFrame(wrapper); + Browser.safeRequestAnimationFrame(wrapper); } }, diff --git a/src/library_gc.js b/src/library_gc.js index 2a164250..b3dae0e9 100644 --- a/src/library_gc.js +++ b/src/library_gc.js @@ -26,7 +26,7 @@ if (GC_SUPPORT) { _GC_finalizer_notifier = _malloc(4); setValue(_GC_finalizer_notifier, 0, 'i32'); if (ENVIRONMENT_IS_WEB) { - setInterval(function() { + Browser.safeSetInterval(function() { GC.maybeCollect(); }, 1000); } else { diff --git a/src/library_glut.js b/src/library_glut.js index 35348028..38cfe55b 100644 --- a/src/library_glut.js +++ b/src/library_glut.js @@ -322,16 +322,17 @@ var LibraryGLUT = { var callback = function() { if (GLUT.idleFunc) { Runtime.dynCall('v', GLUT.idleFunc); - window.setTimeout(callback, 0); + Browser.safeSetTimeout(callback, 0); } } - if (!GLUT.idleFunc) - window.setTimeout(callback, 0); + if (!GLUT.idleFunc) { + Browser.safeSetTimeout(callback, 0); + } GLUT.idleFunc = func; }, glutTimerFunc: function(msec, func, value) { - window.setTimeout(function() { Runtime.dynCall('vi', func, [value]); }, msec); + Browser.safeSetTimeout(function() { Runtime.dynCall('vi', func, [value]); }, msec); }, glutDisplayFunc: function(func) { @@ -419,6 +420,7 @@ var LibraryGLUT = { glutPostRedisplay: function() { if (GLUT.displayFunc) { Browser.requestAnimationFrame(function() { + if (ABORT) return; Runtime.dynCall('v', GLUT.displayFunc); }); } diff --git a/src/library_sdl.js b/src/library_sdl.js index a5080a99..d31c37f5 100644 --- a/src/library_sdl.js +++ b/src/library_sdl.js @@ -1197,7 +1197,7 @@ var LibrarySDL = { SDL_PauseAudio: function(pauseOn) { if (SDL.audio.paused !== pauseOn) { - SDL.audio.timer = pauseOn ? SDL.audio.timer && clearInterval(SDL.audio.timer) : setInterval(SDL.audio.caller, 1/35); + SDL.audio.timer = pauseOn ? SDL.audio.timer && clearInterval(SDL.audio.timer) : Browser.safeSetInterval(SDL.audio.caller, 1/35); } SDL.audio.paused = pauseOn; }, diff --git a/src/parseTools.js b/src/parseTools.js index f30883b5..45046558 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -129,9 +129,13 @@ function isPointerType(type) { return type[type.length-1] == '*'; } +function isArrayType(type) { + return /^\[\d+\ x\ (.*)\]/.test(type); +} + function isStructType(type) { if (isPointerType(type)) return false; - if (/^\[\d+\ x\ (.*)\]/.test(type)) return true; // [15 x ?] blocks. Like structs + if (isArrayType(type)) return true; if (/<?{ ?[^}]* ?}>?/.test(type)) return true; // { i32, i8 } etc. - anonymous struct types // See comment in isStructPointerType() return type[0] == '%'; @@ -286,18 +290,18 @@ function isVarArgsFunctionType(type) { return type.substr(-varArgsSuffix.length) == varArgsSuffix; } -function getNumVars(type) { // how many variables are needed to represent this type +function getNumLegalizedVars(type) { // how many legalized variables are needed to represent this type if (type in Runtime.FLOAT_TYPES) return 1; return Math.max(getNumIntChunks(type), 1); } -function countNormalArgs(type, out) { +function countNormalArgs(type, out, legalized) { out = out || {}; if (!isFunctionType(type, out)) return -1; var ret = 0; if (out.segments) { for (var i = 0; i < out.segments.length; i++) { - ret += getNumVars(out.segments[i][0].text); + ret += legalized ? getNumLegalizedVars(out.segments[i][0].text) : 1; } } if (isVarArgsFunctionType(type)) ret--; @@ -1754,10 +1758,11 @@ function getGetElementPtrIndexes(item) { indexes.push(getFastValue(Runtime.getNativeTypeSize(type), '*', offset, 'i32')); } } - item.params.slice(2, item.params.length).forEach(function(arg) { + item.params.slice(2, item.params.length).forEach(function(arg, i) { var curr = arg; // TODO: If index is constant, optimize var typeData = Types.types[type]; + assert(typeData || i == item.params.length - 3); // can be null, when we get to the end (a basic type) if (isStructType(type) && typeData.needsFlattening) { if (typeData.flatFactor) { indexes.push(getFastValue(curr, '*', typeData.flatFactor, 'i32')); @@ -1773,16 +1778,15 @@ function getGetElementPtrIndexes(item) { indexes.push(curr); } } - if (!isNumber(curr) || parseInt(curr) < 0) { - // We have a *variable* to index with, or a negative number. In both - // cases, in theory we might need to do something dynamic here. FIXME? - // But, most likely all the possible types are the same, so do that case here now... - for (var i = 1; i < typeData.fields.length; i++) { - assert(typeData.fields[0] === typeData.fields[i]); + if (typeData) { + if (isArrayType(type)) { + type = typeData.fields[0]; // all the same, so accept even out-of-bounds this way + } else { + assert(isNumber(curr)); // cannot be dynamic + type = typeData.fields[curr]; } - curr = 0; + assert(type); } - type = typeData && typeData.fields[curr] ? typeData.fields[curr] : ''; }); var ret = getFastValues(indexes, '+', 'i32'); diff --git a/src/postamble.js b/src/postamble.js index d0b737f8..49fd9b3e 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -104,7 +104,7 @@ function run(args) { setTimeout(function() { Module['setStatus'](''); }, 1); - doRun(); + if (!ABORT) doRun(); }, 1); return 0; } else { diff --git a/src/preamble.js b/src/preamble.js index 659b3869..35dfeba9 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -231,7 +231,7 @@ var setjmpId = 1; // Used in setjmp/longjmp var setjmpLabels = {}; #endif -var ABORT = false; +var ABORT = false; // whether we are quitting the application. no code should run after this. set in exit() and abort() var undef = 0; // tempInt is used for 32-bit signed values or smaller. tempBigInt is used diff --git a/tests/cases/oob_ta2.ll b/tests/cases/oob_ta2.ll new file mode 100644 index 00000000..3c94c13c --- /dev/null +++ b/tests/cases/oob_ta2.ll @@ -0,0 +1,25 @@ +; ModuleID = 'tests/hello_world.bc' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" +target triple = "i386-pc-linux-gnu" + +%structy = type { [2 x [10 x i8]] } + +@.str1 = private unnamed_addr constant [10 x i8] c"1234567890", align 1 +@.str2 = private unnamed_addr constant [10 x i8] c"wakawaka\0A\00", align 1 +@.stry = private unnamed_addr constant [2 x %structy] { %structy { [10 x i8] @.str1, [10 x i8] @.str2 }, %structy { [10 x i8] @.str1, [10 x i8] @.str2 } } + +@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1 ; [#uses=1 type=[15 x i8]*] + +; [#uses=0] +define i32 @main(i32 %argc, i8** %argv) { +entry: + %retval = alloca i32, align 4 ; [#uses=1 type=i32*] + store i32 0, i32* %retval + %ind = add i32 %argc, 13 + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x %structy]* @.stry, i32 0, i32 2, i32 0, i32 %ind)) + %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0 type=i32] + ret i32 1 ret i32 1 +} + +; [#uses=1] +declare i32 @printf(i8*, ...) diff --git a/tests/linpack.c b/tests/linpack.c new file mode 100644 index 00000000..363415c3 --- /dev/null +++ b/tests/linpack.c @@ -0,0 +1,1153 @@ +/* gcc linpack.c cpuidc64.o cpuida64.o -m64 -lrt -lc -lm -o linpack + * + * Linpack 100x100 Benchmark In C/C++ For PCs + * + * Different compilers can produce different floating point numeric + * results, probably due to compiling instructions in a different + * sequence. As the program checks these, they may need to be changed. + * The log file indicates non-standard results and these values can + * be copied and pasted into this program. See // Values near the + * end of main(). + * + * Different compilers do not optimise the code in the same way. + * This can lead to wide variations in benchmark speeds. See results + * with MS6 compiler ID and compare with those from same CPUs from + * the Watcom compiler generated code. + * + *************************************************************************** +*/ + +#define _CRT_SECURE_NO_WARNINGS 1 +#ifdef WIN32 +#include <Windows.h> +#else +#include <sys/time.h> +#endif + +#define UNROLL +#define DP + +#ifdef SP +#define REAL float +#define ZERO 0.0 +#define ONE 1.0 +#define PREC "Single" +#endif + +#ifdef DP +#define REAL double +#define ZERO 0.0e0 +#define ONE 1.0e0 +#define PREC "Double" +#endif + +#ifdef ROLL +#define ROLLING "Rolled" +#endif +#ifdef UNROLL +#define ROLLING "Unrolled" +#endif + + // VERSION + + #ifdef CNNT + #define options "Non-optimised" + #define opt "0" + #else +// #define options "Optimised" + #define options "Opt 3 64 Bit" + #define opt "1" + #endif + +#define NTIMES 10 + +#include <stdio.h> +#include <math.h> +#include <stdlib.h> +#include <time.h> + + +/* this is truly rank, but it's minimally invasive, and lifted in part from the STREAM scores */ + +static double secs; + +#ifndef WIN32 + +double mysecond() +{ + struct timeval tp; + struct timezone tzp; + int i; + + i = gettimeofday(&tp,&tzp); + return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); +} +#else + +double mysecond() +{ + static LARGE_INTEGER freq = {0}; + LARGE_INTEGER count = {0}; + if(freq.QuadPart == 0LL) { + QueryPerformanceFrequency(&freq); + } + QueryPerformanceCounter(&count); + return (double)count.QuadPart / (double)freq.QuadPart; +} + +#endif + +void start_time() +{ + secs = mysecond(); +} + +void end_time() +{ + secs = mysecond() - secs; +} + +void print_time (int row); +void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma); +void dgefa (REAL a[], int lda, int n, int ipvt[], int *info); +void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job); +void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]); +void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy); +REAL epslon (REAL x); +int idamax (int n, REAL dx[], int incx); +void dscal (int n, REAL da, REAL dx[], int incx); +REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy); + +static REAL atime[9][15]; +double runSecs = 1; + + +int main (int argc, char *argv[]) +{ + static REAL aa[200*200],a[200*201],b[200],x[200]; + REAL cray,ops,total,norma,normx; + REAL resid,residn,eps,tm2,epsn,x1,x2; + REAL mflops; + static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; + int endit, pass, loop; + REAL overhead1, overhead2, time2; + REAL max1, max2; + char was[5][20]; + char expect[5][20]; + char title[5][20]; + int errors; + + + printf("\n"); + + printf("##########################################\n"); + + + + lda = 201; + ldaa = 200; + cray = .056; + n = 100; + + fprintf(stdout, "%s ", ROLLING); + fprintf(stdout, "%s ", PREC); + fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n"); + + fprintf(stdout,"Optimisation %s\n\n",options); + + ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); + + matgen(a,lda,n,b,&norma); + start_time(); + dgefa(a,lda,n,ipvt,&info); + end_time(); + atime[0][0] = secs; + start_time(); + dgesl(a,lda,n,ipvt,b,0); + end_time(); + atime[1][0] = secs; + total = atime[0][0] + atime[1][0]; + +/* compute a residual to verify results. */ + + for (i = 0; i < n; i++) { + x[i] = b[i]; + } + matgen(a,lda,n,b,&norma); + for (i = 0; i < n; i++) { + b[i] = -b[i]; + } + dmxpy(n,b,n,lda,x,a); + resid = 0.0; + normx = 0.0; + for (i = 0; i < n; i++) { + resid = (resid > fabs((double)b[i])) + ? resid : fabs((double)b[i]); + normx = (normx > fabs((double)x[i])) + ? normx : fabs((double)x[i]); + } + eps = epslon(ONE); + residn = resid/( n*norma*normx*eps ); + epsn = eps; + x1 = x[0] - 1; + x2 = x[n-1] - 1; + + printf("norm resid resid machep"); + printf(" x[0]-1 x[n-1]-1\n"); + printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", + (double)residn, (double)resid, (double)epsn, + (double)x1, (double)x2); + + printf("Times are reported for matrices of order %5d\n",n); + printf("1 pass times for array with leading dimension of%5d\n\n",lda); + printf(" dgefa dgesl total Mflops unit"); + printf(" ratio\n"); + + atime[2][0] = total; + if (total > 0.0) + { + atime[3][0] = ops/(1.0e6*total); + atime[4][0] = 2.0/atime[3][0]; + } + else + { + atime[3][0] = 0.0; + atime[4][0] = 0.0; + } + atime[5][0] = total/cray; + + print_time(0); + +/************************************************************************ + * Calculate overhead of executing matgen procedure * + ************************************************************************/ + + printf("\nCalculating matgen overhead\n"); + pass = -20; + loop = NTIMES; + do + { + start_time(); + pass = pass + 1; + for ( i = 0 ; i < loop ; i++) + { + matgen(a,lda,n,b,&norma); + } + end_time(); + overhead1 = secs; + printf("%10d times %6.2f seconds\n", loop, overhead1); + if (overhead1 > runSecs) + { + pass = 0; + } + if (pass < 0) + { + if (overhead1 < 0.1) + { + loop = loop * 10; + } + else + { + loop = loop * 2; + } + } + } + while (pass < 0); + + overhead1 = overhead1 / (double)loop; + + printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead1); + +/************************************************************************ + * Calculate matgen/dgefa passes for runSecs seconds * + ************************************************************************/ + + printf("Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs); + pass = -20; + ntimes = NTIMES; + do + { + start_time(); + pass = pass + 1; + for ( i = 0 ; i < ntimes ; i++) + { + matgen(a,lda,n,b,&norma); + dgefa(a,lda,n,ipvt,&info ); + } + end_time(); + time2 = secs; + printf("%10d times %6.2f seconds\n", ntimes, time2); + if (time2 > runSecs) + { + pass = 0; + } + if (pass < 0) + { + if (time2 < 0.1) + { + ntimes = ntimes * 10; + } + else + { + ntimes = ntimes * 2; + } + } + } + while (pass < 0); + + ntimes = (int)(runSecs * (double)ntimes / time2); + if (ntimes == 0) ntimes = 1; + + printf("Passes used %10d \n\n", ntimes); + printf("Times for array with leading dimension of%4d\n\n",lda); + printf(" dgefa dgesl total Mflops unit"); + printf(" ratio\n"); + +/************************************************************************ + * Execute 5 passes * + ************************************************************************/ + + tm2 = ntimes * overhead1; + atime[3][6] = 0; + + for (j=1 ; j<6 ; j++) + { + start_time(); + for (i = 0; i < ntimes; i++) + { + matgen(a,lda,n,b,&norma); + dgefa(a,lda,n,ipvt,&info ); + } + end_time(); + atime[0][j] = (secs - tm2)/ntimes; + + start_time(); + for (i = 0; i < ntimes; i++) + { + dgesl(a,lda,n,ipvt,b,0); + } + end_time(); + + atime[1][j] = secs/ntimes; + total = atime[0][j] + atime[1][j]; + atime[2][j] = total; + atime[3][j] = ops/(1.0e6*total); + atime[4][j] = 2.0/atime[3][j]; + atime[5][j] = total/cray; + atime[3][6] = atime[3][6] + atime[3][j]; + + print_time(j); + } + atime[3][6] = atime[3][6] / 5.0; + printf("Average %11.2f\n", + (double)atime[3][6]); + + printf("\nCalculating matgen2 overhead\n"); + +/************************************************************************ + * Calculate overhead of executing matgen procedure * + ************************************************************************/ + + start_time(); + for ( i = 0 ; i < loop ; i++) + { + matgen(aa,ldaa,n,b,&norma); + } + end_time(); + overhead2 = secs; + overhead2 = overhead2 / (double)loop; + + printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead2); + printf("Times for array with leading dimension of%4d\n\n",ldaa); + printf(" dgefa dgesl total Mflops unit"); + printf(" ratio\n"); + +/************************************************************************ + * Execute 5 passes * + ************************************************************************/ + + tm2 = ntimes * overhead2; + atime[3][12] = 0; + + for (j=7 ; j<12 ; j++) + { + start_time(); + for (i = 0; i < ntimes; i++) + { + matgen(aa,ldaa,n,b,&norma); + dgefa(aa,ldaa,n,ipvt,&info ); + } + end_time(); + atime[0][j] = (secs - tm2)/ntimes; + + start_time(); + for (i = 0; i < ntimes; i++) + { + dgesl(aa,ldaa,n,ipvt,b,0); + } + end_time(); + atime[1][j] = secs/ntimes; + total = atime[0][j] + atime[1][j]; + atime[2][j] = total; + atime[3][j] = ops/(1.0e6*total); + atime[4][j] = 2.0/atime[3][j]; + atime[5][j] = total/cray; + atime[3][12] = atime[3][12] + atime[3][j]; + + print_time(j); + } + atime[3][12] = atime[3][12] / 5.0; + printf("Average %11.2f\n", + (double)atime[3][12]); + +/************************************************************************ + * Use minimum average as overall Mflops rating * + ************************************************************************/ + + mflops = atime[3][6]; + if (atime[3][12] < mflops) mflops = atime[3][12]; + + printf("\n"); + printf( "%s ", ROLLING); + printf( "%s ", PREC); + printf(" Precision %11.2f Mflops \n\n",mflops); + + + max1 = 0; + for (i=1 ; i<6 ; i++) + { + if (atime[3][i] > max1) max1 = atime[3][i]; + } + + max2 = 0; + for (i=7 ; i<12 ; i++) + { + if (atime[3][i] > max2) max2 = atime[3][i]; + } + if (max1 < max2) max2 = max1; + + sprintf(was[0], "%16.1f",(double)residn); + sprintf(was[1], "%16.8e",(double)resid); + sprintf(was[2], "%16.8e",(double)epsn); + sprintf(was[3], "%16.8e",(double)x1); + sprintf(was[4], "%16.8e",(double)x2); + +/* + // Values for Watcom + + sprintf(expect[0], " 0.4"); + sprintf(expect[1], " 7.41628980e-014"); + sprintf(expect[2], " 1.00000000e-015"); + sprintf(expect[3], "-1.49880108e-014"); + sprintf(expect[4], "-1.89848137e-014"); + // Values for Visual C++ + + sprintf(expect[0], " 1.7"); + sprintf(expect[1], " 7.41628980e-014"); + sprintf(expect[2], " 2.22044605e-016"); + sprintf(expect[3], "-1.49880108e-014"); + sprintf(expect[4], "-1.89848137e-014"); + + // Values for Ubuntu GCC 32 Bit + + sprintf(expect[0], " 1.9"); + sprintf(expect[1], " 8.39915160e-14"); + sprintf(expect[2], " 2.22044605e-16"); + sprintf(expect[3], " -6.22835117e-14"); + sprintf(expect[4], " -4.16333634e-14"); +*/ + + // Values for Ubuntu GCC 32 Bit + + sprintf(expect[0], " 1.7"); + sprintf(expect[1], " 7.41628980e-14"); + sprintf(expect[2], " 2.22044605e-16"); + sprintf(expect[3], " -1.49880108e-14"); + sprintf(expect[4], " -1.89848137e-14"); + + sprintf(title[0], "norm. resid"); + sprintf(title[1], "resid "); + sprintf(title[2], "machep "); + sprintf(title[3], "x[0]-1 "); + sprintf(title[4], "x[n-1]-1 "); + + if (strtol(opt, NULL, 10) == 0) + { + sprintf(expect[2], " 8.88178420e-016"); + } + errors = 0; + + printf ("\n"); +} + +/*----------------------*/ +void print_time (int row) + +{ +printf("%11.5f%11.5f%11.5f%11.2f%11.4f%11.4f\n", (double)atime[0][row], + (double)atime[1][row], (double)atime[2][row], (double)atime[3][row], + (double)atime[4][row], (double)atime[5][row]); + return; +} + +/*----------------------*/ + +void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma) + + +/* We would like to declare a[][lda], but c does not allow it. In this +function, references to a[i][j] are written a[lda*i+j]. */ + +{ + int init, i, j; + + init = 1325; + *norma = 0.0; + for (j = 0; j < n; j++) { + for (i = 0; i < n; i++) { + init = 3125*init % 65536; + a[lda*j+i] = (init - 32768.0)/16384.0; + *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma; + + /* alternative for some compilers + if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]); + */ + } + } + for (i = 0; i < n; i++) { + b[i] = 0.0; + } + for (j = 0; j < n; j++) { + for (i = 0; i < n; i++) { + b[i] = b[i] + a[lda*j+i]; + } + } + return; +} + +/*----------------------*/ +void dgefa(REAL a[], int lda, int n, int ipvt[], int *info) + + +/* We would like to declare a[][lda], but c does not allow it. In this +function, references to a[i][j] are written a[lda*i+j]. */ +/* + dgefa factors a double precision matrix by gaussian elimination. + + dgefa is usually called by dgeco, but it can be called + directly with a saving in time if rcond is not needed. + (time for dgeco) = (1 + 9/n)*(time for dgefa) . + + on entry + + a REAL precision[n][lda] + the matrix to be factored. + + lda integer + the leading dimension of the array a . + + n integer + the order of the matrix a . + + on return + + a an upper triangular matrix and the multipliers + which were used to obtain it. + the factorization can be written a = l*u where + l is a product of permutation and unit lower + triangular matrices and u is upper triangular. + + ipvt integer[n] + an integer vector of pivot indices. + + info integer + = 0 normal value. + = k if u[k][k] .eq. 0.0 . this is not an error + condition for this subroutine, but it does + indicate that dgesl or dgedi will divide by zero + if called. use rcond in dgeco for a reliable + indication of singularity. + + linpack. this version dated 08/14/78 . + cleve moler, university of new mexico, argonne national lab. + + functions + + blas daxpy,dscal,idamax +*/ + +{ +/* internal variables */ + +REAL t; +int j,k,kp1,l,nm1; + + +/* gaussian elimination with partial pivoting */ + + *info = 0; + nm1 = n - 1; + if (nm1 >= 0) { + for (k = 0; k < nm1; k++) { + kp1 = k + 1; + + /* find l = pivot index */ + + l = idamax(n-k,&a[lda*k+k],1) + k; + ipvt[k] = l; + + /* zero pivot implies this column already + triangularized */ + + if (a[lda*k+l] != ZERO) { + + /* interchange if necessary */ + + if (l != k) { + t = a[lda*k+l]; + a[lda*k+l] = a[lda*k+k]; + a[lda*k+k] = t; + } + + /* compute multipliers */ + + t = -ONE/a[lda*k+k]; + dscal(n-(k+1),t,&a[lda*k+k+1],1); + + /* row elimination with column indexing */ + + for (j = kp1; j < n; j++) { + t = a[lda*j+l]; + if (l != k) { + a[lda*j+l] = a[lda*j+k]; + a[lda*j+k] = t; + } + daxpy(n-(k+1),t,&a[lda*k+k+1],1, + &a[lda*j+k+1],1); + } + } + else { + *info = k; + } + } + } + ipvt[n-1] = n-1; + if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1; + return; +} + +/*----------------------*/ + +void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job ) + + +/* We would like to declare a[][lda], but c does not allow it. In this +function, references to a[i][j] are written a[lda*i+j]. */ + +/* + dgesl solves the double precision system + a * x = b or trans(a) * x = b + using the factors computed by dgeco or dgefa. + + on entry + |