14 роки тому · 38c72a01ec
--- a/benchmarks/Makefile
+++ b/benchmarks/Makefile
@@ -0,0 +1,21 @@
 
				+CC=xgcc
			
 
				+CFLAGS=-O0
			
 
				+
			
 
				+%.s: %.c
			
 
				+	$(CC) $(CFLAGS) -S $<
			
 
				+%.o: %.s
			
 
				+	$(CC) $(CFLAGS) -c $<
			
 
				+
			
 
				+%: %.o
			
 
				+	$(CC) $(CFLAGS) $<
			
 
				+
			
 
				+all: acron clinpack dhrystone pi slalom whet
			
 
				+
			
 
				+asm: acron.s clinpack.s dhrystone.s pi.s slalom.s whet.s
			
 
				+
			
 
				+clean:
			
 
				+	rm -f acron clinpack dhrystone pi slalom whet
			
 
				+	rm -f *.o
			
 
				+	rm -f *.s
			
 
				+	rm -f *.cycles *.output
			
 
				+
			
--- a/benchmarks/acron.c
+++ b/benchmarks/acron.c
@@ -0,0 +1,63 @@
 
				+#include <stdio.h>
			
 
				+
			
 
				+#define N	6
			
 
				+
			
 
				+char *w[] = {"Vertalerbouw", "Ertalerbouw", "Practicum", "Optimization", "Peephole", "Eephole"};
			
 
				+char acron[N*2], command[100];
			
 
				+int  done[N], pindex[N+1];
			
 
				+
			
 
				+int is_vowel(char c)
			
 
				+{
			
 
				+   return (c==65 || c==69 || c==73 || c==79 || c==85 || c==89)? 1 : 0;
			
 
				+}
			
 
				+
			
 
				+void do_perm(int n, int done[], int index, int size)
			
 
				+{
			
 
				+   int j, i, nrv = 0, k;
			
 
				+
			
 
				+   if (index == 1 && (!is_vowel(w[pindex[0]][0]) && !is_vowel(w[n][0])))
			
 
				+       return;
			
 
				+   if (index > 1) {
			
 
				+      nrv = is_vowel(w[pindex[index-2]][0]) +
			
 
				+            is_vowel(w[pindex[index-1]][0]) +
			
 
				+            is_vowel(w[n][0]);
			
 
				+      if (nrv == 0 || nrv == 3)
			
 
				+       return;
			
 
				+   }
			
 
				+   pindex[index++] = n;
			
 
				+   if (index < N && --size) {
			
 
				+      for (j = 0; j<N; j++) {
			
 
				+         if (done[j] == 0) {
			
 
				+            done[j] = 1;
			
 
				+            do_perm(j, done, index, size);
			
 
				+            done[j] = 0;
			
 
				+         }
			
 
				+      }
			
 
				+   } else {
			
 
				+      k = 0;
			
 
				+      for (i=0; i < index; i++) {
			
 
				+	 int t = 0;
			
 
				+	 while (isupper(w[pindex[i]][t]))
			
 
				+	    acron[k++] = w[pindex[i]][t++];
			
 
				+      }
			
 
				+      acron[k] = 0;
			
 
				+      printf("%s", acron);
			
 
				+      for (i=0; i < index; i++) 
			
 
				+	    printf(" %s", w[pindex[i]]);
			
 
				+      printf("\n");
			
 
				+/*      fflush(stdout); */
			
 
				+   }
			
 
				+}
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+   int i, j;
			
 
				+
			
 
				+   for (j = 4; j <= N; j++) {
			
 
				+      for (i = 0; i < N; i++) {
			
 
				+        done[i] = 1;
			
 
				+        do_perm(i, done, 0, j);
			
 
				+        done[i] = 0;
			
 
				+      }
			
 
				+   }
			
 
				+}
			
--- a/benchmarks/clinpack.c
+++ b/benchmarks/clinpack.c
@@ -0,0 +1,1229 @@
 
				+/*
			
 
				+Translated to C by Bonnie Toy 5/88
			
 
				+
			
 
				+You MUST specify one of -DSP   or -DDP     to compile correctly.
			
 
				+You MUST specify one of -DROLL or -DUNROLL to compile correctly.
			
 
				+You MUST specify a timer option(see below) to compile correctly.
			
 
				+
			
 
				+To compile double precision version for Sun-4:
			
 
				+   cc -DUNIX -DDP -DROLL -O4 clinpack.c
			
 
				+
			
 
				+To compile single precision version for Sun-4:
			
 
				+   cc -DUNIX -DSP -DROLL -O4 -fsingle -fsingle2 clinpack.c
			
 
				+
			
 
				+To obtain   rolled source BLAS, add -DROLL   to the command lines.
			
 
				+To obtain unrolled source BLAS, add -DUNROLL to the command lines.
			
 
				+
			
 
				+PLEASE NOTE: You can also just 'uncomment' one of the options below.
			
 
				+*/
			
 
				+
			
 
				+/* #define SP     */
			
 
				+#define DP    
			
 
				+/*#define ROLL  */
			
 
				+#define UNROLL 
			
 
				+
			
 
				+/***************************************************************/
			
 
				+/* Timer options. You MUST uncomment one of the options below  */
			
 
				+/* or compile, for example, with the '-DUNIX' option.          */
			
 
				+/***************************************************************/
			
 
				+/* #define Amiga       */
			
 
				+#define UNIX        
			
 
				+/* #define UNIX_Old    */
			
 
				+/* #define VMS         */
			
 
				+/* #define BORLAND_C   */
			
 
				+/* #define MSC         */
			
 
				+/* #define MAC         */
			
 
				+/* #define IPSC        */
			
 
				+/* #define FORTRAN_SEC */
			
 
				+/* #define GTODay      */
			
 
				+/* #define CTimer      */
			
 
				+/* #define UXPM        */
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <math.h>
			
 
				+
			
 
				+#ifdef SP
			
 
				+#define REAL float
			
 
				+#define ZERO 0.0
			
 
				+#define ONE  1.0
			
 
				+#define PREC "Single "
			
 
				+#endif
			
 
				+
			
 
				+#ifdef DP
			
 
				+#define REAL double
			
 
				+#define ZERO 0.0e0
			
 
				+#define ONE  1.0e0
			
 
				+#define PREC "Double "
			
 
				+#endif
			
 
				+
			
 
				+#define NTIMES 1
			
 
				+
			
 
				+#ifdef ROLL
			
 
				+#define ROLLING "Rolled "
			
 
				+#endif
			
 
				+
			
 
				+#ifdef UNROLL
			
 
				+#define ROLLING "Unrolled "
			
 
				+#endif
			
 
				+
			
 
				+static double st[8][6];
			
 
				+
			
 
				+main ()
			
 
				+{
			
 
				+   static REAL aa[200][200],a[200][201],b[200],x[200];
			
 
				+   REAL cray,ops,total,norma,normx;
			
 
				+   REAL resid,residn,eps;
			
 
				+   REAL epslon(),kf;
			
 
				+   double t1,tm,tm2,dtime();
			
 
				+   static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops;
			
 
				+
			
 
				+   lda = 201;
			
 
				+   ldaa = 200;
			
 
				+   cray = .056; 
			
 
				+   n = 25;
			
 
				+
			
 
				+   printf(ROLLING); printf(PREC);
			
 
				+   printf("Precision Linpack\n\n");
			
 
				+
			
 
				+	ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
			
 
				+
			
 
				+	matgen(a,lda,n,b,&norma);
			
 
				+	t1 = dtime();
			
 
				+	dgefa(a,lda,n,ipvt,&info);
			
 
				+	st[0][0] = dtime() - t1;
			
 
				+	
			
 
				+	t1 = dtime();
			
 
				+	dgesl(a,lda,n,ipvt,b,0);
			
 
				+	st[1][0] = dtime() - t1;
			
 
				+	total = st[0][0] + st[1][0];
			
 
				+
			
 
				+/*     compute a residual to verify results.  */ 
			
 
				+
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	   {
			
 
				+	       x[i] = b[i];
			
 
				+	   }
			
 
				+	matgen(a,lda,n,b,&norma);
			
 
				+	for (i = 0; i < n; i++) 
			
 
				+	   {
			
 
				+	       b[i] = -b[i];
			
 
				+	   }
			
 
				+	dmxpy(n,b,n,lda,x,a);
			
 
				+	resid = 0.0;
			
 
				+	normx = 0.0;
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	 {
			
 
				+	       resid = (resid > fabs((double)b[i])) 
			
 
				+	 ? resid : fabs((double)b[i]);
			
 
				+	       normx = (normx > fabs((double)x[i])) 
			
 
				+	 ? normx : fabs((double)x[i]);
			
 
				+	 }
			
 
				+	eps = epslon((REAL)ONE);
			
 
				+	residn = resid/( n*norma*normx*eps );
			
 
				+   
			
 
				+   printf("   norm. resid      resid           machep");
			
 
				+   printf("         x[0]-1        x[n-1]-1\n");
			
 
				+   printf("%8.1f      %16.8e%16.8e%16.8e%16.8e\n",
			
 
				+	  (double)residn, (double)resid, (double)eps, 
			
 
				+	       (double)x[0]-1, (double)x[n-1]-1);
			
 
				+
			
 
				+printf(" times are reported for matrices of order %5d\n",n);
			
 
				+printf("      dgefa      dgesl      total       kflops     unit");
			
 
				+printf("      ratio\n");
			
 
				+
			
 
				+	st[2][0] = total;
			
 
				+	st[3][0] = ops/(1.0e3*total);
			
 
				+	st[4][0] = 2.0e3/st[3][0];
			
 
				+	st[5][0] = total/cray;
			
 
				+
			
 
				+   printf(" times for array with leading dimension of%5d\n",lda);
			
 
				+   print_time(0);
			
 
				+
			
 
				+	matgen(a,lda,n,b,&norma);
			
 
				+	t1 = dtime();
			
 
				+	dgefa(a,lda,n,ipvt,&info);
			
 
				+	st[0][1] = dtime() - t1;
			
 
				+	
			
 
				+	t1 = dtime();
			
 
				+	dgesl(a,lda,n,ipvt,b,0);
			
 
				+	st[1][1] = dtime() - t1;
			
 
				+	total = st[0][1] + st[1][1];
			
 
				+	
			
 
				+	st[2][1] = total;
			
 
				+	st[3][1] = ops/(1.0e3*total);
			
 
				+	st[4][1] = 2.0e3/st[3][1];
			
 
				+	st[5][1] = total/cray;
			
 
				+
			
 
				+	matgen(a,lda,n,b,&norma);
			
 
				+	
			
 
				+	t1 = dtime();
			
 
				+	dgefa(a,lda,n,ipvt,&info);
			
 
				+	st[0][2] = dtime() - t1;
			
 
				+	
			
 
				+	t1 = dtime();
			
 
				+	dgesl(a,lda,n,ipvt,b,0);
			
 
				+	st[1][2] = dtime() - t1;
			
 
				+	
			
 
				+	total = st[0][2] + st[1][2];
			
 
				+	st[2][2] = total;
			
 
				+	st[3][2] = ops/(1.0e3*total);
			
 
				+	st[4][2] = 2.0e3/st[3][2];
			
 
				+	st[5][2] = total/cray;
			
 
				+
			
 
				+	ntimes = NTIMES;
			
 
				+	tm2 = 0.0;
			
 
				+	t1 = dtime();
			
 
				+
			
 
				+   for (i = 0; i < ntimes; i++) {
			
 
				+	       tm = dtime();
			
 
				+      matgen(a,lda,n,b,&norma);
			
 
				+      tm2 = tm2 + dtime() - tm;
			
 
				+      dgefa(a,lda,n,ipvt,&info);
			
 
				+      }
			
 
				+
			
 
				+	st[0][3] = (dtime() - t1 - tm2)/ntimes;
			
 
				+	t1 = dtime();
			
 
				+
			
 
				+   for (i = 0; i < ntimes; i++) {
			
 
				+	       dgesl(a,lda,n,ipvt,b,0);
			
 
				+      }
			
 
				+
			
 
				+	st[1][3] = (dtime() - t1)/ntimes;
			
 
				+	total = st[0][3] + st[1][3];
			
 
				+	st[2][3] = total;
			
 
				+	st[3][3] = ops/(1.0e3*total);
			
 
				+	st[4][3] = 2.0e3/st[3][3];
			
 
				+	st[5][3] = total/cray;
			
 
				+
			
 
				+   print_time(1);
			
 
				+   print_time(2);
			
 
				+   print_time(3);
			
 
				+
			
 
				+	matgen(aa,ldaa,n,b,&norma);
			
 
				+	t1 = dtime();
			
 
				+	dgefa(aa,ldaa,n,ipvt,&info);
			
 
				+	st[0][4] = dtime() - t1;
			
 
				+	
			
 
				+	t1 = dtime();
			
 
				+	dgesl(aa,ldaa,n,ipvt,b,0);
			
 
				+	st[1][4] = dtime() - t1;
			
 
				+
			
 
				+	total = st[0][4] + st[1][4];
			
 
				+	st[2][4] = total;
			
 
				+	st[3][4] = ops/(1.0e3*total);
			
 
				+	st[4][4] = 2.0e3/st[3][4];
			
 
				+	st[5][4] = total/cray;
			
 
				+
			
 
				+	matgen(aa,ldaa,n,b,&norma);
			
 
				+	t1 = dtime();
			
 
				+	dgefa(aa,ldaa,n,ipvt,&info);
			
 
				+	st[0][5] = dtime() - t1;
			
 
				+
			
 
				+	t1 = dtime();
			
 
				+	dgesl(aa,ldaa,n,ipvt,b,0);
			
 
				+	st[1][5] = dtime() - t1;
			
 
				+
			
 
				+	total = st[0][5] + st[1][5];
			
 
				+	st[2][5] = total;
			
 
				+	st[3][5] = ops/(1.0e3*total);
			
 
				+	st[4][5] = 2.0e3/st[3][5];
			
 
				+	st[5][5] = total/cray;
			
 
				+
			
 
				+   matgen(aa,ldaa,n,b,&norma);
			
 
				+   t1 = dtime();
			
 
				+   dgefa(aa,ldaa,n,ipvt,&info);
			
 
				+   st[0][6] = dtime() - t1;
			
 
				+
			
 
				+   t1 = dtime();
			
 
				+   dgesl(aa,ldaa,n,ipvt,b,0);
			
 
				+   st[1][6] = dtime() - t1;
			
 
				+
			
 
				+   total = st[0][6] + st[1][6];
			
 
				+   st[2][6] = total;
			
 
				+   st[3][6] = ops/(1.0e3*total);
			
 
				+   st[4][6] = 2.0e3/st[3][6];
			
 
				+   st[5][6] = total/cray;
			
 
				+
			
 
				+   ntimes = NTIMES;
			
 
				+   tm2 = 0;
			
 
				+   t1 = dtime();
			
 
				+   for (i = 0; i < ntimes; i++) {
			
 
				+      tm = dtime();
			
 
				+      matgen(aa,ldaa,n,b,&norma);
			
 
				+      tm2 = tm2 + dtime() - tm;
			
 
				+      dgefa(aa,ldaa,n,ipvt,&info);
			
 
				+      }
			
 
				+
			
 
				+   st[0][7] = (dtime() - t1 - tm2)/ntimes;
			
 
				+   
			
 
				+   t1 = dtime();
			
 
				+   for (i = 0; i < ntimes; i++) {
			
 
				+      dgesl(aa,ldaa,n,ipvt,b,0);
			
 
				+      }
			
 
				+
			
 
				+   st[1][7] = (dtime() - t1)/ntimes;
			
 
				+   total = st[0][7] + st[1][7];
			
 
				+   st[2][7] = total;
			
 
				+   st[3][7] = ops/(1.0e3*total);
			
 
				+   st[4][7] = 2.0e3/st[3][7];
			
 
				+   st[5][7] = total/cray;
			
 
				+
			
 
				+   /* the following code sequence implements the semantics of
			
 
				+      the Fortran intrinsics "nint(min(st[3][3],st[3][7]))"   */
			
 
				+/*
			
 
				+   kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
			
 
				+   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
			
 
				+   if (fabs((double)kf) < ONE) 
			
 
				+      kflops = 0;
			
 
				+   else {
			
 
				+      kflops = floor(fabs((double)kf));
			
 
				+      if (kf < ZERO) kflops = -kflops;
			
 
				+   }
			
 
				+*/
			
 
				+   if ( st[3][3] < ZERO ) st[3][3] = ZERO;
			
 
				+   if ( st[3][7] < ZERO ) st[3][7] = ZERO;
			
 
				+   kf = st[3][3];
			
 
				+   if ( st[3][7] < st[3][3] ) kf = st[3][7];
			
 
				+   kflops = (int)(kf + 0.5);
			
 
				+
			
 
				+   printf(" times for array with leading dimension of%4d\n",ldaa);
			
 
				+   print_time(4);
			
 
				+   print_time(5);
			
 
				+   print_time(6);
			
 
				+   print_time(7);
			
 
				+   printf(ROLLING); printf(PREC);
			
 
				+   printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
			
 
				+}
			
 
				+     
			
 
				+/*----------------------*/ 
			
 
				+print_time (row)
			
 
				+int row;
			
 
				+{
			
 
				+printf("%11.2f%11.2f%11.2f%11.0f%11.2f%11.2f\n",
			
 
				+       (double)st[0][row], (double)st[1][row], (double)st[2][row], 
			
 
				+       (double)st[3][row], (double)st[4][row], (double)st[5][row]);
			
 
				+}
			
 
				+      
			
 
				+/*----------------------*/ 
			
 
				+matgen(a,lda,n,b,norma)
			
 
				+REAL a[],b[],*norma;
			
 
				+int lda, n;
			
 
				+
			
 
				+/* We would like to declare a[][lda], but c does not allow it.  In this
			
 
				+function, references to a[i][j] are written a[lda*i+j].  */
			
 
				+
			
 
				+{
			
 
				+   int init, i, j;
			
 
				+
			
 
				+   init = 1325;
			
 
				+   *norma = 0.0;
			
 
				+   for (j = 0; j < n; j++) {
			
 
				+      for (i = 0; i < n; i++) {
			
 
				+	 init = 3125*init % 65536;
			
 
				+	 a[lda*j+i] = (init - 32768.0)/16384.0;
			
 
				+	 *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
			
 
				+      }
			
 
				+   }
			
 
				+   for (i = 0; i < n; i++) {
			
 
				+	  b[i] = 0.0;
			
 
				+   }
			
 
				+   for (j = 0; j < n; j++) {
			
 
				+      for (i = 0; i < n; i++) {
			
 
				+	 b[i] = b[i] + a[lda*j+i];
			
 
				+      }
			
 
				+   }
			
 
				+}
			
 
				+
			
 
				+/*----------------------*/ 
			
 
				+dgefa(a,lda,n,ipvt,info)
			
 
				+REAL a[];
			
 
				+int lda,n,ipvt[],*info;
			
 
				+
			
 
				+/* We would like to declare a[][lda], but c does not allow it.  In this
			
 
				+function, references to a[i][j] are written a[lda*i+j].  
			
 
				+*/
			
 
				+
			
 
				+/*
			
 
				+     dgefa factors a double precision matrix by gaussian elimination.
			
 
				+
			
 
				+     dgefa is usually called by dgeco, but it can be called
			
 
				+     directly with a saving in time if  rcond  is not needed.
			
 
				+     (time for dgeco) = (1 + 9/n)*(time for dgefa) .
			
 
				+
			
 
				+     on entry
			
 
				+
			
 
				+	a       REAL precision[n][lda]
			
 
				+		the matrix to be factored.
			
 
				+
			
 
				+	lda     integer
			
 
				+		the leading dimension of the array  a .
			
 
				+
			
 
				+	n       integer
			
 
				+		the order of the matrix  a .
			
 
				+
			
 
				+     on return
			
 
				+
			
 
				+	a       an upper triangular matrix and the multipliers
			
 
				+		which were used to obtain it.
			
 
				+		the factorization can be written  a = l*u  where
			
 
				+		l  is a product of permutation and unit lower
			
 
				+		triangular matrices and  u  is upper triangular.
			
 
				+
			
 
				+	ipvt    integer[n]
			
 
				+		an integer vector of pivot indices.
			
 
				+
			
 
				+	info    integer
			
 
				+		= 0  normal value.
			
 
				+		= k  if  u[k][k] .eq. 0.0 .  this is not an error
			
 
				+		     condition for this subroutine, but it does
			
 
				+		     indicate that dgesl or dgedi will divide by zero
			
 
				+		     if called.  use  rcond  in dgeco for a reliable
			
 
				+		     indication of singularity.
			
 
				+
			
 
				+     linpack. this version dated 08/14/78 .
			
 
				+     cleve moler, university of new mexico, argonne national lab.
			
 
				+
			
 
				+     functions
			
 
				+
			
 
				+     blas daxpy,dscal,idamax
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+{
			
 
				+/*     internal variables   */
			
 
				+
			
 
				+REAL t;
			
 
				+int idamax(),j,k,kp1,l,nm1;
			
 
				+
			
 
				+
			
 
				+/*     gaussian elimination with partial pivoting   */
			
 
				+
			
 
				+   *info = 0;
			
 
				+   nm1 = n - 1;
			
 
				+   if (nm1 >=  0) {
			
 
				+      for (k = 0; k < nm1; k++) {
			
 
				+	 kp1 = k + 1;
			
 
				+
			
 
				+		/* find l = pivot index   */
			
 
				+
			
 
				+	 l = idamax(n-k,&a[lda*k+k],1) + k;
			
 
				+	 ipvt[k] = l;
			
 
				+
			
 
				+	 /* zero pivot implies this column already 
			
 
				+	    triangularized */
			
 
				+
			
 
				+	 if (a[lda*k+l] != ZERO) {
			
 
				+
			
 
				+	    /* interchange if necessary */
			
 
				+
			
 
				+	    if (l != k) {
			
 
				+	       t = a[lda*k+l];
			
 
				+	       a[lda*k+l] = a[lda*k+k];
			
 
				+	       a[lda*k+k] = t; 
			
 
				+	    }
			
 
				+
			
 
				+	    /* compute multipliers */
			
 
				+
			
 
				+	    t = -ONE/a[lda*k+k];
			
 
				+	    dscal(n-(k+1),t,&a[lda*k+k+1],1);
			
 
				+
			
 
				+	    /* row elimination with column indexing */
			
 
				+
			
 
				+	    for (j = kp1; j < n; j++) {
			
 
				+	       t = a[lda*j+l];
			
 
				+	       if (l != k) {
			
 
				+		  a[lda*j+l] = a[lda*j+k];
			
 
				+		  a[lda*j+k] = t;
			
 
				+	       }
			
 
				+	       daxpy(n-(k+1),t,&a[lda*k+k+1],1,
			
 
				+		     &a[lda*j+k+1],1);
			
 
				+	      } 
			
 
				+	   }
			
 
				+	 else { 
			
 
				+		     *info = k;
			
 
				+	 }
			
 
				+      } 
			
 
				+   }
			
 
				+   ipvt[n-1] = n-1;
			
 
				+   if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*----------------------*/ 
			
 
				+
			
 
				+dgesl(a,lda,n,ipvt,b,job)
			
 
				+int lda,n,ipvt[],job;
			
 
				+REAL a[],b[];
			
 
				+
			
 
				+/* We would like to declare a[][lda], but c does not allow it.  In this
			
 
				+function, references to a[i][j] are written a[lda*i+j].  */
			
 
				+
			
 
				+/*
			
 
				+     dgesl solves the double precision system
			
 
				+     a * x = b  or  trans(a) * x = b
			
 
				+     using the factors computed by dgeco or dgefa.
			
 
				+
			
 
				+     on entry
			
 
				+
			
 
				+	a       double precision[n][lda]
			
 
				+		the output from dgeco or dgefa.
			
 
				+
			
 
				+	lda     integer
			
 
				+		the leading dimension of the array  a .
			
 
				+
			
 
				+	n       integer
			
 
				+		the order of the matrix  a .
			
 
				+
			
 
				+	ipvt    integer[n]
			
 
				+		the pivot vector from dgeco or dgefa.
			
 
				+
			
 
				+	b       double precision[n]
			
 
				+		the right hand side vector.
			
 
				+
			
 
				+	job     integer
			
 
				+		= 0         to solve  a*x = b ,
			
 
				+		= nonzero   to solve  trans(a)*x = b  where
			
 
				+			    trans(a)  is the transpose.
			
 
				+
			
 
				+    on return
			
 
				+
			
 
				+	b       the solution vector  x .
			
 
				+
			
 
				+     error condition
			
 
				+
			
 
				+	a division by zero will occur if the input factor contains a
			
 
				+	zero on the diagonal.  technically this indicates singularity
			
 
				+	but it is often caused by improper arguments or improper
			
 
				+	setting of lda .  it will not occur if the subroutines are
			
 
				+	called correctly and if dgeco has set rcond .gt. 0.0
			
 
				+	or dgefa has set info .eq. 0 .
			
 
				+
			
 
				+     to compute  inverse(a) * c  where  c  is a matrix
			
 
				+     with  p  columns
			
 
				+	   dgeco(a,lda,n,ipvt,rcond,z)
			
 
				+	   if (!rcond is too small){
			
 
				+	      for (j=0,j<p,j++)
			
 
				+		    dgesl(a,lda,n,ipvt,c[j][0],0);
			
 
				+      }
			
 
				+
			
 
				+     linpack. this version dated 08/14/78 .
			
 
				+     cleve moler, university of new mexico, argonne national lab.
			
 
				+
			
 
				+     functions
			
 
				+
			
 
				+     blas daxpy,ddot
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+{
			
 
				+/*     internal variables   */
			
 
				+
			
 
				+   REAL ddot(),t;
			
 
				+   int k,kb,l,nm1;
			
 
				+
			
 
				+   nm1 = n - 1;
			
 
				+   if (job == 0) {
			
 
				+
			
 
				+      /* job = 0 , solve  a * x = b
			
 
				+	 first solve  l*y = b       */
			
 
				+
			
 
				+      if (nm1 >= 1) {
			
 
				+	 for (k = 0; k < nm1; k++) {
			
 
				+	    l = ipvt[k];
			
 
				+	    t = b[l];
			
 
				+	    if (l != k){ 
			
 
				+	       b[l] = b[k];
			
 
				+	       b[k] = t;
			
 
				+	    }   
			
 
				+	    daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
			
 
				+	 }
			
 
				+      } 
			
 
				+
			
 
				+      /* now solve  u*x = y */
			
 
				+
			
 
				+      for (kb = 0; kb < n; kb++) {
			
 
				+	  k = n - (kb + 1);
			
 
				+	  b[k] = b[k]/a[lda*k+k];
			
 
				+	  t = -b[k];
			
 
				+	  daxpy(k,t,&a[lda*k+0],1,&b[0],1);
			
 
				+      }
			
 
				+   }
			
 
				+   else { 
			
 
				+
			
 
				+      /* job = nonzero, solve  trans(a) * x = b
			
 
				+	 first solve  trans(u)*y = b          */
			
 
				+
			
 
				+      for (k = 0; k < n; k++) {
			
 
				+	 t = ddot(k,&a[lda*k+0],1,&b[0],1);
			
 
				+	 b[k] = (b[k] - t)/a[lda*k+k];
			
 
				+      }
			
 
				+
			
 
				+      /* now solve trans(l)*x = y   */
			
 
				+
			
 
				+      if (nm1 >= 1) {
			
 
				+	 for (kb = 1; kb < nm1; kb++) {
			
 
				+	    k = n - (kb+1);
			
 
				+	    b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
			
 
				+	    l = ipvt[k];
			
 
				+	    if (l != k) {
			
 
				+	       t = b[l];
			
 
				+	       b[l] = b[k];
			
 
				+	       b[k] = t;
			
 
				+	    }
			
 
				+	 }
			
 
				+      }
			
 
				+   }
			
 
				+}
			
 
				+
			
 
				+/*----------------------*/ 
			
 
				+
			
 
				+daxpy(n,da,dx,incx,dy,incy)
			
 
				+/*
			
 
				+     constant times a vector plus a vector.
			
 
				+     jack dongarra, linpack, 3/11/78.
			
 
				+*/
			
 
				+REAL dx[],dy[],da;
			
 
				+int incx,incy,n;
			
 
				+{
			
 
				+   int i,ix,iy,m,mp1;
			
 
				+
			
 
				+   if(n <= 0) return;
			
 
				+   if (da == ZERO) return;
			
 
				+
			
 
				+   if(incx != 1 || incy != 1) {
			
 
				+
			
 
				+      /* code for unequal increments or equal increments
			
 
				+	 not equal to 1                */
			
 
				+
			
 
				+      ix = 1;
			
 
				+      iy = 1;
			
 
				+      if(incx < 0) ix = (-n+1)*incx + 1;
			
 
				+      if(incy < 0) iy = (-n+1)*incy + 1;
			
 
				+      for (i = 0;i < n; i++) {
			
 
				+	 dy[iy] = dy[iy] + da*dx[ix];
			
 
				+	 ix = ix + incx;
			
 
				+	 iy = iy + incy;
			
 
				+      }
			
 
				+	    return;
			
 
				+   }
			
 
				+
			
 
				+   /* code for both increments equal to 1 */
			
 
				+
			
 
				+#ifdef ROLL
			
 
				+   for (i = 0;i < n; i++) {
			
 
				+      dy[i] = dy[i] + da*dx[i];
			
 
				+   }
			
 
				+#endif
			
 
				+#ifdef UNROLL
			
 
				+
			
 
				+   m = n % 4;
			
 
				+   if ( m != 0) {
			
 
				+      for (i = 0; i < m; i++) 
			
 
				+	 dy[i] = dy[i] + da*dx[i];
			
 
				+      if (n < 4) return;
			
 
				+   }
			
 
				+   for (i = m; i < n; i = i + 4) {
			
 
				+      dy[i]   = dy[i]   + da*dx[i];
			
 
				+      dy[i+1] = dy[i+1] + da*dx[i+1];
			
 
				+      dy[i+2] = dy[i+2] + da*dx[i+2];
			
 
				+      dy[i+3] = dy[i+3] + da*dx[i+3];
			
 
				+   }
			
 
				+#endif
			
 
				+}
			
 
				+   
			
 
				+/*----------------------*/ 
			
 
				+
			
 
				+REAL ddot(n,dx,incx,dy,incy)
			
 
				+/*
			
 
				+     forms the dot product of two vectors.
			
 
				+     jack dongarra, linpack, 3/11/78.
			
 
				+*/
			
 
				+REAL dx[],dy[];
			
 
				+
			
 
				+int incx,incy,n;
			
 
				+{
			
 
				+   REAL dtemp;
			
 
				+   int i,ix,iy,m,mp1;
			
 
				+
			
 
				+   dtemp = ZERO;
			
 
				+
			
 
				+   if(n <= 0) return(ZERO);
			
 
				+
			
 
				+   if(incx != 1 || incy != 1) {
			
 
				+
			
 
				+      /* code for unequal increments or equal increments
			
 
				+	 not equal to 1               */
			
 
				+
			
 
				+      ix = 0;
			
 
				+      iy = 0;
			
 
				+      if (incx < 0) ix = (-n+1)*incx;
			
 
				+      if (incy < 0) iy = (-n+1)*incy;
			
 
				+      for (i = 0;i < n; i++) {
			
 
				+	 dtemp = dtemp + dx[ix]*dy[iy];
			
 
				+	 ix = ix + incx;
			
 
				+	 iy = iy + incy;
			
 
				+      }
			
 
				+      return(dtemp);
			
 
				+   }
			
 
				+
			
 
				+   /* code for both increments equal to 1 */
			
 
				+
			
 
				+#ifdef ROLL
			
 
				+   for (i=0;i < n; i++)
			
 
				+      dtemp = dtemp + dx[i]*dy[i];
			
 
				+   return(dtemp);
			
 
				+#endif
			
 
				+#ifdef UNROLL
			
 
				+
			
 
				+   m = n % 5;
			
 
				+   if (m != 0) {
			
 
				+      for (i = 0; i < m; i++)
			
 
				+	 dtemp = dtemp + dx[i]*dy[i];
			
 
				+      if (n < 5) return(dtemp);
			
 
				+   }
			
 
				+   for (i = m; i < n; i = i + 5) {
			
 
				+      dtemp = dtemp + dx[i]*dy[i] +
			
 
				+      dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
			
 
				+      dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
			
 
				+   }
			
 
				+   return(dtemp);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/*----------------------*/ 
			
 
				+dscal(n,da,dx,incx)
			
 
				+
			
 
				+/*     scales a vector by a constant.
			
 
				+      jack dongarra, linpack, 3/11/78.
			
 
				+*/
			
 
				+REAL da,dx[];
			
 
				+int n, incx;
			
 
				+{
			
 
				+   int i,m,mp1,nincx;
			
 
				+
			
 
				+   if(n <= 0)return;
			
 
				+   if(incx != 1) {
			
 
				+
			
 
				+      /* code for increment not equal to 1 */
			
 
				+
			
 
				+      nincx = n*incx;
			
 
				+      for (i = 0; i < nincx; i = i + incx)
			
 
				+	 dx[i] = da*dx[i];
			
 
				+      return;
			
 
				+   }
			
 
				+
			
 
				+   /* code for increment equal to 1 */
			
 
				+
			
 
				+#ifdef ROLL
			
 
				+   for (i = 0; i < n; i++)
			
 
				+      dx[i] = da*dx[i];
			
 
				+#endif
			
 
				+#ifdef UNROLL
			
 
				+
			
 
				+   m = n % 5;
			
 
				+   if (m != 0) {
			
 
				+      for (i = 0; i < m; i++)
			
 
				+	 dx[i] = da*dx[i];
			
 
				+      if (n < 5) return;
			
 
				+   }
			
 
				+   for (i = m; i < n; i = i + 5){
			
 
				+      dx[i] = da*dx[i];
			
 
				+      dx[i+1] = da*dx[i+1];
			
 
				+      dx[i+2] = da*dx[i+2];
			
 
				+      dx[i+3] = da*dx[i+3];
			
 
				+      dx[i+4] = da*dx[i+4];
			
 
				+   }
			
 
				+#endif
			
 
				+
			
 
				+}
			
 
				+
			
 
				+/*----------------------*/ 
			
 
				+int idamax(n,dx,incx)
			
 
				+
			
 
				+/*
			
 
				+     finds the index of element having max. absolute value.
			
 
				+     jack dongarra, linpack, 3/11/78.
			
 
				+*/
			
 
				+
			
 
				+REAL dx[];
			
 
				+int incx,n;
			
 
				+{
			
 
				+   REAL dmax;
			
 
				+   int i, ix, itemp;
			
 
				+
			
 
				+   if( n < 1 ) return(-1);
			
 
				+   if(n ==1 ) return(0);
			
 
				+   if(incx != 1) {
			
 
				+
			
 
				+      /* code for increment not equal to 1 */
			
 
				+
			
 
				+      ix = 1;
			
 
				+      dmax = fabs((double)dx[0]);
			
 
				+      ix = ix + incx;
			
 
				+      for (i = 1; i < n; i++) {
			
 
				+	 if(fabs((double)dx[ix]) > dmax)  {
			
 
				+	    itemp = i;
			
 
				+	    dmax = fabs((double)dx[ix]);
			
 
				+	 }
			
 
				+	 ix = ix + incx;
			
 
				+      }
			
 
				+   }
			
 
				+   else {
			
 
				+
			
 
				+      /* code for increment equal to 1 */
			
 
				+
			
 
				+      itemp = 0;
			
 
				+      dmax = fabs((double)dx[0]);
			
 
				+      for (i = 1; i < n; i++) {
			
 
				+	 if(fabs((double)dx[i]) > dmax) {
			
 
				+	    itemp = i;
			
 
				+	    dmax = fabs((double)dx[i]);
			
 
				+	 }
			
 
				+      }
			
 
				+   }
			
 
				+   return (itemp);
			
 
				+}
			
 
				+
			
 
				+/*----------------------*/ 
			
 
				+REAL epslon (x)
			
 
				+REAL x;
			
 
				+/*
			
 
				+     estimate unit roundoff in quantities of size x.
			
 
				+*/
			
 
				+
			
 
				+{
			
 
				+   REAL a,b,c,eps;
			
 
				+/*
			
 
				+     this program should function properly on all systems
			
 
				+     satisfying the following two assumptions,
			
 
				+	1.  the base used in representing dfloating point
			
 
				+	    numbers is not a power of three.
			
 
				+	2.  the quantity  a  in statement 10 is represented to 
			
 
				+	    the accuracy used in dfloating point variables
			
 
				+	    that are stored in memory.
			
 
				+     the statement number 10 and the go to 10 are intended to
			
 
				+     force optimizing compilers to generate code satisfying 
			
 
				+     assumption 2.
			
 
				+     under these assumptions, it should be true that,
			
 
				+	    a  is not exactly equal to four-thirds,
			
 
				+	    b  has a zero for its last bit or digit,
			
 
				+	    c  is not exactly equal to one,
			
 
				+	    eps  measures the separation of 1.0 from
			
 
				+		 the next larger dfloating point number.
			
 
				+     the developers of eispack would appreciate being informed
			
 
				+     about any systems where these assumptions do not hold.
			
 
				+
			
 
				+     *****************************************************************
			
 
				+     this routine is one of the auxiliary routines used by eispack iii
			
 
				+     to avoid machine dependencies.
			
 
				+     *****************************************************************
			
 
				+
			
 
				+     this version dated 4/6/83.
			
 
				+*/
			
 
				+
			
 
				+   a = 4.0e0/3.0e0;
			
 
				+   eps = ZERO;
			
 
				+   while (eps == ZERO) {
			
 
				+      b = a - ONE;
			
 
				+      c = b + b + b;
			
 
				+      eps = fabs((double)(c-ONE));
			
 
				+   }
			
 
				+   return(eps*fabs((double)x));
			
 
				+}
			
 
				+ 
			
 
				+/*----------------------*/ 
			
 
				+dmxpy (n1, y, n2, ldm, x, m)
			
 
				+REAL y[], x[], m[];
			
 
				+int n1, n2, ldm;
			
 
				+
			
 
				+/* We would like to declare m[][ldm], but c does not allow it.  In this
			
 
				+function, references to m[i][j] are written m[ldm*i+j].  */
			
 
				+
			
 
				+/*
			
 
				+   purpose:
			
 
				+     multiply matrix m times vector x and add the result to vector y.
			
 
				+
			
 
				+   parameters:
			
 
				+
			
 
				+     n1 integer, number of elements in vector y, and number of rows in
			
 
				+	 matrix m
			
 
				+
			
 
				+     y double [n1], vector of length n1 to which is added 
			
 
				+	 the product m*x
			
 
				+
			
 
				+     n2 integer, number of elements in vector x, and number of columns
			
 
				+	 in matrix m
			
 
				+
			
 
				+     ldm integer, leading dimension of array m
			
 
				+
			
 
				+     x double [n2], vector of length n2
			
 
				+
			
 
				+     m double [ldm][n2], matrix of n1 rows and n2 columns
			
 
				+
			
 
				+ ----------------------------------------------------------------------
			
 
				+*/
			
 
				+{
			
 
				+   int j,i,jmin;
			
 
				+   /* cleanup odd vector */
			
 
				+
			
 
				+   j = n2 % 2;
			
 
				+   if (j >= 1) {
			
 
				+      j = j - 1;
			
 
				+      for (i = 0; i < n1; i++) 
			
 
				+		  y[i] = (y[i]) + x[j]*m[ldm*j+i];
			
 
				+   } 
			
 
				+
			
 
				+   /* cleanup odd group of two vectors */
			
 
				+
			
 
				+   j = n2 % 4;
			
 
				+   if (j >= 2) {
			
 
				+      j = j - 1;
			
 
				+      for (i = 0; i < n1; i++)
			
 
				+		  y[i] = ( (y[i])
			
 
				+			    + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
			
 
				+   } 
			
 
				+
			
 
				+   /* cleanup odd group of four vectors */
			
 
				+
			
 
				+   j = n2 % 8;
			
 
				+   if (j >= 4) {
			
 
				+      j = j - 1;
			
 
				+      for (i = 0; i < n1; i++)
			
 
				+	 y[i] = ((( (y[i])
			
 
				+		+ x[j-3]*m[ldm*(j-3)+i]) 
			
 
				+		+ x[j-2]*m[ldm*(j-2)+i])
			
 
				+		+ x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
			
 
				+   } 
			
 
				+
			
 
				+   /* cleanup odd group of eight vectors */
			
 
				+
			
 
				+   j = n2 % 16;
			
 
				+   if (j >= 8) {
			
 
				+      j = j - 1;
			
 
				+      for (i = 0; i < n1; i++)
			
 
				+	 y[i] = ((((((( (y[i])
			
 
				+		+ x[j-7]*m[ldm*(j-7)+i]) + x[j-6]*m[ldm*(j-6)+i])
			
 
				+		  + x[j-5]*m[ldm*(j-5)+i]) + x[j-4]*m[ldm*(j-4)+i])
			
 
				+		+ x[j-3]*m[ldm*(j-3)+i]) + x[j-2]*m[ldm*(j-2)+i])
			
 
				+		+ x[j-1]*m[ldm*(j-1)+i]) + x[j]  *m[ldm*j+i];
			
 
				+   } 
			
 
				+   
			
 
				+   /* main loop - groups of sixteen vectors */
			
 
				+
			
 
				+   jmin = (n2%16)+16;
			
 
				+   for (j = jmin-1; j < n2; j = j + 16) {
			
 
				+      for (i = 0; i < n1; i++) 
			
 
				+	 y[i] = ((((((((((((((( (y[i])
			
 
				+		   + x[j-15]*m[ldm*(j-15)+i]) 
			
 
				+	    + x[j-14]*m[ldm*(j-14)+i])
			
 
				+		 + x[j-13]*m[ldm*(j-13)+i]) 
			
 
				+	    + x[j-12]*m[ldm*(j-12)+i])
			
 
				+		 + x[j-11]*m[ldm*(j-11)+i]) 
			
 
				+	    + x[j-10]*m[ldm*(j-10)+i])
			
 
				+		 + x[j- 9]*m[ldm*(j- 9)+i]) 
			
 
				+	    + x[j- 8]*m[ldm*(j- 8)+i])
			
 
				+		 + x[j- 7]*m[ldm*(j- 7)+i]) 
			
 
				+	    + x[j- 6]*m[ldm*(j- 6)+i])
			
 
				+		 + x[j- 5]*m[ldm*(j- 5)+i]) 
			
 
				+	    + x[j- 4]*m[ldm*(j- 4)+i])
			
 
				+		 + x[j- 3]*m[ldm*(j- 3)+i]) 
			
 
				+	    + x[j- 2]*m[ldm*(j- 2)+i])
			
 
				+		 + x[j- 1]*m[ldm*(j- 1)+i]) 
			
 
				+	    + x[j]   *m[ldm*j+i];
			
 
				+   }
			
 
				+} 
			
 
				+
			
 
				+/*****************************************************/
			
 
				+/* Various timer routines.                           */
			
 
				+/* Al Aburto, aburto@marlin.nosc.mil, 26 Sep 1992    */
			
 
				+/*                                                   */
			
 
				+/* t = dtime() outputs the current time in seconds.  */
			
 
				+/* Use CAUTION as some of these routines will mess   */
			
 
				+/* up when timing across the hour mark!!!            */
			
 
				+/*                                                   */
			
 
				+/* For timing I use the 'user' time whenever         */
			
 
				+/* possible. Using 'user+sys' time is a separate     */
			
 
				+/* issue.                                            */
			
 
				+/*                                                   */
			
 
				+/*****************************************************/
			
 
				+
			
 
				+/*********************************/
			
 
				+/* Timer code.                   */
			
 
				+/*********************************/
			
 
				+/*******************/
			
 
				+/*  Amiga dtime()  */
			
 
				+/*******************/
			
 
				+#ifdef Amiga
			
 
				+#include <ctype.h>
			
 
				+#define HZ 50
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   struct   tt {
			
 
				+      long  days;
			
 
				+      long  minutes;
			
 
				+      long  ticks;
			
 
				+   } tt;
			
 
				+
			
 
				+   DateStamp(&tt);
			
 
				+
			
 
				+   q = ((double)(tt.ticks + (tt.minutes * 60L * 50L))) / (double)HZ;
			
 
				+
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*****************************************************/
			
 
				+/*  UNIX dtime(). This is the preferred UNIX timer.  */
			
 
				+/*  Provided by: Markku Kolkka, mk59200@cc.tut.fi    */
			
 
				+/*  HP-UX Addition by: Bo Thide', bt@irfu.se         */
			
 
				+/*****************************************************/
			
 
				+#ifdef UNIX
			
 
				+#include <sys/time.h>
			
 
				+#include <sys/resource.h>
			
 
				+
			
 
				+#ifdef __hpux
			
 
				+#include <sys/syscall.h>
			
 
				+#define getrusage(a,b) syscall(SYS_getrusage,a,b)
			
 
				+#endif
			
 
				+
			
 
				+struct rusage rusage;
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   getrusage(RUSAGE_SELF,&rusage);
			
 
				+
			
 
				+   q = (double)(rusage.ru_utime.tv_sec);
			
 
				+   q = q + (double)(rusage.ru_utime.tv_usec) * 1.0e-06;
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/***************************************************/
			
 
				+/*  UNIX_Old dtime(). This is the old UNIX timer.  */
			
 
				+/*  Use only if absolutely necessary as HZ may be  */
			
 
				+/*  ill defined on your system.                    */
			
 
				+/***************************************************/
			
 
				+#ifdef UNIX_Old
			
 
				+#include <sys/types.h>
			
 
				+#include <sys/times.h>
			
 
				+#include <sys/param.h>
			
 
				+
			
 
				+#ifndef HZ
			
 
				+#define HZ 60
			
 
				+#endif
			
 
				+
			
 
				+struct tms tms;
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   times(&tms);
			
 
				+
			
 
				+   q = (double)(tms.tms_utime) / (double)HZ;
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*********************************************************/
			
 
				+/*  VMS dtime() for VMS systems.                         */
			
 
				+/*  Provided by: RAMO@uvphys.phys.UVic.CA                */
			
 
				+/*  Some people have run into problems with this timer.  */
			
 
				+/*********************************************************/
			
 
				+#ifdef VMS
			
 
				+#include time
			
 
				+
			
 
				+#ifndef HZ
			
 
				+#define HZ 100
			
 
				+#endif
			
 
				+
			
 
				+struct tbuffer_t
			
 
				+       {
			
 
				+	int proc_user_time;
			
 
				+	int proc_system_time;
			
 
				+	int child_user_time;
			
 
				+	int child_system_time;
			
 
				+       };
			
 
				+struct tbuffer_t tms;
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   times(&tms);
			
 
				+
			
 
				+   q = (double)(tms.proc_user_time) / (double)HZ;
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/******************************/
			
 
				+/*  BORLAND C dtime() for DOS */
			
 
				+/******************************/
			
 
				+#ifdef BORLAND_C
			
 
				+#include <ctype.h>
			
 
				+#include <dos.h>
			
 
				+#include <time.h>
			
 
				+
			
 
				+#define HZ 100
			
 
				+struct time tnow;
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   gettime(&tnow);
			
 
				+
			
 
				+   q = 60.0 * (double)(tnow.ti_min);
			
 
				+   q = q + (double)(tnow.ti_sec);
			
 
				+   q = q + (double)(tnow.ti_hund)/(double)HZ;
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/**************************************/
			
 
				+/*  Microsoft C (MSC) dtime() for DOS */
			
 
				+/**************************************/
			
 
				+#ifdef MSC
			
 
				+#include <time.h>
			
 
				+#include <ctype.h>
			
 
				+
			
 
				+#define HZ CLK_TCK
			
 
				+clock_t tnow;
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   tnow = clock();
			
 
				+
			
 
				+   q = (double)tnow / (double)HZ;
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*************************************/
			
 
				+/*  Macintosh (MAC) Think C dtime()  */
			
 
				+/*************************************/
			
 
				+#ifdef MAC
			
 
				+#include <time.h>
			
 
				+
			
 
				+#define HZ 60
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   q = (double)clock() / (double)HZ;
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/************************************************************/
			
 
				+/*  iPSC/860 (IPSC) dtime() for i860.                       */
			
 
				+/*  Provided by: Dan Yergeau, yergeau@gloworm.Stanford.EDU  */
			
 
				+/************************************************************/
			
 
				+#ifdef IPSC
			
 
				+extern double dclock();
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   q = dclock();
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/**************************************************/
			
 
				+/*  FORTRAN dtime() for Cray type systems.        */
			
 
				+/*  This is the preferred timer for Cray systems. */
			
 
				+/**************************************************/
			
 
				+#ifdef FORTRAN_SEC
			
 
				+
			
 
				+fortran double second();
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   second(&q);
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/***********************************************************/
			
 
				+/*  UNICOS C dtime() for Cray UNICOS systems.  Don't use   */
			
 
				+/*  unless absolutely necessary as returned time includes  */
			
 
				+/*  'user+system' time.  Provided by: R. Mike Dority,      */
			
 
				+/*  dority@craysea.cray.com                                */
			
 
				+/***********************************************************/
			
 
				+#ifdef CTimer
			
 
				+#include <time.h>
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double    q;
			
 
				+   clock_t   t;
			
 
				+
			
 
				+       t = clock();
			
 
				+
			
 
				+       q = (double)t / (double)CLOCKS_PER_SEC;
			
 
				+
			
 
				+       return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/********************************************/
			
 
				+/* Another UNIX timer using gettimeofday(). */
			
 
				+/* However, getrusage() is preferred.       */
			
 
				+/********************************************/
			
 
				+#ifdef GTODay
			
 
				+#include <sys/time.h>
			
 
				+
			
 
				+struct timeval tnow;
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   gettimeofday(&tnow,NULL);
			
 
				+   q = (double)tnow.tv_sec + (double)tnow.tv_usec * 1.0e-6;
			
 
				+
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*****************************************************/
			
 
				+/*  Fujitsu UXP/M timer.                             */
			
 
				+/*  Provided by: Mathew Lim, ANUSF, M.Lim@anu.edu.au */
			
 
				+/*****************************************************/
			
 
				+#ifdef UXPM
			
 
				+#include <sys/types.h>
			
 
				+#include <sys/timesu.h>
			
 
				+struct tmsu rusage;
			
 
				+
			
 
				+double dtime()
			
 
				+{
			
 
				+   double q;
			
 
				+
			
 
				+   timesu(&rusage);
			
 
				+
			
 
				+   q = (double)(rusage.tms_utime) * 1.0e-06;
			
 
				+   
			
 
				+   return q;
			
 
				+}
			
 
				+#endif
			
--- a/benchmarks/dhrystone.c
+++ b/benchmarks/dhrystone.c
@@ -0,0 +1,742 @@
 
				+/*	EVERBODY:	Please read "APOLOGY" below. -rick 01/06/86
			
 
				+ *
			
 
				+ *	"DHRYSTONE" Benchmark Program
			
 
				+ *
			
 
				+ *	Version:	C/1.1, 12/01/84
			
 
				+ *
			
 
				+ *	Date:		PROGRAM updated 01/06/86, RESULTS updated 02/17/86
			
 
				+ *
			
 
				+ *	Author:		Reinhold P. Weicker,  CACM Vol 27, No 10, 10/84 pg. 1013
			
 
				+ *			Translated from ADA by Rick Richardson
			
 
				+ *			Every method to preserve ADA-likeness has been used,
			
 
				+ *			at the expense of C-ness.
			
 
				+ *
			
 
				+ *	Compile:	cc -O dry.c -o drynr			: No registers
			
 
				+ *			cc -O -DREG=register dry.c -o dryr	: Registers
			
 
				+ *
			
 
				+ *	Defines:	Defines are provided for old C compiler's
			
 
				+ *			which don't have enums, and can't assign structures.
			
 
				+ *			The time(2) function is library dependant; Most
			
 
				+ *			return the time in seconds, but beware of some, like
			
 
				+ *			Aztec C, which return other units.
			
 
				+ *			The LOOPS define is initially set for 50000 loops.
			
 
				+ *			If you have a machine with large integers and is
			
 
				+ *			very fast, please change this number to 500000 to
			
 
				+ *			get better accuracy.  Please select the way to
			
 
				+ *			measure the execution time using the TIME define.
			
 
				+ *			For single user machines, time(2) is adequate. For
			
 
				+ *			multi-user machines where you cannot get single-user
			
 
				+ *			access, use the times(2) function.  If you have
			
 
				+ *			neither, use a stopwatch in the dead of night.
			
 
				+ *			Use a "printf" at the point marked "start timer"
			
 
				+ *			to begin your timings. DO NOT use the UNIX "time(1)"
			
 
				+ *			command, as this will measure the total time to
			
 
				+ *			run this program, which will (erroneously) include
			
 
				+ *			the time to malloc(3) storage and to compute the
			
 
				+ *			time it takes to do nothing.
			
 
				+ *
			
 
				+ *	Run:		drynr; dryr
			
 
				+ *
			
 
				+ *	Results:	If you get any new machine/OS results, please send to:
			
 
				+ *
			
 
				+ *				{ihnp4,vax135,..}!houxm!castor!pcrat!rick
			
 
				+ *
			
 
				+ *			and thanks to all that do.  Space prevents listing
			
 
				+ *			the names of those who have provided some of these
			
 
				+ *			results.  I'll be forwarding these results to
			
 
				+ *			Rheinhold Weicker.
			
 
				+ *
			
 
				+ *	Note:		I order the list in increasing performance of the
			
 
				+ *			"with registers" benchmark.  If the compiler doesn't
			
 
				+ *			provide register variables, then the benchmark
			
 
				+ *			is the same for both REG and NOREG.
			
 
				+ *
			
 
				+ *	PLEASE:		Send complete information about the machine type,
			
 
				+ *			clock speed, OS and C manufacturer/version.  If
			
 
				+ *			the machine is modified, tell me what was done.
			
 
				+ *			On UNIX, execute uname -a and cc -V to get this info.
			
 
				+ *
			
 
				+ *	80x8x NOTE:	80x8x benchers: please try to do all memory models
			
 
				+ *			for a particular compiler.
			
 
				+ *
			
 
				+ *	APOLOGY (1/30/86):
			
 
				+ *		Well, I goofed things up!  As pointed out by Haakon Bugge,
			
 
				+ *		the line of code marked "GOOF" below was missing from the
			
 
				+ *		Dhrystone distribution for the last several months.  It
			
 
				+ *		*WAS* in a backup copy I made last winter, so no doubt it
			
 
				+ *		was victimized by sleepy fingers operating vi!
			
 
				+ *
			
 
				+ *		The effect of the line missing is that the reported benchmarks
			
 
				+ *		are 15% too fast (at least on a 80286).  Now, this creates
			
 
				+ *		a dilema - do I throw out ALL the data so far collected
			
 
				+ *		and use only results from this (corrected) version, or
			
 
				+ *		do I just keep collecting data for the old version?
			
 
				+ *
			
 
				+ *		Since the data collected so far *is* valid as long as it
			
 
				+ *		is compared with like data, I have decided to keep
			
 
				+ *		TWO lists- one for the old benchmark, and one for the
			
 
				+ *		new.  This also gives me an opportunity to correct one
			
 
				+ *		other error I made in the instructions for this benchmark.
			
 
				+ *		My experience with C compilers has been mostly with
			
 
				+ *		UNIX 'pcc' derived compilers, where the 'optimizer' simply
			
 
				+ *		fixes sloppy code generation (peephole optimization).
			
 
				+ *		But today, there exist C compiler optimizers that will actually
			
 
				+ *		perform optimization in the Computer Science sense of the word,
			
 
				+ *		by removing, for example, assignments to a variable whose
			
 
				+ *		value is never used.  Dhrystone, unfortunately, provides
			
 
				+ *		lots of opportunities for this sort of optimization.
			
 
				+ *
			
 
				+ *		I request that benchmarkers re-run this new, corrected
			
 
				+ *		version of Dhrystone, turning off or bypassing optimizers
			
 
				+ *		which perform more than peephole optimization.  Please
			
 
				+ *		indicate the version of Dhrystone used when reporting the
			
 
				+ *		results to me.
			
 
				+ *		
			
 
				+ * RESULTS BEGIN HERE
			
 
				+ *
			
 
				+ *----------------DHRYSTONE VERSION 1.1 RESULTS BEGIN--------------------------
			
 
				+ *
			
 
				+ * MACHINE	MICROPROCESSOR	OPERATING	COMPILER	DHRYSTONES/SEC.
			
 
				+ * TYPE				SYSTEM				NO REG	REGS
			
 
				+ * --------------------------	------------	-----------	---------------
			
 
				+ * IBM PC/AT    80286-7.5Mhz    Venix/286 SVR2  cc              1159    1254 *15
			
 
				+ *
			
 
				+ *
			
 
				+ *----------------DHRYSTONE VERSION 1.0 RESULTS BEGIN--------------------------
			
 
				+ *
			
 
				+ * MACHINE	MICROPROCESSOR	OPERATING	COMPILER	DHRYSTONES/SEC.
			
 
				+ * TYPE				SYSTEM				NO REG	REGS
			
 
				+ * --------------------------	------------	-----------	---------------
			
 
				+ * Commodore 64	6510-1MHz	C64 ROM		C Power 2.8	  36	  36
			
 
				+ * HP-110	8086-5.33Mhz	MSDOS 2.11	Lattice 2.14	 284	 284
			
 
				+ * IBM PC/XT	8088-4.77Mhz	PC/IX		cc		 271	 294
			
 
				+ * CCC 3205	?		Xelos(SVR2) 	cc		 279	 296
			
 
				+ * Perq-II	2901 bitslice	Accent S5c 	cc (CMU)	 301	 301
			
 
				+ * IBM PC/XT	8088-4.77Mhz	COHERENT 2.3.43	MarkWilliams cc  296	 317
			
 
				+ * Cosmos	68000-8Mhz	UniSoft		cc		 305	 322
			
 
				+ * IBM PC/XT	8088-4.77Mhz	Venix/86 2.0	cc		 297	 324
			
 
				+ * DEC PRO 350  11/23           Venix/PRO SVR2  cc               299     325
			
 
				+ * IBM PC	8088-4.77Mhz	MSDOS 2.0	b16cc 2.0	 310	 340
			
 
				+ * PDP11/23	11/23           Venix (V7)      cc               320     358
			
 
				+ * Commodore Amiga		?		Lattice 3.02	 368	 371
			
 
				+ * PC/XT        8088-4.77Mhz    Venix/86 SYS V  cc               339     377
			
 
				+ * IBM PC	8088-4.77Mhz	MSDOS 2.0	CI-C86 2.20M	 390	 390
			
 
				+ * IBM PC/XT	8088-4.77Mhz	PCDOS 2.1	Wizard 2.1	 367	 403
			
 
				+ * IBM PC/XT	8088-4.77Mhz	PCDOS 3.1	Lattice 2.15	 403	 403 @
			
 
				+ * Colex DM-6	68010-8Mhz	Unisoft SYSV	cc		 378	 410
			
 
				+ * IBM PC	8088-4.77Mhz	PCDOS 3.1	Datalight 1.10	 416	 416
			
 
				+ * IBM PC	NEC V20-4.77Mhz	MSDOS 3.1	MS 3.1 		 387	 420
			
 
				+ * IBM PC/XT	8088-4.77Mhz	PCDOS 2.1	Microsoft 3.0	 390	 427
			
 
				+ * IBM PC	NEC V20-4.77Mhz	MSDOS 3.1	MS 3.1 (186) 	 393	 427
			
 
				+ * PDP-11/34	-		UNIX V7M	cc		 387	 438
			
 
				+ * IBM PC	8088, 4.77mhz	PC-DOS 2.1	Aztec C v3.2d	 423	 454
			
 
				+ * Tandy 1000	V20, 4.77mhz	MS-DOS 2.11	Aztec C v3.2d	 423	 458
			
 
				+ * Tandy TRS-16B 68000-6Mhz	Xenix 1.3.5	cc		 438	 458
			
 
				+ * PDP-11/34	-		RSTS/E		decus c		 438	 495
			
 
				+ * Onyx C8002	Z8000-4Mhz	IS/1 1.1 (V7)	cc		 476	 511
			
 
				+ * CCC 3230			Xelos (SysV.2)	cc		 507	 565
			
 
				+ * Tandy TRS-16B 68000-6Mhz	Xenix 1.3.5	Green Hills	 609	 617
			
 
				+ * DEC PRO 380  11/73           Venix/PRO SVR2  cc               577     628
			
 
				+ * FHL QT+	68000-10Mhz	Os9/68000	version 1.3	 603	 649 FH
			
 
				+ * Apollo DN550	68010-?Mhz	AegisSR9/IX	cc 3.12		 666	 666
			
 
				+ * HP-110	8086-5.33Mhz	MSDOS 2.11	Aztec-C		 641	 676 
			
 
				+ * ATT PC6300	8086-8Mhz	MSDOS 2.11	b16cc 2.0	 632	 684
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.0	CI-C86 2.1	 666	 684
			
 
				+ * Tandy 6000	68000-8Mhz	Xenix 3.0	cc		 694	 694
			
 
				+ * IBM PC/AT	80286-6Mhz	Xenix 3.0	cc		 684	 704 MM
			
 
				+ * Macintosh	68000-7.8Mhz 2M	Mac Rom		Mac C 32 bit int 694	 704
			
 
				+ * Macintosh	68000-7.7Mhz	-		MegaMax C 2.0	 661	 709
			
 
				+ * IBM PC/AT	80286-6Mhz	Xenix 3.0	cc		 704	 714 LM
			
 
				+ * Codata 3300	68000-8Mhz	UniPlus+ (v7)	cc		 678	 725
			
 
				+ * WICAT MB	68000-8Mhz	System V	WICAT C 4.1	 585	 731 ~
			
 
				+ * Cadmus 9000	68010-10Mhz	UNIX		cc		 714	 735
			
 
				+ * AT&T 6300    8086-8Mhz       Venix/86 SVR2   cc               668     743
			
 
				+ * Cadmus 9790	68010-10Mhz 1MB	SVR0,Cadmus3.7	cc		 720	 747
			
 
				+ * NEC PC9801F	8086-8Mhz	PCDOS 2.11	Lattice 2.15	 768	  -  @
			
 
				+ * ATT PC6300	8086-8Mhz	MSDOS 2.11	CI-C86 2.20M	 769	 769
			
 
				+ * Burroughs XE550 68010-10Mhz	Centix 2.10	cc		 769	 769 CT1
			
 
				+ * EAGLE/TURBO  8086-8Mhz       Venix/86 SVR2   cc               696     779
			
 
				+ * ALTOS 586	8086-10Mhz	Xenix 3.0b	cc 		 724	 793
			
 
				+ * DEC 11/73	J-11 micro	Ultrix-11 V3.0	System V	 735	 793
			
 
				+ * ATT 3B2/300	WE32000-?Mhz	UNIX 5.0.2	cc		 735	 806
			
 
				+ * Apollo DN320	68010-?Mhz	AegisSR9/IX	cc 3.12		 806	 806
			
 
				+ * IRIS-2400	68010-10Mhz	UNIX System V	cc		 772	 829
			
 
				+ * Atari 520ST  68000-8Mhz      TOS             DigResearch      839     846
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.0	MS 3.0(large)	 833	 847 LM
			
 
				+ * WICAT MB	68000-8Mhz	System V	WICAT C 4.1	 675	 853 S~
			
 
				+ * VAX 11/750	-		Ultrix 1.1	4.2BSD cc	 781	 862
			
 
				+ * CCC  7350A	68000-8MHz	UniSoft V.2	cc		 821	 875
			
 
				+ * VAX 11/750	-		UNIX 4.2bsd	cc		 862	 877
			
 
				+ * Fast Mac	68000-7.7Mhz	-		MegaMax C 2.0	 839	 904 +
			
 
				+ * IBM PC/XT	8086-9.54Mhz	PCDOS 3.1	Microsoft 3.0	 833	 909 C1
			
 
				+ * DEC 11/44			Ultrix-11 V3.0	System V	 862	 909
			
 
				+ * Macintosh	68000-7.8Mhz 2M	Mac Rom		Mac C 16 bit int 877	 909 S
			
 
				+ * CCC 3210	?		Xelos R01(SVR2)	cc		 849	 924
			
 
				+ * CCC 3220	?               Ed. 7 v2.3      cc		 892	 925
			
 
				+ * IBM PC/AT	80286-6Mhz	Xenix 3.0	cc -i		 909	 925
			
 
				+ * AT&T 6300	8086, 8mhz	MS-DOS 2.11	Aztec C v3.2d	 862	 943
			
 
				+ * IBM PC/AT	80286-6Mhz	Xenix 3.0	cc		 892	 961
			
 
				+ * VAX 11/750	w/FPA		Eunice 3.2	cc		 914	 976
			
 
				+ * IBM PC/XT	8086-9.54Mhz	PCDOS 3.1	Wizard 2.1	 892	 980 C1
			
 
				+ * IBM PC/XT	8086-9.54Mhz	PCDOS 3.1	Lattice 2.15	 980	 980 C1
			
 
				+ * Plexus P35	68000-10Mhz	UNIX System III cc		 984	 980
			
 
				+ * PDP-11/73	KDJ11-AA 15Mhz	UNIX V7M 2.1	cc		 862     981
			
 
				+ * VAX 11/750	w/FPA		UNIX 4.3bsd	cc		 994	 997
			
 
				+ * IRIS-1400	68010-10Mhz	UNIX System V	cc		 909	1000
			
 
				+ * IBM PC/AT	80286-6Mhz	Venix/86 2.1	cc		 961	1000
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.0	b16cc 2.0	 943	1063
			
 
				+ * Zilog S8000/11 Z8001-5.5Mhz	Zeus 3.2	cc		1011	1084
			
 
				+ * NSC ICM-3216 NSC 32016-10Mhz	UNIX SVR2	cc		1041	1084
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.0	MS 3.0(small)	1063	1086
			
 
				+ * VAX 11/750	w/FPA		VMS		VAX-11 C 2.0	 958	1091
			
 
				+ * Stride	68000-10Mhz	System-V/68	cc		1041	1111
			
 
				+ * Plexus P/60  MC68000-12.5Mhz	UNIX SYSIII	Plexus		1111	1111
			
 
				+ * ATT PC7300	68010-10Mhz	UNIX 5.2	cc		1041	1111
			
 
				+ * CCC 3230	?		Xelos R01(SVR2)	cc		1040	1126
			
 
				+ * Stride	68000-12Mhz	System-V/68	cc		1063	1136
			
 
				+ * IBM PC/AT    80286-6Mhz      Venix/286 SVR2  cc              1056    1149
			
 
				+ * Plexus P/60  MC68000-12.5Mhz	UNIX SYSIII	Plexus		1111	1163 T
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.0	Datalight 1.10	1190	1190
			
 
				+ * ATT PC6300+	80286-6Mhz	MSDOS 3.1	b16cc 2.0	1111	1219
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.1	Wizard 2.1	1136	1219
			
 
				+ * Sun2/120	68010-10Mhz	Sun 4.2BSD	cc		1136	1219
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.0	CI-C86 2.20M	1219	1219
			
 
				+ * WICAT PB	68000-8Mhz	System V	WICAT C 4.1	 998	1226 ~
			
 
				+ * MASSCOMP 500	68010-10MHz	RTU V3.0	cc (V3.2)	1156	1238
			
 
				+ * Alliant FX/8 IP (68012-12Mhz) Concentrix	cc -ip;exec -i 	1170	1243 FX
			
 
				+ * Cyb DataMate	68010-12.5Mhz	Uniplus 5.0	Unisoft cc	1162	1250
			
 
				+ * PDP 11/70	-		UNIX 5.2	cc		1162	1250
			
 
				+ * IBM PC/AT	80286-6Mhz	PCDOS 3.1	Lattice 2.15	1250	1250
			
 
				+ * IBM PC/AT	80286-7.5Mhz	Venix/86 2.1	cc		1190	1315 *15
			
 
				+ * Sun2/120	68010-10Mhz	Standalone	cc		1219	1315
			
 
				+ * Intel 380	80286-8Mhz	Xenix R3.0up1	cc		1250	1315 *16
			
 
				+ * Sequent Balance 8000	NS32032-10MHz	Dynix 2.0	cc	1250	1315 N12
			
 
				+ * IBM PC/DSI-32 32032-10Mhz	MSDOS 3.1	GreenHills 2.14	1282	1315 C3
			
 
				+ * ATT 3B2/400	WE32100-?Mhz	UNIX 5.2	cc		1315	1315
			
 
				+ * CCC 3250XP	-		Xelos R01(SVR2)	cc		1215	1318
			
 
				+ * IBM PC/RT 032 RISC(801?)?Mhz BSD 4.2         cc              1248    1333 RT
			
 
				+ * DG MV4000	-		AOS/VS 5.00	cc		1333	1333
			
 
				+ * IBM PC/AT	80286-8Mhz	Venix/86 2.1	cc		1275	1380 *16
			
 
				+ * IBM PC/AT	80286-6Mhz	MSDOS 3.0	Microsoft 3.0	1250	1388
			
 
				+ * ATT PC6300+	80286-6Mhz	MSDOS 3.1	CI-C86 2.20M	1428	1428
			
 
				+ * COMPAQ/286   80286-8Mhz      Venix/286 SVR2  cc              1326    1443
			
 
				+ * IBM PC/AT    80286-7.5Mhz    Venix/286 SVR2  cc              1333    1449 *15
			
 
				+ * WICAT PB	68000-8Mhz	System V	WICAT C 4.1	1169	1464 S~
			
 
				+ * Tandy II/6000 68000-8Mhz	Xenix 3.0	cc      	1384	1477
			
 
				+ * WICAT MB	68000-12.5Mhz	System V	WICAT C 4.1	1246	1537 ~
			
 
				+ * IBM PC/AT    80286-9Mhz      SCO Xenix V     cc              1540    1556 *18
			
 
				+ * Cyb DataMate	68010-12.5Mhz	Uniplus 5.0	Unisoft cc	1470	1562 S
			
 
				+ * VAX 11/780	-		UNIX 5.2	cc		1515	1562
			
 
				+ * MicroVAX-II	-		-		-		1562	1612
			
 
				+ * VAX 11/780	-		UNIX 4.3bsd	cc		1646	1662
			
 
				+ * Apollo DN660	-		AegisSR9/IX	cc 3.12		1666	1666
			
 
				+ * ATT 3B20	-		UNIX 5.2	cc		1515	1724
			
 
				+ * NEC PC-98XA	80286-8Mhz	PCDOS 3.1	Lattice 2.15	1724	1724 @
			
 
				+ * HP9000-500	B series CPU	HP-UX 4.02	cc		1724	-
			
 
				+ * IBM PC/STD	80286-8Mhz	MSDOS 3.0 	Microsoft 3.0	1724	1785 C2
			
 
				+ * WICAT MB	68000-12.5Mhz	System V	WICAT C 4.1	1450	1814 S~
			
 
				+ * WICAT PB	68000-12.5Mhz	System V	WICAT C 4.1	1530	1898 ~
			
 
				+ * DEC-2065	KL10-Model B	TOPS-20 6.1FT5	Port. C Comp.	1937	1946
			
 
				+ * Gould PN6005	-		UTX 1.1(4.2BSD)	cc		1675	1964
			
 
				+ * DEC2060	KL-10		TOPS-20		cc		2000	2000 &
			
 
				+ * VAX 11/785	-		UNIX 5.2	cc		2083	2083
			
 
				+ * VAX 11/785	-		VMS		VAX-11 C 2.0	2083	2083
			
 
				+ * VAX 11/785	-		UNIX SVR2	cc		2123	2083
			
 
				+ * VAX 11/785   -               ULTRIX-32 1.1   cc		2083    2091 
			
 
				+ * VAX 11/785	-		UNIX 4.3bsd	cc		2135	2136
			
 
				+ * WICAT PB	68000-12.5Mhz	System V	WICAT C 4.1	1780	2233 S~
			
 
				+ * Pyramid 90x	-		OSx 2.3		cc		2272	2272
			
 
				+ * Pyramid 90x	FPA,cache,4Mb	OSx 2.5		cc no -O	2777	2777
			
 
				+ * Pyramid 90x	w/cache		OSx 2.5		cc w/-O		3333	3333
			
 
				+ * IBM-4341-II	-		VM/SP3		Waterloo C 1.2  3333	3333
			
 
				+ * IRIS-2400T	68020-16.67Mhz	UNIX System V	cc		3105	3401
			
 
				+ * Celerity C-1200 ?		UNIX 4.2BSD	cc		3485	3468
			
 
				+ * SUN 3/75	68020-16.67Mhz	SUN 4.2 V3	cc		3333	3571
			
 
				+ * IBM-4341	Model 12	UTS 5.0		?		3685	3685
			
 
				+ * SUN-3/160    68020-16.67Mhz  Sun 4.2 V3.0A   cc		3381    3764
			
 
				+ * Sun 3/180	68020-16.67Mhz	Sun 4.2		cc		3333	3846
			
 
				+ * IBM-4341	Model 12	UTS 5.0		?		3910	3910 MN
			
 
				+ * MC 5400	68020-16.67MHz	RTU V3.0	cc (V4.0)	3952	4054
			
 
				+ * NCR Tower32  68020-16.67Mhz  SYS 5.0 Rel 2.0 cc              3846	4545
			
 
				+ * Gould PN9080	-		UTX-32 1.1c	cc		-	4629
			
 
				+ * MC 5600/5700	68020-16.67MHz	RTU V3.0	cc (V4.0)	4504	4746 %
			
 
				+ * Gould 1460-342 ECL proc      UTX/32 11/c   c           342   677G1
			
 
				+* VX 800			UIX .3bd	c		724	088 * AX 600-		MS	VAX11  2.	712	742
			
 
				+* Aliat F/8 E		oncntrx	c -c;exc - 	652	655FX
			
 
				+* CI PWER6/3		CS(S+4.)	c		700	800 * CI OWE 6/2		OWE 6 NIXV	c		836	498 * CI OWE 6/2		.2 el.1.2	cc	893	944
			
 
				+* Serr (CI Pwer6)	4.2SD	cc	934   000
			
 
				+ *CRA-X-P/1	  105hz	OS .14Cra C       020   020
			
 
				+ *IBM308	-	UTS5.0Rel1	c	     1666  1250
			
 
				+  CRY-1	   80hz	TSS	Cry C2.0   1210  1388
			
 
				+  IB-303	-	VMCMSHPO3.4Watrlo C .2 388   388
			
 
				+ *Amdhl 70 /8 	UT/V .2     ccv1.3     1550  1550
			
 
				+  CRY-XMP/8	  10MhzCTS		Cay  2.    1525  1757
			
 
				+* Adah 58	-	UTS5.0Rel1.2cc 1.5      307   307
			
 
				+ *Amdhl 860 		TS/ 5.      c v.23     2970  2970 *
			
 
				+* NTE
			
 
				+*  *  Crytalchagedfro 'sock tolised alu.
			
 
				+       hisMacntoh ws ugraed rom128 to512 insuc a ay hat *     th ne 38K o memory is not slowed down by video generator accesses.
			
 
				+ *   %   Single processor; MC == MASSCOMP
			
 
				+ *   &   A version 7 C compiler written at New Mexico Tech.
			
 
				+ *   @   vanilla Lattice compiler used with MicroPro standard library
			
 
				+ *   S   Shorts used instead of ints
			
 
				+ *   T	 with Chris Torek's patches (whatever they are).
			
 
				+ *   ~   For WICAT Systems: MB=MultiBus, PB=Proprietary Bus
			
 
				+ *   LM  Large Memory Model. (Otherwise, all 80x8x results are small model)
			
 
				+ *   MM  Medium Memory Model. (Otherwise, all 80x8x results are small model)
			
 
				+ *   C1  Univation PC TURBO Co-processor; 9.54Mhz 8086, 640K RAM
			
 
				+ *   C2  Seattle Telecom STD-286 board
			
 
				+ *   C3  Definicon DSI-32 coprocessor
			
 
				+ *   C?  Unknown co-processor board?
			
 
				+ *   CT1 Convergent Technologies MegaFrame, 1 processor.
			
 
				+ *   MN  Using Mike Newtons 'optimizer' (see net.sources).
			
 
				+ *   G1  This Gould machine has 2 processors and was able to run 2 dhrystone
			
 
				+ *       Benchmarks in parallel with no slowdown.
			
 
				+ *   FH  FHC == Frank Hogg Labs (Hazelwood Uniquad 2 in an FHL box).
			
 
				+ *   FX  The Alliant FX/8 is a system consisting of 1-8 CEs (computation
			
 
				+ *	 engines) and 1-12 IPs (interactive processors). Note N8 applies.
			
 
				+ *   RT  This is one of the RT's that CMU has been using for awhile.  I'm
			
 
				+ *	 not sure that this is identical to the machine that IBM is selling
			
 
				+ *	 to the public.
			
 
				+ *   Nnn This machine has multiple processors, allowing "nn" copies of the
			
 
				+ *	 benchmark to run in the same time as 1 copy.
			
 
				+ *   ?   I don't trust results marked with '?'.  These were sent to me with
			
 
				+ *       either incomplete info, or with times that just don't make sense.
			
 
				+ *	 ?? means I think the performance is too poor, ?! means too good.
			
 
				+ *       If anybody can confirm these figures, please respond.
			
 
				+ *
			
 
				+ *  ABBREVIATIONS
			
 
				+ *	CCC	Concurrent Computer Corp. (was Perkin-Elmer)
			
 
				+ *	MC	Masscomp
			
 
				+ *
			
 
				+ *--------------------------------RESULTS END----------------------------------
			
 
				+ *
			
 
				+ *	The following program contains statements of a high-level programming
			
 
				+ *	language (C) in a distribution considered representative:
			
 
				+ *
			
 
				+ *	assignments			53%
			
 
				+ *	control statements		32%
			
 
				+ *	procedure, function calls	15%
			
 
				+ *
			
 
				+ *	100 statements are dynamically executed.  The program is balanced with
			
 
				+ *	respect to the three aspects:
			
 
				+ *		- statement type
			
 
				+ *		- operand type (for simple data types)
			
 
				+ *		- operand access
			
 
				+ *			operand global, local, parameter, or constant.
			
 
				+ *
			
 
				+ *	The combination of these three aspects is balanced only approximately.
			
 
				+ *
			
 
				+ *	The program does not compute anything meaningfull, but it is
			
 
				+ *	syntactically and semantically correct.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+/* Accuracy of timings and human fatigue controlled by next two lines */
			
 
				+/*#define LOOPS	50000		/* Use this for slow or 16 bit machines */
			
 
				+#define LOOPS	5000		/* Use this for faster machines */
			
 
				+
			
 
				+/* Compiler dependent options */
			
 
				+#undef	NOENUM			/* Define if compiler has no enum's */
			
 
				+#undef	NOSTRUCTASSIGN		/* Define if compiler can't assign structures */
			
 
				+
			
 
				+/* define only one of the next two defines */
			
 
				+#define TIMES			/* Use times(2) time function */
			
 
				+/*#define TIME			/* Use time(2) time function */
			
 
				+
			
 
				+/* define the granularity of your times(2) function (when used) */
			
 
				+/*#define HZ	60		/* times(2) returns 1/60 second (most) */
			
 
				+#define HZ	100		/* times(2) returns 1/100 second (WECo) */
			
 
				+
			
 
				+/* for compatibility with goofed up version */
			
 
				+/*#define GOOF			/* Define if you want the goofed up version */
			
 
				+
			
 
				+#ifdef GOOF
			
 
				+char	Version[] = "1.0";
			
 
				+#else
			
 
				+char	Version[] = "1.1";
			
 
				+#endif
			
 
				+
			
 
				+#ifdef	NOSTRUCTASSIGN
			
 
				+#define	structassign(d, s)	memcpy(&(d), &(s), sizeof(d))
			
 
				+#else
			
 
				+#define	structassign(d, s)	d = s
			
 
				+#endif
			
 
				+
			
 
				+#ifdef	NOENUM
			
 
				+#define	Ident1	1
			
 
				+#define	Ident2	2
			
 
				+#define	Ident3	3
			
 
				+#define	Ident4	4
			
 
				+#define	Ident5	5
			
 
				+typedef int	Enumeration;
			
 
				+#else
			
 
				+typedef enum	{Ident1, Ident2, Ident3, Ident4, Ident5} Enumeration;
			
 
				+#endif
			
 
				+
			
 
				+typedef int	OneToThirty;
			
 
				+typedef int	OneToFifty;
			
 
				+typedef char	CapitalLetter;
			
 
				+typedef char	String30[31];
			
 
				+typedef int	Array1Dim[51];
			
 
				+typedef int	Array2Dim[51][51];
			
 
				+
			
 
				+struct	Record
			
 
				+{
			
 
				+	struct Record		*PtrComp;
			
 
				+	Enumeration		Discr;
			
 
				+	Enumeration		EnumComp;
			
 
				+	OneToFifty		IntComp;
			
 
				+	String30		StringComp;
			
 
				+};
			
 
				+
			
 
				+typedef struct Record 	RecordType;
			
 
				+typedef RecordType *	RecordPtr;
			
 
				+typedef int		boolean;
			
 
				+
			
 
				+#define	NULL		0
			
 
				+#define	TRUE		1
			
 
				+#define	FALSE		0
			
 
				+
			
 
				+#ifndef REG
			
 
				+#define	REG
			
 
				+#endif
			
 
				+
			
 
				+extern Enumeration	Func1();
			
 
				+extern boolean		Func2();
			
 
				+
			
 
				+#ifdef TIMES
			
 
				+#include <sys/types.h>
			
 
				+#include <sys/times.h>
			
 
				+#endif
			
 
				+
			
 
				+main()
			
 
				+{
			
 
				+	Proc0();
			
 
				+	exit(0);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Package 1
			
 
				+ */
			
 
				+int		IntGlob;
			
 
				+boolean		BoolGlob;
			
 
				+char		Char1Glob;
			
 
				+char		Char2Glob;
			
 
				+Array1Dim	Array1Glob;
			
 
				+Array2Dim	Array2Glob;
			
 
				+RecordPtr	PtrGlb;
			
 
				+RecordPtr	PtrGlbNext;
			
 
				+
			
 
				+Proc0()
			
 
				+{
			
 
				+	OneToFifty		IntLoc1;
			
 
				+	REG OneToFifty		IntLoc2;
			
 
				+	OneToFifty		IntLoc3;
			
 
				+	REG char		CharLoc;
			
 
				+	REG char		CharIndex;
			
 
				+	Enumeration	 	EnumLoc;
			
 
				+	String30		String1Loc;
			
 
				+	String30		String2Loc;
			
 
				+	extern char		*malloc();
			
 
				+
			
 
				+#ifdef TIME
			
 
				+	long			time();
			
 
				+	long			starttime;
			
 
				+	long			benchtime;
			
 
				+	long			nulltime;
			
 
				+	register unsigned int	i;
			
 
				+
			
 
				+	starttime = time( (long *) 0);
			
 
				+	for (i = 0; i < LOOPS; ++i);
			
 
				+	nulltime = time( (long *) 0) - starttime; /* Computes o'head of loop */
			
 
				+#endif
			
 
				+#ifdef TIMES
			
 
				+	time_t			starttime;
			
 
				+	time_t			benchtime;
			
 
				+	time_t			nulltime;
			
 
				+	struct tms		tms;
			
 
				+	register unsigned int	i;
			
 
				+
			
 
				+	times(&tms); starttime = tms.tms_utime;
			
 
				+	for (i = 0; i < LOOPS; ++i);
			
 
				+	times(&tms);
			
 
				+	nulltime = tms.tms_utime - starttime; /* Computes overhead of looping */
			
 
				+#endif
			
 
				+
			
 
				+	PtrGlbNext = (RecordPtr) malloc(sizeof(RecordType));
			
 
				+	PtrGlb = (RecordPtr) malloc(sizeof(RecordType));
			
 
				+	PtrGlb->PtrComp = PtrGlbNext;
			
 
				+	PtrGlb->Discr = Ident1;
			
 
				+	PtrGlb->EnumComp = Ident3;
			
 
				+	PtrGlb->IntComp = 40;
			
 
				+	strcpy(PtrGlb->StringComp, "DHRYSTONE PROGRAM, SOME STRING");
			
 
				+#ifndef	GOOF
			
 
				+	strcpy(String1Loc, "DHRYSTONE PROGRAM, 1'ST STRING");	/*GOOF*/
			
 
				+#endif
			
 
				+	Array2Glob[8][7] = 10;	/* Was missing in published program */
			
 
				+
			
 
				+/*****************
			
 
				+-- Start Timer --
			
 
				+*****************/
			
 
				+#ifdef TIME
			
 
				+	starttime = time( (long *) 0);
			
 
				+#endif
			
 
				+#ifdef TIMES
			
 
				+	times(&tms); starttime = tms.tms_utime;
			
 
				+#endif
			
 
				+	for (i = 0; i < LOOPS; ++i)
			
 
				+	{
			
 
				+
			
 
				+		Proc5();
			
 
				+		Proc4();
			
 
				+		IntLoc1 = 2;
			
 
				+		IntLoc2 = 3;
			
 
				+		strcpy(String2Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
			
 
				+		EnumLoc = Ident2;
			
 
				+		BoolGlob = ! Func2(String1Loc, String2Loc);
			
 
				+		while (IntLoc1 < IntLoc2)
			
 
				+		{
			
 
				+			IntLoc3 = 5 * IntLoc1 - IntLoc2;
			
 
				+			Proc7(IntLoc1, IntLoc2, &IntLoc3);
			
 
				+			++IntLoc1;
			
 
				+		}
			
 
				+		Proc8(Array1Glob, Array2Glob, IntLoc1, IntLoc3);
			
 
				+		Proc1(PtrGlb);
			
 
				+		for (CharIndex = 'A'; CharIndex <= Char2Glob; ++CharIndex)
			
 
				+			if (EnumLoc == Func1(CharIndex, 'C'))
			
 
				+				Proc6(Ident1, &EnumLoc);
			
 
				+		IntLoc3 = IntLoc2 * IntLoc1;
			
 
				+		IntLoc2 = IntLoc3 / IntLoc1;
			
 
				+		IntLoc2 = 7 * (IntLoc3 - IntLoc2) - IntLoc1;
			
 
				+		Proc2(&IntLoc1);
			
 
				+	}
			
 
				+
			
 
				+/*****************
			
 
				+-- Stop Timer --
			
 
				+*****************/
			
 
				+
			
 
				+#ifdef TIME
			
 
				+	benchtime = time( (long *) 0) - starttime - nulltime;
			
 
				+	printf("Dhrystone(%s) time for %ld passes = %ld\n",
			
 
				+		Version,
			
 
				+		(long) LOOPS, benchtime);
			
 
				+	printf("This machine benchmarks at %ld dhrystones/second\n",
			
 
				+		((long) LOOPS) / benchtime);
			
 
				+#endif
			
 
				+#ifdef TIMES
			
 
				+	times(&tms);
			
 
				+	benchtime = tms.tms_utime - starttime - nulltime;
			
 
				+	printf("Dhrystone(%s) time for %ld passes = %ld\n",
			
 
				+		Version,
			
 
				+		(long) LOOPS, benchtime/HZ);
			
 
				+	printf("This machine benchmarks at %ld dhrystones/second\n",
			
 
				+		((long) LOOPS) * HZ / benchtime);
			
 
				+#endif
			
 
				+
			
 
				+}
			
 
				+
			
 
				+Proc1(PtrParIn)
			
 
				+REG RecordPtr	PtrParIn;
			
 
				+{
			
 
				+#define	NextRecord	(*(PtrParIn->PtrComp))
			
 
				+
			
 
				+	structassign(NextRecord, *PtrGlb);
			
 
				+	PtrParIn->IntComp = 5;
			
 
				+	NextRecord.IntComp = PtrParIn->IntComp;
			
 
				+	NextRecord.PtrComp = PtrParIn->PtrComp;
			
 
				+	Proc3(NextRecord.PtrComp);
			
 
				+	if (NextRecord.Discr == Ident1)
			
 
				+	{
			
 
				+		NextRecord.IntComp = 6;
			
 
				+		Proc6(PtrParIn->EnumComp, &NextRecord.EnumComp);
			
 
				+		NextRecord.PtrComp = PtrGlb->PtrComp;
			
 
				+		Proc7(NextRecord.IntComp, 10, &NextRecord.IntComp);
			
 
				+	}
			
 
				+	else
			
 
				+		structassign(*PtrParIn, NextRecord);
			
 
				+
			
 
				+#undef	NextRecord
			
 
				+}
			
 
				+
			
 
				+Proc2(IntParIO)
			
 
				+OneToFifty	*IntParIO;
			
 
				+{
			
 
				+	REG OneToFifty		IntLoc;
			
 
				+	REG Enumeration		EnumLoc;
			
 
				+
			
 
				+	IntLoc = *IntParIO + 10;
			
 
				+	for(;;)
			
 
				+	{
			
 
				+		if (Char1Glob == 'A')
			
 
				+		{
			
 
				+			--IntLoc;
			
 
				+			*IntParIO = IntLoc - IntGlob;
			
 
				+			EnumLoc = Ident1;
			
 
				+		}
			
 
				+		if (EnumLoc == Ident1)
			
 
				+			break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+Proc3(PtrParOut)
			
 
				+RecordPtr	*PtrParOut;
			
 
				+{
			
 
				+	if (PtrGlb != NULL)
			
 
				+		*PtrParOut = PtrGlb->PtrComp;
			
 
				+	else
			
 
				+		IntGlob = 100;
			
 
				+	Proc7(10, IntGlob, &PtrGlb->IntComp);
			
 
				+}
			
 
				+
			
 
				+Proc4()
			
 
				+{
			
 
				+	REG boolean	BoolLoc;
			
 
				+
			
 
				+	BoolLoc = Char1Glob == 'A';
			
 
				+	BoolLoc |= BoolGlob;
			
 
				+	Char2Glob = 'B';
			
 
				+}
			
 
				+
			
 
				+Proc5()
			
 
				+{
			
 
				+	Char1Glob = 'A';
			
 
				+	BoolGlob = FALSE;
			
 
				+}
			
 
				+
			
 
				+extern boolean Func3();
			
 
				+
			
 
				+Proc6(EnumParIn, EnumParOut)
			
 
				+REG Enumeration	EnumParIn;
			
 
				+REG Enumeration	*EnumParOut;
			
 
				+{
			
 
				+	*EnumParOut = EnumParIn;
			
 
				+	if (! Func3(EnumParIn) )
			
 
				+		*EnumParOut = Ident4;
			
 
				+	switch (EnumParIn)
			
 
				+	{
			
 
				+	case Ident1:	*EnumParOut = Ident1; break;
			
 
				+	case Ident2:	if (IntGlob > 100) *EnumParOut = Ident1;
			
 
				+			else *EnumParOut = Ident4;
			
 
				+			break;
			
 
				+	case Ident3:	*EnumParOut = Ident2; break;
			
 
				+	case Ident4:	break;
			
 
				+	case Ident5:	*EnumParOut = Ident3;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+Proc7(IntParI1, IntParI2, IntParOut)
			
 
				+OneToFifty	IntParI1;
			
 
				+OneToFifty	IntParI2;
			
 
				+OneToFifty	*IntParOut;
			
 
				+{
			
 
				+	REG OneToFifty	IntLoc;
			
 
				+
			
 
				+	IntLoc = IntParI1 + 2;
			
 
				+	*IntParOut = IntParI2 + IntLoc;
			
 
				+}
			
 
				+
			
 
				+Proc8(Array1Par, Array2Par, IntParI1, IntParI2)
			
 
				+Array1Dim	Array1Par;
			
 
				+Array2Dim	Array2Par;
			
 
				+OneToFifty	IntParI1;
			
 
				+OneToFifty	IntParI2;
			
 
				+{
			
 
				+	REG OneToFifty	IntLoc;
			
 
				+	REG OneToFifty	IntIndex;
			
 
				+
			
 
				+	IntLoc = IntParI1 + 5;
			
 
				+	Array1Par[IntLoc] = IntParI2;
			
 
				+	Array1Par[IntLoc+1] = Array1Par[IntLoc];
			
 
				+	Array1Par[IntLoc+30] = IntLoc;
			
 
				+	for (IntIndex = IntLoc; IntIndex <= (IntLoc+1); ++IntIndex)
			
 
				+		Array2Par[IntLoc][IntIndex] = IntLoc;
			
 
				+	++Array2Par[IntLoc][IntLoc-1];
			
 
				+	Array2Par[IntLoc+20][IntLoc] = Array1Par[IntLoc];
			
 
				+	IntGlob = 5;
			
 
				+}
			
 
				+
			
 
				+Enumeration Func1(CharPar1, CharPar2)
			
 
				+CapitalLetter	CharPar1;
			
 
				+CapitalLetter	CharPar2;
			
 
				+{
			
 
				+	REG CapitalLetter	CharLoc1;
			
 
				+	REG CapitalLetter	CharLoc2;
			
 
				+
			
 
				+	CharLoc1 = CharPar1;
			
 
				+	CharLoc2 = CharLoc1;
			
 
				+	if (CharLoc2 != CharPar2)
			
 
				+		return (Ident1);
			
 
				+	else
			
 
				+		return (Ident2);
			
 
				+}
			
 
				+
			
 
				+boolean Func2(StrParI1, StrParI2)
			
 
				+String30	StrParI1;
			
 
				+String30	StrParI2;
			
 
				+{
			
 
				+	REG OneToThirty		IntLoc;
			
 
				+	REG CapitalLetter	CharLoc;
			
 
				+
			
 
				+	IntLoc = 1;
			
 
				+	while (IntLoc <= 1)
			
 
				+		if (Func1(StrParI1[IntLoc], StrParI2[IntLoc+1]) == Ident1)
			
 
				+		{
			
 
				+			CharLoc = 'A';
			
 
				+			++IntLoc;
			
 
				+		}
			
 
				+	if (CharLoc >= 'W' && CharLoc <= 'Z')
			
 
				+		IntLoc = 7;
			
 
				+	if (CharLoc == 'X')
			
 
				+		return(TRUE);
			
 
				+	else
			
 
				+	{
			
 
				+		if (strcmp(StrParI1, StrParI2) > 0)
			
 
				+		{
			
 
				+			IntLoc += 7;
			
 
				+			return (TRUE);
			
 
				+		}
			
 
				+		else
			
 
				+			return (FALSE);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+boolean Func3(EnumParIn)
			
 
				+REG Enumeration	EnumParIn;
			
 
				+{
			
 
				+	REG Enumeration	EnumLoc;
			
 
				+
			
 
				+	EnumLoc = EnumParIn;
			
 
				+	if (EnumLoc == Ident3) return (TRUE);
			
 
				+	return (FALSE);
			
 
				+}
			
 
				+
			
 
				+#ifdef	NOSTRUCTASSIGN
			
 
				+memcpy(d, s, l)
			
 
				+register char	*d;
			
 
				+register char	*s;
			
 
				+register int	l;
			
 
				+{
			
 
				+	while (l--) *d++ = *s++;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if 0
			
 
				+IntLoc;
			
 
				+		}
			
 
				+	if (CharLoc >= 'W' && CharLoc <= 'Z')
			
 
				+		IntLoc = 7;
			
 
				+	if (CharLoc == 'X')
			
 
				+		return(TRUE);
			
 
				+	else
			
 
				+	{
			
 
				+		if (strcmp(StrParI1, StrParI2) > 0)
			
 
				+		{
			
 
				+			IntLoc += 7;
			
 
				+			return (TRUE);
			
 
				+		}
			
 
				+		else
			
 
				+			return (FALSE);
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
--- a/benchmarks/geom
+++ b/benchmarks/geom
@@ -0,0 +1,8 @@
 
				+ 13.50e+0   9.00e+0   8.00e+0                               Box dimensions
			
 
				+  0.80e+0   0.99e+0   0.54e+0   0.84e+0   0.01e+0   0.84e+0
			
 
				+  0.80e+0   0.01e+0   0.54e+0   0.84e+0   0.01e+0   0.84e+0
			
 
				+  0.80e+0   0.01e+0   0.54e+0   0.84e+0   0.99e+0   0.84e+0 Reflectivities (RGB)
			
 
				+  1.27e+0   0.00e+0   0.00e+0   0.00e+0   0.00e+0   0.00e+0
			
 
				+  1.27e+0   0.00e+0   0.00e+0   0.00e+0   0.00e+0   0.00e+0
			
 
				+  1.27e+0   0.00e+0   0.00e+0   0.00e+0   0.00e+0   0.00e+0 Emissivities (RGB)
			
 
				+
			
--- a/benchmarks/pi.c
+++ b/benchmarks/pi.c
@@ -0,0 +1,25 @@
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				+   int i, its, hits = 0;
			
 
				+   double d1, d2;
			
 
				+
			
 
				+   if (argc != 2) {
			
 
				+      fprintf(stderr, "Usage: %s <iterations>\n", argv[0]);
			
 
				+      exit(0);
			
 
				+   }
			
 
				+
			
 
				+   its = atoi(argv[1]);
			
 
				+   srandom(1);
			
 
				+   for (i = 0; i < its; i++) {
			
 
				+      d1 = ((double)random())/2147483647.0;
			
 
				+      d2 = ((double)random())/2147483647.0;
			
 
				+      if (((d1*d1) + (d2*d2)) <= 1)
			
 
				+	 hits++;
			
 
				+   }
			
 
				+   printf("%.10f\n", (double)4.0 * (double) ((double)hits / (double)its));
			
 
				+   return 1;
			
 
				+}
			
 
				+
			
--- a/benchmarks/slalom.c
+++ b/benchmarks/slalom.c
@@ -0,0 +1,1138 @@
 
				+/******************************************************************************
			
 
				+                               S L A L O M
			
 
				+
			
 
				+    Scalable Language-independent Ames Laboratory One-minute Measurement
			
 
				+
			
 
				+     The following program is the first benchmark based on fixed time rather
			
 
				+  than fixed problem comparison.  Not only is fixed time more representative
			
 
				+  of the way people use computers, it also greatly increases the scope and
			
 
				+  longevity of the benchmark.  SLALOM is very scalable, and can be used to
			
 
				+  compare computers as slow as 126 floating-point operations per second
			
 
				+  (FLOPS) to computers running a trillion times faster.  The scalability can
			
 
				+  be used to compare single processors to massively parallel collections
			
 
				+  of processors, and to study the space of problem size vs. ensemble size
			
 
				+  in fine detail.  It resembles the LINPACK benchmark since it involves
			
 
				+  factoring and backsolving a (nearly) dense matrix, but incorporates a
			
 
				+  number of improvements to that benchmark that we hope will make SLALOM
			
 
				+  a better reflection of general system performance.
			
 
				+
			
 
				+     The SLALOM benchmark solves a complete, real problem (optical radiosity
			
 
				+  on the interior of a box), not a contrived kernel or a synthetic mixture of
			
 
				+  sample operations.  SLALOM is unusual since it times input, problem setup,
			
 
				+  solution, and output, not just the solution.  For slower computers, the
			
 
				+  problem setup will take the majority of the time; it grows as the square of
			
 
				+  the problem size.  The solver grows as the cube of the problem size, and
			
 
				+  dominates the time for large values of n.
			
 
				+
			
 
				+     While the following is C, you are free to translate it into any
			
 
				+  language you like, including assembly language specific to one computer.
			
 
				+  You may use compiler directives, hand-tuned library calls, loop unrolling,
			
 
				+  and even change the algorithm, if you can provide a convincing argument
			
 
				+  that the program still works for the full range of possible inputs.  For
			
 
				+  example, if you replace the direct solver with an iterative one, you must
			
 
				+  make sure your method is correct even when the geometry is quite eccentric
			
 
				+  and the box faces are highly reflective. (rho = .999)
			
 
				+
			
 
				+     The main() driver should be used with the value of 60 seconds for the
			
 
				+  SLALOM benchmark.  The work done for a particular problem size is figured
			
 
				+  after timing has ceased, so there is no overhead for work assessment.  The
			
 
				+  residual check ||Ax - b|| is also done after timing has ceased.  Two
			
 
				+  computers may be compared either by their problem size n, or by their MFLOPS
			
 
				+  rate, never by the ratio of execution times.  Times will always be near one
			
 
				+  minute in SLALOM.  We have used the following weights for floating-point
			
 
				+  operation counting, based on the weights used by Lawrence Livermore National
			
 
				+  Laboratory:
			
 
				+
			
 
				+                        OPERATION                       WEIGHT
			
 
				+                    a=b, a=(constant)                      0
			
 
				+            a<0, a<=0, a==0, a!=0, a>0, a>=0               0
			
 
				+                 -a, fabs(a), fsgn(a, b)                   0
			
 
				+                   a+b, a-b, a*b, a^2                      1
			
 
				+            a<b, a<=b, a==b, a!=b, a>b, a>=b               1
			
 
				+                   (int) a, (double)b                      1
			
 
				+                        1/a, -1/a                          3
			
 
				+                           a/b                             4
			
 
				+                          sqrt(a)                          4
			
 
				+               Format to or from ASCII string              6
			
 
				+       sin(a), cos(a), tan(a), log(a), atan(a), exp(a)     8
			
 
				+
			
 
				+     We invite you to share with us the results of any measurements that you
			
 
				+  make with SLALOM.  We do NOT accept anonymous data; machine timings will be
			
 
				+  referenced and dated.
			
 
				+
			
 
				+     The least you need to do to adapt SLALOM to your computer is:
			
 
				+
			
 
				+        1.  In the "Measure" routine, set NMAX to a value large enough to keep
			
 
				+            the computer working for a minute.  Vary it slightly if it helps
			
 
				+            (for reasons of cache size, interleaving, etc.)
			
 
				+
			
 
				+        2.  Replace the timer call in "When" with the most accurate wall-clock
			
 
				+            timer at your disposal.  If only CPU time is available, try to run
			
 
				+            the job standalone or at high priority, since we are ultimately
			
 
				+            interested in the top of the statistical range of performance.
			
 
				+
			
 
				+        3.  Edit in the information specific to your test in the "What"
			
 
				+            routine, so that final output will be automatically annotated.
			
 
				+
			
 
				+        4.  Compile, link, and run the program, interacting to select values
			
 
				+            of n that bracket a time of one minute.  Once everything is
			
 
				+            running, run it as a batch job so as to record the session.
			
 
				+
			
 
				+     Examples of ways you may optimize performance:
			
 
				+
			
 
				+        1.  Unroll the loops in SetUp1 and SetUp2; it is possible to
			
 
				+            vectorize both SetUp1 and SetUp2 at the cost of some extra
			
 
				+            operations, program complexity, and storage.
			
 
				+
			
 
				+        2.  Replace the innermost loops of Solver with calls to well-tuned
			
 
				+            libraries of linear algebra routines, such as DDOT from the
			
 
				+            Basic Linear Algebra Subroutines (level 1 BLAS).  Better still,
			
 
				+            use a tuned library routine for all of Solver; the sparsity
			
 
				+            exploited in Solver is only a few percent, so you will usually
			
 
				+            gain more than you lose by applying a dense symmetric solver.
			
 
				+
			
 
				+        3.  Parallelize the SetUp and Solver routines; all are highly
			
 
				+            parallel.  Each element of the matrix can be constructed
			
 
				+            independently, once each processor knows the geometry and part of
			
 
				+            the partitioning into regions.  A substantial body of literature
			
 
				+            now exists for performing the types of operations in Solver in
			
 
				+            parallel.
			
 
				+
			
 
				+        4.  Overlap computation with output.  Once the Region routine is done,
			
 
				+            the first part of the output file (patch geometry) can be written
			
 
				+            while the radiosities are being calculated.
			
 
				+
			
 
				+     Examples of what you may NOT do:
			
 
				+
			
 
				+        1.  The tuning must not be made specific to the particular input
			
 
				+            provided.  For example, you may not eliminate IF tests simply
			
 
				+            because they always come out the same way for this input; you
			
 
				+            may not use precomputed answers or table look-up unless those
			
 
				+            answers and tables cover the full range of possible inputs; and
			
 
				+            you may not exploit symmetry for even values of the problem size.
			
 
				+
			
 
				+        2.  You may not disable the self-consistency tests in SetUp3 and
			
 
				+            Verify, nor alter their tolerance constants.
			
 
				+
			
 
				+        3.  You may not change the input or output files to unformatted
			
 
				+            binary or other format that would render them difficult to create
			
 
				+            or read for humans.
			
 
				+
			
 
				+        4.  You may not eliminate the reading of the "geom" file by putting
			
 
				+            its data directly into the compiled program.
			
 
				+
			
 
				+        5.  You may not change any of the work assessments in Meter.  If you
			
 
				+            use more floating-point operations than indicated, you must still
			
 
				+            use the assessments provided.  If you find a way to use fewer
			
 
				+            operations and still get the job done for arbitrary input
			
 
				+            parameters, please tell us!
			
 
				+
			
 
				+                          -John Gustafson, Diane Rover, Michael Carter,
			
 
				+                           and Stephen Elbert
			
 
				+                           Ames Laboratory, Ames, Iowa 50011
			
 
				+******************************************************************************/
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/*  The following program finds a value n such that a problem of size n      */
			
 
				+/*  takes just under "goal" seconds to execute.                              */
			
 
				+/*                                                                           */
			
 
				+/*  John Gustafson, Diane Rover, Michael Carter, and Stephen Elbert          */
			
 
				+/*  Ames Laboratory, 3/18/90                                                 */
			
 
				+/*                                                                           */
			
 
				+/*  Calls:  Meter   Measures execution time for some application.            */
			
 
				+/*          What    Prints work-timing statistics and system information.    */
			
 
				+/*****************************************************************************/
			
 
				+
			
 
				+#include				<stdio.h>
			
 
				+#include				<math.h>
			
 
				+#include				<sys/time.h>
			
 
				+
			
 
				+/* NMAX = Largest npatch for your computer; adjust as needed. */
			
 
				+#define		NMAX		2048
			
 
				+#define		EPS			(0.5e-8)
			
 
				+#define		FALSE		(1==0)
			
 
				+#define		TRUE		(!FALSE)
			
 
				+#define		MAX(a,b)	(((a) > (b)) ? (a) : (b))
			
 
				+
			
 
				+/* Global variables and function return types: */
			
 
				+double 	goal,		/* User input, fixed-time benchmark goal, in seconds. */
			
 
				+		timing,		/* Elapsed time returned by Meter routine, in seconds.*/
			
 
				+		work,		/* In this case, number of FLOPs performed.           */
			
 
				+		When(),		/* Wall clock in seconds.                             */
			
 
				+		Ddot();		/* Double dot product.                                */
			
 
				+int		mean,		/* Avg between upper and lower bounds for bisection   */
			
 
				+					/* method.                                            */
			
 
				+		n,			/* The problem size.                                  */
			
 
				+		nupper,		/* Upper bound on problem size, used in iterating     */
			
 
				+					/* toward goal.                                       */
			
 
				+		Meter(),	/* Driver for following benchmark functions.          */
			
 
				+		Reader (),	/* Reads problem description from 'geom' file.        */
			
 
				+		Region (),	/* Subdivides box faces into patches.                 */
			
 
				+		SetUp3 (),	/* Set up matrix to solve.                            */
			
 
				+		Storer (),	/* Write result to 'answer' file.                     */
			
 
				+		Verify ();	/* Verify the radiosity solution from solver.         */
			
 
				+void	SetUp1 (),	/* Set up matrix to solve.                            */
			
 
				+		SetUp2 (),	/* Set up matrix to solve.                            */
			
 
				+		Solver ();	/* Solve the radiosity matrix.                        */
			
 
				+
			
 
				+main ()
			
 
				+{
			
 
				+	int		ok;			/* Return code temporary storage.       */
			
 
				+
			
 
				+	/* Get desired number of seconds: */
			
 
				+	printf ("Enter the number of seconds that is the goal: ");
			
 
				+	scanf ("%lg", &goal);
			
 
				+
			
 
				+	/* Get lower and upper bounds for n from the standard input device: */
			
 
				+	do {
			
 
				+		printf ("Enter a lower bound for n: ");
			
 
				+		scanf ("%d", &n);
			
 
				+		if (n <= 0)
			
 
				+			exit(0);
			
 
				+		ok = Meter (n, &timing, &work);
			
 
				+		if (timing >= goal)
			
 
				+			printf ("Must take less than %g seconds.  Took %g.\n",
			
 
				+			  goal, timing);
			
 
				+	} while (!ok || timing >= goal);
			
 
				+
			
 
				+	do {
			
 
				+		printf ("Enter an upper bound for n: ");
			
 
				+		scanf ("%d", &nupper);
			
 
				+		if (nupper <= 0)
			
 
				+			exit(0);
			
 
				+		ok = Meter (nupper, &timing, &work);
			
 
				+		if (timing < goal) {
			
 
				+			printf ("Must take at least %g seconds.  Took %g.\n",
			
 
				+			  goal, timing);
			
 
				+			n = MAX(nupper, n);
			
 
				+		}
			
 
				+	} while (!ok || timing < goal);
			
 
				+		
			
 
				+	/*
			
 
				+	 *  While the [n, nupper] interval is larger than 1, bisect it and
			
 
				+	 *  pick a half:
			
 
				+	 */
			
 
				+	while (nupper - n > 1) {
			
 
				+		mean = (n + nupper) / 2;
			
 
				+		ok = Meter (mean, &timing, &work);
			
 
				+		if (timing < goal)
			
 
				+			n = mean;
			
 
				+		else
			
 
				+			nupper = mean;
			
 
				+		printf ("New interval: [%d,%d]\n", n, nupper);
			
 
				+	}
			
 
				+		
			
 
				+	/* Ensure that most recent run was for n, not nupper. */
			
 
				+	ok = Meter (n, &timing, &work);
			
 
				+
			
 
				+	/* Print out final statistics. */
			
 
				+	What (n, timing, work);
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* This routine should be edited to contain information for your system.     */
			
 
				+/*****************************************************************************/
			
 
				+What (n, timing, work)
			
 
				+int n;
			
 
				+double timing, work;
			
 
				+{
			
 
				+	int			i;
			
 
				+	static char *info[] = {
			
 
				+		"Machine:  SUN 4/370GX          Processor:  SPARC",
			
 
				+		"Memory:   32 MB                # of procs: 1",
			
 
				+		"Cache:    128 KB               # used:     1",
			
 
				+		"NMAX:     512                  Clock:      25 MHz",
			
 
				+		"Disk:     .3GB SCSI+.7GB SMD   Node name:  amssun2",
			
 
				+		"OS:       SUNOS 4.0.3          Timer:      Wall, gettimeofday()",
			
 
				+		"Language: C                    Alone:      yes",
			
 
				+		"Compiler: cc                   Run by:     M. Carter",
			
 
				+		"Options:  -O                   Date:       23 May 1990",
			
 
				+		NULL
			
 
				+	};
			
 
				+
			
 
				+	printf ("\n");
			
 
				+	for (i = 0 ; info[i] ; i++)
			
 
				+		puts (info[i]);
			
 
				+	printf ("M ops:    %-13lg        Time:       %-.3lf seconds\n",
			
 
				+	  work * 1e-6, timing);
			
 
				+	printf ("n:        %-6d               MFLOPS:     %-.5lg\n",
			
 
				+	  n, (work / timing) * 1e-6);
			
 
				+	printf ("Approximate data memory use: %d bytes.\n",
			
 
				+	  8 * n * n + 120 * n + 800);
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/*  This routine measures time required on a revised LINPACK-type benchmark, */
			
 
				+/*  including input, matrix generation, solution, and output.                */
			
 
				+/*                                                                           */
			
 
				+/*  John Gustafson, Diane Rover, Michael Carter, and Stephen Elbert          */
			
 
				+/*  Ames Laboratory, 3/18/90                                                 */
			
 
				+/*                                                                           */
			
 
				+/*  Calls: Reader  Reads the problem description from secondary storage.     */
			
 
				+/*         Region  Partitions box surface into rectangular regions (patches).*/
			
 
				+/*         SetUp1  Sets up equations from patch geometries-parallel faces.   */
			
 
				+/*         SetUp2  Sets up equations from patch geometries-orthogonal faces. */
			
 
				+/*         SetUp3  Sets up equations-row normalization and radiant props.    */
			
 
				+/*         Solver  Solves the equations by LDL factorization.                */
			
 
				+/*         Storer  Stores solution (patch radiosities) on secondary storage. */
			
 
				+/*         When    Returns wall-clock time, in seconds.                      */
			
 
				+/*****************************************************************************/
			
 
				+
			
 
				+Meter (npatch, timing, work)
			
 
				+int		npatch;		/* In, problem size, here the number of equations. */
			
 
				+double	*timing,	/* Out, number of seconds measured.                */
			
 
				+		*work;		/* Out, work done, here the number of FLOPs.       */
			
 
				+{
			
 
				+	static
			
 
				+	double	area[NMAX],			/* Areas of patches * 8 * pi.                */
			
 
				+			box[7],				/* Dimensions of box in x, y, z directions.  */
			
 
				+			coeff[NMAX][NMAX],	/* The coefficients of the eqns to solve.    */
			
 
				+			diag[3][NMAX],		/* Diag terms of the eqns to solve. (RGB)    */
			
 
				+			emiss[6][3],		/* (RGB) emissivities of patches.            */
			
 
				+			place[3][NMAX],		/* Width-height-depth position of patches.   */
			
 
				+			result[3][NMAX],	/* Answer radiosities (RGB).                 */
			
 
				+			rho[6][3],			/* (RGB) Reflectivities of patches.          */
			
 
				+			rhs[3][NMAX],		/* Right-hand sides of eqns to solve (RGB).  */
			
 
				+			size[2][NMAX];		/* Width-height sizes of patches.            */
			
 
				+	double	ops[8],				/* Floating-point operation counts.          */
			
 
				+			p[6],				/* Number of patches in faces.               */
			
 
				+			sec[8],				/* Times for routines, in seconds.           */
			
 
				+			tmp1, tmp2;			/* Double temporary variables.               */
			
 
				+	int		i,					/* Loop counter.                             */
			
 
				+			itmp1,				/* Integer temporary variable.               */
			
 
				+			non0;				/* Index of first nonzero off-diagonal elem. */
			
 
				+	static
			
 
				+	int		loop[6][2];			/* Patch number ranges for faces.            */
			
 
				+	static char *tasks[] = {	/* Names of all the functions in benchmark.  */
			
 
				+		"Reader", "Region",
			
 
				+		"SetUp1", "SetUp2",
			
 
				+		"SetUp3", "Solver",
			
 
				+		"Storer"
			
 
				+	};
			
 
				+	static char *format =		/* Output line format.                       */
			
 
				+		"%6.6s%8.3f%17.0f%14.6f%10.1f %%\n";
			
 
				+
			
 
				+	/* First check that npatch lies between 6 and NMAX: */
			
 
				+	if (npatch < 6) {
			
 
				+		printf ("Must be at least 6, the number of faces.\n");
			
 
				+		return (FALSE);
			
 
				+	}
			
 
				+	else if (npatch > NMAX) {
			
 
				+		printf ("Exceeds %d = maximum for this system.\n", NMAX);
			
 
				+		return (FALSE);
			
 
				+	}
			
 
				+
			
 
				+	/* Ensure that previous 'answer' file is deleted: */
			
 
				+	unlink ("answer");
			
 
				+
			
 
				+	/* Time the tasks, individually and collectively.  */
			
 
				+	sec[0] = When();
			
 
				+	if (!Reader (box, rho, emiss))
			
 
				+		return (FALSE);
			
 
				+	sec[1] = When();
			
 
				+	if (!Region (npatch, loop, box, place, size, area))
			
 
				+		return (FALSE);
			
 
				+	sec[2] = When();
			
 
				+	SetUp1 (npatch, loop, coeff, place, size);
			
 
				+	sec[3] = When();
			
 
				+	SetUp2 (npatch, loop, coeff, place, size);
			
 
				+	sec[4] = When();
			
 
				+	if (!SetUp3 (npatch, loop, area, rho, emiss, coeff, diag, rhs))
			
 
				+		return (FALSE);
			
 
				+	sec[5] = When();
			
 
				+	non0 = loop[1][0];
			
 
				+	Solver (npatch, non0, coeff, diag, rhs, result);
			
 
				+	sec[6] = When();
			
 
				+	Storer (npatch, loop, place, size, result);
			
 
				+	sec[7] = When();
			
 
				+	*timing = sec[7] - sec[0];
			
 
				+	for (i = 0 ; i < 7 ; i++)
			
 
				+		sec[i] = sec[i+1] - sec[i];
			
 
				+		
			
 
				+	/* Assess floating-point work done by each routine called, and total: */
			
 
				+	/* Note the ops counts are talleyed into a double array, and there    */
			
 
				+	/* some strange casts to double in some equations.  This is to        */
			
 
				+	/* prevent integer overflow.                                          */
			
 
				+	itmp1 = 0;
			
 
				+	tmp1 = 0.0;
			
 
				+	for (i = 0 ; i < 6 ; i++) {
			
 
				+        p[i] = loop[i][1] - loop[i][0] + 1;
			
 
				+        tmp1 += p[i] * p[i];
			
 
				+        itmp1 += sqrt(p[i] * box[i] / box[i + 1]) + 0.5;
			
 
				+	}
			
 
				+	tmp2 = p[0] * p[3] + p[1] * p[4] + p[2] * p[5];
			
 
				+	ops[0] = 258;
			
 
				+	ops[1] = 154 + (double) 8 * itmp1 + npatch;
			
 
				+	ops[2] = 6 + 532 * tmp2;
			
 
				+	ops[3] = 8*npatch + 370 * ((double) npatch * npatch - tmp1 - 2*tmp2) / 2.0;
			
 
				+	ops[4] = 72 + (double) 9 * npatch + (double) npatch * npatch - tmp1;
			
 
				+	ops[5] = npatch * (npatch * ((double) npatch + 7.5) - 2.5) - 21
			
 
				+			+ (non0+1) * ((non0+1) * (2 * ((double) non0+1) - 16.5) + 35.5)
			
 
				+			+ (non0+1) * npatch * (9 - 3 * ((double) non0+1));
			
 
				+	ops[6] = 48 * npatch;
			
 
				+	*work = ops[0] + ops[1] + ops[2] + ops[3] + ops[4] + ops[5] + ops[6];
			
 
				+
			
 
				+	/* Display timing-work-speed breakdown by routine. */
			
 
				+	printf ("%d patches:\n", npatch);
			
 
				+	printf (" Task  Seconds       Operations        MFLOPS    %% of Time\n");
			
 
				+	for (i = 0 ; i < 7 ; i++) {
			
 
				+		if (sec[i] == 0.0)
			
 
				+			sec[i] = 0.001;
			
 
				+		printf (format, tasks[i], sec[i], ops[i], (ops[i] / sec[i]) * 1e-6,
			
 
				+			100.0 * sec[i] / *timing);
			
 
				+	}
			
 
				+	printf (format, "TOTALS", *timing, *work, (*work / *timing) * 1e-6, 100.0);
			
 
				+	Verify (npatch, coeff, diag, rhs, result);
			
 
				+
			
 
				+	return (TRUE);
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/*  This function should return the actual, wall clock time (not CPU time)   */
			
 
				+/*  in seconds as accurately as possible.  Change it to your system timer.   */
			
 
				+/*****************************************************************************/
			
 
				+double
			
 
				+When()
			
 
				+{
			
 
				+	struct timeval tp;
			
 
				+	struct timezone tzp;
			
 
				+	gettimeofday (&tp, &tzp);
			
 
				+	return ((double) tp.tv_sec + (double) tp.tv_usec * 1e-6);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* The following routine reads in the problem description from secondary     */
			
 
				+/* storage, and checks that numbers are in reasonable ranges.                */
			
 
				+/*****************************************************************************/
			
 
				+Reader (box, rho, emiss)
			
 
				+double	box[],			/* Out: Dimensions of box in x, y, z directions.  */
			
 
				+		rho[][3],		/* Out: (RGB) Reflectivities of patches.          */
			
 
				+		emiss[][3];		/* Out: (RGB) emissivities of patches.            */
			
 
				+{
			
 
				+	/*
			
 
				+	 *  Local variables:
			
 
				+	 *    infile  Device number for input file.
			
 
				+	 *    i, j    Loop counters.
			
 
				+	 *    tmp1    Maximum emissivity, to check that emissivities are not all 0.
			
 
				+	 */
			
 
				+	int		i, j,		/* Loop counters.                            */
			
 
				+			n;			/* Number of args fscanf()'ed from file.     */
			
 
				+	double	tmp1;		/* Maximum emissivity.                       */
			
 
				+	FILE	*infile;	/* Input file pointer.                       */
			
 
				+	char	buff[81];	/* Buffer used to eat a line of input.       */
			
 
				+
			
 
				+	/* Open the input file and read in the data. */
			
 
				+	if ((infile = fopen ("geom", "r")) == NULL) {
			
 
				+		printf ("slalom:  'geom' geometry file not found.\n");
			
 
				+		exit (1);
			
 
				+	}
			
 
				+
			
 
				+	/* Read the box coordinates and error check. */
			
 
				+	n = 0;
			
 
				+	for (i = 0 ; i < 3 ; i++) {
			
 
				+		n += fscanf (infile, "%lg", &box[i]);
			
 
				+	}
			
 
				+	fgets (buff, 80, infile);		/* Eat the rest of the line. */
			
 
				+	if (n != 3) {
			
 
				+		printf ("Must specify exactly 3 box coordinates.\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+
			
 
				+	/* Read the reflectivities and error check. */
			
 
				+	n = 0;
			
 
				+	for (j = 0 ; j < 3 ; j++) {
			
 
				+		for (i = 0 ; i < 6 ; i++) {
			
 
				+			n += fscanf (infile, "%lg", &rho[i][j]);
			
 
				+		}
			
 
				+	}
			
 
				+	fgets (buff, 80, infile);		/* Eat the rest of the line. */
			
 
				+	if (n != 18) {
			
 
				+		printf ("Must specify exactly 18 box coordinates.\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+
			
 
				+	/* Read the emissivities and error check. */
			
 
				+	n = 0;
			
 
				+	for (j = 0 ; j < 3 ; j++) {
			
 
				+		for (i = 0 ; i < 6 ; i++) {
			
 
				+			n += fscanf (infile, "%lg", &emiss[i][j]);
			
 
				+		}
			
 
				+	}
			
 
				+	fgets (buff, 80, infile);		/* Eat the rest of the line. */
			
 
				+	if (n != 18) {
			
 
				+		printf ("Must specify exactly 18 box coordinates.\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+	fclose (infile);
			
 
				+
			
 
				+	/* Now sanity-check the values that were just read. */
			
 
				+	for (j = 0 ; j < 3 ; j++) {
			
 
				+		if (box[j] < 1.0 || box[j] >= 100.0) {
			
 
				+			printf ("Box dimensions must be between 1 and 100.\n");
			
 
				+			return (FALSE);
			
 
				+		}
			
 
				+		box[j+3] = box[j];
			
 
				+
			
 
				+		tmp1 = 0.0;
			
 
				+		for (i = 0 ; i < 6 ; i++) {
			
 
				+			if (rho[i][j] < 0.000 || rho[i][j] > 0.999) {
			
 
				+				printf ("Reflectivities must be between .000 and .999.\n");
			
 
				+				return (FALSE);
			
 
				+			}
			
 
				+			if (emiss[i][j] < 0.0) {
			
 
				+				printf ("Emissivity cannot be negative.\n");
			
 
				+				return (FALSE);
			
 
				+			}
			
 
				+			if (tmp1 < emiss[i][j])
			
 
				+				tmp1 = emiss[i][j];
			
 
				+		}
			
 
				+		if (tmp1 == 0.0) {
			
 
				+			printf ("Emissivities are zero.  Problem is trivial.\n");
			
 
				+			return (FALSE);
			
 
				+		}
			
 
				+	}
			
 
				+	box[6] = box[3];
			
 
				+	return (TRUE);
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* The following routine decomposes the surface of a variable-sized box      */
			
 
				+/* into patches that are as nearly equal in size and square as possible.     */
			
 
				+/*****************************************************************************/
			
 
				+Region (npatch, loop, box, place, size, area)
			
 
				+int		npatch,			/* In: Problem size.                             */
			
 
				+		loop[][2];		/* Out: Patch number ranges for faces.           */
			
 
				+double	area[],			/* Out: 8pi * areas of the patches.              */
			
 
				+		box[],			/* In: Dimensions of box in x, y, z directions.  */
			
 
				+		place[][NMAX],	/* Out: Width-height-depth positions of patches. */
			
 
				+		size[][NMAX];	/* Out: Width-height sizes of patches.           */
			
 
				+{
			
 
				+
			
 
				+
			
 
				+	int		icol,	/* Loop counter over the number of columns. */
			
 
				+			ipatch,	/* Loop counter over the number of patches. */
			
 
				+			iface,	/* Loop counter over the number of faces.   */
			
 
				+			itmp1,	/* Integer temporary variables.             */
			
 
				+			itmp2,	/* Integer temporary variables.             */
			
 
				+			last,	/* Inner loop ending value.                 */
			
 
				+			lead,	/* Inner loop starting value.               */
			
 
				+			numcol,	/* Number of columns on faces.              */
			
 
				+			numpat,	/* Number of patches on a face.             */
			
 
				+			numrow;	/* Number of rows of patches in a column.   */
			
 
				+	double	height,	/* Height of a patch within a column.       */
			
 
				+			tmp1,	/* double temporary variables.              */
			
 
				+			tmp2,	/* double temporary variables.              */
			
 
				+			tmp3,	/* double temporary variables.              */
			
 
				+			tmp4,	/* double temporary variables.              */
			
 
				+			width;	/* Width of a column of patches.            */
			
 
				+
			
 
				+	/* Allocate patches to each face, proportionate to area of each face. */
			
 
				+	tmp1 = 2.0 * (box[0] * box[1] + box[1] * box[2] + box[2] * box[0]);
			
 
				+	tmp2 = 0.0;
			
 
				+	tmp3 = npatch;
			
 
				+	loop[0][0] = 0;
			
 
				+	for (iface = 0 ; iface < 5 ; iface++) {
			
 
				+		tmp2 = tmp2 + box[iface] * box[iface + 1];
			
 
				+		loop[iface][1] = (int) (tmp3 * tmp2 / tmp1 + 0.5) - 1;
			
 
				+		loop[iface + 1][0] = loop[iface][1] + 1;
			
 
				+	}
			
 
				+	loop[5][1] = npatch - 1;
			
 
				+
			
 
				+	/* Subdivide each face into numpat patches. */
			
 
				+	for (iface = 0 ; iface < 6 ; iface++) {
			
 
				+		numpat = loop[iface][1] - loop[iface][0] + 1;
			
 
				+		tmp3 = 0.0;
			
 
				+		if (iface >= 3)
			
 
				+			tmp3 = box[iface-1];
			
 
				+		numcol = (int) (sqrt(numpat * box[iface] / box[iface + 1]) + 0.5);
			
 
				+		if (numcol > numpat)
			
 
				+			numcol = numpat;
			
 
				+		if (numcol == 0)
			
 
				+			numcol = 1;
			
 
				+		width = box[iface] / numcol;
			
 
				+		itmp1 = numcol - 1;
			
 
				+		tmp1 = 0.0;
			
 
				+		for (icol = 0 ; icol < numcol ; icol++) {
			
 
				+			itmp2 = itmp1 / numcol;
			
 
				+			numrow = (itmp1 + numpat) / numcol - itmp2;
			
 
				+			if (numrow == 0) {
			
 
				+				printf ("Eccentric box requires more patches.\n");
			
 
				+				return (FALSE);
			
 
				+			}
			
 
				+			height = box[iface + 1] / numrow;
			
 
				+			tmp2 = 0.0;
			
 
				+			tmp4 = width * height * (8.0 * M_PI);
			
 
				+			lead = loop[iface][0] + itmp2;
			
 
				+			last = lead + numrow;
			
 
				+			for (ipatch = lead ; ipatch < last ; ipatch++) { 
			
 
				+				size[0][ipatch] = width;
			
 
				+				size[1][ipatch] = height;
			
 
				+				place[0][ipatch] = tmp1;
			
 
				+				place[1][ipatch] = tmp2;
			
 
				+				place[2][ipatch] = tmp3;
			
 
				+				area[ipatch] = tmp4;
			
 
				+				tmp2 = tmp2 + height;
			
 
				+			}
			
 
				+			tmp1 = tmp1 + width;
			
 
				+			itmp1 = itmp1 + numpat;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return (TRUE);
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* This routine sets up the radiosity matrix for parallel patches.           */
			
 
				+/*****************************************************************************/
			
 
				+void
			
 
				+SetUp1 (npatch, loop, coeff, place, size)
			
 
				+int		npatch,			/* In: Problem size.                             */
			
 
				+		loop[][2];		/* In: Patch number ranges for faces.            */
			
 
				+double	coeff[][NMAX],	/* Out: The coefficients of the eqns to solve.   */
			
 
				+		place[][NMAX],		/* In: Width-height-depth positions of patches.  */
			
 
				+		size[][NMAX];		/* In: Width-height sizes of patches.            */
			
 
				+{
			
 
				+	int		i, j, k,	/* General loop counters.                            */
			
 
				+			m, n,		/* General loop counters.                            */
			
 
				+			iface,		/* Loop counter over the number of faces.            */
			
 
				+			ipatch,		/* Loop counter over the number of patches.          */
			
 
				+			jface,		/* Face coupled to iface when computing mat. elems.  */
			
 
				+			jpatch;		/* Patch coupled to ipatch when computing mat. elems.*/
			
 
				+	double	d[2][2][2],	/* Point-to-point couplings between patch corners.   */
			
 
				+			d2[2][2][2],/* Squares of d values, to save recomputation.       */
			
 
				+			tmp1, tmp2,	/* Double temporary variables.                       */
			
 
				+			tmp3, tmp4,	/* Double temporary variables.                       */
			
 
				+			tmp5, tmp6,	/* Double temporary variables.                       */
			
 
				+			tmp7, tmp8;	/* Double temporary variables.                       */
			
 
				+
			
 
				+	for (iface = 0 ; iface < 3 ; iface++) {
			
 
				+		jface = iface + 3;
			
 
				+		tmp1 = place[2][loop[jface][0]] * place[2][loop[jface][0]];
			
 
				+		tmp6 = tmp1 + tmp1;
			
 
				+		for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
			
 
				+			for (jpatch=loop[jface][0] ; jpatch <= loop[jface][1] ; jpatch++) {
			
 
				+				for (j = 0 ; j < 2 ; j++) {
			
 
				+					d [0][0][j] = place[j][jpatch] - place[j][ipatch];
			
 
				+					d [1][0][j] = d[0][0][j] + size[j][jpatch];
			
 
				+					d [0][1][j] = d[0][0][j] - size[j][ipatch];
			
 
				+					d [1][1][j] = d[1][0][j] - size[j][ipatch];
			
 
				+					d2[0][0][j] = d[0][0][j] * d[0][0][j];
			
 
				+					d2[1][0][j] = d[1][0][j] * d[1][0][j];
			
 
				+					d2[0][1][j] = d[0][1][j] * d[0][1][j];
			
 
				+					d2[1][1][j] = d[1][1][j] * d[1][1][j];
			
 
				+				}
			
 
				+				tmp2 = 0.0;
			
 
				+				for (m = 0 ; m < 2 ; m++) {
			
 
				+					for (i = 0 ; i < 2 ; i++) {
			
 
				+						tmp3 = d2[m][i][1] + tmp1;
			
 
				+						tmp4 = sqrt(tmp3);
			
 
				+						tmp5 = 1.0 / tmp4;
			
 
				+						tmp8 = 0.0;
			
 
				+						for (k = 0 ; k < 2 ; k++) {
			
 
				+							for (n = 0 ; n < 2 ; n++) {
			
 
				+								tmp7 = d[k][n][0];
			
 
				+								tmp8 = -tmp7 * atan(tmp7 * tmp5) - tmp8;
			
 
				+							}
			
 
				+							tmp8 = -tmp8;
			
 
				+						}
			
 
				+						tmp2 = -4.0 * tmp4 * tmp8 - tmp2 - tmp6 *
			
 
				+						  log(((d2[1][0][0] + tmp3) * (d2[0][1][0] + tmp3)) /
			
 
				+						      ((d2[0][0][0] + tmp3) * (d2[1][1][0] + tmp3)));
			
 
				+					}
			
 
				+					tmp2 = -tmp2;
			
 
				+				}
			
 
				+				for (m = 0 ; m < 2 ; m++) {
			
 
				+					for (i = 0 ; i < 2 ; i++) {
			
 
				+						tmp4 = sqrt(d2[m][i][0] + tmp1);
			
 
				+						tmp5 = 1.0 / tmp4;
			
 
				+						tmp8 = 0.0;
			
 
				+						for (k = 0 ; k < 2 ; k++) {
			
 
				+							for (n = 0 ; n < 2 ; n++) {
			
 
				+								tmp7 = d[k][n][1];
			
 
				+								tmp8 = -tmp7 * atan(tmp7 * tmp5) - tmp8;
			
 
				+							}
			
 
				+							tmp8 = -tmp8;
			
 
				+						}
			
 
				+						tmp2 = -4.0 * tmp4 * tmp8 - tmp2;
			
 
				+					}
			
 
				+					tmp2 = -tmp2;
			
 
				+				}
			
 
				+				coeff[ipatch][jpatch] = tmp2;
			
 
				+				coeff[jpatch][ipatch] = tmp2;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* This routine sets up the radiosity matrix for orthogonal patches.         */
			
 
				+/*****************************************************************************/
			
 
				+void
			
 
				+SetUp2 (npatch, loop, coeff, place, size)
			
 
				+int		npatch,			/* In: Problem size.                             */
			
 
				+		loop[][2];		/* In: Patch number ranges for faces.            */
			
 
				+double	coeff[][NMAX],	/* Out: The coefficients of the eqns to solve.   */
			
 
				+		place[][NMAX],	/* In: Width-height-depth positions of patches.  */
			
 
				+		size[][NMAX];	/* In: Width-height sizes of patches.            */
			
 
				+{
			
 
				+	int		m,			/* General loop counters.                            */
			
 
				+			iface,		/* Loop counter over the number of faces.            */
			
 
				+			ipatch,		/* Loop counter over the number of patches.          */
			
 
				+			jface,		/* Face coupled to iface when computing mat. elems.  */
			
 
				+			jpatch;		/* Patch coupled to ipatch when computing mat. elems.*/
			
 
				+
			
 
				+	double	tmpb, tmpa,
			
 
				+			c11d, c12d, c21d, c22d, c11s, c12s, c21s, c22s,
			
 
				+			d11d, d12d, d21d, d22d, d11s, d12s, d21s, d22s,
			
 
				+			d11i, d12i, d21i, d22i, a10s, a20s, b01s, b02s,
			
 
				+			e1111, e1211, e2111, e2211, e1112, e1212, e2112, e2212,
			
 
				+			e1121, e1221, e2121, e2221, e1122, e1222, e2122, e2222;
			
 
				+
			
 
				+	for (iface = 0 ; iface < 6 ; iface++) {
			
 
				+		for (m = 0 ; m < 2 ; m++) {
			
 
				+			jface = (iface + m + 1) % 6;
			
 
				+			for (ipatch=loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
			
 
				+				a10s = place[m][ipatch] - place[2][loop[jface][0]];
			
 
				+				a20s = a10s + size[m][ipatch];
			
 
				+				a10s = a10s * a10s;
			
 
				+				a20s = a20s * a20s;
			
 
				+				for (jpatch=loop[jface][0] ; jpatch<=loop[jface][1];jpatch++) {
			
 
				+					c11d = place[m][jpatch] - place[1-m][ipatch];
			
 
				+					c12d = c11d + size[m][jpatch];
			
 
				+					c21d = c11d - size[1-m][ipatch];
			
 
				+					c22d = c12d - size[1-m][ipatch];
			
 
				+					c11s = c11d * c11d;
			
 
				+					c12s = c12d * c12d;
			
 
				+					c21s = c21d * c21d;
			
 
				+					c22s = c22d * c22d;
			
 
				+					b01s = place[1 - m][jpatch] - place[2][ipatch];
			
 
				+					b02s = b01s + size[1 - m][jpatch];
			
 
				+
			
 
				+					/**/
			
 
				+					/* Bump the term by a small real to avoid
			
 
				+					/* singularities in coupling function:
			
 
				+					/**/
			
 
				+					b01s = b01s * b01s + 1e-35;
			
 
				+					b02s = b02s * b02s + 1e-35;
			
 
				+					d11s = a10s + b01s;
			
 
				+					d12s = a10s + b02s;
			
 
				+					d21s = a20s + b01s;
			
 
				+					d22s = a20s + b02s;
			
 
				+					d11d = sqrt(d11s);
			
 
				+					d12d = sqrt(d12s);
			
 
				+					d21d = sqrt(d21s);
			
 
				+					d22d = sqrt(d22s);
			
 
				+					d11i = 1.0 / d11d;
			
 
				+					d12i = 1.0 / d12d;
			
 
				+					d21i = 1.0 / d21d;
			
 
				+					d22i = 1.0 / d22d;
			
 
				+
			
 
				+					tmpa =	  d11d * ( c11d * atan (c11d * d11i)
			
 
				+									 - c12d * atan (c12d * d11i)
			
 
				+									 - c21d * atan (c21d * d11i)
			
 
				+									 + c22d * atan (c22d * d11i))
			
 
				+							+ d12d * (-c11d * atan (c11d * d12i)
			
 
				+									 + c12d * atan (c12d * d12i)
			
 
				+									 + c21d * atan (c21d * d12i)
			
 
				+									 - c22d * atan (c22d * d12i))
			
 
				+							+ d21d * (-c11d * atan (c11d * d21i)
			
 
				+									 + c12d * atan (c12d * d21i)
			
 
				+									 + c21d * atan (c21d * d21i)
			
 
				+									 - c22d * atan (c22d * d21i))
			
 
				+							+ d22d * ( c11d * atan (c11d * d22i)
			
 
				+									 - c12d * atan (c12d * d22i)
			
 
				+									 - c21d * atan (c21d * d22i)
			
 
				+									 + c22d * atan (c22d * d22i));
			
 
				+
			
 
				+					e1111 = c11s + d11s;
			
 
				+					e1211 = c12s + d11s;
			
 
				+					e2111 = c21s + d11s;
			
 
				+					e2211 = c22s + d11s;
			
 
				+					e1112 = c11s + d12s;
			
 
				+					e1212 = c12s + d12s;
			
 
				+					e2112 = c21s + d12s;
			
 
				+					e2212 = c22s + d12s;
			
 
				+					e1121 = c11s + d21s;
			
 
				+					e1221 = c12s + d21s;
			
 
				+					e2121 = c21s + d21s;
			
 
				+					e2221 = c22s + d21s;
			
 
				+					e1122 = c11s + d22s;
			
 
				+					e1222 = c12s + d22s;
			
 
				+					e2122 = c21s + d22s;
			
 
				+					e2222 = c22s + d22s;
			
 
				+
			
 
				+					tmpb =    c11s * log( e1111 * e1122 / (e1112 * e1121))
			
 
				+							- c12s * log( e1211 * e1222 / (e1212 * e1221))
			
 
				+							- c21s * log( e2111 * e2122 / (e2112 * e2121))
			
 
				+							+ c22s * log( e2211 * e2222 / (e2212 * e2221))
			
 
				+							- d11s * log( e1111 * e2211 / (e1211 * e2111))
			
 
				+							+ d12s * log( e1112 * e2212 / (e1212 * e2112))
			
 
				+							+ d21s * log( e1121 * e2221 / (e1221 * e2121))
			
 
				+							- d22s * log( e1122 * e2222 / (e1222 * e2122));
			
 
				+
			
 
				+					coeff[ipatch][jpatch] = fabs(4.0 * tmpa + tmpb);
			
 
				+					coeff[jpatch][ipatch] = coeff[ipatch][jpatch];
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* This routine sets up the radiosity matrix... normalizes row sums to 1,    */
			
 
				+/* and includes terms derived from reflectivites and emissivities of faces.  */
			
 
				+/*****************************************************************************/
			
 
				+SetUp3 (npatch, loop, area, rho, emiss, coeff, diag, rhs)
			
 
				+int		npatch,			/* In: Problem size.                                 */
			
 
				+		loop[][2];		/* In: Patch number ranges for faces.                */
			
 
				+double	area[],			/* In: 8 * pi * areas of the patches.                */
			
 
				+		rho[][3],		/* In: (RGB) Reflectivities of the face interiors.   */
			
 
				+		emiss[][3],		/* In: (RGB) Emissivities of the face interiors.     */
			
 
				+		coeff[][NMAX],	/* Out: The coefficients of the eqns to solve.       */
			
 
				+		diag[][NMAX],	/* Out: (RGB) Diagonal terms of the system.          */
			
 
				+		rhs[][NMAX];	/* Out: (RGB) Right-hand sides of system to solve.   */
			
 
				+{
			
 
				+
			
 
				+	/*
			
 
				+	 *  Local variables:
			
 
				+	 *    iface     Loop counter over the number of faces.
			
 
				+	 *    ipatch    Outer loop counter over the number of patches.
			
 
				+	 *    j         Loop counter over each color (R-G-B).
			
 
				+	 *    jpatch    Inner loop counter over the number of patches.
			
 
				+	 *    tmp1      double temporary variable.
			
 
				+	 *    vtmp1-2   double vector temporary variables.
			
 
				+	 */
			
 
				+	int		j,			/* (RGB) Loop counter over each color.               */
			
 
				+			iface,		/* Loop counter over the number of faces.            */
			
 
				+			ipatch,		/* Outer loop counter over the number of patches.    */
			
 
				+			jpatch;		/* Inner loop counter over the number of patches.    */
			
 
				+	double	tmp1,		/* Double temporary variable.                        */
			
 
				+			vtmp1[3],	/* Double vector temporary variables.                */
			
 
				+			vtmp2[3];	/* Double vector temporary variables.                */
			
 
				+
			
 
				+	/* Ensure that row sums to 1, and put in reflectivities (rho) and        */
			
 
				+	/* emissivities.                                                         */
			
 
				+	for (iface = 0 ; iface < 6 ; iface++) {
			
 
				+		for (j = 0 ; j < 3 ; j++) {
			
 
				+          vtmp1[j] = 1.0 / rho[iface][j];
			
 
				+          vtmp2[j] = emiss[iface][j] * vtmp1[j];
			
 
				+		}
			
 
				+		for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
			
 
				+			tmp1 = 0.0;
			
 
				+			for (jpatch = 0 ; jpatch < loop[iface][0] ; jpatch++) {
			
 
				+				tmp1 += coeff[ipatch][jpatch];
			
 
				+			}
			
 
				+			for (jpatch = loop[iface][1]+1 ; jpatch < npatch ; jpatch++) {
			
 
				+				tmp1 += coeff[ipatch][jpatch];
			
 
				+			}
			
 
				+			/* Make sure row sum (total form factor) is close to 1: */
			
 
				+			if (fabs(tmp1 - area[ipatch]) > (0.5e-9 * tmp1)) {
			
 
				+				printf ("Total form factor is too far from unity.\n");
			
 
				+				return (FALSE);
			
 
				+			}
			
 
				+			tmp1 = -tmp1;
			
 
				+			/* Set coplanar patch interactions to zero. */
			
 
				+			for (jpatch=loop[iface][0] ; jpatch <= loop[iface][1] ; jpatch++) {
			
 
				+				coeff[ipatch][jpatch] = 0.0;
			
 
				+			}
			
 
				+			/* Assign diagonal entries and right-hand sides. */
			
 
				+			for (j = 0 ; j < 3 ; j++) {
			
 
				+				diag[j][ipatch] = vtmp1[j] * tmp1;
			
 
				+				rhs[j][ipatch] = vtmp2[j] * tmp1;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return (TRUE);
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* This routine factors and backsolves a real, symmetric, near-dense matrix  */
			
 
				+/* by LDL factorization.  No pivoting; the matrix is diagonally dominant.    */
			
 
				+/*****************************************************************************/
			
 
				+void
			
 
				+Solver (npatch, non0, coeff, diag, rhs, result)
			
 
				+int		npatch,			/* In: Problem size.                                 */
			
 
				+		non0;			/* In: Index of first nonzero off-diagonal mat. elem.*/
			
 
				+double	coeff[][NMAX],	/* In/Out: The coefficients of the eqns to solve.    */
			
 
				+		diag[][NMAX],	/* Out: (RGB) Diagonal terms of the system.          */
			
 
				+		rhs[][NMAX],	/* In: (RGB) Right-hand sides of system to solve.    */
			
 
				+		result[][NMAX];	/* Out: (RGB) solution radiosities.                  */
			
 
				+{
			
 
				+	int		i, j,		/* General loop counters.     */
			
 
				+			k, m;		/* General loop counters.     */
			
 
				+	double	tmp1;		/* Double temporary variable. */
			
 
				+
			
 
				+	/* Load lower triangle of coefficients, diagonal, and solution vector. */
			
 
				+	for (m = 0 ; m < 3 ; m++) {
			
 
				+		for (i = non0 ; i < npatch ; i++) {
			
 
				+			coeff[i][i] = diag[m][i];
			
 
				+			result[m][i] = rhs[m][i];
			
 
				+			for (j = 0 ; j < i ; j++) {
			
 
				+				coeff[i][j] = coeff[j][i];
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* Factor matrix, writing factors on top of original matrix. */
			
 
				+		for (j = 0 ; j < non0 ; j++) {
			
 
				+			coeff[j][j] = 1.0 / diag[m][j];
			
 
				+			result[m][j] = rhs[m][j];
			
 
				+		}
			
 
				+
			
 
				+		for (j = non0 ; j < npatch ; j++) {
			
 
				+			for (k = non0 ; k < j ; k++) {
			
 
				+				coeff[j][k] -= Ddot (k, &coeff[k][0], 1, &coeff[j][0], 1);
			
 
				+			}
			
 
				+			for (k = 0 ; k < j ; k++) {
			
 
				+				tmp1 = coeff[j][k];
			
 
				+				coeff[j][k] = tmp1 * coeff[k][k];
			
 
				+				coeff[j][j] -= tmp1 * coeff[j][k];
			
 
				+			}
			
 
				+			coeff[j][j] = 1.0 / coeff[j][j];
			
 
				+		}
			
 
				+
			
 
				+		/* Backsolve, in three stages (for L, D, and L transpose). */
			
 
				+		for (k = non0 ; k < npatch ; k++) {
			
 
				+			result[m][k] -= Ddot (k, &result[m][0], 1, &coeff[k][0], 1);
			
 
				+		}
			
 
				+
			
 
				+		for (k = 0 ; k < npatch ; k++) {
			
 
				+			result[m][k] *= coeff[k][k];
			
 
				+		}
			
 
				+
			
 
				+		for (k = npatch - 2 ; k >= non0 ; k--) {
			
 
				+			result[m][k] -= Ddot (npatch-(k+1), &result[m][k+1], 1,
			
 
				+								&coeff[k+1][k], NMAX);
			
 
				+		}
			
 
				+
			
 
				+		for (k = non0 - 1 ; k >= 0 ; k--) {
			
 
				+			result[m][k] -= Ddot (npatch-non0, &result[m][non0], 1,
			
 
				+								&coeff[non0][k], NMAX);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* The following routine writes the answer to secondary storage.             */
			
 
				+/*****************************************************************************/
			
 
				+Storer (npatch, loop, place, size, result)
			
 
				+int		npatch,			/* In: Problem size.                                 */
			
 
				+		loop[][2];		/* In: Patch number ranges for faces.                */
			
 
				+double	result[][NMAX],	/* In: (RGB) Radiosity solutions.                    */
			
 
				+		place[][NMAX],	/* In: Width-height-depth positions of patches.      */
			
 
				+		size[][NMAX];	/* In: Width-height sizes of patches.                */
			
 
				+{
			
 
				+	int		i,			/* General loop counter.                             */
			
 
				+			iface,		/* Loop counter over number of faces.                */
			
 
				+			ipatch;		/* Loop counter of number of patches within a face.  */
			
 
				+	FILE	*outfile;	/* Output file pointer.                              */
			
 
				+
			
 
				+	/* Write patch geometry to 'answer' file. */
			
 
				+	if ((outfile = fopen("answer", "w")) == NULL) {
			
 
				+		printf ("Unable to open 'answer' file.\n");
			
 
				+		exit (1);
			
 
				+	}
			
 
				+	fprintf (outfile, "%d patches:\n", npatch);
			
 
				+	fprintf (outfile,
			
 
				+	  " Patch  Face       Position in w, h, d              Width     Height\n");
			
 
				+	for (iface = 0 ; iface < 6 ; iface++) {
			
 
				+		for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
			
 
				+			fprintf (outfile,
			
 
				+				"%5d   %4d%11.5lf%11.5lf%11.5lf  %11.5lf%11.5lf\n",
			
 
				+				ipatch+1, iface+1,
			
 
				+				place[0][ipatch],
			
 
				+				place[1][ipatch],
			
 
				+				place[2][ipatch],
			
 
				+				size[0][ipatch],
			
 
				+				size[1][ipatch]);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Write patch radiosities to 'answer' file. */
			
 
				+	fprintf (outfile, "\n Patch  Face  Radiosities\n");
			
 
				+	for (iface = 0 ; iface < 6 ; iface++) {
			
 
				+		for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
			
 
				+			fprintf (outfile, "%5d   %4d%12.8lf%12.8lf%12.8lf\n",
			
 
				+				ipatch+1, iface+1,
			
 
				+				result[0][ipatch],
			
 
				+				result[1][ipatch],
			
 
				+				result[2][ipatch]);
			
 
				+		}
			
 
				+	}
			
 
				+	fclose(outfile);
			
 
				+}
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* This routine verifies that the computed radiosities satisfy the equations.*/
			
 
				+/*                                                                           */
			
 
				+/*  John Gustafson, Diane Rover, Michael Carter, and Stephen Elbert          */
			
 
				+/*  Ames Laboratory, 3/18/90                                                 */
			
 
				+/*****************************************************************************/
			
 
				+Verify (npatch, coeff, diag, rhs, result)
			
 
				+int		npatch;			/* In: Problem size.                                 */
			
 
				+double	coeff[][NMAX],	/* In: The coefficients of the eqns to solve.        */
			
 
				+		diag[][NMAX],	/* In: (RGB) Diagonal terms of the system.           */
			
 
				+		rhs[][NMAX],	/* In: (RGB) Right-hand sides of system to solve.    */
			
 
				+		result[][NMAX];	/* In: (RGB) Radiosity solutions.                    */
			
 
				+{
			
 
				+	double	tmp1, tmp2;	/* Double temporary variables. */
			
 
				+	double	anorm,		/* Norm accumulation variable. */
			
 
				+			xnorm;		/* Norm accumulation variable. */
			
 
				+	int		i, j, m;	/* General loop counters.      */
			
 
				+
			
 
				+	tmp1 = 0.0;
			
 
				+	for (m = 0 ; m < 3 ; m++) {
			
 
				+		/* Copy lower triangle of coefficients to upper triangle, */
			
 
				+		/* and load diagonal.                                     */
			
 
				+		for (i = 0 ; i < npatch ; i++) {
			
 
				+			coeff[i][i] = diag[m][i];
			
 
				+			for (j = 0 ; j < i ; j++) {
			
 
				+				coeff[i][j] = coeff[j][i];
			
 
				+			}
			
 
				+		}
			
 
				+		/* Multiply matrix by solution vector, and accum. norm of residual. */
			
 
				+		anorm = xnorm = 0.0;
			
 
				+		for (j = 0 ; j < npatch ; j++) {
			
 
				+			tmp2 = rhs[m][j];
			
 
				+			for (i = 0 ; i < npatch ; i++) {
			
 
				+				tmp2 -= (coeff[j][i] * result[m][i]);
			
 
				+				anorm = MAX(anorm, fabs(coeff[j][i]));
			
 
				+			}
			
 
				+			xnorm = MAX(xnorm, fabs(result[m][j]));
			
 
				+			tmp1 += fabs(tmp2);
			
 
				+		}
			
 
				+	}
			
 
				+	/* printf ("anorm = %g  xnorm = %g\n", anorm, xnorm); */
			
 
				+	tmp1 /= (anorm * xnorm);
			
 
				+	if (tmp1 > 3 * EPS) {
			
 
				+		printf ("Residual is too large: %lg\n", tmp1);
			
 
				+		return (FALSE);
			
 
				+	}
			
 
				+	return (TRUE);
			
 
				+}
			
 
				+
			
 
				+#ifdef		SUN4
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* Double precision dot product specifically written for Sun 4/370.          */
			
 
				+/* By Michael Carter and John Gustafson, May 30, 1990                        */
			
 
				+/* This code unrolls the dot product four ways since that's how many         */
			
 
				+/* registers are available on the SPARC.  Other RISC system will require     */
			
 
				+/* something very similar.  Also, unit stride is take advantage of in the    */
			
 
				+/* form of special cases.                                                    */
			
 
				+/*****************************************************************************/
			
 
				+double
			
 
				+Ddot (n, a, ia, b, ib)
			
 
				+register
			
 
				+int		n,		/* Number of elements in vectors.  */
			
 
				+		ia,		/* Stride of a vector in ELEMENTS. */
			
 
				+		ib;		/* Stride of b vector in ELEMENTS. */
			
 
				+register
			
 
				+double	*a,		/* Pointer to first vector.        */
			
 
				+		*b;		/* Pointer to second vector.       */
			
 
				+{
			
 
				+	register double	sum0 = 0.0,
			
 
				+					sum1 = 0.0,
			
 
				+					sum2 = 0.0,
			
 
				+					sum3 = 0.0;
			
 
				+	register int	m = n & 3;
			
 
				+	int				t;
			
 
				+
			
 
				+	/* The ragged cleanup part. */
			
 
				+	while (m--) {
			
 
				+		sum0 += *a * *b;
			
 
				+		a += ia;
			
 
				+		b += ib;
			
 
				+	}
			
 
				+
			
 
				+	/* The fast pipelined part */
			
 
				+	n >>= 2;
			
 
				+	if (ib == 1 && ia != 1) {
			
 
				+		t = ia;
			
 
				+		ia = ib;
			
 
				+		ib = t;
			
 
				+		t = (int) a;
			
 
				+		b = a;
			
 
				+		a = (double *) t;
			
 
				+	}
			
 
				+
			
 
				+	/* We can optimize if one or more strides are equal to 1. */
			
 
				+	if (ia == 1) {
			
 
				+		/* This runs if both strides are 1. */
			
 
				+		if (ib == 1) {
			
 
				+			ia <<= 2;
			
 
				+			ib <<= 2;
			
 
				+			while (n--) {
			
 
				+				sum0 += a[0] * b[0];
			
 
				+				sum1 += a[1] * b[1];
			
 
				+				sum2 += a[2] * b[2];
			
 
				+				sum3 += a[3] * b[3];
			
 
				+				a += ia;
			
 
				+				b += ib;
			
 
				+			}
			
 
				+		}
			
 
				+		/* This runs if stride of a only is equal to 1. */
			
 
				+		else {
			
 
				+			ia <<= 2;
			
 
				+			while (n--) {
			
 
				+				sum0 += a[0] * *b;
			
 
				+				b += ib;
			
 
				+				sum1 += a[1] * *b;
			
 
				+				b += ib;
			
 
				+				sum2 += a[2] * *b;
			
 
				+				b += ib;
			
 
				+				sum3 += a[3] * *b;
			
 
				+				a += ia;
			
 
				+				b += ib;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	/* This runs for the more general case.        */
			
 
				+	/* This is about .5 MFLOPS slower on Sun 4/370 */
			
 
				+	else {
			
 
				+		while (n--) {
			
 
				+			sum0 += *a * *b;
			
 
				+			a += ia;
			
 
				+			b += ib;
			
 
				+			sum1 += *a * *b;
			
 
				+			a += ia;
			
 
				+			b += ib;
			
 
				+			sum2 += *a * *b;
			
 
				+			a += ia;
			
 
				+			b += ib;
			
 
				+			sum3 += *a * *b;
			
 
				+			a += ia;
			
 
				+			b += ib;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return (sum0 + sum1 + sum2 + sum3);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+/*****************************************************************************/
			
 
				+/* Generic double-precision dot product.  Unrolling will help pipelined      */
			
 
				+/* computers.  Modify accordingly.                                           */
			
 
				+/*****************************************************************************/
			
 
				+double
			
 
				+Ddot (n, a, ia, b, ib)
			
 
				+register
			
 
				+int		n,		/* Number of elements in vectors.  */
			
 
				+		ia,		/* Stride of a vector in ELEMENTS. */
			
 
				+		ib;		/* Stride of b vector in ELEMENTS. */
			
 
				+register
			
 
				+double	*a,		/* Pointer to first vector.        */
			
 
				+		*b;		/* Pointer to second vector.       */
			
 
				+{
			
 
				+	register double sum = 0.0;
			
 
				+
			
 
				+	while (n--) {
			
 
				+		sum += *a * *b;
			
 
				+		a += ia;
			
 
				+		b += ib;
			
 
				+	}
			
 
				+	return (sum);
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/benchmarks/slalom.input
+++ b/benchmarks/slalom.input
@@ -0,0 +1,4 @@
 
				+40
			
 
				+20
			
 
				+0
			
 
				+
			
--- a/benchmarks/whet.c
+++ b/benchmarks/whet.c
@@ -0,0 +1,213 @@
 
				+#include <math.h>
			
 
				+#include <time.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+/*
			
 
				+timer program -- computes total time in seconds
			
 
				+since the first call. Uses constant CLOCK_RATE
			
 
				+to compute of CPU time in seconds
			
 
				+*/
			
 
				+/* Unix clock */
			
 
				+#define CLOCK_RATE 1000000.0
			
 
				+
			
 
				+/* MS-DOS Turbo C 
			
 
				+#define CLOCK_RATE CLK_TCK
			
 
				+*/
			
 
				+float second(void);
			
 
				+
			
 
				+float second()
			
 
				+{
			
 
				+   return((float)clock() / CLOCK_RATE);
			
 
				+}
			
 
				+
			
 
				+/* C-style global parameters */
			
 
				+
			
 
				+float T,T1,T2,E1[4];
			
 
				+int J,K,L;
			
 
				+
			
 
				+void POUT(long n, long j, long k, float x1, float x2, float x3, float x4)
			
 
				+{
			
 
				+	printf("\n %7.1ld%7.1ld%7.1ld%12.4e%12.4e%12.4e%12.4e%8.2f",
			
 
				+	n,j,k,x1,x2,x3,x4,second());
			
 
				+}
			
 
				+
			
 
				+void PA(E)
			
 
				+float *E;
			
 
				+{
			
 
				+	int j;
			
 
				+	j=0;
			
 
				+	do {
			
 
				+		E[0]=(E[0]+E[1]+E[2]-E[3])*T;
			
 
				+		E[1]=(E[0]+E[1]-E[2]+E[3])*T;
			
 
				+		E[2]=(E[0]-E[1]+E[2]+E[3])*T;
			
 
				+		E[3]=(-E[0]+E[1]+E[2]+E[3])/T2;
			
 
				+		j=j+1;
			
 
				+	}
			
 
				+	while(j<6);
			
 
				+}
			
 
				+
			
 
				+void P0()
			
 
				+{
			
 
				+	E1[J-1]=E1[K-1];
			
 
				+	E1[K-1]=E1[L-1];
			
 
				+	E1[L-1]=E1[J-1];
			
 
				+}
			
 
				+
			
 
				+void P3(X, Y, Z)
			
 
				+float *X, *Y, *Z;
			
 
				+{
			
 
				+	float X1, Y1;
			
 
				+
			
 
				+	X1=*X;
			
 
				+	Y1=*Y;
			
 
				+	X1=T*(X1+Y1);
			
 
				+	Y1=T*(X1+Y1);
			
 
				+	*Z=(X1+Y1)/T2;
			
 
				+}
			
 
				+
			
 
				+/* equivalent description of FORTRAN-style common block ( slow !) */
			
 
				+/*
			
 
				+struct _comm_blk_ {
			
 
				+	float _T, _T1, _T2, _E1[4];
			
 
				+	int _J,_K,_L;
			
 
				+} common;
			
 
				+#define T common._T
			
 
				+#define T1 common._T1
			
 
				+#define T2 common._T2
			
 
				+#define E1 common._E1
			
 
				+#define J common._J
			
 
				+#define K common._K
			
 
				+#define L common._L
			
 
				+*/
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+float X1,X2,X3,X4,X,Y,Z;
			
 
				+long I,ISAVE,N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12;
			
 
				+
			
 
				+	printf("Start timing.");
			
 
				+
			
 
				+	I = 10;
			
 
				+	T1=0.50025000;
			
 
				+	T=0.499975000;
			
 
				+	T2=2.0000;
			
 
				+	ISAVE=I;
			
 
				+	N1=0;
			
 
				+	N2=12*I;
			
 
				+	N3=14*I;
			
 
				+	N4=348*I;
			
 
				+	N5=0;
			
 
				+	N6=210*I;
			
 
				+	N7=32*I;
			
 
				+	N8=899*I;
			
 
				+	N9=516*I;
			
 
				+	N10=0;
			
 
				+	N11=93*I;
			
 
				+	N12=0;
			
 
				+	X1=1.0;
			
 
				+	X2=-1.0;
			
 
				+	X3=-1.0;
			
 
				+	X4=-1.;
			
 
				+	for(I=0; I<N1; I++)
			
 
				+	{
			
 
				+		X1=(X1+X2+X3-X4)*T;
			
 
				+		X2=(X1+X2-X3+X4)*T;
			
 
				+		X4=(-X1+X2+X3+X4)*T;
			
 
				+		X3=(X1-X2+X3+X4)*T;
			
 
				+	}
			
 
				+	POUT(N1,N1,N1,X1,X2,X3,X4);
			
 
				+	E1[0]=1.0;
			
 
				+	E1[1]=-1.0;
			
 
				+	E1[2]=-1.0;
			
 
				+	E1[3]=-1.0;
			
 
				+	for(I=0; I<N2; I++)
			
 
				+	{
			
 
				+		E1[0]=(E1[0]+E1[1]+E1[2]-E1[3])*T;
			
 
				+		E1[1]=(E1[0]+E1[1]-E1[2]+E1[3])*T;
			
 
				+		E1[2]=(E1[0]-E1[1]+E1[2]+E1[3])*T;
			
 
				+		E1[3]=(-E1[0]+E1[1]+E1[2]+E1[3])*T;
			
 
				+	}
			
 
				+	POUT(N2,N3,N2,E1[0],E1[1],E1[2],E1[3]);
			
 
				+
			
 
				+	for(I=0; I<N3; I++) PA(E1);
			
 
				+	POUT(N3,N2,N2,E1[0],E1[1],E1[2],E1[3]);
			
 
				+	J=1;
			
 
				+
			
 
				+	for(I=0; I<N4; I++)
			
 
				+	{
			
 
				+		if(J==1) J=2;
			
 
				+		else J=3;
			
 
				+
			
 
				+		if(J<2) J=0;
			
 
				+		else J=1;
			
 
				+
			
 
				+		if(J<1) J=1;
			
 
				+		else J=0;
			
 
				+	}
			
 
				+	POUT(N4,J,J,X1,X2,X3,X4);
			
 
				+	J=1;
			
 
				+	K=2;
			
 
				+	L=3;
			
 
				+	for(I=0; I<N6; I++)
			
 
				+	{
			
 
				+		J=J*(K-J)*(L-K);
			
 
				+		K=L*K-(L-J)*K;
			
 
				+		L=(L-K)*(K+J);
			
 
				+		E1[L-2]=J+K+L;
			
 
				+		E1[K-2]=J*K*L;
			
 
				+	}
			
 
				+	POUT(N6,(long)J,(long)K,E1[0],E1[1],E1[2],E1[3]);
			
 
				+
			
 
				+	X=0.5;
			
 
				+	Y=0.5;
			
 
				+	{
			
 
				+	 register float x=X;
			
 
				+	 register float y=Y;
			
 
				+	 register float t2=T2;
			
 
				+	 register float t=T;
			
 
				+
			
 
				+	 for(I=0; I<N7; I++)
			
 
				+	 {
			
 
				+		x=t*atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
			
 
				+		y=t*atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
			
 
				+	 }
			
 
				+	 X=x; Y=y;
			
 
				+	}
			
 
				+	POUT(N7,(long)J,(long)K,X,X,Y,Y);
			
 
				+	X=1.0;
			
 
				+	Y=1.0;
			
 
				+	Z=1.0;
			
 
				+
			
 
				+	for(I=0; I<N8; I++) P3(&X,&Y,&Z);
			
 
				+	POUT(N8,(long)J,(long)K,X,Y,Z,Z);
			
 
				+	J=1;
			
 
				+	K=2;
			
 
				+	L=3;
			
 
				+	E1[0]=1.0;
			
 
				+	E1[1]=2.0;
			
 
				+	E1[2]=3.0;
			
 
				+	for(I=0; I<N9; I++) P0();
			
 
				+	POUT(N9,(long)J,(long)K,E1[0],E1[1],E1[2],E1[3]);
			
 
				+	J=2;
			
 
				+	K=3;
			
 
				+	for(I=0; I<N10; I++)
			
 
				+	{
			
 
				+		J+=K;
			
 
				+		K+=J;
			
 
				+		J-=K;
			
 
				+		K-=J+J;
			
 
				+	}
			
 
				+	POUT(N10,(long)J,(long)K,X1,X2,X3,X4);
			
 
				+	X=0.75;
			
 
				+	{
			
 
				+	 register float x=X;
			
 
				+	 register float t1=T1;
			
 
				+	 for(I=0; I<N11; I++) 	x=sqrt(exp(log(x)/t1));
			
 
				+	 X=x;
			
 
				+	}
			
 
				+	POUT(N11,(long)J,(long)K,X,X,X,X);
			
 
				+
			
 
				+	printf("\n %g whetstones per second\n", 1.0e+08/second());
			
 
				+}
			
 
				+