| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138 |
- /******************************************************************************
- S L A L O M
- Scalable Language-independent Ames Laboratory One-minute Measurement
- The following program is the first benchmark based on fixed time rather
- than fixed problem comparison. Not only is fixed time more representative
- of the way people use computers, it also greatly increases the scope and
- longevity of the benchmark. SLALOM is very scalable, and can be used to
- compare computers as slow as 126 floating-point operations per second
- (FLOPS) to computers running a trillion times faster. The scalability can
- be used to compare single processors to massively parallel collections
- of processors, and to study the space of problem size vs. ensemble size
- in fine detail. It resembles the LINPACK benchmark since it involves
- factoring and backsolving a (nearly) dense matrix, but incorporates a
- number of improvements to that benchmark that we hope will make SLALOM
- a better reflection of general system performance.
- The SLALOM benchmark solves a complete, real problem (optical radiosity
- on the interior of a box), not a contrived kernel or a synthetic mixture of
- sample operations. SLALOM is unusual since it times input, problem setup,
- solution, and output, not just the solution. For slower computers, the
- problem setup will take the majority of the time; it grows as the square of
- the problem size. The solver grows as the cube of the problem size, and
- dominates the time for large values of n.
- While the following is C, you are free to translate it into any
- language you like, including assembly language specific to one computer.
- You may use compiler directives, hand-tuned library calls, loop unrolling,
- and even change the algorithm, if you can provide a convincing argument
- that the program still works for the full range of possible inputs. For
- example, if you replace the direct solver with an iterative one, you must
- make sure your method is correct even when the geometry is quite eccentric
- and the box faces are highly reflective. (rho = .999)
- The main() driver should be used with the value of 60 seconds for the
- SLALOM benchmark. The work done for a particular problem size is figured
- after timing has ceased, so there is no overhead for work assessment. The
- residual check ||Ax - b|| is also done after timing has ceased. Two
- computers may be compared either by their problem size n, or by their MFLOPS
- rate, never by the ratio of execution times. Times will always be near one
- minute in SLALOM. We have used the following weights for floating-point
- operation counting, based on the weights used by Lawrence Livermore National
- Laboratory:
- OPERATION WEIGHT
- a=b, a=(constant) 0
- a<0, a<=0, a==0, a!=0, a>0, a>=0 0
- -a, fabs(a), fsgn(a, b) 0
- a+b, a-b, a*b, a^2 1
- a<b, a<=b, a==b, a!=b, a>b, a>=b 1
- (int) a, (double)b 1
- 1/a, -1/a 3
- a/b 4
- sqrt(a) 4
- Format to or from ASCII string 6
- sin(a), cos(a), tan(a), log(a), atan(a), exp(a) 8
- We invite you to share with us the results of any measurements that you
- make with SLALOM. We do NOT accept anonymous data; machine timings will be
- referenced and dated.
- The least you need to do to adapt SLALOM to your computer is:
- 1. In the "Measure" routine, set NMAX to a value large enough to keep
- the computer working for a minute. Vary it slightly if it helps
- (for reasons of cache size, interleaving, etc.)
- 2. Replace the timer call in "When" with the most accurate wall-clock
- timer at your disposal. If only CPU time is available, try to run
- the job standalone or at high priority, since we are ultimately
- interested in the top of the statistical range of performance.
- 3. Edit in the information specific to your test in the "What"
- routine, so that final output will be automatically annotated.
- 4. Compile, link, and run the program, interacting to select values
- of n that bracket a time of one minute. Once everything is
- running, run it as a batch job so as to record the session.
- Examples of ways you may optimize performance:
- 1. Unroll the loops in SetUp1 and SetUp2; it is possible to
- vectorize both SetUp1 and SetUp2 at the cost of some extra
- operations, program complexity, and storage.
- 2. Replace the innermost loops of Solver with calls to well-tuned
- libraries of linear algebra routines, such as DDOT from the
- Basic Linear Algebra Subroutines (level 1 BLAS). Better still,
- use a tuned library routine for all of Solver; the sparsity
- exploited in Solver is only a few percent, so you will usually
- gain more than you lose by applying a dense symmetric solver.
- 3. Parallelize the SetUp and Solver routines; all are highly
- parallel. Each element of the matrix can be constructed
- independently, once each processor knows the geometry and part of
- the partitioning into regions. A substantial body of literature
- now exists for performing the types of operations in Solver in
- parallel.
- 4. Overlap computation with output. Once the Region routine is done,
- the first part of the output file (patch geometry) can be written
- while the radiosities are being calculated.
- Examples of what you may NOT do:
- 1. The tuning must not be made specific to the particular input
- provided. For example, you may not eliminate IF tests simply
- because they always come out the same way for this input; you
- may not use precomputed answers or table look-up unless those
- answers and tables cover the full range of possible inputs; and
- you may not exploit symmetry for even values of the problem size.
- 2. You may not disable the self-consistency tests in SetUp3 and
- Verify, nor alter their tolerance constants.
- 3. You may not change the input or output files to unformatted
- binary or other format that would render them difficult to create
- or read for humans.
- 4. You may not eliminate the reading of the "geom" file by putting
- its data directly into the compiled program.
- 5. You may not change any of the work assessments in Meter. If you
- use more floating-point operations than indicated, you must still
- use the assessments provided. If you find a way to use fewer
- operations and still get the job done for arbitrary input
- parameters, please tell us!
- -John Gustafson, Diane Rover, Michael Carter,
- and Stephen Elbert
- Ames Laboratory, Ames, Iowa 50011
- ******************************************************************************/
- /*****************************************************************************/
- /* The following program finds a value n such that a problem of size n */
- /* takes just under "goal" seconds to execute. */
- /* */
- /* John Gustafson, Diane Rover, Michael Carter, and Stephen Elbert */
- /* Ames Laboratory, 3/18/90 */
- /* */
- /* Calls: Meter Measures execution time for some application. */
- /* What Prints work-timing statistics and system information. */
- /*****************************************************************************/
- #include <stdio.h>
- #include <math.h>
- #include <sys/time.h>
- /* NMAX = Largest npatch for your computer; adjust as needed. */
- #define NMAX 2048
- #define EPS (0.5e-8)
- #define FALSE (1==0)
- #define TRUE (!FALSE)
- #define MAX(a,b) (((a) > (b)) ? (a) : (b))
- /* Global variables and function return types: */
- double goal, /* User input, fixed-time benchmark goal, in seconds. */
- timing, /* Elapsed time returned by Meter routine, in seconds.*/
- work, /* In this case, number of FLOPs performed. */
- When(), /* Wall clock in seconds. */
- Ddot(); /* Double dot product. */
- int mean, /* Avg between upper and lower bounds for bisection */
- /* method. */
- n, /* The problem size. */
- nupper, /* Upper bound on problem size, used in iterating */
- /* toward goal. */
- Meter(), /* Driver for following benchmark functions. */
- Reader (), /* Reads problem description from 'geom' file. */
- Region (), /* Subdivides box faces into patches. */
- SetUp3 (), /* Set up matrix to solve. */
- Storer (), /* Write result to 'answer' file. */
- Verify (); /* Verify the radiosity solution from solver. */
- void SetUp1 (), /* Set up matrix to solve. */
- SetUp2 (), /* Set up matrix to solve. */
- Solver (); /* Solve the radiosity matrix. */
- main ()
- {
- int ok; /* Return code temporary storage. */
- /* Get desired number of seconds: */
- printf ("Enter the number of seconds that is the goal: ");
- scanf ("%lg", &goal);
- /* Get lower and upper bounds for n from the standard input device: */
- do {
- printf ("Enter a lower bound for n: ");
- scanf ("%d", &n);
- if (n <= 0)
- exit(0);
- ok = Meter (n, &timing, &work);
- if (timing >= goal)
- printf ("Must take less than %g seconds. Took %g.\n",
- goal, timing);
- } while (!ok || timing >= goal);
- do {
- printf ("Enter an upper bound for n: ");
- scanf ("%d", &nupper);
- if (nupper <= 0)
- exit(0);
- ok = Meter (nupper, &timing, &work);
- if (timing < goal) {
- printf ("Must take at least %g seconds. Took %g.\n",
- goal, timing);
- n = MAX(nupper, n);
- }
- } while (!ok || timing < goal);
-
- /*
- * While the [n, nupper] interval is larger than 1, bisect it and
- * pick a half:
- */
- while (nupper - n > 1) {
- mean = (n + nupper) / 2;
- ok = Meter (mean, &timing, &work);
- if (timing < goal)
- n = mean;
- else
- nupper = mean;
- printf ("New interval: [%d,%d]\n", n, nupper);
- }
-
- /* Ensure that most recent run was for n, not nupper. */
- ok = Meter (n, &timing, &work);
- /* Print out final statistics. */
- What (n, timing, work);
- }
- /*****************************************************************************/
- /* This routine should be edited to contain information for your system. */
- /*****************************************************************************/
- What (n, timing, work)
- int n;
- double timing, work;
- {
- int i;
- static char *info[] = {
- "Machine: SUN 4/370GX Processor: SPARC",
- "Memory: 32 MB # of procs: 1",
- "Cache: 128 KB # used: 1",
- "NMAX: 512 Clock: 25 MHz",
- "Disk: .3GB SCSI+.7GB SMD Node name: amssun2",
- "OS: SUNOS 4.0.3 Timer: Wall, gettimeofday()",
- "Language: C Alone: yes",
- "Compiler: cc Run by: M. Carter",
- "Options: -O Date: 23 May 1990",
- NULL
- };
- printf ("\n");
- for (i = 0 ; info[i] ; i++)
- puts (info[i]);
- printf ("M ops: %-13lg Time: %-.3lf seconds\n",
- work * 1e-6, timing);
- printf ("n: %-6d MFLOPS: %-.5lg\n",
- n, (work / timing) * 1e-6);
- printf ("Approximate data memory use: %d bytes.\n",
- 8 * n * n + 120 * n + 800);
- }
- /*****************************************************************************/
- /* This routine measures time required on a revised LINPACK-type benchmark, */
- /* including input, matrix generation, solution, and output. */
- /* */
- /* John Gustafson, Diane Rover, Michael Carter, and Stephen Elbert */
- /* Ames Laboratory, 3/18/90 */
- /* */
- /* Calls: Reader Reads the problem description from secondary storage. */
- /* Region Partitions box surface into rectangular regions (patches).*/
- /* SetUp1 Sets up equations from patch geometries-parallel faces. */
- /* SetUp2 Sets up equations from patch geometries-orthogonal faces. */
- /* SetUp3 Sets up equations-row normalization and radiant props. */
- /* Solver Solves the equations by LDL factorization. */
- /* Storer Stores solution (patch radiosities) on secondary storage. */
- /* When Returns wall-clock time, in seconds. */
- /*****************************************************************************/
- Meter (npatch, timing, work)
- int npatch; /* In, problem size, here the number of equations. */
- double *timing, /* Out, number of seconds measured. */
- *work; /* Out, work done, here the number of FLOPs. */
- {
- static
- double area[NMAX], /* Areas of patches * 8 * pi. */
- box[7], /* Dimensions of box in x, y, z directions. */
- coeff[NMAX][NMAX], /* The coefficients of the eqns to solve. */
- diag[3][NMAX], /* Diag terms of the eqns to solve. (RGB) */
- emiss[6][3], /* (RGB) emissivities of patches. */
- place[3][NMAX], /* Width-height-depth position of patches. */
- result[3][NMAX], /* Answer radiosities (RGB). */
- rho[6][3], /* (RGB) Reflectivities of patches. */
- rhs[3][NMAX], /* Right-hand sides of eqns to solve (RGB). */
- size[2][NMAX]; /* Width-height sizes of patches. */
- double ops[8], /* Floating-point operation counts. */
- p[6], /* Number of patches in faces. */
- sec[8], /* Times for routines, in seconds. */
- tmp1, tmp2; /* Double temporary variables. */
- int i, /* Loop counter. */
- itmp1, /* Integer temporary variable. */
- non0; /* Index of first nonzero off-diagonal elem. */
- static
- int loop[6][2]; /* Patch number ranges for faces. */
- static char *tasks[] = { /* Names of all the functions in benchmark. */
- "Reader", "Region",
- "SetUp1", "SetUp2",
- "SetUp3", "Solver",
- "Storer"
- };
- static char *format = /* Output line format. */
- "%6.6s%8.3f%17.0f%14.6f%10.1f %%\n";
- /* First check that npatch lies between 6 and NMAX: */
- if (npatch < 6) {
- printf ("Must be at least 6, the number of faces.\n");
- return (FALSE);
- }
- else if (npatch > NMAX) {
- printf ("Exceeds %d = maximum for this system.\n", NMAX);
- return (FALSE);
- }
- /* Ensure that previous 'answer' file is deleted: */
- unlink ("answer");
- /* Time the tasks, individually and collectively. */
- sec[0] = When();
- if (!Reader (box, rho, emiss))
- return (FALSE);
- sec[1] = When();
- if (!Region (npatch, loop, box, place, size, area))
- return (FALSE);
- sec[2] = When();
- SetUp1 (npatch, loop, coeff, place, size);
- sec[3] = When();
- SetUp2 (npatch, loop, coeff, place, size);
- sec[4] = When();
- if (!SetUp3 (npatch, loop, area, rho, emiss, coeff, diag, rhs))
- return (FALSE);
- sec[5] = When();
- non0 = loop[1][0];
- Solver (npatch, non0, coeff, diag, rhs, result);
- sec[6] = When();
- Storer (npatch, loop, place, size, result);
- sec[7] = When();
- *timing = sec[7] - sec[0];
- for (i = 0 ; i < 7 ; i++)
- sec[i] = sec[i+1] - sec[i];
-
- /* Assess floating-point work done by each routine called, and total: */
- /* Note the ops counts are talleyed into a double array, and there */
- /* some strange casts to double in some equations. This is to */
- /* prevent integer overflow. */
- itmp1 = 0;
- tmp1 = 0.0;
- for (i = 0 ; i < 6 ; i++) {
- p[i] = loop[i][1] - loop[i][0] + 1;
- tmp1 += p[i] * p[i];
- itmp1 += sqrt(p[i] * box[i] / box[i + 1]) + 0.5;
- }
- tmp2 = p[0] * p[3] + p[1] * p[4] + p[2] * p[5];
- ops[0] = 258;
- ops[1] = 154 + (double) 8 * itmp1 + npatch;
- ops[2] = 6 + 532 * tmp2;
- ops[3] = 8*npatch + 370 * ((double) npatch * npatch - tmp1 - 2*tmp2) / 2.0;
- ops[4] = 72 + (double) 9 * npatch + (double) npatch * npatch - tmp1;
- ops[5] = npatch * (npatch * ((double) npatch + 7.5) - 2.5) - 21
- + (non0+1) * ((non0+1) * (2 * ((double) non0+1) - 16.5) + 35.5)
- + (non0+1) * npatch * (9 - 3 * ((double) non0+1));
- ops[6] = 48 * npatch;
- *work = ops[0] + ops[1] + ops[2] + ops[3] + ops[4] + ops[5] + ops[6];
- /* Display timing-work-speed breakdown by routine. */
- printf ("%d patches:\n", npatch);
- printf (" Task Seconds Operations MFLOPS %% of Time\n");
- for (i = 0 ; i < 7 ; i++) {
- if (sec[i] == 0.0)
- sec[i] = 0.001;
- printf (format, tasks[i], sec[i], ops[i], (ops[i] / sec[i]) * 1e-6,
- 100.0 * sec[i] / *timing);
- }
- printf (format, "TOTALS", *timing, *work, (*work / *timing) * 1e-6, 100.0);
- Verify (npatch, coeff, diag, rhs, result);
- return (TRUE);
- }
- /*****************************************************************************/
- /* This function should return the actual, wall clock time (not CPU time) */
- /* in seconds as accurately as possible. Change it to your system timer. */
- /*****************************************************************************/
- double
- When()
- {
- struct timeval tp;
- struct timezone tzp;
- gettimeofday (&tp, &tzp);
- return ((double) tp.tv_sec + (double) tp.tv_usec * 1e-6);
- }
- /*****************************************************************************/
- /* The following routine reads in the problem description from secondary */
- /* storage, and checks that numbers are in reasonable ranges. */
- /*****************************************************************************/
- Reader (box, rho, emiss)
- double box[], /* Out: Dimensions of box in x, y, z directions. */
- rho[][3], /* Out: (RGB) Reflectivities of patches. */
- emiss[][3]; /* Out: (RGB) emissivities of patches. */
- {
- /*
- * Local variables:
- * infile Device number for input file.
- * i, j Loop counters.
- * tmp1 Maximum emissivity, to check that emissivities are not all 0.
- */
- int i, j, /* Loop counters. */
- n; /* Number of args fscanf()'ed from file. */
- double tmp1; /* Maximum emissivity. */
- FILE *infile; /* Input file pointer. */
- char buff[81]; /* Buffer used to eat a line of input. */
- /* Open the input file and read in the data. */
- if ((infile = fopen ("geom", "r")) == NULL) {
- printf ("slalom: 'geom' geometry file not found.\n");
- exit (1);
- }
- /* Read the box coordinates and error check. */
- n = 0;
- for (i = 0 ; i < 3 ; i++) {
- n += fscanf (infile, "%lg", &box[i]);
- }
- fgets (buff, 80, infile); /* Eat the rest of the line. */
- if (n != 3) {
- printf ("Must specify exactly 3 box coordinates.\n");
- exit(1);
- }
- /* Read the reflectivities and error check. */
- n = 0;
- for (j = 0 ; j < 3 ; j++) {
- for (i = 0 ; i < 6 ; i++) {
- n += fscanf (infile, "%lg", &rho[i][j]);
- }
- }
- fgets (buff, 80, infile); /* Eat the rest of the line. */
- if (n != 18) {
- printf ("Must specify exactly 18 box coordinates.\n");
- exit(1);
- }
- /* Read the emissivities and error check. */
- n = 0;
- for (j = 0 ; j < 3 ; j++) {
- for (i = 0 ; i < 6 ; i++) {
- n += fscanf (infile, "%lg", &emiss[i][j]);
- }
- }
- fgets (buff, 80, infile); /* Eat the rest of the line. */
- if (n != 18) {
- printf ("Must specify exactly 18 box coordinates.\n");
- exit(1);
- }
- fclose (infile);
- /* Now sanity-check the values that were just read. */
- for (j = 0 ; j < 3 ; j++) {
- if (box[j] < 1.0 || box[j] >= 100.0) {
- printf ("Box dimensions must be between 1 and 100.\n");
- return (FALSE);
- }
- box[j+3] = box[j];
- tmp1 = 0.0;
- for (i = 0 ; i < 6 ; i++) {
- if (rho[i][j] < 0.000 || rho[i][j] > 0.999) {
- printf ("Reflectivities must be between .000 and .999.\n");
- return (FALSE);
- }
- if (emiss[i][j] < 0.0) {
- printf ("Emissivity cannot be negative.\n");
- return (FALSE);
- }
- if (tmp1 < emiss[i][j])
- tmp1 = emiss[i][j];
- }
- if (tmp1 == 0.0) {
- printf ("Emissivities are zero. Problem is trivial.\n");
- return (FALSE);
- }
- }
- box[6] = box[3];
- return (TRUE);
- }
- /*****************************************************************************/
- /* The following routine decomposes the surface of a variable-sized box */
- /* into patches that are as nearly equal in size and square as possible. */
- /*****************************************************************************/
- Region (npatch, loop, box, place, size, area)
- int npatch, /* In: Problem size. */
- loop[][2]; /* Out: Patch number ranges for faces. */
- double area[], /* Out: 8pi * areas of the patches. */
- box[], /* In: Dimensions of box in x, y, z directions. */
- place[][NMAX], /* Out: Width-height-depth positions of patches. */
- size[][NMAX]; /* Out: Width-height sizes of patches. */
- {
- int icol, /* Loop counter over the number of columns. */
- ipatch, /* Loop counter over the number of patches. */
- iface, /* Loop counter over the number of faces. */
- itmp1, /* Integer temporary variables. */
- itmp2, /* Integer temporary variables. */
- last, /* Inner loop ending value. */
- lead, /* Inner loop starting value. */
- numcol, /* Number of columns on faces. */
- numpat, /* Number of patches on a face. */
- numrow; /* Number of rows of patches in a column. */
- double height, /* Height of a patch within a column. */
- tmp1, /* double temporary variables. */
- tmp2, /* double temporary variables. */
- tmp3, /* double temporary variables. */
- tmp4, /* double temporary variables. */
- width; /* Width of a column of patches. */
- /* Allocate patches to each face, proportionate to area of each face. */
- tmp1 = 2.0 * (box[0] * box[1] + box[1] * box[2] + box[2] * box[0]);
- tmp2 = 0.0;
- tmp3 = npatch;
- loop[0][0] = 0;
- for (iface = 0 ; iface < 5 ; iface++) {
- tmp2 = tmp2 + box[iface] * box[iface + 1];
- loop[iface][1] = (int) (tmp3 * tmp2 / tmp1 + 0.5) - 1;
- loop[iface + 1][0] = loop[iface][1] + 1;
- }
- loop[5][1] = npatch - 1;
- /* Subdivide each face into numpat patches. */
- for (iface = 0 ; iface < 6 ; iface++) {
- numpat = loop[iface][1] - loop[iface][0] + 1;
- tmp3 = 0.0;
- if (iface >= 3)
- tmp3 = box[iface-1];
- numcol = (int) (sqrt(numpat * box[iface] / box[iface + 1]) + 0.5);
- if (numcol > numpat)
- numcol = numpat;
- if (numcol == 0)
- numcol = 1;
- width = box[iface] / numcol;
- itmp1 = numcol - 1;
- tmp1 = 0.0;
- for (icol = 0 ; icol < numcol ; icol++) {
- itmp2 = itmp1 / numcol;
- numrow = (itmp1 + numpat) / numcol - itmp2;
- if (numrow == 0) {
- printf ("Eccentric box requires more patches.\n");
- return (FALSE);
- }
- height = box[iface + 1] / numrow;
- tmp2 = 0.0;
- tmp4 = width * height * (8.0 * M_PI);
- lead = loop[iface][0] + itmp2;
- last = lead + numrow;
- for (ipatch = lead ; ipatch < last ; ipatch++) {
- size[0][ipatch] = width;
- size[1][ipatch] = height;
- place[0][ipatch] = tmp1;
- place[1][ipatch] = tmp2;
- place[2][ipatch] = tmp3;
- area[ipatch] = tmp4;
- tmp2 = tmp2 + height;
- }
- tmp1 = tmp1 + width;
- itmp1 = itmp1 + numpat;
- }
- }
- return (TRUE);
- }
- /*****************************************************************************/
- /* This routine sets up the radiosity matrix for parallel patches. */
- /*****************************************************************************/
- void
- SetUp1 (npatch, loop, coeff, place, size)
- int npatch, /* In: Problem size. */
- loop[][2]; /* In: Patch number ranges for faces. */
- double coeff[][NMAX], /* Out: The coefficients of the eqns to solve. */
- place[][NMAX], /* In: Width-height-depth positions of patches. */
- size[][NMAX]; /* In: Width-height sizes of patches. */
- {
- int i, j, k, /* General loop counters. */
- m, n, /* General loop counters. */
- iface, /* Loop counter over the number of faces. */
- ipatch, /* Loop counter over the number of patches. */
- jface, /* Face coupled to iface when computing mat. elems. */
- jpatch; /* Patch coupled to ipatch when computing mat. elems.*/
- double d[2][2][2], /* Point-to-point couplings between patch corners. */
- d2[2][2][2],/* Squares of d values, to save recomputation. */
- tmp1, tmp2, /* Double temporary variables. */
- tmp3, tmp4, /* Double temporary variables. */
- tmp5, tmp6, /* Double temporary variables. */
- tmp7, tmp8; /* Double temporary variables. */
- for (iface = 0 ; iface < 3 ; iface++) {
- jface = iface + 3;
- tmp1 = place[2][loop[jface][0]] * place[2][loop[jface][0]];
- tmp6 = tmp1 + tmp1;
- for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
- for (jpatch=loop[jface][0] ; jpatch <= loop[jface][1] ; jpatch++) {
- for (j = 0 ; j < 2 ; j++) {
- d [0][0][j] = place[j][jpatch] - place[j][ipatch];
- d [1][0][j] = d[0][0][j] + size[j][jpatch];
- d [0][1][j] = d[0][0][j] - size[j][ipatch];
- d [1][1][j] = d[1][0][j] - size[j][ipatch];
- d2[0][0][j] = d[0][0][j] * d[0][0][j];
- d2[1][0][j] = d[1][0][j] * d[1][0][j];
- d2[0][1][j] = d[0][1][j] * d[0][1][j];
- d2[1][1][j] = d[1][1][j] * d[1][1][j];
- }
- tmp2 = 0.0;
- for (m = 0 ; m < 2 ; m++) {
- for (i = 0 ; i < 2 ; i++) {
- tmp3 = d2[m][i][1] + tmp1;
- tmp4 = sqrt(tmp3);
- tmp5 = 1.0 / tmp4;
- tmp8 = 0.0;
- for (k = 0 ; k < 2 ; k++) {
- for (n = 0 ; n < 2 ; n++) {
- tmp7 = d[k][n][0];
- tmp8 = -tmp7 * atan(tmp7 * tmp5) - tmp8;
- }
- tmp8 = -tmp8;
- }
- tmp2 = -4.0 * tmp4 * tmp8 - tmp2 - tmp6 *
- log(((d2[1][0][0] + tmp3) * (d2[0][1][0] + tmp3)) /
- ((d2[0][0][0] + tmp3) * (d2[1][1][0] + tmp3)));
- }
- tmp2 = -tmp2;
- }
- for (m = 0 ; m < 2 ; m++) {
- for (i = 0 ; i < 2 ; i++) {
- tmp4 = sqrt(d2[m][i][0] + tmp1);
- tmp5 = 1.0 / tmp4;
- tmp8 = 0.0;
- for (k = 0 ; k < 2 ; k++) {
- for (n = 0 ; n < 2 ; n++) {
- tmp7 = d[k][n][1];
- tmp8 = -tmp7 * atan(tmp7 * tmp5) - tmp8;
- }
- tmp8 = -tmp8;
- }
- tmp2 = -4.0 * tmp4 * tmp8 - tmp2;
- }
- tmp2 = -tmp2;
- }
- coeff[ipatch][jpatch] = tmp2;
- coeff[jpatch][ipatch] = tmp2;
- }
- }
- }
- }
- /*****************************************************************************/
- /* This routine sets up the radiosity matrix for orthogonal patches. */
- /*****************************************************************************/
- void
- SetUp2 (npatch, loop, coeff, place, size)
- int npatch, /* In: Problem size. */
- loop[][2]; /* In: Patch number ranges for faces. */
- double coeff[][NMAX], /* Out: The coefficients of the eqns to solve. */
- place[][NMAX], /* In: Width-height-depth positions of patches. */
- size[][NMAX]; /* In: Width-height sizes of patches. */
- {
- int m, /* General loop counters. */
- iface, /* Loop counter over the number of faces. */
- ipatch, /* Loop counter over the number of patches. */
- jface, /* Face coupled to iface when computing mat. elems. */
- jpatch; /* Patch coupled to ipatch when computing mat. elems.*/
- double tmpb, tmpa,
- c11d, c12d, c21d, c22d, c11s, c12s, c21s, c22s,
- d11d, d12d, d21d, d22d, d11s, d12s, d21s, d22s,
- d11i, d12i, d21i, d22i, a10s, a20s, b01s, b02s,
- e1111, e1211, e2111, e2211, e1112, e1212, e2112, e2212,
- e1121, e1221, e2121, e2221, e1122, e1222, e2122, e2222;
- for (iface = 0 ; iface < 6 ; iface++) {
- for (m = 0 ; m < 2 ; m++) {
- jface = (iface + m + 1) % 6;
- for (ipatch=loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
- a10s = place[m][ipatch] - place[2][loop[jface][0]];
- a20s = a10s + size[m][ipatch];
- a10s = a10s * a10s;
- a20s = a20s * a20s;
- for (jpatch=loop[jface][0] ; jpatch<=loop[jface][1];jpatch++) {
- c11d = place[m][jpatch] - place[1-m][ipatch];
- c12d = c11d + size[m][jpatch];
- c21d = c11d - size[1-m][ipatch];
- c22d = c12d - size[1-m][ipatch];
- c11s = c11d * c11d;
- c12s = c12d * c12d;
- c21s = c21d * c21d;
- c22s = c22d * c22d;
- b01s = place[1 - m][jpatch] - place[2][ipatch];
- b02s = b01s + size[1 - m][jpatch];
- /**/
- /* Bump the term by a small real to avoid
- /* singularities in coupling function:
- /**/
- b01s = b01s * b01s + 1e-35;
- b02s = b02s * b02s + 1e-35;
- d11s = a10s + b01s;
- d12s = a10s + b02s;
- d21s = a20s + b01s;
- d22s = a20s + b02s;
- d11d = sqrt(d11s);
- d12d = sqrt(d12s);
- d21d = sqrt(d21s);
- d22d = sqrt(d22s);
- d11i = 1.0 / d11d;
- d12i = 1.0 / d12d;
- d21i = 1.0 / d21d;
- d22i = 1.0 / d22d;
- tmpa = d11d * ( c11d * atan (c11d * d11i)
- - c12d * atan (c12d * d11i)
- - c21d * atan (c21d * d11i)
- + c22d * atan (c22d * d11i))
- + d12d * (-c11d * atan (c11d * d12i)
- + c12d * atan (c12d * d12i)
- + c21d * atan (c21d * d12i)
- - c22d * atan (c22d * d12i))
- + d21d * (-c11d * atan (c11d * d21i)
- + c12d * atan (c12d * d21i)
- + c21d * atan (c21d * d21i)
- - c22d * atan (c22d * d21i))
- + d22d * ( c11d * atan (c11d * d22i)
- - c12d * atan (c12d * d22i)
- - c21d * atan (c21d * d22i)
- + c22d * atan (c22d * d22i));
- e1111 = c11s + d11s;
- e1211 = c12s + d11s;
- e2111 = c21s + d11s;
- e2211 = c22s + d11s;
- e1112 = c11s + d12s;
- e1212 = c12s + d12s;
- e2112 = c21s + d12s;
- e2212 = c22s + d12s;
- e1121 = c11s + d21s;
- e1221 = c12s + d21s;
- e2121 = c21s + d21s;
- e2221 = c22s + d21s;
- e1122 = c11s + d22s;
- e1222 = c12s + d22s;
- e2122 = c21s + d22s;
- e2222 = c22s + d22s;
- tmpb = c11s * log( e1111 * e1122 / (e1112 * e1121))
- - c12s * log( e1211 * e1222 / (e1212 * e1221))
- - c21s * log( e2111 * e2122 / (e2112 * e2121))
- + c22s * log( e2211 * e2222 / (e2212 * e2221))
- - d11s * log( e1111 * e2211 / (e1211 * e2111))
- + d12s * log( e1112 * e2212 / (e1212 * e2112))
- + d21s * log( e1121 * e2221 / (e1221 * e2121))
- - d22s * log( e1122 * e2222 / (e1222 * e2122));
- coeff[ipatch][jpatch] = fabs(4.0 * tmpa + tmpb);
- coeff[jpatch][ipatch] = coeff[ipatch][jpatch];
- }
- }
- }
- }
- }
- /*****************************************************************************/
- /* This routine sets up the radiosity matrix... normalizes row sums to 1, */
- /* and includes terms derived from reflectivites and emissivities of faces. */
- /*****************************************************************************/
- SetUp3 (npatch, loop, area, rho, emiss, coeff, diag, rhs)
- int npatch, /* In: Problem size. */
- loop[][2]; /* In: Patch number ranges for faces. */
- double area[], /* In: 8 * pi * areas of the patches. */
- rho[][3], /* In: (RGB) Reflectivities of the face interiors. */
- emiss[][3], /* In: (RGB) Emissivities of the face interiors. */
- coeff[][NMAX], /* Out: The coefficients of the eqns to solve. */
- diag[][NMAX], /* Out: (RGB) Diagonal terms of the system. */
- rhs[][NMAX]; /* Out: (RGB) Right-hand sides of system to solve. */
- {
- /*
- * Local variables:
- * iface Loop counter over the number of faces.
- * ipatch Outer loop counter over the number of patches.
- * j Loop counter over each color (R-G-B).
- * jpatch Inner loop counter over the number of patches.
- * tmp1 double temporary variable.
- * vtmp1-2 double vector temporary variables.
- */
- int j, /* (RGB) Loop counter over each color. */
- iface, /* Loop counter over the number of faces. */
- ipatch, /* Outer loop counter over the number of patches. */
- jpatch; /* Inner loop counter over the number of patches. */
- double tmp1, /* Double temporary variable. */
- vtmp1[3], /* Double vector temporary variables. */
- vtmp2[3]; /* Double vector temporary variables. */
- /* Ensure that row sums to 1, and put in reflectivities (rho) and */
- /* emissivities. */
- for (iface = 0 ; iface < 6 ; iface++) {
- for (j = 0 ; j < 3 ; j++) {
- vtmp1[j] = 1.0 / rho[iface][j];
- vtmp2[j] = emiss[iface][j] * vtmp1[j];
- }
- for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
- tmp1 = 0.0;
- for (jpatch = 0 ; jpatch < loop[iface][0] ; jpatch++) {
- tmp1 += coeff[ipatch][jpatch];
- }
- for (jpatch = loop[iface][1]+1 ; jpatch < npatch ; jpatch++) {
- tmp1 += coeff[ipatch][jpatch];
- }
- /* Make sure row sum (total form factor) is close to 1: */
- if (fabs(tmp1 - area[ipatch]) > (0.5e-9 * tmp1)) {
- printf ("Total form factor is too far from unity.\n");
- return (FALSE);
- }
- tmp1 = -tmp1;
- /* Set coplanar patch interactions to zero. */
- for (jpatch=loop[iface][0] ; jpatch <= loop[iface][1] ; jpatch++) {
- coeff[ipatch][jpatch] = 0.0;
- }
- /* Assign diagonal entries and right-hand sides. */
- for (j = 0 ; j < 3 ; j++) {
- diag[j][ipatch] = vtmp1[j] * tmp1;
- rhs[j][ipatch] = vtmp2[j] * tmp1;
- }
- }
- }
- return (TRUE);
- }
- /*****************************************************************************/
- /* This routine factors and backsolves a real, symmetric, near-dense matrix */
- /* by LDL factorization. No pivoting; the matrix is diagonally dominant. */
- /*****************************************************************************/
- void
- Solver (npatch, non0, coeff, diag, rhs, result)
- int npatch, /* In: Problem size. */
- non0; /* In: Index of first nonzero off-diagonal mat. elem.*/
- double coeff[][NMAX], /* In/Out: The coefficients of the eqns to solve. */
- diag[][NMAX], /* Out: (RGB) Diagonal terms of the system. */
- rhs[][NMAX], /* In: (RGB) Right-hand sides of system to solve. */
- result[][NMAX]; /* Out: (RGB) solution radiosities. */
- {
- int i, j, /* General loop counters. */
- k, m; /* General loop counters. */
- double tmp1; /* Double temporary variable. */
- /* Load lower triangle of coefficients, diagonal, and solution vector. */
- for (m = 0 ; m < 3 ; m++) {
- for (i = non0 ; i < npatch ; i++) {
- coeff[i][i] = diag[m][i];
- result[m][i] = rhs[m][i];
- for (j = 0 ; j < i ; j++) {
- coeff[i][j] = coeff[j][i];
- }
- }
- /* Factor matrix, writing factors on top of original matrix. */
- for (j = 0 ; j < non0 ; j++) {
- coeff[j][j] = 1.0 / diag[m][j];
- result[m][j] = rhs[m][j];
- }
- for (j = non0 ; j < npatch ; j++) {
- for (k = non0 ; k < j ; k++) {
- coeff[j][k] -= Ddot (k, &coeff[k][0], 1, &coeff[j][0], 1);
- }
- for (k = 0 ; k < j ; k++) {
- tmp1 = coeff[j][k];
- coeff[j][k] = tmp1 * coeff[k][k];
- coeff[j][j] -= tmp1 * coeff[j][k];
- }
- coeff[j][j] = 1.0 / coeff[j][j];
- }
- /* Backsolve, in three stages (for L, D, and L transpose). */
- for (k = non0 ; k < npatch ; k++) {
- result[m][k] -= Ddot (k, &result[m][0], 1, &coeff[k][0], 1);
- }
- for (k = 0 ; k < npatch ; k++) {
- result[m][k] *= coeff[k][k];
- }
- for (k = npatch - 2 ; k >= non0 ; k--) {
- result[m][k] -= Ddot (npatch-(k+1), &result[m][k+1], 1,
- &coeff[k+1][k], NMAX);
- }
- for (k = non0 - 1 ; k >= 0 ; k--) {
- result[m][k] -= Ddot (npatch-non0, &result[m][non0], 1,
- &coeff[non0][k], NMAX);
- }
- }
- }
- /*****************************************************************************/
- /* The following routine writes the answer to secondary storage. */
- /*****************************************************************************/
- Storer (npatch, loop, place, size, result)
- int npatch, /* In: Problem size. */
- loop[][2]; /* In: Patch number ranges for faces. */
- double result[][NMAX], /* In: (RGB) Radiosity solutions. */
- place[][NMAX], /* In: Width-height-depth positions of patches. */
- size[][NMAX]; /* In: Width-height sizes of patches. */
- {
- int i, /* General loop counter. */
- iface, /* Loop counter over number of faces. */
- ipatch; /* Loop counter of number of patches within a face. */
- FILE *outfile; /* Output file pointer. */
- /* Write patch geometry to 'answer' file. */
- if ((outfile = fopen("answer", "w")) == NULL) {
- printf ("Unable to open 'answer' file.\n");
- exit (1);
- }
- fprintf (outfile, "%d patches:\n", npatch);
- fprintf (outfile,
- " Patch Face Position in w, h, d Width Height\n");
- for (iface = 0 ; iface < 6 ; iface++) {
- for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
- fprintf (outfile,
- "%5d %4d%11.5lf%11.5lf%11.5lf %11.5lf%11.5lf\n",
- ipatch+1, iface+1,
- place[0][ipatch],
- place[1][ipatch],
- place[2][ipatch],
- size[0][ipatch],
- size[1][ipatch]);
- }
- }
- /* Write patch radiosities to 'answer' file. */
- fprintf (outfile, "\n Patch Face Radiosities\n");
- for (iface = 0 ; iface < 6 ; iface++) {
- for (ipatch = loop[iface][0] ; ipatch <= loop[iface][1] ; ipatch++) {
- fprintf (outfile, "%5d %4d%12.8lf%12.8lf%12.8lf\n",
- ipatch+1, iface+1,
- result[0][ipatch],
- result[1][ipatch],
- result[2][ipatch]);
- }
- }
- fclose(outfile);
- }
- /*****************************************************************************/
- /* This routine verifies that the computed radiosities satisfy the equations.*/
- /* */
- /* John Gustafson, Diane Rover, Michael Carter, and Stephen Elbert */
- /* Ames Laboratory, 3/18/90 */
- /*****************************************************************************/
- Verify (npatch, coeff, diag, rhs, result)
- int npatch; /* In: Problem size. */
- double coeff[][NMAX], /* In: The coefficients of the eqns to solve. */
- diag[][NMAX], /* In: (RGB) Diagonal terms of the system. */
- rhs[][NMAX], /* In: (RGB) Right-hand sides of system to solve. */
- result[][NMAX]; /* In: (RGB) Radiosity solutions. */
- {
- double tmp1, tmp2; /* Double temporary variables. */
- double anorm, /* Norm accumulation variable. */
- xnorm; /* Norm accumulation variable. */
- int i, j, m; /* General loop counters. */
- tmp1 = 0.0;
- for (m = 0 ; m < 3 ; m++) {
- /* Copy lower triangle of coefficients to upper triangle, */
- /* and load diagonal. */
- for (i = 0 ; i < npatch ; i++) {
- coeff[i][i] = diag[m][i];
- for (j = 0 ; j < i ; j++) {
- coeff[i][j] = coeff[j][i];
- }
- }
- /* Multiply matrix by solution vector, and accum. norm of residual. */
- anorm = xnorm = 0.0;
- for (j = 0 ; j < npatch ; j++) {
- tmp2 = rhs[m][j];
- for (i = 0 ; i < npatch ; i++) {
- tmp2 -= (coeff[j][i] * result[m][i]);
- anorm = MAX(anorm, fabs(coeff[j][i]));
- }
- xnorm = MAX(xnorm, fabs(result[m][j]));
- tmp1 += fabs(tmp2);
- }
- }
- /* printf ("anorm = %g xnorm = %g\n", anorm, xnorm); */
- tmp1 /= (anorm * xnorm);
- if (tmp1 > 3 * EPS) {
- printf ("Residual is too large: %lg\n", tmp1);
- return (FALSE);
- }
- return (TRUE);
- }
- #ifdef SUN4
- /*****************************************************************************/
- /* Double precision dot product specifically written for Sun 4/370. */
- /* By Michael Carter and John Gustafson, May 30, 1990 */
- /* This code unrolls the dot product four ways since that's how many */
- /* registers are available on the SPARC. Other RISC system will require */
- /* something very similar. Also, unit stride is take advantage of in the */
- /* form of special cases. */
- /*****************************************************************************/
- double
- Ddot (n, a, ia, b, ib)
- register
- int n, /* Number of elements in vectors. */
- ia, /* Stride of a vector in ELEMENTS. */
- ib; /* Stride of b vector in ELEMENTS. */
- register
- double *a, /* Pointer to first vector. */
- *b; /* Pointer to second vector. */
- {
- register double sum0 = 0.0,
- sum1 = 0.0,
- sum2 = 0.0,
- sum3 = 0.0;
- register int m = n & 3;
- int t;
- /* The ragged cleanup part. */
- while (m--) {
- sum0 += *a * *b;
- a += ia;
- b += ib;
- }
- /* The fast pipelined part */
- n >>= 2;
- if (ib == 1 && ia != 1) {
- t = ia;
- ia = ib;
- ib = t;
- t = (int) a;
- b = a;
- a = (double *) t;
- }
- /* We can optimize if one or more strides are equal to 1. */
- if (ia == 1) {
- /* This runs if both strides are 1. */
- if (ib == 1) {
- ia <<= 2;
- ib <<= 2;
- while (n--) {
- sum0 += a[0] * b[0];
- sum1 += a[1] * b[1];
- sum2 += a[2] * b[2];
- sum3 += a[3] * b[3];
- a += ia;
- b += ib;
- }
- }
- /* This runs if stride of a only is equal to 1. */
- else {
- ia <<= 2;
- while (n--) {
- sum0 += a[0] * *b;
- b += ib;
- sum1 += a[1] * *b;
- b += ib;
- sum2 += a[2] * *b;
- b += ib;
- sum3 += a[3] * *b;
- a += ia;
- b += ib;
- }
- }
- }
- /* This runs for the more general case. */
- /* This is about .5 MFLOPS slower on Sun 4/370 */
- else {
- while (n--) {
- sum0 += *a * *b;
- a += ia;
- b += ib;
- sum1 += *a * *b;
- a += ia;
- b += ib;
- sum2 += *a * *b;
- a += ia;
- b += ib;
- sum3 += *a * *b;
- a += ia;
- b += ib;
- }
- }
- return (sum0 + sum1 + sum2 + sum3);
- }
- #else
- /*****************************************************************************/
- /* Generic double-precision dot product. Unrolling will help pipelined */
- /* computers. Modify accordingly. */
- /*****************************************************************************/
- double
- Ddot (n, a, ia, b, ib)
- register
- int n, /* Number of elements in vectors. */
- ia, /* Stride of a vector in ELEMENTS. */
- ib; /* Stride of b vector in ELEMENTS. */
- register
- double *a, /* Pointer to first vector. */
- *b; /* Pointer to second vector. */
- {
- register double sum = 0.0;
- while (n--) {
- sum += *a * *b;
- a += ia;
- b += ib;
- }
- return (sum);
- }
- #endif
|