clinpack.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229
  1. /*
  2. Translated to C by Bonnie Toy 5/88
  3. You MUST specify one of -DSP or -DDP to compile correctly.
  4. You MUST specify one of -DROLL or -DUNROLL to compile correctly.
  5. You MUST specify a timer option(see below) to compile correctly.
  6. To compile double precision version for Sun-4:
  7. cc -DUNIX -DDP -DROLL -O4 clinpack.c
  8. To compile single precision version for Sun-4:
  9. cc -DUNIX -DSP -DROLL -O4 -fsingle -fsingle2 clinpack.c
  10. To obtain rolled source BLAS, add -DROLL to the command lines.
  11. To obtain unrolled source BLAS, add -DUNROLL to the command lines.
  12. PLEASE NOTE: You can also just 'uncomment' one of the options below.
  13. */
  14. /* #define SP */
  15. #define DP
  16. /*#define ROLL */
  17. #define UNROLL
  18. /***************************************************************/
  19. /* Timer options. You MUST uncomment one of the options below */
  20. /* or compile, for example, with the '-DUNIX' option. */
  21. /***************************************************************/
  22. /* #define Amiga */
  23. #define UNIX
  24. /* #define UNIX_Old */
  25. /* #define VMS */
  26. /* #define BORLAND_C */
  27. /* #define MSC */
  28. /* #define MAC */
  29. /* #define IPSC */
  30. /* #define FORTRAN_SEC */
  31. /* #define GTODay */
  32. /* #define CTimer */
  33. /* #define UXPM */
  34. #include <stdio.h>
  35. #include <math.h>
  36. #ifdef SP
  37. #define REAL float
  38. #define ZERO 0.0
  39. #define ONE 1.0
  40. #define PREC "Single "
  41. #endif
  42. #ifdef DP
  43. #define REAL double
  44. #define ZERO 0.0e0
  45. #define ONE 1.0e0
  46. #define PREC "Double "
  47. #endif
  48. #define NTIMES 1
  49. #ifdef ROLL
  50. #define ROLLING "Rolled "
  51. #endif
  52. #ifdef UNROLL
  53. #define ROLLING "Unrolled "
  54. #endif
  55. static double st[8][6];
  56. main ()
  57. {
  58. static REAL aa[200][200],a[200][201],b[200],x[200];
  59. REAL cray,ops,total,norma,normx;
  60. REAL resid,residn,eps;
  61. REAL epslon(),kf;
  62. double t1,tm,tm2,dtime();
  63. static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops;
  64. lda = 201;
  65. ldaa = 200;
  66. cray = .056;
  67. n = 25;
  68. printf(ROLLING); printf(PREC);
  69. printf("Precision Linpack\n\n");
  70. ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
  71. matgen(a,lda,n,b,&norma);
  72. t1 = dtime();
  73. dgefa(a,lda,n,ipvt,&info);
  74. st[0][0] = dtime() - t1;
  75. t1 = dtime();
  76. dgesl(a,lda,n,ipvt,b,0);
  77. st[1][0] = dtime() - t1;
  78. total = st[0][0] + st[1][0];
  79. /* compute a residual to verify results. */
  80. for (i = 0; i < n; i++)
  81. {
  82. x[i] = b[i];
  83. }
  84. matgen(a,lda,n,b,&norma);
  85. for (i = 0; i < n; i++)
  86. {
  87. b[i] = -b[i];
  88. }
  89. dmxpy(n,b,n,lda,x,a);
  90. resid = 0.0;
  91. normx = 0.0;
  92. for (i = 0; i < n; i++)
  93. {
  94. resid = (resid > fabs((double)b[i]))
  95. ? resid : fabs((double)b[i]);
  96. normx = (normx > fabs((double)x[i]))
  97. ? normx : fabs((double)x[i]);
  98. }
  99. eps = epslon((REAL)ONE);
  100. residn = resid/( n*norma*normx*eps );
  101. printf(" norm. resid resid machep");
  102. printf(" x[0]-1 x[n-1]-1\n");
  103. printf("%8.1f %16.8e%16.8e%16.8e%16.8e\n",
  104. (double)residn, (double)resid, (double)eps,
  105. (double)x[0]-1, (double)x[n-1]-1);
  106. printf(" times are reported for matrices of order %5d\n",n);
  107. printf(" dgefa dgesl total kflops unit");
  108. printf(" ratio\n");
  109. st[2][0] = total;
  110. st[3][0] = ops/(1.0e3*total);
  111. st[4][0] = 2.0e3/st[3][0];
  112. st[5][0] = total/cray;
  113. printf(" times for array with leading dimension of%5d\n",lda);
  114. print_time(0);
  115. matgen(a,lda,n,b,&norma);
  116. t1 = dtime();
  117. dgefa(a,lda,n,ipvt,&info);
  118. st[0][1] = dtime() - t1;
  119. t1 = dtime();
  120. dgesl(a,lda,n,ipvt,b,0);
  121. st[1][1] = dtime() - t1;
  122. total = st[0][1] + st[1][1];
  123. st[2][1] = total;
  124. st[3][1] = ops/(1.0e3*total);
  125. st[4][1] = 2.0e3/st[3][1];
  126. st[5][1] = total/cray;
  127. matgen(a,lda,n,b,&norma);
  128. t1 = dtime();
  129. dgefa(a,lda,n,ipvt,&info);
  130. st[0][2] = dtime() - t1;
  131. t1 = dtime();
  132. dgesl(a,lda,n,ipvt,b,0);
  133. st[1][2] = dtime() - t1;
  134. total = st[0][2] + st[1][2];
  135. st[2][2] = total;
  136. st[3][2] = ops/(1.0e3*total);
  137. st[4][2] = 2.0e3/st[3][2];
  138. st[5][2] = total/cray;
  139. ntimes = NTIMES;
  140. tm2 = 0.0;
  141. t1 = dtime();
  142. for (i = 0; i < ntimes; i++) {
  143. tm = dtime();
  144. matgen(a,lda,n,b,&norma);
  145. tm2 = tm2 + dtime() - tm;
  146. dgefa(a,lda,n,ipvt,&info);
  147. }
  148. st[0][3] = (dtime() - t1 - tm2)/ntimes;
  149. t1 = dtime();
  150. for (i = 0; i < ntimes; i++) {
  151. dgesl(a,lda,n,ipvt,b,0);
  152. }
  153. st[1][3] = (dtime() - t1)/ntimes;
  154. total = st[0][3] + st[1][3];
  155. st[2][3] = total;
  156. st[3][3] = ops/(1.0e3*total);
  157. st[4][3] = 2.0e3/st[3][3];
  158. st[5][3] = total/cray;
  159. print_time(1);
  160. print_time(2);
  161. print_time(3);
  162. matgen(aa,ldaa,n,b,&norma);
  163. t1 = dtime();
  164. dgefa(aa,ldaa,n,ipvt,&info);
  165. st[0][4] = dtime() - t1;
  166. t1 = dtime();
  167. dgesl(aa,ldaa,n,ipvt,b,0);
  168. st[1][4] = dtime() - t1;
  169. total = st[0][4] + st[1][4];
  170. st[2][4] = total;
  171. st[3][4] = ops/(1.0e3*total);
  172. st[4][4] = 2.0e3/st[3][4];
  173. st[5][4] = total/cray;
  174. matgen(aa,ldaa,n,b,&norma);
  175. t1 = dtime();
  176. dgefa(aa,ldaa,n,ipvt,&info);
  177. st[0][5] = dtime() - t1;
  178. t1 = dtime();
  179. dgesl(aa,ldaa,n,ipvt,b,0);
  180. st[1][5] = dtime() - t1;
  181. total = st[0][5] + st[1][5];
  182. st[2][5] = total;
  183. st[3][5] = ops/(1.0e3*total);
  184. st[4][5] = 2.0e3/st[3][5];
  185. st[5][5] = total/cray;
  186. matgen(aa,ldaa,n,b,&norma);
  187. t1 = dtime();
  188. dgefa(aa,ldaa,n,ipvt,&info);
  189. st[0][6] = dtime() - t1;
  190. t1 = dtime();
  191. dgesl(aa,ldaa,n,ipvt,b,0);
  192. st[1][6] = dtime() - t1;
  193. total = st[0][6] + st[1][6];
  194. st[2][6] = total;
  195. st[3][6] = ops/(1.0e3*total);
  196. st[4][6] = 2.0e3/st[3][6];
  197. st[5][6] = total/cray;
  198. ntimes = NTIMES;
  199. tm2 = 0;
  200. t1 = dtime();
  201. for (i = 0; i < ntimes; i++) {
  202. tm = dtime();
  203. matgen(aa,ldaa,n,b,&norma);
  204. tm2 = tm2 + dtime() - tm;
  205. dgefa(aa,ldaa,n,ipvt,&info);
  206. }
  207. st[0][7] = (dtime() - t1 - tm2)/ntimes;
  208. t1 = dtime();
  209. for (i = 0; i < ntimes; i++) {
  210. dgesl(aa,ldaa,n,ipvt,b,0);
  211. }
  212. st[1][7] = (dtime() - t1)/ntimes;
  213. total = st[0][7] + st[1][7];
  214. st[2][7] = total;
  215. st[3][7] = ops/(1.0e3*total);
  216. st[4][7] = 2.0e3/st[3][7];
  217. st[5][7] = total/cray;
  218. /* the following code sequence implements the semantics of
  219. the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */
  220. /*
  221. kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
  222. kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
  223. if (fabs((double)kf) < ONE)
  224. kflops = 0;
  225. else {
  226. kflops = floor(fabs((double)kf));
  227. if (kf < ZERO) kflops = -kflops;
  228. }
  229. */
  230. if ( st[3][3] < ZERO ) st[3][3] = ZERO;
  231. if ( st[3][7] < ZERO ) st[3][7] = ZERO;
  232. kf = st[3][3];
  233. if ( st[3][7] < st[3][3] ) kf = st[3][7];
  234. kflops = (int)(kf + 0.5);
  235. printf(" times for array with leading dimension of%4d\n",ldaa);
  236. print_time(4);
  237. print_time(5);
  238. print_time(6);
  239. print_time(7);
  240. printf(ROLLING); printf(PREC);
  241. printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
  242. }
  243. /*----------------------*/
  244. print_time (row)
  245. int row;
  246. {
  247. printf("%11.2f%11.2f%11.2f%11.0f%11.2f%11.2f\n",
  248. (double)st[0][row], (double)st[1][row], (double)st[2][row],
  249. (double)st[3][row], (double)st[4][row], (double)st[5][row]);
  250. }
  251. /*----------------------*/
  252. matgen(a,lda,n,b,norma)
  253. REAL a[],b[],*norma;
  254. int lda, n;
  255. /* We would like to declare a[][lda], but c does not allow it. In this
  256. function, references to a[i][j] are written a[lda*i+j]. */
  257. {
  258. int init, i, j;
  259. init = 1325;
  260. *norma = 0.0;
  261. for (j = 0; j < n; j++) {
  262. for (i = 0; i < n; i++) {
  263. init = 3125*init % 65536;
  264. a[lda*j+i] = (init - 32768.0)/16384.0;
  265. *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
  266. }
  267. }
  268. for (i = 0; i < n; i++) {
  269. b[i] = 0.0;
  270. }
  271. for (j = 0; j < n; j++) {
  272. for (i = 0; i < n; i++) {
  273. b[i] = b[i] + a[lda*j+i];
  274. }
  275. }
  276. }
  277. /*----------------------*/
  278. dgefa(a,lda,n,ipvt,info)
  279. REAL a[];
  280. int lda,n,ipvt[],*info;
  281. /* We would like to declare a[][lda], but c does not allow it. In this
  282. function, references to a[i][j] are written a[lda*i+j].
  283. */
  284. /*
  285. dgefa factors a double precision matrix by gaussian elimination.
  286. dgefa is usually called by dgeco, but it can be called
  287. directly with a saving in time if rcond is not needed.
  288. (time for dgeco) = (1 + 9/n)*(time for dgefa) .
  289. on entry
  290. a REAL precision[n][lda]
  291. the matrix to be factored.
  292. lda integer
  293. the leading dimension of the array a .
  294. n integer
  295. the order of the matrix a .
  296. on return
  297. a an upper triangular matrix and the multipliers
  298. which were used to obtain it.
  299. the factorization can be written a = l*u where
  300. l is a product of permutation and unit lower
  301. triangular matrices and u is upper triangular.
  302. ipvt integer[n]
  303. an integer vector of pivot indices.
  304. info integer
  305. = 0 normal value.
  306. = k if u[k][k] .eq. 0.0 . this is not an error
  307. condition for this subroutine, but it does
  308. indicate that dgesl or dgedi will divide by zero
  309. if called. use rcond in dgeco for a reliable
  310. indication of singularity.
  311. linpack. this version dated 08/14/78 .
  312. cleve moler, university of new mexico, argonne national lab.
  313. functions
  314. blas daxpy,dscal,idamax
  315. */
  316. {
  317. /* internal variables */
  318. REAL t;
  319. int idamax(),j,k,kp1,l,nm1;
  320. /* gaussian elimination with partial pivoting */
  321. *info = 0;
  322. nm1 = n - 1;
  323. if (nm1 >= 0) {
  324. for (k = 0; k < nm1; k++) {
  325. kp1 = k + 1;
  326. /* find l = pivot index */
  327. l = idamax(n-k,&a[lda*k+k],1) + k;
  328. ipvt[k] = l;
  329. /* zero pivot implies this column already
  330. triangularized */
  331. if (a[lda*k+l] != ZERO) {
  332. /* interchange if necessary */
  333. if (l != k) {
  334. t = a[lda*k+l];
  335. a[lda*k+l] = a[lda*k+k];
  336. a[lda*k+k] = t;
  337. }
  338. /* compute multipliers */
  339. t = -ONE/a[lda*k+k];
  340. dscal(n-(k+1),t,&a[lda*k+k+1],1);
  341. /* row elimination with column indexing */
  342. for (j = kp1; j < n; j++) {
  343. t = a[lda*j+l];
  344. if (l != k) {
  345. a[lda*j+l] = a[lda*j+k];
  346. a[lda*j+k] = t;
  347. }
  348. daxpy(n-(k+1),t,&a[lda*k+k+1],1,
  349. &a[lda*j+k+1],1);
  350. }
  351. }
  352. else {
  353. *info = k;
  354. }
  355. }
  356. }
  357. ipvt[n-1] = n-1;
  358. if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1;
  359. }
  360. /*----------------------*/
  361. dgesl(a,lda,n,ipvt,b,job)
  362. int lda,n,ipvt[],job;
  363. REAL a[],b[];
  364. /* We would like to declare a[][lda], but c does not allow it. In this
  365. function, references to a[i][j] are written a[lda*i+j]. */
  366. /*
  367. dgesl solves the double precision system
  368. a * x = b or trans(a) * x = b
  369. using the factors computed by dgeco or dgefa.
  370. on entry
  371. a double precision[n][lda]
  372. the output from dgeco or dgefa.
  373. lda integer
  374. the leading dimension of the array a .
  375. n integer
  376. the order of the matrix a .
  377. ipvt integer[n]
  378. the pivot vector from dgeco or dgefa.
  379. b double precision[n]
  380. the right hand side vector.
  381. job integer
  382. = 0 to solve a*x = b ,
  383. = nonzero to solve trans(a)*x = b where
  384. trans(a) is the transpose.
  385. on return
  386. b the solution vector x .
  387. error condition
  388. a division by zero will occur if the input factor contains a
  389. zero on the diagonal. technically this indicates singularity
  390. but it is often caused by improper arguments or improper
  391. setting of lda . it will not occur if the subroutines are
  392. called correctly and if dgeco has set rcond .gt. 0.0
  393. or dgefa has set info .eq. 0 .
  394. to compute inverse(a) * c where c is a matrix
  395. with p columns
  396. dgeco(a,lda,n,ipvt,rcond,z)
  397. if (!rcond is too small){
  398. for (j=0,j<p,j++)
  399. dgesl(a,lda,n,ipvt,c[j][0],0);
  400. }
  401. linpack. this version dated 08/14/78 .
  402. cleve moler, university of new mexico, argonne national lab.
  403. functions
  404. blas daxpy,ddot
  405. */
  406. {
  407. /* internal variables */
  408. REAL ddot(),t;
  409. int k,kb,l,nm1;
  410. nm1 = n - 1;
  411. if (job == 0) {
  412. /* job = 0 , solve a * x = b
  413. first solve l*y = b */
  414. if (nm1 >= 1) {
  415. for (k = 0; k < nm1; k++) {
  416. l = ipvt[k];
  417. t = b[l];
  418. if (l != k){
  419. b[l] = b[k];
  420. b[k] = t;
  421. }
  422. daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
  423. }
  424. }
  425. /* now solve u*x = y */
  426. for (kb = 0; kb < n; kb++) {
  427. k = n - (kb + 1);
  428. b[k] = b[k]/a[lda*k+k];
  429. t = -b[k];
  430. daxpy(k,t,&a[lda*k+0],1,&b[0],1);
  431. }
  432. }
  433. else {
  434. /* job = nonzero, solve trans(a) * x = b
  435. first solve trans(u)*y = b */
  436. for (k = 0; k < n; k++) {
  437. t = ddot(k,&a[lda*k+0],1,&b[0],1);
  438. b[k] = (b[k] - t)/a[lda*k+k];
  439. }
  440. /* now solve trans(l)*x = y */
  441. if (nm1 >= 1) {
  442. for (kb = 1; kb < nm1; kb++) {
  443. k = n - (kb+1);
  444. b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
  445. l = ipvt[k];
  446. if (l != k) {
  447. t = b[l];
  448. b[l] = b[k];
  449. b[k] = t;
  450. }
  451. }
  452. }
  453. }
  454. }
  455. /*----------------------*/
  456. daxpy(n,da,dx,incx,dy,incy)
  457. /*
  458. constant times a vector plus a vector.
  459. jack dongarra, linpack, 3/11/78.
  460. */
  461. REAL dx[],dy[],da;
  462. int incx,incy,n;
  463. {
  464. int i,ix,iy,m,mp1;
  465. if(n <= 0) return;
  466. if (da == ZERO) return;
  467. if(incx != 1 || incy != 1) {
  468. /* code for unequal increments or equal increments
  469. not equal to 1 */
  470. ix = 1;
  471. iy = 1;
  472. if(incx < 0) ix = (-n+1)*incx + 1;
  473. if(incy < 0) iy = (-n+1)*incy + 1;
  474. for (i = 0;i < n; i++) {
  475. dy[iy] = dy[iy] + da*dx[ix];
  476. ix = ix + incx;
  477. iy = iy + incy;
  478. }
  479. return;
  480. }
  481. /* code for both increments equal to 1 */
  482. #ifdef ROLL
  483. for (i = 0;i < n; i++) {
  484. dy[i] = dy[i] + da*dx[i];
  485. }
  486. #endif
  487. #ifdef UNROLL
  488. m = n % 4;
  489. if ( m != 0) {
  490. for (i = 0; i < m; i++)
  491. dy[i] = dy[i] + da*dx[i];
  492. if (n < 4) return;
  493. }
  494. for (i = m; i < n; i = i + 4) {
  495. dy[i] = dy[i] + da*dx[i];
  496. dy[i+1] = dy[i+1] + da*dx[i+1];
  497. dy[i+2] = dy[i+2] + da*dx[i+2];
  498. dy[i+3] = dy[i+3] + da*dx[i+3];
  499. }
  500. #endif
  501. }
  502. /*----------------------*/
  503. REAL ddot(n,dx,incx,dy,incy)
  504. /*
  505. forms the dot product of two vectors.
  506. jack dongarra, linpack, 3/11/78.
  507. */
  508. REAL dx[],dy[];
  509. int incx,incy,n;
  510. {
  511. REAL dtemp;
  512. int i,ix,iy,m,mp1;
  513. dtemp = ZERO;
  514. if(n <= 0) return(ZERO);
  515. if(incx != 1 || incy != 1) {
  516. /* code for unequal increments or equal increments
  517. not equal to 1 */
  518. ix = 0;
  519. iy = 0;
  520. if (incx < 0) ix = (-n+1)*incx;
  521. if (incy < 0) iy = (-n+1)*incy;
  522. for (i = 0;i < n; i++) {
  523. dtemp = dtemp + dx[ix]*dy[iy];
  524. ix = ix + incx;
  525. iy = iy + incy;
  526. }
  527. return(dtemp);
  528. }
  529. /* code for both increments equal to 1 */
  530. #ifdef ROLL
  531. for (i=0;i < n; i++)
  532. dtemp = dtemp + dx[i]*dy[i];
  533. return(dtemp);
  534. #endif
  535. #ifdef UNROLL
  536. m = n % 5;
  537. if (m != 0) {
  538. for (i = 0; i < m; i++)
  539. dtemp = dtemp + dx[i]*dy[i];
  540. if (n < 5) return(dtemp);
  541. }
  542. for (i = m; i < n; i = i + 5) {
  543. dtemp = dtemp + dx[i]*dy[i] +
  544. dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
  545. dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
  546. }
  547. return(dtemp);
  548. #endif
  549. }
  550. /*----------------------*/
  551. dscal(n,da,dx,incx)
  552. /* scales a vector by a constant.
  553. jack dongarra, linpack, 3/11/78.
  554. */
  555. REAL da,dx[];
  556. int n, incx;
  557. {
  558. int i,m,mp1,nincx;
  559. if(n <= 0)return;
  560. if(incx != 1) {
  561. /* code for increment not equal to 1 */
  562. nincx = n*incx;
  563. for (i = 0; i < nincx; i = i + incx)
  564. dx[i] = da*dx[i];
  565. return;
  566. }
  567. /* code for increment equal to 1 */
  568. #ifdef ROLL
  569. for (i = 0; i < n; i++)
  570. dx[i] = da*dx[i];
  571. #endif
  572. #ifdef UNROLL
  573. m = n % 5;
  574. if (m != 0) {
  575. for (i = 0; i < m; i++)
  576. dx[i] = da*dx[i];
  577. if (n < 5) return;
  578. }
  579. for (i = m; i < n; i = i + 5){
  580. dx[i] = da*dx[i];
  581. dx[i+1] = da*dx[i+1];
  582. dx[i+2] = da*dx[i+2];
  583. dx[i+3] = da*dx[i+3];
  584. dx[i+4] = da*dx[i+4];
  585. }
  586. #endif
  587. }
  588. /*----------------------*/
  589. int idamax(n,dx,incx)
  590. /*
  591. finds the index of element having max. absolute value.
  592. jack dongarra, linpack, 3/11/78.
  593. */
  594. REAL dx[];
  595. int incx,n;
  596. {
  597. REAL dmax;
  598. int i, ix, itemp;
  599. if( n < 1 ) return(-1);
  600. if(n ==1 ) return(0);
  601. if(incx != 1) {
  602. /* code for increment not equal to 1 */
  603. ix = 1;
  604. dmax = fabs((double)dx[0]);
  605. ix = ix + incx;
  606. for (i = 1; i < n; i++) {
  607. if(fabs((double)dx[ix]) > dmax) {
  608. itemp = i;
  609. dmax = fabs((double)dx[ix]);
  610. }
  611. ix = ix + incx;
  612. }
  613. }
  614. else {
  615. /* code for increment equal to 1 */
  616. itemp = 0;
  617. dmax = fabs((double)dx[0]);
  618. for (i = 1; i < n; i++) {
  619. if(fabs((double)dx[i]) > dmax) {
  620. itemp = i;
  621. dmax = fabs((double)dx[i]);
  622. }
  623. }
  624. }
  625. return (itemp);
  626. }
  627. /*----------------------*/
  628. REAL epslon (x)
  629. REAL x;
  630. /*
  631. estimate unit roundoff in quantities of size x.
  632. */
  633. {
  634. REAL a,b,c,eps;
  635. /*
  636. this program should function properly on all systems
  637. satisfying the following two assumptions,
  638. 1. the base used in representing dfloating point
  639. numbers is not a power of three.
  640. 2. the quantity a in statement 10 is represented to
  641. the accuracy used in dfloating point variables
  642. that are stored in memory.
  643. the statement number 10 and the go to 10 are intended to
  644. force optimizing compilers to generate code satisfying
  645. assumption 2.
  646. under these assumptions, it should be true that,
  647. a is not exactly equal to four-thirds,
  648. b has a zero for its last bit or digit,
  649. c is not exactly equal to one,
  650. eps measures the separation of 1.0 from
  651. the next larger dfloating point number.
  652. the developers of eispack would appreciate being informed
  653. about any systems where these assumptions do not hold.
  654. *****************************************************************
  655. this routine is one of the auxiliary routines used by eispack iii
  656. to avoid machine dependencies.
  657. *****************************************************************
  658. this version dated 4/6/83.
  659. */
  660. a = 4.0e0/3.0e0;
  661. eps = ZERO;
  662. while (eps == ZERO) {
  663. b = a - ONE;
  664. c = b + b + b;
  665. eps = fabs((double)(c-ONE));
  666. }
  667. return(eps*fabs((double)x));
  668. }
  669. /*----------------------*/
  670. dmxpy (n1, y, n2, ldm, x, m)
  671. REAL y[], x[], m[];
  672. int n1, n2, ldm;
  673. /* We would like to declare m[][ldm], but c does not allow it. In this
  674. function, references to m[i][j] are written m[ldm*i+j]. */
  675. /*
  676. purpose:
  677. multiply matrix m times vector x and add the result to vector y.
  678. parameters:
  679. n1 integer, number of elements in vector y, and number of rows in
  680. matrix m
  681. y double [n1], vector of length n1 to which is added
  682. the product m*x
  683. n2 integer, number of elements in vector x, and number of columns
  684. in matrix m
  685. ldm integer, leading dimension of array m
  686. x double [n2], vector of length n2
  687. m double [ldm][n2], matrix of n1 rows and n2 columns
  688. ----------------------------------------------------------------------
  689. */
  690. {
  691. int j,i,jmin;
  692. /* cleanup odd vector */
  693. j = n2 % 2;
  694. if (j >= 1) {
  695. j = j - 1;
  696. for (i = 0; i < n1; i++)
  697. y[i] = (y[i]) + x[j]*m[ldm*j+i];
  698. }
  699. /* cleanup odd group of two vectors */
  700. j = n2 % 4;
  701. if (j >= 2) {
  702. j = j - 1;
  703. for (i = 0; i < n1; i++)
  704. y[i] = ( (y[i])
  705. + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
  706. }
  707. /* cleanup odd group of four vectors */
  708. j = n2 % 8;
  709. if (j >= 4) {
  710. j = j - 1;
  711. for (i = 0; i < n1; i++)
  712. y[i] = ((( (y[i])
  713. + x[j-3]*m[ldm*(j-3)+i])
  714. + x[j-2]*m[ldm*(j-2)+i])
  715. + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
  716. }
  717. /* cleanup odd group of eight vectors */
  718. j = n2 % 16;
  719. if (j >= 8) {
  720. j = j - 1;
  721. for (i = 0; i < n1; i++)
  722. y[i] = ((((((( (y[i])
  723. + x[j-7]*m[ldm*(j-7)+i]) + x[j-6]*m[ldm*(j-6)+i])
  724. + x[j-5]*m[ldm*(j-5)+i]) + x[j-4]*m[ldm*(j-4)+i])
  725. + x[j-3]*m[ldm*(j-3)+i]) + x[j-2]*m[ldm*(j-2)+i])
  726. + x[j-1]*m[ldm*(j-1)+i]) + x[j] *m[ldm*j+i];
  727. }
  728. /* main loop - groups of sixteen vectors */
  729. jmin = (n2%16)+16;
  730. for (j = jmin-1; j < n2; j = j + 16) {
  731. for (i = 0; i < n1; i++)
  732. y[i] = ((((((((((((((( (y[i])
  733. + x[j-15]*m[ldm*(j-15)+i])
  734. + x[j-14]*m[ldm*(j-14)+i])
  735. + x[j-13]*m[ldm*(j-13)+i])
  736. + x[j-12]*m[ldm*(j-12)+i])
  737. + x[j-11]*m[ldm*(j-11)+i])
  738. + x[j-10]*m[ldm*(j-10)+i])
  739. + x[j- 9]*m[ldm*(j- 9)+i])
  740. + x[j- 8]*m[ldm*(j- 8)+i])
  741. + x[j- 7]*m[ldm*(j- 7)+i])
  742. + x[j- 6]*m[ldm*(j- 6)+i])
  743. + x[j- 5]*m[ldm*(j- 5)+i])
  744. + x[j- 4]*m[ldm*(j- 4)+i])
  745. + x[j- 3]*m[ldm*(j- 3)+i])
  746. + x[j- 2]*m[ldm*(j- 2)+i])
  747. + x[j- 1]*m[ldm*(j- 1)+i])
  748. + x[j] *m[ldm*j+i];
  749. }
  750. }
  751. /*****************************************************/
  752. /* Various timer routines. */
  753. /* Al Aburto, aburto@marlin.nosc.mil, 26 Sep 1992 */
  754. /* */
  755. /* t = dtime() outputs the current time in seconds. */
  756. /* Use CAUTION as some of these routines will mess */
  757. /* up when timing across the hour mark!!! */
  758. /* */
  759. /* For timing I use the 'user' time whenever */
  760. /* possible. Using 'user+sys' time is a separate */
  761. /* issue. */
  762. /* */
  763. /*****************************************************/
  764. /*********************************/
  765. /* Timer code. */
  766. /*********************************/
  767. /*******************/
  768. /* Amiga dtime() */
  769. /*******************/
  770. #ifdef Amiga
  771. #include <ctype.h>
  772. #define HZ 50
  773. double dtime()
  774. {
  775. double q;
  776. struct tt {
  777. long days;
  778. long minutes;
  779. long ticks;
  780. } tt;
  781. DateStamp(&tt);
  782. q = ((double)(tt.ticks + (tt.minutes * 60L * 50L))) / (double)HZ;
  783. return q;
  784. }
  785. #endif
  786. /*****************************************************/
  787. /* UNIX dtime(). This is the preferred UNIX timer. */
  788. /* Provided by: Markku Kolkka, mk59200@cc.tut.fi */
  789. /* HP-UX Addition by: Bo Thide', bt@irfu.se */
  790. /*****************************************************/
  791. #ifdef UNIX
  792. #include <sys/time.h>
  793. #include <sys/resource.h>
  794. #ifdef __hpux
  795. #include <sys/syscall.h>
  796. #define getrusage(a,b) syscall(SYS_getrusage,a,b)
  797. #endif
  798. struct rusage rusage;
  799. double dtime()
  800. {
  801. double q;
  802. getrusage(RUSAGE_SELF,&rusage);
  803. q = (double)(rusage.ru_utime.tv_sec);
  804. q = q + (double)(rusage.ru_utime.tv_usec) * 1.0e-06;
  805. return q;
  806. }
  807. #endif
  808. /***************************************************/
  809. /* UNIX_Old dtime(). This is the old UNIX timer. */
  810. /* Use only if absolutely necessary as HZ may be */
  811. /* ill defined on your system. */
  812. /***************************************************/
  813. #ifdef UNIX_Old
  814. #include <sys/types.h>
  815. #include <sys/times.h>
  816. #include <sys/param.h>
  817. #ifndef HZ
  818. #define HZ 60
  819. #endif
  820. struct tms tms;
  821. double dtime()
  822. {
  823. double q;
  824. times(&tms);
  825. q = (double)(tms.tms_utime) / (double)HZ;
  826. return q;
  827. }
  828. #endif
  829. /*********************************************************/
  830. /* VMS dtime() for VMS systems. */
  831. /* Provided by: RAMO@uvphys.phys.UVic.CA */
  832. /* Some people have run into problems with this timer. */
  833. /*********************************************************/
  834. #ifdef VMS
  835. #include time
  836. #ifndef HZ
  837. #define HZ 100
  838. #endif
  839. struct tbuffer_t
  840. {
  841. int proc_user_time;
  842. int proc_system_time;
  843. int child_user_time;
  844. int child_system_time;
  845. };
  846. struct tbuffer_t tms;
  847. double dtime()
  848. {
  849. double q;
  850. times(&tms);
  851. q = (double)(tms.proc_user_time) / (double)HZ;
  852. return q;
  853. }
  854. #endif
  855. /******************************/
  856. /* BORLAND C dtime() for DOS */
  857. /******************************/
  858. #ifdef BORLAND_C
  859. #include <ctype.h>
  860. #include <dos.h>
  861. #include <time.h>
  862. #define HZ 100
  863. struct time tnow;
  864. double dtime()
  865. {
  866. double q;
  867. gettime(&tnow);
  868. q = 60.0 * (double)(tnow.ti_min);
  869. q = q + (double)(tnow.ti_sec);
  870. q = q + (double)(tnow.ti_hund)/(double)HZ;
  871. return q;
  872. }
  873. #endif
  874. /**************************************/
  875. /* Microsoft C (MSC) dtime() for DOS */
  876. /**************************************/
  877. #ifdef MSC
  878. #include <time.h>
  879. #include <ctype.h>
  880. #define HZ CLK_TCK
  881. clock_t tnow;
  882. double dtime()
  883. {
  884. double q;
  885. tnow = clock();
  886. q = (double)tnow / (double)HZ;
  887. return q;
  888. }
  889. #endif
  890. /*************************************/
  891. /* Macintosh (MAC) Think C dtime() */
  892. /*************************************/
  893. #ifdef MAC
  894. #include <time.h>
  895. #define HZ 60
  896. double dtime()
  897. {
  898. double q;
  899. q = (double)clock() / (double)HZ;
  900. return q;
  901. }
  902. #endif
  903. /************************************************************/
  904. /* iPSC/860 (IPSC) dtime() for i860. */
  905. /* Provided by: Dan Yergeau, yergeau@gloworm.Stanford.EDU */
  906. /************************************************************/
  907. #ifdef IPSC
  908. extern double dclock();
  909. double dtime()
  910. {
  911. double q;
  912. q = dclock();
  913. return q;
  914. }
  915. #endif
  916. /**************************************************/
  917. /* FORTRAN dtime() for Cray type systems. */
  918. /* This is the preferred timer for Cray systems. */
  919. /**************************************************/
  920. #ifdef FORTRAN_SEC
  921. fortran double second();
  922. double dtime()
  923. {
  924. double q;
  925. second(&q);
  926. return q;
  927. }
  928. #endif
  929. /***********************************************************/
  930. /* UNICOS C dtime() for Cray UNICOS systems. Don't use */
  931. /* unless absolutely necessary as returned time includes */
  932. /* 'user+system' time. Provided by: R. Mike Dority, */
  933. /* dority@craysea.cray.com */
  934. /***********************************************************/
  935. #ifdef CTimer
  936. #include <time.h>
  937. double dtime()
  938. {
  939. double q;
  940. clock_t t;
  941. t = clock();
  942. q = (double)t / (double)CLOCKS_PER_SEC;
  943. return q;
  944. }
  945. #endif
  946. /********************************************/
  947. /* Another UNIX timer using gettimeofday(). */
  948. /* However, getrusage() is preferred. */
  949. /********************************************/
  950. #ifdef GTODay
  951. #include <sys/time.h>
  952. struct timeval tnow;
  953. double dtime()
  954. {
  955. double q;
  956. gettimeofday(&tnow,NULL);
  957. q = (double)tnow.tv_sec + (double)tnow.tv_usec * 1.0e-6;
  958. return q;
  959. }
  960. #endif
  961. /*****************************************************/
  962. /* Fujitsu UXP/M timer. */
  963. /* Provided by: Mathew Lim, ANUSF, M.Lim@anu.edu.au */
  964. /*****************************************************/
  965. #ifdef UXPM
  966. #include <sys/types.h>
  967. #include <sys/timesu.h>
  968. struct tmsu rusage;
  969. double dtime()
  970. {
  971. double q;
  972. timesu(&rusage);
  973. q = (double)(rusage.tms_utime) * 1.0e-06;
  974. return q;
  975. }
  976. #endif