]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/multiprecision/performance/linpack-benchmark.cpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / libs / multiprecision / performance / linpack-benchmark.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright 2011 John Maddock. Distributed under the Boost
3 // Software License, Version 1.0. (See accompanying file
4 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 /* 1000d.f -- translated by f2c (version 20050501).
7 You must link the resulting object file with libf2c:
8 on Microsoft Windows system, link with libf2c.lib;
9 on Linux or Unix systems, link with .../path/to/libf2c.a -lm
10 or, if you install libf2c.a in a standard place, with -lf2c -lm
11 -- in that order, at the end of the command line, as in
12 cc *.o -lf2c -lm
13 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
14
15 http://www.netlib.org/f2c/libf2c.zip
16 */
17 #include <iostream>
18 #include <iomanip>
19 #include <cmath>
20
21 #if defined(TEST_GMPXX)
22 #include <gmpxx.h>
23 typedef mpf_class real_type;
24 #elif defined(TEST_MPFRXX)
25 #include <gmpfrxx.h>
26 typedef mpfr_class real_type;
27 #elif defined(TEST_CPP_DEC_FLOAT)
28 #include <boost/multiprecision/cpp_dec_float.hpp>
29 typedef boost::multiprecision::cpp_dec_float_50 real_type;
30 #elif defined(TEST_MPFR_50)
31 #include <boost/multiprecision/mpfr.hpp>
32 typedef boost::multiprecision::mpfr_float_50 real_type;
33 #elif defined(TEST_MPF_50)
34 #include <boost/multiprecision/gmp.hpp>
35 typedef boost::multiprecision::mpf_float_50 real_type;
36 #elif defined(NATIVE_FLOAT128)
37 #include <boost/multiprecision/float128.hpp>
38 typedef __float128 real_type;
39
40 std::ostream& operator<<(std::ostream& os, const __float128& f)
41 {
42 return os << boost::multiprecision::float128(f);
43 }
44
45 #include <boost/type_traits/has_left_shift.hpp>
46
47 namespace boost {
48
49 template <>
50 struct has_left_shift<std::basic_ostream<char>, __float128> : public mpl::true_
51 {};
52
53 template <>
54 double lexical_cast<double, __float128>(const __float128& f)
55 {
56 return f;
57 }
58
59 } // namespace boost
60
61 #elif defined(TEST_FLOAT128)
62 #include <boost/multiprecision/float128.hpp>
63 typedef boost::multiprecision::float128 real_type;
64 #elif defined(TEST_CPP_BIN_FLOAT_QUAD)
65 #include <boost/multiprecision/cpp_bin_float.hpp>
66 typedef boost::multiprecision::cpp_bin_float_quad real_type;
67 #elif defined(TEST_CPP_BIN_FLOAT_OCT)
68 #include <boost/multiprecision/cpp_bin_float.hpp>
69 typedef boost::multiprecision::cpp_bin_float_oct real_type;
70 #else
71 typedef double real_type;
72 #endif
73
74 #include <boost/lexical_cast.hpp>
75
76 #ifndef CAST_TO_RT
77 #define CAST_TO_RT(x) x
78 #endif
79
80 extern "C" {
81 #include "f2c.h"
82 integer s_wsfe(cilist*), e_wsfe(void), do_fio(integer*, char*, ftnlen),
83 s_wsle(cilist*), do_lio(integer*, integer*, char*, ftnlen),
84 e_wsle(void);
85 /* Subroutine */ int s_stop(char*, ftnlen);
86
87 #undef abs
88 #undef dabs
89 #define dabs abs
90 #undef dmin
91 #undef dmax
92 #define dmin min
93 #define dmax max
94 }
95 #include <time.h>
96
97 using std::max;
98 using std::min;
99
100 /* Table of constant values */
101
102 static integer c__0 = 0;
103 static real_type c_b7 = CAST_TO_RT(1);
104 static integer c__1 = 1;
105 static integer c__9 = 9;
106
107 inline double second_(void)
108 {
109 return ((double)(clock())) / CLOCKS_PER_SEC;
110 }
111
112 int dgefa_(real_type*, integer*, integer*, integer*, integer*), dgesl_(real_type*, integer*, integer*, integer*, real_type*, integer*);
113 int dmxpy_(integer*, real_type*, integer*, integer*, real_type*, real_type*);
114 int matgen_(real_type*, integer*, integer*, real_type*, real_type*);
115 real_type epslon_(real_type*);
116 real_type ran_(integer*);
117 int dscal_(integer*, real_type*, real_type*, integer*);
118 int daxpy_(integer*, real_type*, real_type*, integer*, real_type*, integer*);
119 integer idamax_(integer*, real_type*, integer*);
120 real_type ddot_(integer*, real_type*, integer*, real_type*, integer*);
121 int daxpy_(integer*, real_type*, real_type*, integer*, real_type*, integer*);
122 int dmxpy_(integer*, real_type*, integer*, integer*, real_type*, real_type*);
123
124 extern "C" int MAIN__()
125 {
126 #ifdef TEST_MPF_50
127 std::cout << "Testing number<mpf_float<50> >" << std::endl;
128 #elif defined(TEST_MPFR_50)
129 std::cout << "Testing number<mpf_float<50> >" << std::endl;
130 #elif defined(TEST_GMPXX)
131 std::cout << "Testing mpf_class at 50 decimal degits" << std::endl;
132 mpf_set_default_prec(((50 + 1) * 1000L) / 301L);
133 #elif defined(TEST_MPFRXX)
134 std::cout << "Testing mpfr_class at 50 decimal degits" << std::endl;
135 mpfr_set_default_prec(((50 + 1) * 1000L) / 301L);
136 #elif defined(TEST_CPP_DEC_FLOAT)
137 std::cout << "Testing number<cpp_dec_float<50> >" << std::endl;
138 #elif defined(NATIVE_FLOAT128)
139 std::cout << "Testing __float128" << std::endl;
140 #elif defined(TEST_FLOAT128)
141 std::cout << "Testing number<float128_backend, et_off>" << std::endl;
142 #else
143 std::cout << "Testing double" << std::endl;
144 #endif
145
146 /* Format strings */
147 static char fmt_1[] = "(\002 Please send the results of this run to:\002"
148 "//\002 Jack J. Dongarra\002/\002 Computer Science Department\002/"
149 "\002 University of Tennessee\002/\002 Knoxville, Tennessee 37996"
150 "-1300\002//\002 Fax: 615-974-8296\002//\002 Internet: dongarra@c"
151 "s.utk.edu\002/)";
152 static char fmt_40[] = "(\002 norm. resid resid mac"
153 "hep\002,\002 x(1) x(n)\002)";
154 static char fmt_50[] = "(1p5e16.8)";
155 static char fmt_60[] = "(//\002 times are reported for matrices of or"
156 "der \002,i5)";
157 static char fmt_70[] = "(6x,\002factor\002,5x,\002solve\002,6x,\002tota"
158 "l\002,5x,\002mflops\002,7x,\002unit\002,6x,\002ratio\002)";
159 static char fmt_80[] = "(\002 times for array with leading dimension o"
160 "f\002,i4)";
161 static char fmt_110[] = "(6(1pe11.3))";
162
163 /* System generated locals */
164 integer i__1;
165 real_type d__1, d__2, d__3;
166
167 /* Builtin functions */
168
169 /* Local variables */
170 static real_type a[1001000] /* was [1001][1000] */, b[1000];
171 static integer i__, n;
172 static real_type x[1000];
173 static double t1;
174 static integer lda;
175 static double ops;
176 static real_type eps;
177 static integer info;
178 static double time[6], cray, total;
179 static integer ipvt[1000];
180 static real_type resid, norma;
181 static real_type normx;
182 static real_type residn;
183
184 /* Fortran I/O blocks */
185 static cilist io___4 = {0, 6, 0, fmt_1, 0};
186 static cilist io___20 = {0, 6, 0, fmt_40, 0};
187 static cilist io___21 = {0, 6, 0, fmt_50, 0};
188 static cilist io___22 = {0, 6, 0, fmt_60, 0};
189 static cilist io___23 = {0, 6, 0, fmt_70, 0};
190 static cilist io___24 = {0, 6, 0, fmt_80, 0};
191 static cilist io___25 = {0, 6, 0, fmt_110, 0};
192 static cilist io___26 = {0, 6, 0, 0, 0};
193
194 lda = 1001;
195
196 /* this program was updated on 10/12/92 to correct a */
197 /* problem with the random number generator. The previous */
198 /* random number generator had a short period and produced */
199 /* singular matrices occasionally. */
200
201 n = 1000;
202 cray = .056f;
203 s_wsfe(&io___4);
204 e_wsfe();
205 /* Computing 3rd power */
206 d__1 = (real_type)n;
207 /* Computing 2nd power */
208 d__2 = (real_type)n;
209 ops = boost::lexical_cast<double>(real_type(d__1 * (d__1 * d__1) * 2. / 3. + d__2 * d__2 * 2.));
210
211 matgen_(a, &lda, &n, b, &norma);
212
213 /* ****************************************************************** */
214 /* ****************************************************************** */
215 /* you should replace the call to dgefa and dgesl */
216 /* by calls to your linear equation solver. */
217 /* ****************************************************************** */
218 /* ****************************************************************** */
219
220 t1 = second_();
221 dgefa_(a, &lda, &n, ipvt, &info);
222 time[0] = second_() - t1;
223 t1 = second_();
224 dgesl_(a, &lda, &n, ipvt, b, &c__0);
225 time[1] = second_() - t1;
226 total = time[0] + time[1];
227 /* ****************************************************************** */
228 /* ****************************************************************** */
229
230 /* compute a residual to verify results. */
231
232 i__1 = n;
233 for (i__ = 1; i__ <= i__1; ++i__)
234 {
235 x[i__ - 1] = b[i__ - 1];
236 /* L10: */
237 }
238 matgen_(a, &lda, &n, b, &norma);
239 i__1 = n;
240 for (i__ = 1; i__ <= i__1; ++i__)
241 {
242 b[i__ - 1] = -b[i__ - 1];
243 /* L20: */
244 }
245 dmxpy_(&n, b, &n, &lda, x, a);
246 resid = CAST_TO_RT(0);
247 normx = CAST_TO_RT(0);
248 i__1 = n;
249 for (i__ = 1; i__ <= i__1; ++i__)
250 {
251 /* Computing MAX */
252 d__2 = resid, d__3 = (d__1 = b[i__ - 1], abs(d__1));
253 resid = (max)(d__2, d__3);
254 /* Computing MAX */
255 d__2 = normx, d__3 = (d__1 = x[i__ - 1], abs(d__1));
256 normx = (max)(d__2, d__3);
257 /* L30: */
258 }
259 eps = epslon_(&c_b7);
260 residn = resid / (n * norma * normx * eps);
261 s_wsfe(&io___20);
262 e_wsfe();
263 s_wsfe(&io___21);
264 /*
265 do_fio(&c__1, (char *)&residn, (ftnlen)sizeof(real_type));
266 do_fio(&c__1, (char *)&resid, (ftnlen)sizeof(real_type));
267 do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real_type));
268 do_fio(&c__1, (char *)&x[0], (ftnlen)sizeof(real_type));
269 do_fio(&c__1, (char *)&x[n - 1], (ftnlen)sizeof(real_type));
270 */
271 std::cout << std::setw(12) << std::setprecision(5) << residn << " " << resid << " " << eps << " " << x[0] << " " << x[n - 1] << std::endl;
272 e_wsfe();
273
274 s_wsfe(&io___22);
275 do_fio(&c__1, (char*)&n, (ftnlen)sizeof(integer));
276 e_wsfe();
277 s_wsfe(&io___23);
278 e_wsfe();
279
280 time[2] = total;
281 time[3] = ops / (total * 1e6);
282 time[4] = 2. / time[3];
283 time[5] = total / cray;
284 s_wsfe(&io___24);
285 do_fio(&c__1, (char*)&lda, (ftnlen)sizeof(integer));
286 e_wsfe();
287 s_wsfe(&io___25);
288 for (i__ = 1; i__ <= 6; ++i__)
289 {
290 // do_fio(&c__1, (char *)&time[i__ - 1], (ftnlen)sizeof(real_type));
291 std::cout << std::setw(12) << std::setprecision(5) << time[i__ - 1];
292 }
293 e_wsfe();
294 s_wsle(&io___26);
295 do_lio(&c__9, &c__1, " end of tests -- this version dated 10/12/92", (ftnlen)44);
296 e_wsle();
297
298 s_stop("", (ftnlen)0);
299 return 0;
300 } /* MAIN__ */
301
302 /* Subroutine */ int matgen_(real_type* a, integer* lda, integer* n,
303 real_type* b, real_type* norma)
304 {
305 /* System generated locals */
306 integer a_dim1, a_offset, i__1, i__2;
307 real_type d__1, d__2;
308
309 /* Local variables */
310 static integer i__, j;
311 static integer init[4];
312
313 /* Parameter adjustments */
314 a_dim1 = *lda;
315 a_offset = 1 + a_dim1;
316 a -= a_offset;
317 --b;
318
319 /* Function Body */
320 init[0] = 1;
321 init[1] = 2;
322 init[2] = 3;
323 init[3] = 1325;
324 *norma = CAST_TO_RT(0);
325 i__1 = *n;
326 for (j = 1; j <= i__1; ++j)
327 {
328 i__2 = *n;
329 for (i__ = 1; i__ <= i__2; ++i__)
330 {
331 a[i__ + j * a_dim1] = ran_(init) - .5f;
332 /* Computing MAX */
333 d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
334 *norma = (max)(d__2, *norma);
335 /* L20: */
336 }
337 /* L30: */
338 }
339 i__1 = *n;
340 for (i__ = 1; i__ <= i__1; ++i__)
341 {
342 b[i__] = CAST_TO_RT(0);
343 /* L35: */
344 }
345 i__1 = *n;
346 for (j = 1; j <= i__1; ++j)
347 {
348 i__2 = *n;
349 for (i__ = 1; i__ <= i__2; ++i__)
350 {
351 b[i__] += a[i__ + j * a_dim1];
352 /* L40: */
353 }
354 /* L50: */
355 }
356 return 0;
357 } /* matgen_ */
358
359 /* Subroutine */ int dgefa_(real_type* a, integer* lda, integer* n, integer* ipvt, integer* info)
360 {
361 /* System generated locals */
362 integer a_dim1, a_offset, i__1, i__2, i__3;
363
364 /* Local variables */
365 static integer j, k, l;
366 static real_type t;
367 static integer kp1, nm1;
368
369 /* dgefa factors a double precision matrix by gaussian elimination. */
370
371 /* dgefa is usually called by dgeco, but it can be called */
372 /* directly with a saving in time if rcond is not needed. */
373 /* (time for dgeco) = (1 + 9/n)*(time for dgefa) . */
374
375 /* on entry */
376
377 /* a double precision(lda, n) */
378 /* the matrix to be factored. */
379
380 /* lda integer */
381 /* the leading dimension of the array a . */
382
383 /* n integer */
384 /* the order of the matrix a . */
385
386 /* on return */
387
388 /* a an upper triangular matrix and the multipliers */
389 /* which were used to obtain it. */
390 /* the factorization can be written a = l*u where */
391 /* l is a product of permutation and unit lower */
392 /* triangular matrices and u is upper triangular. */
393
394 /* ipvt integer(n) */
395 /* an integer vector of pivot indices. */
396
397 /* info integer */
398 /* = 0 normal value. */
399 /* = k if u(k,k) .eq. 0.0 . this is not an error */
400 /* condition for this subroutine, but it does */
401 /* indicate that dgesl or dgedi will divide by zero */
402 /* if called. use rcond in dgeco for a reliable */
403 /* indication of singularity. */
404
405 /* linpack. this version dated 08/14/78 . */
406 /* cleve moler, university of new mexico, argonne national lab. */
407
408 /* subroutines and functions */
409
410 /* blas daxpy,dscal,idamax */
411
412 /* internal variables */
413
414 /* gaussian elimination with partial pivoting */
415
416 /* Parameter adjustments */
417 a_dim1 = *lda;
418 a_offset = 1 + a_dim1;
419 a -= a_offset;
420 --ipvt;
421
422 /* Function Body */
423 *info = 0;
424 nm1 = *n - 1;
425 if (nm1 < 1)
426 {
427 goto L70;
428 }
429 i__1 = nm1;
430 for (k = 1; k <= i__1; ++k)
431 {
432 kp1 = k + 1;
433
434 /* find l = pivot index */
435
436 i__2 = *n - k + 1;
437 l = idamax_(&i__2, &a[k + k * a_dim1], &c__1) + k - 1;
438 ipvt[k] = l;
439
440 /* zero pivot implies this column already triangularized */
441
442 if (a[l + k * a_dim1] == 0.)
443 {
444 goto L40;
445 }
446
447 /* interchange if necessary */
448
449 if (l == k)
450 {
451 goto L10;
452 }
453 t = a[l + k * a_dim1];
454 a[l + k * a_dim1] = a[k + k * a_dim1];
455 a[k + k * a_dim1] = t;
456 L10:
457
458 /* compute multipliers */
459
460 t = -1. / a[k + k * a_dim1];
461 i__2 = *n - k;
462 dscal_(&i__2, &t, &a[k + 1 + k * a_dim1], &c__1);
463
464 /* row elimination with column indexing */
465
466 i__2 = *n;
467 for (j = kp1; j <= i__2; ++j)
468 {
469 t = a[l + j * a_dim1];
470 if (l == k)
471 {
472 goto L20;
473 }
474 a[l + j * a_dim1] = a[k + j * a_dim1];
475 a[k + j * a_dim1] = t;
476 L20:
477 i__3 = *n - k;
478 daxpy_(&i__3, &t, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + j * a_dim1], &c__1);
479 /* L30: */
480 }
481 goto L50;
482 L40:
483 *info = k;
484 L50:
485 /* L60: */
486 ;
487 }
488 L70:
489 ipvt[*n] = *n;
490 if (a[*n + *n * a_dim1] == 0.)
491 {
492 *info = *n;
493 }
494 return 0;
495 } /* dgefa_ */
496
497 /* Subroutine */ int dgesl_(real_type* a, integer* lda, integer* n, integer* ipvt, real_type* b, integer* job)
498 {
499 /* System generated locals */
500 integer a_dim1, a_offset, i__1, i__2;
501
502 /* Local variables */
503 static integer k, l;
504 static real_type t;
505 static integer kb, nm1;
506
507 /* dgesl solves the double precision system */
508 /* a * x = b or trans(a) * x = b */
509 /* using the factors computed by dgeco or dgefa. */
510
511 /* on entry */
512
513 /* a double precision(lda, n) */
514 /* the output from dgeco or dgefa. */
515
516 /* lda integer */
517 /* the leading dimension of the array a . */
518
519 /* n integer */
520 /* the order of the matrix a . */
521
522 /* ipvt integer(n) */
523 /* the pivot vector from dgeco or dgefa. */
524
525 /* b double precision(n) */
526 /* the right hand side vector. */
527
528 /* job integer */
529 /* = 0 to solve a*x = b , */
530 /* = nonzero to solve trans(a)*x = b where */
531 /* trans(a) is the transpose. */
532
533 /* on return */
534
535 /* b the solution vector x . */
536
537 /* error condition */
538
539 /* a division by zero will occur if the input factor contains a */
540 /* zero on the diagonal. technically this indicates singularity */
541 /* but it is often caused by improper arguments or improper */
542 /* setting of lda . it will not occur if the subroutines are */
543 /* called correctly and if dgeco has set rcond .gt. 0.0 */
544 /* or dgefa has set info .eq. 0 . */
545
546 /* to compute inverse(a) * c where c is a matrix */
547 /* with p columns */
548 /* call dgeco(a,lda,n,ipvt,rcond,z) */
549 /* if (rcond is too small) go to ... */
550 /* do 10 j = 1, p */
551 /* call dgesl(a,lda,n,ipvt,c(1,j),0) */
552 /* 10 continue */
553
554 /* linpack. this version dated 08/14/78 . */
555 /* cleve moler, university of new mexico, argonne national lab. */
556
557 /* subroutines and functions */
558
559 /* blas daxpy,ddot */
560
561 /* internal variables */
562
563 /* Parameter adjustments */
564 a_dim1 = *lda;
565 a_offset = 1 + a_dim1;
566 a -= a_offset;
567 --ipvt;
568 --b;
569
570 /* Function Body */
571 nm1 = *n - 1;
572 if (*job != 0)
573 {
574 goto L50;
575 }
576
577 /* job = 0 , solve a * x = b */
578 /* first solve l*y = b */
579
580 if (nm1 < 1)
581 {
582 goto L30;
583 }
584 i__1 = nm1;
585 for (k = 1; k <= i__1; ++k)
586 {
587 l = ipvt[k];
588 t = b[l];
589 if (l == k)
590 {
591 goto L10;
592 }
593 b[l] = b[k];
594 b[k] = t;
595 L10:
596 i__2 = *n - k;
597 daxpy_(&i__2, &t, &a[k + 1 + k * a_dim1], &c__1, &b[k + 1], &c__1);
598 /* L20: */
599 }
600 L30:
601
602 /* now solve u*x = y */
603
604 i__1 = *n;
605 for (kb = 1; kb <= i__1; ++kb)
606 {
607 k = *n + 1 - kb;
608 b[k] /= a[k + k * a_dim1];
609 t = -b[k];
610 i__2 = k - 1;
611 daxpy_(&i__2, &t, &a[k * a_dim1 + 1], &c__1, &b[1], &c__1);
612 /* L40: */
613 }
614 goto L100;
615 L50:
616
617 /* job = nonzero, solve trans(a) * x = b */
618 /* first solve trans(u)*y = b */
619
620 i__1 = *n;
621 for (k = 1; k <= i__1; ++k)
622 {
623 i__2 = k - 1;
624 t = ddot_(&i__2, &a[k * a_dim1 + 1], &c__1, &b[1], &c__1);
625 b[k] = (b[k] - t) / a[k + k * a_dim1];
626 /* L60: */
627 }
628
629 /* now solve trans(l)*x = y */
630
631 if (nm1 < 1)
632 {
633 goto L90;
634 }
635 i__1 = nm1;
636 for (kb = 1; kb <= i__1; ++kb)
637 {
638 k = *n - kb;
639 i__2 = *n - k;
640 b[k] += ddot_(&i__2, &a[k + 1 + k * a_dim1], &c__1, &b[k + 1], &c__1);
641 l = ipvt[k];
642 if (l == k)
643 {
644 goto L70;
645 }
646 t = b[l];
647 b[l] = b[k];
648 b[k] = t;
649 L70:
650 /* L80: */
651 ;
652 }
653 L90:
654 L100:
655 return 0;
656 } /* dgesl_ */
657
658 /* Subroutine */ int daxpy_(integer* n, real_type* da, real_type* dx,
659 integer* incx, real_type* dy, integer* incy)
660 {
661 /* System generated locals */
662 integer i__1;
663
664 /* Local variables */
665 static integer i__, m, ix, iy, mp1;
666
667 /* constant times a vector plus a vector. */
668 /* uses unrolled loops for increments equal to one. */
669 /* jack dongarra, linpack, 3/11/78. */
670
671 /* Parameter adjustments */
672 --dy;
673 --dx;
674
675 /* Function Body */
676 if (*n <= 0)
677 {
678 return 0;
679 }
680 if (*da == 0.)
681 {
682 return 0;
683 }
684 if (*incx == 1 && *incy == 1)
685 {
686 goto L20;
687 }
688
689 /* code for unequal increments or equal increments */
690 /* not equal to 1 */
691
692 ix = 1;
693 iy = 1;
694 if (*incx < 0)
695 {
696 ix = (-(*n) + 1) * *incx + 1;
697 }
698 if (*incy < 0)
699 {
700 iy = (-(*n) + 1) * *incy + 1;
701 }
702 i__1 = *n;
703 for (i__ = 1; i__ <= i__1; ++i__)
704 {
705 dy[iy] += *da * dx[ix];
706 ix += *incx;
707 iy += *incy;
708 /* L10: */
709 }
710 return 0;
711
712 /* code for both increments equal to 1 */
713
714 /* clean-up loop */
715
716 L20:
717 m = *n % 4;
718 if (m == 0)
719 {
720 goto L40;
721 }
722 i__1 = m;
723 for (i__ = 1; i__ <= i__1; ++i__)
724 {
725 dy[i__] += *da * dx[i__];
726 /* L30: */
727 }
728 if (*n < 4)
729 {
730 return 0;
731 }
732 L40:
733 mp1 = m + 1;
734 i__1 = *n;
735 for (i__ = mp1; i__ <= i__1; i__ += 4)
736 {
737 dy[i__] += *da * dx[i__];
738 dy[i__ + 1] += *da * dx[i__ + 1];
739 dy[i__ + 2] += *da * dx[i__ + 2];
740 dy[i__ + 3] += *da * dx[i__ + 3];
741 /* L50: */
742 }
743 return 0;
744 } /* daxpy_ */
745
746 real_type ddot_(integer* n, real_type* dx, integer* incx, real_type* dy,
747 integer* incy)
748 {
749 /* System generated locals */
750 integer i__1;
751 real_type ret_val;
752
753 /* Local variables */
754 static integer i__, m, ix, iy, mp1;
755 static real_type dtemp;
756
757 /* forms the dot product of two vectors. */
758 /* uses unrolled loops for increments equal to one. */
759 /* jack dongarra, linpack, 3/11/78. */
760
761 /* Parameter adjustments */
762 --dy;
763 --dx;
764
765 /* Function Body */
766 ret_val = CAST_TO_RT(0);
767 dtemp = CAST_TO_RT(0);
768 if (*n <= 0)
769 {
770 return ret_val;
771 }
772 if (*incx == 1 && *incy == 1)
773 {
774 goto L20;
775 }
776
777 /* code for unequal increments or equal increments */
778 /* not equal to 1 */
779
780 ix = 1;
781 iy = 1;
782 if (*incx < 0)
783 {
784 ix = (-(*n) + 1) * *incx + 1;
785 }
786 if (*incy < 0)
787 {
788 iy = (-(*n) + 1) * *incy + 1;
789 }
790 i__1 = *n;
791 for (i__ = 1; i__ <= i__1; ++i__)
792 {
793 dtemp += dx[ix] * dy[iy];
794 ix += *incx;
795 iy += *incy;
796 /* L10: */
797 }
798 ret_val = dtemp;
799 return ret_val;
800
801 /* code for both increments equal to 1 */
802
803 /* clean-up loop */
804
805 L20:
806 m = *n % 5;
807 if (m == 0)
808 {
809 goto L40;
810 }
811 i__1 = m;
812 for (i__ = 1; i__ <= i__1; ++i__)
813 {
814 dtemp += dx[i__] * dy[i__];
815 /* L30: */
816 }
817 if (*n < 5)
818 {
819 goto L60;
820 }
821 L40:
822 mp1 = m + 1;
823 i__1 = *n;
824 for (i__ = mp1; i__ <= i__1; i__ += 5)
825 {
826 dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 4] * dy[i__ + 4];
827 /* L50: */
828 }
829 L60:
830 ret_val = dtemp;
831 return ret_val;
832 } /* ddot_ */
833
834 /* Subroutine */ int dscal_(integer* n, real_type* da, real_type* dx,
835 integer* incx)
836 {
837 /* System generated locals */
838 integer i__1, i__2;
839
840 /* Local variables */
841 static integer i__, m, mp1, nincx;
842
843 /* scales a vector by a constant. */
844 /* uses unrolled loops for increment equal to one. */
845 /* jack dongarra, linpack, 3/11/78. */
846
847 /* Parameter adjustments */
848 --dx;
849
850 /* Function Body */
851 if (*n <= 0)
852 {
853 return 0;
854 }
855 if (*incx == 1)
856 {
857 goto L20;
858 }
859
860 /* code for increment not equal to 1 */
861
862 nincx = *n * *incx;
863 i__1 = nincx;
864 i__2 = *incx;
865 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2)
866 {
867 dx[i__] = *da * dx[i__];
868 /* L10: */
869 }
870 return 0;
871
872 /* code for increment equal to 1 */
873
874 /* clean-up loop */
875
876 L20:
877 m = *n % 5;
878 if (m == 0)
879 {
880 goto L40;
881 }
882 i__2 = m;
883 for (i__ = 1; i__ <= i__2; ++i__)
884 {
885 dx[i__] = *da * dx[i__];
886 /* L30: */
887 }
888 if (*n < 5)
889 {
890 return 0;
891 }
892 L40:
893 mp1 = m + 1;
894 i__2 = *n;
895 for (i__ = mp1; i__ <= i__2; i__ += 5)
896 {
897 dx[i__] = *da * dx[i__];
898 dx[i__ + 1] = *da * dx[i__ + 1];
899 dx[i__ + 2] = *da * dx[i__ + 2];
900 dx[i__ + 3] = *da * dx[i__ + 3];
901 dx[i__ + 4] = *da * dx[i__ + 4];
902 /* L50: */
903 }
904 return 0;
905 } /* dscal_ */
906
907 integer idamax_(integer* n, real_type* dx, integer* incx)
908 {
909 /* System generated locals */
910 integer ret_val, i__1;
911 real_type d__1;
912
913 /* Local variables */
914 static integer i__, ix;
915 static real_type dmax__;
916
917 /* finds the index of element having max. dabsolute value. */
918 /* jack dongarra, linpack, 3/11/78. */
919
920 /* Parameter adjustments */
921 --dx;
922
923 /* Function Body */
924 ret_val = 0;
925 if (*n < 1)
926 {
927 return ret_val;
928 }
929 ret_val = 1;
930 if (*n == 1)
931 {
932 return ret_val;
933 }
934 if (*incx == 1)
935 {
936 goto L20;
937 }
938
939 /* code for increment not equal to 1 */
940
941 ix = 1;
942 dmax__ = abs(dx[1]);
943 ix += *incx;
944 i__1 = *n;
945 for (i__ = 2; i__ <= i__1; ++i__)
946 {
947 if ((d__1 = dx[ix], abs(d__1)) <= dmax__)
948 {
949 goto L5;
950 }
951 ret_val = i__;
952 dmax__ = (d__1 = dx[ix], abs(d__1));
953 L5:
954 ix += *incx;
955 /* L10: */
956 }
957 return ret_val;
958
959 /* code for increment equal to 1 */
960
961 L20:
962 dmax__ = abs(dx[1]);
963 i__1 = *n;
964 for (i__ = 2; i__ <= i__1; ++i__)
965 {
966 if ((d__1 = dx[i__], abs(d__1)) <= dmax__)
967 {
968 goto L30;
969 }
970 ret_val = i__;
971 dmax__ = (d__1 = dx[i__], abs(d__1));
972 L30:;
973 }
974 return ret_val;
975 } /* idamax_ */
976
977 real_type epslon_(real_type* x)
978 {
979 #if defined(TEST_MPF_100) || defined(TEST_MPFR_100) || defined(TEST_GMPXX) || defined(TEST_MPFRXX)
980 return std::ldexp(1.0, 1 - ((100 + 1) * 1000L) / 301L);
981 #elif defined(TEST_CPP_DEC_FLOAT_BN)
982 return std::pow(10.0, 1 - std::numeric_limits<efx::cpp_dec_float_50>::digits10);
983 #elif defined(NATIVE_FLOAT128)
984 return FLT128_EPSILON;
985 #else
986 return CAST_TO_RT(std::numeric_limits<real_type>::epsilon());
987 #endif
988 } /* epslon_ */
989
990 /* Subroutine */ int mm_(real_type* a, integer* lda, integer* n1, integer* n3, real_type* b, integer* ldb, integer* n2, real_type* c__,
991 integer* ldc)
992 {
993 /* System generated locals */
994 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2;
995
996 /* Local variables */
997 static integer i__, j;
998
999 /* purpose: */
1000 /* multiply matrix b times matrix c and store the result in matrix a. */
1001
1002 /* parameters: */
1003
1004 /* a double precision(lda,n3), matrix of n1 rows and n3 columns */
1005
1006 /* lda integer, leading dimension of array a */
1007
1008 /* n1 integer, number of rows in matrices a and b */
1009
1010 /* n3 integer, number of columns in matrices a and c */
1011
1012 /* b double precision(ldb,n2), matrix of n1 rows and n2 columns */
1013
1014 /* ldb integer, leading dimension of array b */
1015
1016 /* n2 integer, number of columns in matrix b, and number of rows in */
1017 /* matrix c */
1018
1019 /* c double precision(ldc,n3), matrix of n2 rows and n3 columns */
1020
1021 /* ldc integer, leading dimension of array c */
1022
1023 /* ---------------------------------------------------------------------- */
1024
1025 /* Parameter adjustments */
1026 a_dim1 = *lda;
1027 a_offset = 1 + a_dim1;
1028 a -= a_offset;
1029 b_dim1 = *ldb;
1030 b_offset = 1 + b_dim1;
1031 b -= b_offset;
1032 c_dim1 = *ldc;
1033 c_offset = 1 + c_dim1;
1034 c__ -= c_offset;
1035
1036 /* Function Body */
1037 i__1 = *n3;
1038 for (j = 1; j <= i__1; ++j)
1039 {
1040 i__2 = *n1;
1041 for (i__ = 1; i__ <= i__2; ++i__)
1042 {
1043 a[i__ + j * a_dim1] = CAST_TO_RT(0);
1044 /* L10: */
1045 }
1046 dmxpy_(n2, &a[j * a_dim1 + 1], n1, ldb, &c__[j * c_dim1 + 1], &b[b_offset]);
1047 /* L20: */
1048 }
1049
1050 return 0;
1051 } /* mm_ */
1052
1053 /* Subroutine */ int dmxpy_(integer* n1, real_type* y, integer* n2, integer* ldm, real_type* x, real_type* m)
1054 {
1055 /* System generated locals */
1056 integer m_dim1, m_offset, i__1, i__2;
1057
1058 /* Local variables */
1059 static integer i__, j, jmin;
1060
1061 /* purpose: */
1062 /* multiply matrix m times vector x and add the result to vector y. */
1063
1064 /* parameters: */
1065
1066 /* n1 integer, number of elements in vector y, and number of rows in */
1067 /* matrix m */
1068
1069 /* y double precision(n1), vector of length n1 to which is added */
1070 /* the product m*x */
1071
1072 /* n2 integer, number of elements in vector x, and number of columns */
1073 /* in matrix m */
1074
1075 /* ldm integer, leading dimension of array m */
1076
1077 /* x double precision(n2), vector of length n2 */
1078
1079 /* m double precision(ldm,n2), matrix of n1 rows and n2 columns */
1080
1081 /* ---------------------------------------------------------------------- */
1082
1083 /* cleanup odd vector */
1084
1085 /* Parameter adjustments */
1086 --y;
1087 m_dim1 = *ldm;
1088 m_offset = 1 + m_dim1;
1089 m -= m_offset;
1090 --x;
1091
1092 /* Function Body */
1093 j = *n2 % 2;
1094 if (j >= 1)
1095 {
1096 i__1 = *n1;
1097 for (i__ = 1; i__ <= i__1; ++i__)
1098 {
1099 y[i__] += x[j] * m[i__ + j * m_dim1];
1100 /* L10: */
1101 }
1102 }
1103
1104 /* cleanup odd group of two vectors */
1105
1106 j = *n2 % 4;
1107 if (j >= 2)
1108 {
1109 i__1 = *n1;
1110 for (i__ = 1; i__ <= i__1; ++i__)
1111 {
1112 y[i__] = y[i__] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1113 /* L20: */
1114 }
1115 }
1116
1117 /* cleanup odd group of four vectors */
1118
1119 j = *n2 % 8;
1120 if (j >= 4)
1121 {
1122 i__1 = *n1;
1123 for (i__ = 1; i__ <= i__1; ++i__)
1124 {
1125 y[i__] = y[i__] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1126 /* L30: */
1127 }
1128 }
1129
1130 /* cleanup odd group of eight vectors */
1131
1132 j = *n2 % 16;
1133 if (j >= 8)
1134 {
1135 i__1 = *n1;
1136 for (i__ = 1; i__ <= i__1; ++i__)
1137 {
1138 y[i__] = y[i__] + x[j - 7] * m[i__ + (j - 7) * m_dim1] + x[j - 6] * m[i__ + (j - 6) * m_dim1] + x[j - 5] * m[i__ + (j - 5) * m_dim1] + x[j - 4] * m[i__ + (j - 4) * m_dim1] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1139 /* L40: */
1140 }
1141 }
1142
1143 /* main loop - groups of sixteen vectors */
1144
1145 jmin = j + 16;
1146 i__1 = *n2;
1147 for (j = jmin; j <= i__1; j += 16)
1148 {
1149 i__2 = *n1;
1150 for (i__ = 1; i__ <= i__2; ++i__)
1151 {
1152 y[i__] = y[i__] + x[j - 15] * m[i__ + (j - 15) * m_dim1] + x[j - 14] * m[i__ + (j - 14) * m_dim1] + x[j - 13] * m[i__ + (j - 13) * m_dim1] + x[j - 12] * m[i__ + (j - 12) * m_dim1] + x[j - 11] * m[i__ + (j - 11) * m_dim1] + x[j - 10] * m[i__ + (j - 10) * m_dim1] + x[j - 9] * m[i__ + (j - 9) * m_dim1] + x[j - 8] * m[i__ + (j - 8) * m_dim1] + x[j - 7] * m[i__ + (j - 7) * m_dim1] + x[j - 6] * m[i__ + (j - 6) * m_dim1] + x[j - 5] * m[i__ + (j - 5) * m_dim1] + x[j - 4] * m[i__ + (j - 4) * m_dim1] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1153 /* L50: */
1154 }
1155 /* L60: */
1156 }
1157 return 0;
1158 } /* dmxpy_ */
1159
1160 real_type ran_(integer* iseed)
1161 {
1162 /* System generated locals */
1163 real_type ret_val;
1164
1165 /* Local variables */
1166 static integer it1, it2, it3, it4;
1167
1168 /* modified from the LAPACK auxiliary routine 10/12/92 JD */
1169 /* -- LAPACK auxiliary routine (version 1.0) -- */
1170 /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
1171 /* Courant Institute, Argonne National Lab, and Rice University */
1172 /* February 29, 1992 */
1173
1174 /* .. Array Arguments .. */
1175 /* .. */
1176
1177 /* Purpose */
1178 /* ======= */
1179
1180 /* DLARAN returns a random double number from a uniform (0,1) */
1181 /* distribution. */
1182
1183 /* Arguments */
1184 /* ========= */
1185
1186 /* ISEED (input/output) INTEGER array, dimension (4) */
1187 /* On entry, the seed of the random number generator; the array */
1188 /* elements must be between 0 and 4095, and ISEED(4) must be */
1189 /* odd. */
1190 /* On exit, the seed is updated. */
1191
1192 /* Further Details */
1193 /* =============== */
1194
1195 /* This routine uses a multiplicative congruential method with modulus */
1196 /* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */
1197 /* 'Multiplicative congruential random number generators with modulus */
1198 /* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */
1199 /* b = 48', Math. Comp. 189, pp 331-344, 1990). */
1200
1201 /* 48-bit integers are stored in 4 integer array elements with 12 bits */
1202 /* per element. Hence the routine is portable across machines with */
1203 /* integers of 32 bits or more. */
1204
1205 /* .. Parameters .. */
1206 /* .. */
1207 /* .. Local Scalars .. */
1208 /* .. */
1209 /* .. Intrinsic Functions .. */
1210 /* .. */
1211 /* .. Executable Statements .. */
1212
1213 /* multiply the seed by the multiplier modulo 2**48 */
1214
1215 /* Parameter adjustments */
1216 --iseed;
1217
1218 /* Function Body */
1219 it4 = iseed[4] * 2549;
1220 it3 = it4 / 4096;
1221 it4 -= it3 << 12;
1222 it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508;
1223 it2 = it3 / 4096;
1224 it3 -= it2 << 12;
1225 it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322;
1226 it1 = it2 / 4096;
1227 it2 -= it1 << 12;
1228 it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4] * 494;
1229 it1 %= 4096;
1230
1231 /* return updated seed */
1232
1233 iseed[1] = it1;
1234 iseed[2] = it2;
1235 iseed[3] = it3;
1236 iseed[4] = it4;
1237
1238 /* convert 48-bit integer to a double number in the interval (0,1) */
1239
1240 ret_val = ((real_type)it1 + ((real_type)it2 + ((real_type)it3 + (real_type)it4 * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4;
1241 return ret_val;
1242
1243 /* End of RAN */
1244
1245 } /* ran_ */
1246
1247 /*
1248
1249 Double results:
1250 ~~~~~~~~~~~~~~
1251
1252 norm. resid resid machep x(1) x(n)
1253 6.4915 7.207e-013 2.2204e-016 1 1
1254
1255
1256
1257 times are reported for matrices of order 1000
1258 factor solve total mflops unit ratio
1259 times for array with leading dimension of1001
1260 1.443 0.003 1.446 462.43 0.004325 25.821
1261
1262
1263 mpf_class results:
1264 ~~~~~~~~~~~~~~~~~~
1265
1266 norm. resid resid machep x(1) x(n)
1267 3.6575e-05 5.2257e-103 2.8575e-101 1 1
1268
1269
1270
1271 times are reported for matrices of order 1000
1272 factor solve total mflops unit ratio
1273 times for array with leading dimension of1001
1274 266.45 0.798 267.24 2.5021 0.79933 4772.2
1275
1276
1277 number<gmp_float<100> >:
1278 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1279
1280 norm. resid resid machep x(1) x(n)
1281 0.36575e-4 0.52257e-102 0.28575e-100 0.1e1 0.1e1
1282
1283
1284
1285 times are reported for matrices of order 1000
1286 factor solve total mflops unit ratio
1287 times for array with leading dimension of1001
1288 279.96 0.84 280.8 2.3813 0.83988 5014.3
1289
1290 boost::multiprecision::ef::cpp_dec_float_50:
1291 ~~~~~~~~~~~~~~~~~~~~~~~~~
1292
1293 norm. resid resid machep x(1) x(n)
1294 2.551330735e-16 1.275665107e-112 1e-99 1 1
1295
1296
1297
1298 times are reported for matrices of order 1000
1299 factor solve total mflops unit ratio
1300 times for array with leading dimension of1001
1301 363.89 1.074 364.97 1.8321 1.0916 6517.3
1302 */