ceph/src/boost/libs/multiprecision/performance/linpack-benchmark.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //  Copyright 2011 John Maddock. Distributed under the Boost
   3 //  Software License, Version 1.0. (See accompanying file
   4 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   5
   6 /* 1000d.f -- translated by f2c (version 20050501).
   7 You must link the resulting object file with libf2c:
   8 on Microsoft Windows system, link with libf2c.lib;
   9 on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  10 or, if you install libf2c.a in a standard place, with -lf2c -lm
  11 -- in that order, at the end of the command line, as in
  12 cc *.o -lf2c -lm
  13 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  14
  15 http://www.netlib.org/f2c/libf2c.zip
  16 */
  17 #include <iostream>
  18 #include <iomanip>
  19 #include <cmath>
  20
  21 #if defined(TEST_GMPXX)
  22 #include <gmpxx.h>
  23 typedef mpf_class real_type;
  24 #elif defined(TEST_MPFRXX)
  25 #include <gmpfrxx.h>
  26 typedef mpfr_class real_type;
  27 #elif defined(TEST_CPP_DEC_FLOAT)
  28 #include <boost/multiprecision/cpp_dec_float.hpp>
  29 typedef boost::multiprecision::cpp_dec_float_50 real_type;
  30 #elif defined(TEST_MPFR_50)
  31 #include <boost/multiprecision/mpfr.hpp>
  32 typedef boost::multiprecision::mpfr_float_50 real_type;
  33 #elif defined(TEST_MPF_50)
  34 #include <boost/multiprecision/gmp.hpp>
  35 typedef boost::multiprecision::mpf_float_50 real_type;
  36 #elif defined(NATIVE_FLOAT128)
  37 #include <boost/multiprecision/float128.hpp>
  38 typedef __float128 real_type;
  39
  40 std::ostream& operator<<(std::ostream& os, const __float128& f)
  41 {
  42    return os << boost::multiprecision::float128(f);
  43 }
  44
  45 #include <boost/type_traits/has_left_shift.hpp>
  46
  47 namespace boost {
  48
  49 template <>
  50 struct has_left_shift<std::basic_ostream<char>, __float128> : public mpl::true_
  51 {};
  52
  53 template <>
  54 double lexical_cast<double, __float128>(const __float128& f)
  55 {
  56    return f;
  57 }
  58
  59 } // namespace boost
  60
  61 #elif defined(TEST_FLOAT128)
  62 #include <boost/multiprecision/float128.hpp>
  63 typedef boost::multiprecision::float128 real_type;
  64 #elif defined(TEST_CPP_BIN_FLOAT_QUAD)
  65 #include <boost/multiprecision/cpp_bin_float.hpp>
  66 typedef boost::multiprecision::cpp_bin_float_quad real_type;
  67 #elif defined(TEST_CPP_BIN_FLOAT_OCT)
  68 #include <boost/multiprecision/cpp_bin_float.hpp>
  69 typedef boost::multiprecision::cpp_bin_float_oct real_type;
  70 #else
  71 typedef double real_type;
  72 #endif
  73
  74 #include <boost/lexical_cast.hpp>
  75
  76 #ifndef CAST_TO_RT
  77 #define CAST_TO_RT(x) x
  78 #endif
  79
  80 extern "C" {
  81 #include "f2c.h"
  82 integer s_wsfe(cilist*), e_wsfe(void), do_fio(integer*, char*, ftnlen),
  83     s_wsle(cilist*), do_lio(integer*, integer*, char*, ftnlen),
  84     e_wsle(void);
  85 /* Subroutine */ int s_stop(char*, ftnlen);
  86
  87 #undef abs
  88 #undef dabs
  89 #define dabs abs
  90 #undef dmin
  91 #undef dmax
  92 #define dmin min
  93 #define dmax max
  94 }
  95 #include <time.h>
  96
  97 using std::max;
  98 using std::min;
  99
 100 /* Table of constant values */
 101
 102 static integer   c__0 = 0;
 103 static real_type c_b7 = CAST_TO_RT(1);
 104 static integer   c__1 = 1;
 105 static integer   c__9 = 9;
 106
 107 inline double second_(void)
 108 {
 109    return ((double)(clock())) / CLOCKS_PER_SEC;
 110 }
 111
 112 int       dgefa_(real_type*, integer*, integer*, integer*, integer*), dgesl_(real_type*, integer*, integer*, integer*, real_type*, integer*);
 113 int       dmxpy_(integer*, real_type*, integer*, integer*, real_type*, real_type*);
 114 int       matgen_(real_type*, integer*, integer*, real_type*, real_type*);
 115 real_type epslon_(real_type*);
 116 real_type ran_(integer*);
 117 int       dscal_(integer*, real_type*, real_type*, integer*);
 118 int       daxpy_(integer*, real_type*, real_type*, integer*, real_type*, integer*);
 119 integer   idamax_(integer*, real_type*, integer*);
 120 real_type ddot_(integer*, real_type*, integer*, real_type*, integer*);
 121 int       daxpy_(integer*, real_type*, real_type*, integer*, real_type*, integer*);
 122 int       dmxpy_(integer*, real_type*, integer*, integer*, real_type*, real_type*);
 123
 124 extern "C" int MAIN__()
 125 {
 126 #ifdef TEST_MPF_50
 127    std::cout << "Testing number<mpf_float<50> >" << std::endl;
 128 #elif defined(TEST_MPFR_50)
 129    std::cout << "Testing number<mpf_float<50> >" << std::endl;
 130 #elif defined(TEST_GMPXX)
 131    std::cout << "Testing mpf_class at 50 decimal degits" << std::endl;
 132    mpf_set_default_prec(((50 + 1) * 1000L) / 301L);
 133 #elif defined(TEST_MPFRXX)
 134    std::cout << "Testing mpfr_class at 50 decimal degits" << std::endl;
 135    mpfr_set_default_prec(((50 + 1) * 1000L) / 301L);
 136 #elif defined(TEST_CPP_DEC_FLOAT)
 137    std::cout << "Testing number<cpp_dec_float<50> >" << std::endl;
 138 #elif defined(NATIVE_FLOAT128)
 139    std::cout << "Testing __float128" << std::endl;
 140 #elif defined(TEST_FLOAT128)
 141    std::cout << "Testing number<float128_backend, et_off>" << std::endl;
 142 #else
 143    std::cout << "Testing double" << std::endl;
 144 #endif
 145
 146    /* Format strings */
 147    static char fmt_1[] = "(\002 Please send the results of this run to:\002"
 148                          "//\002 Jack J. Dongarra\002/\002 Computer Science Department\002/"
 149                          "\002 University of Tennessee\002/\002 Knoxville, Tennessee 37996"
 150                          "-1300\002//\002 Fax: 615-974-8296\002//\002 Internet: dongarra@c"
 151                          "s.utk.edu\002/)";
 152    static char fmt_40[] = "(\002     norm. resid      resid           mac"
 153                           "hep\002,\002         x(1)          x(n)\002)";
 154    static char fmt_50[] = "(1p5e16.8)";
 155    static char fmt_60[] = "(//\002    times are reported for matrices of or"
 156                           "der \002,i5)";
 157    static char fmt_70[] = "(6x,\002factor\002,5x,\002solve\002,6x,\002tota"
 158                           "l\002,5x,\002mflops\002,7x,\002unit\002,6x,\002ratio\002)";
 159    static char fmt_80[] = "(\002 times for array with leading dimension o"
 160                           "f\002,i4)";
 161    static char fmt_110[] = "(6(1pe11.3))";
 162
 163    /* System generated locals */
 164    integer   i__1;
 165    real_type d__1, d__2, d__3;
 166
 167    /* Builtin functions */
 168
 169    /* Local variables */
 170    static real_type a[1001000] /* was [1001][1000] */, b[1000];
 171    static integer   i__, n;
 172    static real_type x[1000];
 173    static double    t1;
 174    static integer   lda;
 175    static double    ops;
 176    static real_type eps;
 177    static integer   info;
 178    static double    time[6], cray, total;
 179    static integer   ipvt[1000];
 180    static real_type resid, norma;
 181    static real_type normx;
 182    static real_type residn;
 183
 184    /* Fortran I/O blocks */
 185    static cilist io___4  = {0, 6, 0, fmt_1, 0};
 186    static cilist io___20 = {0, 6, 0, fmt_40, 0};
 187    static cilist io___21 = {0, 6, 0, fmt_50, 0};
 188    static cilist io___22 = {0, 6, 0, fmt_60, 0};
 189    static cilist io___23 = {0, 6, 0, fmt_70, 0};
 190    static cilist io___24 = {0, 6, 0, fmt_80, 0};
 191    static cilist io___25 = {0, 6, 0, fmt_110, 0};
 192    static cilist io___26 = {0, 6, 0, 0, 0};
 193
 194    lda = 1001;
 195
 196    /*     this program was updated on 10/12/92 to correct a */
 197    /*     problem with the random number generator. The previous */
 198    /*     random number generator had a short period and produced */
 199    /*     singular matrices occasionally. */
 200
 201    n    = 1000;
 202    cray = .056f;
 203    s_wsfe(&io___4);
 204    e_wsfe();
 205    /* Computing 3rd power */
 206    d__1 = (real_type)n;
 207    /* Computing 2nd power */
 208    d__2 = (real_type)n;
 209    ops  = boost::lexical_cast<double>(real_type(d__1 * (d__1 * d__1) * 2. / 3. + d__2 * d__2 * 2.));
 210
 211    matgen_(a, &lda, &n, b, &norma);
 212
 213    /* ****************************************************************** */
 214    /* ****************************************************************** */
 215    /*        you should replace the call to dgefa and dgesl */
 216    /*        by calls to your linear equation solver. */
 217    /* ****************************************************************** */
 218    /* ****************************************************************** */
 219
 220    t1 = second_();
 221    dgefa_(a, &lda, &n, ipvt, &info);
 222    time[0] = second_() - t1;
 223    t1      = second_();
 224    dgesl_(a, &lda, &n, ipvt, b, &c__0);
 225    time[1] = second_() - t1;
 226    total   = time[0] + time[1];
 227    /* ****************************************************************** */
 228    /* ****************************************************************** */
 229
 230    /*     compute a residual to verify results. */
 231
 232    i__1 = n;
 233    for (i__ = 1; i__ <= i__1; ++i__)
 234    {
 235       x[i__ - 1] = b[i__ - 1];
 236       /* L10: */
 237    }
 238    matgen_(a, &lda, &n, b, &norma);
 239    i__1 = n;
 240    for (i__ = 1; i__ <= i__1; ++i__)
 241    {
 242       b[i__ - 1] = -b[i__ - 1];
 243       /* L20: */
 244    }
 245    dmxpy_(&n, b, &n, &lda, x, a);
 246    resid = CAST_TO_RT(0);
 247    normx = CAST_TO_RT(0);
 248    i__1  = n;
 249    for (i__ = 1; i__ <= i__1; ++i__)
 250    {
 251       /* Computing MAX */
 252       d__2 = resid, d__3 = (d__1 = b[i__ - 1], abs(d__1));
 253       resid = (max)(d__2, d__3);
 254       /* Computing MAX */
 255       d__2 = normx, d__3 = (d__1 = x[i__ - 1], abs(d__1));
 256       normx = (max)(d__2, d__3);
 257       /* L30: */
 258    }
 259    eps    = epslon_(&c_b7);
 260    residn = resid / (n * norma * normx * eps);
 261    s_wsfe(&io___20);
 262    e_wsfe();
 263    s_wsfe(&io___21);
 264    /*
 265    do_fio(&c__1, (char *)&residn, (ftnlen)sizeof(real_type));
 266    do_fio(&c__1, (char *)&resid, (ftnlen)sizeof(real_type));
 267    do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real_type));
 268    do_fio(&c__1, (char *)&x[0], (ftnlen)sizeof(real_type));
 269    do_fio(&c__1, (char *)&x[n - 1], (ftnlen)sizeof(real_type));
 270    */
 271    std::cout << std::setw(12) << std::setprecision(5) << residn << " " << resid << " " << eps << " " << x[0] << " " << x[n - 1] << std::endl;
 272    e_wsfe();
 273
 274    s_wsfe(&io___22);
 275    do_fio(&c__1, (char*)&n, (ftnlen)sizeof(integer));
 276    e_wsfe();
 277    s_wsfe(&io___23);
 278    e_wsfe();
 279
 280    time[2] = total;
 281    time[3] = ops / (total * 1e6);
 282    time[4] = 2. / time[3];
 283    time[5] = total / cray;
 284    s_wsfe(&io___24);
 285    do_fio(&c__1, (char*)&lda, (ftnlen)sizeof(integer));
 286    e_wsfe();
 287    s_wsfe(&io___25);
 288    for (i__ = 1; i__ <= 6; ++i__)
 289    {
 290       // do_fio(&c__1, (char *)&time[i__ - 1], (ftnlen)sizeof(real_type));
 291       std::cout << std::setw(12) << std::setprecision(5) << time[i__ - 1];
 292    }
 293    e_wsfe();
 294    s_wsle(&io___26);
 295    do_lio(&c__9, &c__1, " end of tests -- this version dated 10/12/92", (ftnlen)44);
 296    e_wsle();
 297
 298    s_stop("", (ftnlen)0);
 299    return 0;
 300 } /* MAIN__ */
 301
 302 /* Subroutine */ int matgen_(real_type* a, integer* lda, integer* n,
 303                              real_type* b, real_type* norma)
 304 {
 305    /* System generated locals */
 306    integer   a_dim1, a_offset, i__1, i__2;
 307    real_type d__1, d__2;
 308
 309    /* Local variables */
 310    static integer i__, j;
 311    static integer init[4];
 312
 313    /* Parameter adjustments */
 314    a_dim1   = *lda;
 315    a_offset = 1 + a_dim1;
 316    a -= a_offset;
 317    --b;
 318
 319    /* Function Body */
 320    init[0] = 1;
 321    init[1] = 2;
 322    init[2] = 3;
 323    init[3] = 1325;
 324    *norma  = CAST_TO_RT(0);
 325    i__1    = *n;
 326    for (j = 1; j <= i__1; ++j)
 327    {
 328       i__2 = *n;
 329       for (i__ = 1; i__ <= i__2; ++i__)
 330       {
 331          a[i__ + j * a_dim1] = ran_(init) - .5f;
 332          /* Computing MAX */
 333          d__2   = (d__1 = a[i__ + j * a_dim1], abs(d__1));
 334          *norma = (max)(d__2, *norma);
 335          /* L20: */
 336       }
 337       /* L30: */
 338    }
 339    i__1 = *n;
 340    for (i__ = 1; i__ <= i__1; ++i__)
 341    {
 342       b[i__] = CAST_TO_RT(0);
 343       /* L35: */
 344    }
 345    i__1 = *n;
 346    for (j = 1; j <= i__1; ++j)
 347    {
 348       i__2 = *n;
 349       for (i__ = 1; i__ <= i__2; ++i__)
 350       {
 351          b[i__] += a[i__ + j * a_dim1];
 352          /* L40: */
 353       }
 354       /* L50: */
 355    }
 356    return 0;
 357 } /* matgen_ */
 358
 359 /* Subroutine */ int dgefa_(real_type* a, integer* lda, integer* n, integer* ipvt, integer* info)
 360 {
 361    /* System generated locals */
 362    integer a_dim1, a_offset, i__1, i__2, i__3;
 363
 364    /* Local variables */
 365    static integer   j, k, l;
 366    static real_type t;
 367    static integer   kp1, nm1;
 368
 369    /*     dgefa factors a double precision matrix by gaussian elimination. */
 370
 371    /*     dgefa is usually called by dgeco, but it can be called */
 372    /*     directly with a saving in time if  rcond  is not needed. */
 373    /*     (time for dgeco) = (1 + 9/n)*(time for dgefa) . */
 374
 375    /*     on entry */
 376
 377    /*        a       double precision(lda, n) */
 378    /*                the matrix to be factored. */
 379
 380    /*        lda     integer */
 381    /*                the leading dimension of the array  a . */
 382
 383    /*        n       integer */
 384    /*                the order of the matrix  a . */
 385
 386    /*     on return */
 387
 388    /*        a       an upper triangular matrix and the multipliers */
 389    /*                which were used to obtain it. */
 390    /*                the factorization can be written  a = l*u  where */
 391    /*                l  is a product of permutation and unit lower */
 392    /*                triangular matrices and  u  is upper triangular. */
 393
 394    /*        ipvt    integer(n) */
 395    /*                an integer vector of pivot indices. */
 396
 397    /*        info    integer */
 398    /*                = 0  normal value. */
 399    /*                = k  if  u(k,k) .eq. 0.0 .  this is not an error */
 400    /*                     condition for this subroutine, but it does */
 401    /*                     indicate that dgesl or dgedi will divide by zero */
 402    /*                     if called.  use  rcond  in dgeco for a reliable */
 403    /*                     indication of singularity. */
 404
 405    /*     linpack. this version dated 08/14/78 . */
 406    /*     cleve moler, university of new mexico, argonne national lab. */
 407
 408    /*     subroutines and functions */
 409
 410    /*     blas daxpy,dscal,idamax */
 411
 412    /*     internal variables */
 413
 414    /*     gaussian elimination with partial pivoting */
 415
 416    /* Parameter adjustments */
 417    a_dim1   = *lda;
 418    a_offset = 1 + a_dim1;
 419    a -= a_offset;
 420    --ipvt;
 421
 422    /* Function Body */
 423    *info = 0;
 424    nm1   = *n - 1;
 425    if (nm1 < 1)
 426    {
 427       goto L70;
 428    }
 429    i__1 = nm1;
 430    for (k = 1; k <= i__1; ++k)
 431    {
 432       kp1 = k + 1;
 433
 434       /*        find l = pivot index */
 435
 436       i__2    = *n - k + 1;
 437       l       = idamax_(&i__2, &a[k + k * a_dim1], &c__1) + k - 1;
 438       ipvt[k] = l;
 439
 440       /*        zero pivot implies this column already triangularized */
 441
 442       if (a[l + k * a_dim1] == 0.)
 443       {
 444          goto L40;
 445       }
 446
 447       /*           interchange if necessary */
 448
 449       if (l == k)
 450       {
 451          goto L10;
 452       }
 453       t                 = a[l + k * a_dim1];
 454       a[l + k * a_dim1] = a[k + k * a_dim1];
 455       a[k + k * a_dim1] = t;
 456    L10:
 457
 458       /*           compute multipliers */
 459
 460       t    = -1. / a[k + k * a_dim1];
 461       i__2 = *n - k;
 462       dscal_(&i__2, &t, &a[k + 1 + k * a_dim1], &c__1);
 463
 464       /*           row elimination with column indexing */
 465
 466       i__2 = *n;
 467       for (j = kp1; j <= i__2; ++j)
 468       {
 469          t = a[l + j * a_dim1];
 470          if (l == k)
 471          {
 472             goto L20;
 473          }
 474          a[l + j * a_dim1] = a[k + j * a_dim1];
 475          a[k + j * a_dim1] = t;
 476       L20:
 477          i__3 = *n - k;
 478          daxpy_(&i__3, &t, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + j * a_dim1], &c__1);
 479          /* L30: */
 480       }
 481       goto L50;
 482    L40:
 483       *info = k;
 484    L50:
 485        /* L60: */
 486        ;
 487    }
 488 L70:
 489    ipvt[*n] = *n;
 490    if (a[*n + *n * a_dim1] == 0.)
 491    {
 492       *info = *n;
 493    }
 494    return 0;
 495 } /* dgefa_ */
 496
 497 /* Subroutine */ int dgesl_(real_type* a, integer* lda, integer* n, integer* ipvt, real_type* b, integer* job)
 498 {
 499    /* System generated locals */
 500    integer a_dim1, a_offset, i__1, i__2;
 501
 502    /* Local variables */
 503    static integer   k, l;
 504    static real_type t;
 505    static integer   kb, nm1;
 506
 507    /*     dgesl solves the double precision system */
 508    /*     a * x = b  or  trans(a) * x = b */
 509    /*     using the factors computed by dgeco or dgefa. */
 510
 511    /*     on entry */
 512
 513    /*        a       double precision(lda, n) */
 514    /*                the output from dgeco or dgefa. */
 515
 516    /*        lda     integer */
 517    /*                the leading dimension of the array  a . */
 518
 519    /*        n       integer */
 520    /*                the order of the matrix  a . */
 521
 522    /*        ipvt    integer(n) */
 523    /*                the pivot vector from dgeco or dgefa. */
 524
 525    /*        b       double precision(n) */
 526    /*                the right hand side vector. */
 527
 528    /*        job     integer */
 529    /*                = 0         to solve  a*x = b , */
 530    /*                = nonzero   to solve  trans(a)*x = b  where */
 531    /*                            trans(a)  is the transpose. */
 532
 533    /*     on return */
 534
 535    /*        b       the solution vector  x . */
 536
 537    /*     error condition */
 538
 539    /*        a division by zero will occur if the input factor contains a */
 540    /*        zero on the diagonal.  technically this indicates singularity */
 541    /*        but it is often caused by improper arguments or improper */
 542    /*        setting of lda .  it will not occur if the subroutines are */
 543    /*        called correctly and if dgeco has set rcond .gt. 0.0 */
 544    /*        or dgefa has set info .eq. 0 . */
 545
 546    /*     to compute  inverse(a) * c  where  c  is a matrix */
 547    /*     with  p  columns */
 548    /*           call dgeco(a,lda,n,ipvt,rcond,z) */
 549    /*           if (rcond is too small) go to ... */
 550    /*           do 10 j = 1, p */
 551    /*              call dgesl(a,lda,n,ipvt,c(1,j),0) */
 552    /*        10 continue */
 553
 554    /*     linpack. this version dated 08/14/78 . */
 555    /*     cleve moler, university of new mexico, argonne national lab. */
 556
 557    /*     subroutines and functions */
 558
 559    /*     blas daxpy,ddot */
 560
 561    /*     internal variables */
 562
 563    /* Parameter adjustments */
 564    a_dim1   = *lda;
 565    a_offset = 1 + a_dim1;
 566    a -= a_offset;
 567    --ipvt;
 568    --b;
 569
 570    /* Function Body */
 571    nm1 = *n - 1;
 572    if (*job != 0)
 573    {
 574       goto L50;
 575    }
 576
 577    /*        job = 0 , solve  a * x = b */
 578    /*        first solve  l*y = b */
 579
 580    if (nm1 < 1)
 581    {
 582       goto L30;
 583    }
 584    i__1 = nm1;
 585    for (k = 1; k <= i__1; ++k)
 586    {
 587       l = ipvt[k];
 588       t = b[l];
 589       if (l == k)
 590       {
 591          goto L10;
 592       }
 593       b[l] = b[k];
 594       b[k] = t;
 595    L10:
 596       i__2 = *n - k;
 597       daxpy_(&i__2, &t, &a[k + 1 + k * a_dim1], &c__1, &b[k + 1], &c__1);
 598       /* L20: */
 599    }
 600 L30:
 601
 602    /*        now solve  u*x = y */
 603
 604    i__1 = *n;
 605    for (kb = 1; kb <= i__1; ++kb)
 606    {
 607       k = *n + 1 - kb;
 608       b[k] /= a[k + k * a_dim1];
 609       t    = -b[k];
 610       i__2 = k - 1;
 611       daxpy_(&i__2, &t, &a[k * a_dim1 + 1], &c__1, &b[1], &c__1);
 612       /* L40: */
 613    }
 614    goto L100;
 615 L50:
 616
 617    /*        job = nonzero, solve  trans(a) * x = b */
 618    /*        first solve  trans(u)*y = b */
 619
 620    i__1 = *n;
 621    for (k = 1; k <= i__1; ++k)
 622    {
 623       i__2 = k - 1;
 624       t    = ddot_(&i__2, &a[k * a_dim1 + 1], &c__1, &b[1], &c__1);
 625       b[k] = (b[k] - t) / a[k + k * a_dim1];
 626       /* L60: */
 627    }
 628
 629    /*        now solve trans(l)*x = y */
 630
 631    if (nm1 < 1)
 632    {
 633       goto L90;
 634    }
 635    i__1 = nm1;
 636    for (kb = 1; kb <= i__1; ++kb)
 637    {
 638       k    = *n - kb;
 639       i__2 = *n - k;
 640       b[k] += ddot_(&i__2, &a[k + 1 + k * a_dim1], &c__1, &b[k + 1], &c__1);
 641       l = ipvt[k];
 642       if (l == k)
 643       {
 644          goto L70;
 645       }
 646       t    = b[l];
 647       b[l] = b[k];
 648       b[k] = t;
 649    L70:
 650        /* L80: */
 651        ;
 652    }
 653 L90:
 654 L100:
 655    return 0;
 656 } /* dgesl_ */
 657
 658 /* Subroutine */ int daxpy_(integer* n, real_type* da, real_type* dx,
 659                             integer* incx, real_type* dy, integer* incy)
 660 {
 661    /* System generated locals */
 662    integer i__1;
 663
 664    /* Local variables */
 665    static integer i__, m, ix, iy, mp1;
 666
 667    /*     constant times a vector plus a vector. */
 668    /*     uses unrolled loops for increments equal to one. */
 669    /*     jack dongarra, linpack, 3/11/78. */
 670
 671    /* Parameter adjustments */
 672    --dy;
 673    --dx;
 674
 675    /* Function Body */
 676    if (*n <= 0)
 677    {
 678       return 0;
 679    }
 680    if (*da == 0.)
 681    {
 682       return 0;
 683    }
 684    if (*incx == 1 && *incy == 1)
 685    {
 686       goto L20;
 687    }
 688
 689    /*        code for unequal increments or equal increments */
 690    /*          not equal to 1 */
 691
 692    ix = 1;
 693    iy = 1;
 694    if (*incx < 0)
 695    {
 696       ix = (-(*n) + 1) * *incx + 1;
 697    }
 698    if (*incy < 0)
 699    {
 700       iy = (-(*n) + 1) * *incy + 1;
 701    }
 702    i__1 = *n;
 703    for (i__ = 1; i__ <= i__1; ++i__)
 704    {
 705       dy[iy] += *da * dx[ix];
 706       ix += *incx;
 707       iy += *incy;
 708       /* L10: */
 709    }
 710    return 0;
 711
 712    /*        code for both increments equal to 1 */
 713
 714    /*        clean-up loop */
 715
 716 L20:
 717    m = *n % 4;
 718    if (m == 0)
 719    {
 720       goto L40;
 721    }
 722    i__1 = m;
 723    for (i__ = 1; i__ <= i__1; ++i__)
 724    {
 725       dy[i__] += *da * dx[i__];
 726       /* L30: */
 727    }
 728    if (*n < 4)
 729    {
 730       return 0;
 731    }
 732 L40:
 733    mp1  = m + 1;
 734    i__1 = *n;
 735    for (i__ = mp1; i__ <= i__1; i__ += 4)
 736    {
 737       dy[i__] += *da * dx[i__];
 738       dy[i__ + 1] += *da * dx[i__ + 1];
 739       dy[i__ + 2] += *da * dx[i__ + 2];
 740       dy[i__ + 3] += *da * dx[i__ + 3];
 741       /* L50: */
 742    }
 743    return 0;
 744 } /* daxpy_ */
 745
 746 real_type ddot_(integer* n, real_type* dx, integer* incx, real_type* dy,
 747                 integer* incy)
 748 {
 749    /* System generated locals */
 750    integer   i__1;
 751    real_type ret_val;
 752
 753    /* Local variables */
 754    static integer   i__, m, ix, iy, mp1;
 755    static real_type dtemp;
 756
 757    /*     forms the dot product of two vectors. */
 758    /*     uses unrolled loops for increments equal to one. */
 759    /*     jack dongarra, linpack, 3/11/78. */
 760
 761    /* Parameter adjustments */
 762    --dy;
 763    --dx;
 764
 765    /* Function Body */
 766    ret_val = CAST_TO_RT(0);
 767    dtemp   = CAST_TO_RT(0);
 768    if (*n <= 0)
 769    {
 770       return ret_val;
 771    }
 772    if (*incx == 1 && *incy == 1)
 773    {
 774       goto L20;
 775    }
 776
 777    /*        code for unequal increments or equal increments */
 778    /*          not equal to 1 */
 779
 780    ix = 1;
 781    iy = 1;
 782    if (*incx < 0)
 783    {
 784       ix = (-(*n) + 1) * *incx + 1;
 785    }
 786    if (*incy < 0)
 787    {
 788       iy = (-(*n) + 1) * *incy + 1;
 789    }
 790    i__1 = *n;
 791    for (i__ = 1; i__ <= i__1; ++i__)
 792    {
 793       dtemp += dx[ix] * dy[iy];
 794       ix += *incx;
 795       iy += *incy;
 796       /* L10: */
 797    }
 798    ret_val = dtemp;
 799    return ret_val;
 800
 801    /*        code for both increments equal to 1 */
 802
 803    /*        clean-up loop */
 804
 805 L20:
 806    m = *n % 5;
 807    if (m == 0)
 808    {
 809       goto L40;
 810    }
 811    i__1 = m;
 812    for (i__ = 1; i__ <= i__1; ++i__)
 813    {
 814       dtemp += dx[i__] * dy[i__];
 815       /* L30: */
 816    }
 817    if (*n < 5)
 818    {
 819       goto L60;
 820    }
 821 L40:
 822    mp1  = m + 1;
 823    i__1 = *n;
 824    for (i__ = mp1; i__ <= i__1; i__ += 5)
 825    {
 826       dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 4] * dy[i__ + 4];
 827       /* L50: */
 828    }
 829 L60:
 830    ret_val = dtemp;
 831    return ret_val;
 832 } /* ddot_ */
 833
 834 /* Subroutine */ int dscal_(integer* n, real_type* da, real_type* dx,
 835                             integer* incx)
 836 {
 837    /* System generated locals */
 838    integer i__1, i__2;
 839
 840    /* Local variables */
 841    static integer i__, m, mp1, nincx;
 842
 843    /*     scales a vector by a constant. */
 844    /*     uses unrolled loops for increment equal to one. */
 845    /*     jack dongarra, linpack, 3/11/78. */
 846
 847    /* Parameter adjustments */
 848    --dx;
 849
 850    /* Function Body */
 851    if (*n <= 0)
 852    {
 853       return 0;
 854    }
 855    if (*incx == 1)
 856    {
 857       goto L20;
 858    }
 859
 860    /*        code for increment not equal to 1 */
 861
 862    nincx = *n * *incx;
 863    i__1  = nincx;
 864    i__2  = *incx;
 865    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2)
 866    {
 867       dx[i__] = *da * dx[i__];
 868       /* L10: */
 869    }
 870    return 0;
 871
 872    /*        code for increment equal to 1 */
 873
 874    /*        clean-up loop */
 875
 876 L20:
 877    m = *n % 5;
 878    if (m == 0)
 879    {
 880       goto L40;
 881    }
 882    i__2 = m;
 883    for (i__ = 1; i__ <= i__2; ++i__)
 884    {
 885       dx[i__] = *da * dx[i__];
 886       /* L30: */
 887    }
 888    if (*n < 5)
 889    {
 890       return 0;
 891    }
 892 L40:
 893    mp1  = m + 1;
 894    i__2 = *n;
 895    for (i__ = mp1; i__ <= i__2; i__ += 5)
 896    {
 897       dx[i__]     = *da * dx[i__];
 898       dx[i__ + 1] = *da * dx[i__ + 1];
 899       dx[i__ + 2] = *da * dx[i__ + 2];
 900       dx[i__ + 3] = *da * dx[i__ + 3];
 901       dx[i__ + 4] = *da * dx[i__ + 4];
 902       /* L50: */
 903    }
 904    return 0;
 905 } /* dscal_ */
 906
 907 integer idamax_(integer* n, real_type* dx, integer* incx)
 908 {
 909    /* System generated locals */
 910    integer   ret_val, i__1;
 911    real_type d__1;
 912
 913    /* Local variables */
 914    static integer   i__, ix;
 915    static real_type dmax__;
 916
 917    /*     finds the index of element having max. dabsolute value. */
 918    /*     jack dongarra, linpack, 3/11/78. */
 919
 920    /* Parameter adjustments */
 921    --dx;
 922
 923    /* Function Body */
 924    ret_val = 0;
 925    if (*n < 1)
 926    {
 927       return ret_val;
 928    }
 929    ret_val = 1;
 930    if (*n == 1)
 931    {
 932       return ret_val;
 933    }
 934    if (*incx == 1)
 935    {
 936       goto L20;
 937    }
 938
 939    /*        code for increment not equal to 1 */
 940
 941    ix     = 1;
 942    dmax__ = abs(dx[1]);
 943    ix += *incx;
 944    i__1 = *n;
 945    for (i__ = 2; i__ <= i__1; ++i__)
 946    {
 947       if ((d__1 = dx[ix], abs(d__1)) <= dmax__)
 948       {
 949          goto L5;
 950       }
 951       ret_val = i__;
 952       dmax__  = (d__1 = dx[ix], abs(d__1));
 953    L5:
 954       ix += *incx;
 955       /* L10: */
 956    }
 957    return ret_val;
 958
 959    /*        code for increment equal to 1 */
 960
 961 L20:
 962    dmax__ = abs(dx[1]);
 963    i__1   = *n;
 964    for (i__ = 2; i__ <= i__1; ++i__)
 965    {
 966       if ((d__1 = dx[i__], abs(d__1)) <= dmax__)
 967       {
 968          goto L30;
 969       }
 970       ret_val = i__;
 971       dmax__  = (d__1 = dx[i__], abs(d__1));
 972    L30:;
 973    }
 974    return ret_val;
 975 } /* idamax_ */
 976
 977 real_type epslon_(real_type* x)
 978 {
 979 #if defined(TEST_MPF_100) || defined(TEST_MPFR_100) || defined(TEST_GMPXX) || defined(TEST_MPFRXX)
 980    return std::ldexp(1.0, 1 - ((100 + 1) * 1000L) / 301L);
 981 #elif defined(TEST_CPP_DEC_FLOAT_BN)
 982    return std::pow(10.0, 1 - std::numeric_limits<efx::cpp_dec_float_50>::digits10);
 983 #elif defined(NATIVE_FLOAT128)
 984    return FLT128_EPSILON;
 985 #else
 986    return CAST_TO_RT(std::numeric_limits<real_type>::epsilon());
 987 #endif
 988 } /* epslon_ */
 989
 990 /* Subroutine */ int mm_(real_type* a, integer* lda, integer* n1, integer* n3, real_type* b, integer* ldb, integer* n2, real_type* c__,
 991                          integer* ldc)
 992 {
 993    /* System generated locals */
 994    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2;
 995
 996    /* Local variables */
 997    static integer i__, j;
 998
 999    /*   purpose: */
1000    /*     multiply matrix b times matrix c and store the result in matrix a. */
1001
1002    /*   parameters: */
1003
1004    /*     a double precision(lda,n3), matrix of n1 rows and n3 columns */
1005
1006    /*     lda integer, leading dimension of array a */
1007
1008    /*     n1 integer, number of rows in matrices a and b */
1009
1010    /*     n3 integer, number of columns in matrices a and c */
1011
1012    /*     b double precision(ldb,n2), matrix of n1 rows and n2 columns */
1013
1014    /*     ldb integer, leading dimension of array b */
1015
1016    /*     n2 integer, number of columns in matrix b, and number of rows in */
1017    /*         matrix c */
1018
1019    /*     c double precision(ldc,n3), matrix of n2 rows and n3 columns */
1020
1021    /*     ldc integer, leading dimension of array c */
1022
1023    /* ---------------------------------------------------------------------- */
1024
1025    /* Parameter adjustments */
1026    a_dim1   = *lda;
1027    a_offset = 1 + a_dim1;
1028    a -= a_offset;
1029    b_dim1   = *ldb;
1030    b_offset = 1 + b_dim1;
1031    b -= b_offset;
1032    c_dim1   = *ldc;
1033    c_offset = 1 + c_dim1;
1034    c__ -= c_offset;
1035
1036    /* Function Body */
1037    i__1 = *n3;
1038    for (j = 1; j <= i__1; ++j)
1039    {
1040       i__2 = *n1;
1041       for (i__ = 1; i__ <= i__2; ++i__)
1042       {
1043          a[i__ + j * a_dim1] = CAST_TO_RT(0);
1044          /* L10: */
1045       }
1046       dmxpy_(n2, &a[j * a_dim1 + 1], n1, ldb, &c__[j * c_dim1 + 1], &b[b_offset]);
1047       /* L20: */
1048    }
1049
1050    return 0;
1051 } /* mm_ */
1052
1053 /* Subroutine */ int dmxpy_(integer* n1, real_type* y, integer* n2, integer* ldm, real_type* x, real_type* m)
1054 {
1055    /* System generated locals */
1056    integer m_dim1, m_offset, i__1, i__2;
1057
1058    /* Local variables */
1059    static integer i__, j, jmin;
1060
1061    /*   purpose: */
1062    /*     multiply matrix m times vector x and add the result to vector y. */
1063
1064    /*   parameters: */
1065
1066    /*     n1 integer, number of elements in vector y, and number of rows in */
1067    /*         matrix m */
1068
1069    /*     y double precision(n1), vector of length n1 to which is added */
1070    /*         the product m*x */
1071
1072    /*     n2 integer, number of elements in vector x, and number of columns */
1073    /*         in matrix m */
1074
1075    /*     ldm integer, leading dimension of array m */
1076
1077    /*     x double precision(n2), vector of length n2 */
1078
1079    /*     m double precision(ldm,n2), matrix of n1 rows and n2 columns */
1080
1081    /* ---------------------------------------------------------------------- */
1082
1083    /*   cleanup odd vector */
1084
1085    /* Parameter adjustments */
1086    --y;
1087    m_dim1   = *ldm;
1088    m_offset = 1 + m_dim1;
1089    m -= m_offset;
1090    --x;
1091
1092    /* Function Body */
1093    j = *n2 % 2;
1094    if (j >= 1)
1095    {
1096       i__1 = *n1;
1097       for (i__ = 1; i__ <= i__1; ++i__)
1098       {
1099          y[i__] += x[j] * m[i__ + j * m_dim1];
1100          /* L10: */
1101       }
1102    }
1103
1104    /*   cleanup odd group of two vectors */
1105
1106    j = *n2 % 4;
1107    if (j >= 2)
1108    {
1109       i__1 = *n1;
1110       for (i__ = 1; i__ <= i__1; ++i__)
1111       {
1112          y[i__] = y[i__] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1113          /* L20: */
1114       }
1115    }
1116
1117    /*   cleanup odd group of four vectors */
1118
1119    j = *n2 % 8;
1120    if (j >= 4)
1121    {
1122       i__1 = *n1;
1123       for (i__ = 1; i__ <= i__1; ++i__)
1124       {
1125          y[i__] = y[i__] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1126          /* L30: */
1127       }
1128    }
1129
1130    /*   cleanup odd group of eight vectors */
1131
1132    j = *n2 % 16;
1133    if (j >= 8)
1134    {
1135       i__1 = *n1;
1136       for (i__ = 1; i__ <= i__1; ++i__)
1137       {
1138          y[i__] = y[i__] + x[j - 7] * m[i__ + (j - 7) * m_dim1] + x[j - 6] * m[i__ + (j - 6) * m_dim1] + x[j - 5] * m[i__ + (j - 5) * m_dim1] + x[j - 4] * m[i__ + (j - 4) * m_dim1] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1139          /* L40: */
1140       }
1141    }
1142
1143    /*   main loop - groups of sixteen vectors */
1144
1145    jmin = j + 16;
1146    i__1 = *n2;
1147    for (j = jmin; j <= i__1; j += 16)
1148    {
1149       i__2 = *n1;
1150       for (i__ = 1; i__ <= i__2; ++i__)
1151       {
1152          y[i__] = y[i__] + x[j - 15] * m[i__ + (j - 15) * m_dim1] + x[j - 14] * m[i__ + (j - 14) * m_dim1] + x[j - 13] * m[i__ + (j - 13) * m_dim1] + x[j - 12] * m[i__ + (j - 12) * m_dim1] + x[j - 11] * m[i__ + (j - 11) * m_dim1] + x[j - 10] * m[i__ + (j - 10) * m_dim1] + x[j - 9] * m[i__ + (j - 9) * m_dim1] + x[j - 8] * m[i__ + (j - 8) * m_dim1] + x[j - 7] * m[i__ + (j - 7) * m_dim1] + x[j - 6] * m[i__ + (j - 6) * m_dim1] + x[j - 5] * m[i__ + (j - 5) * m_dim1] + x[j - 4] * m[i__ + (j - 4) * m_dim1] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1];
1153          /* L50: */
1154       }
1155       /* L60: */
1156    }
1157    return 0;
1158 } /* dmxpy_ */
1159
1160 real_type ran_(integer* iseed)
1161 {
1162    /* System generated locals */
1163    real_type ret_val;
1164
1165    /* Local variables */
1166    static integer it1, it2, it3, it4;
1167
1168    /*     modified from the LAPACK auxiliary routine 10/12/92 JD */
1169    /*  -- LAPACK auxiliary routine (version 1.0) -- */
1170    /*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
1171    /*     Courant Institute, Argonne National Lab, and Rice University */
1172    /*     February 29, 1992 */
1173
1174    /*     .. Array Arguments .. */
1175    /*     .. */
1176
1177    /*  Purpose */
1178    /*  ======= */
1179
1180    /*  DLARAN returns a random double number from a uniform (0,1) */
1181    /*  distribution. */
1182
1183    /*  Arguments */
1184    /*  ========= */
1185
1186    /*  ISEED   (input/output) INTEGER array, dimension (4) */
1187    /*          On entry, the seed of the random number generator; the array */
1188    /*          elements must be between 0 and 4095, and ISEED(4) must be */
1189    /*          odd. */
1190    /*          On exit, the seed is updated. */
1191
1192    /*  Further Details */
1193    /*  =============== */
1194
1195    /*  This routine uses a multiplicative congruential method with modulus */
1196    /*  2**48 and multiplier 33952834046453 (see G.S.Fishman, */
1197    /*  'Multiplicative congruential random number generators with modulus */
1198    /*  2**b: an exhaustive analysis for b = 32 and a partial analysis for */
1199    /*  b = 48', Math. Comp. 189, pp 331-344, 1990). */
1200
1201    /*  48-bit integers are stored in 4 integer array elements with 12 bits */
1202    /*  per element. Hence the routine is portable across machines with */
1203    /*  integers of 32 bits or more. */
1204
1205    /*     .. Parameters .. */
1206    /*     .. */
1207    /*     .. Local Scalars .. */
1208    /*     .. */
1209    /*     .. Intrinsic Functions .. */
1210    /*     .. */
1211    /*     .. Executable Statements .. */
1212
1213    /*     multiply the seed by the multiplier modulo 2**48 */
1214
1215    /* Parameter adjustments */
1216    --iseed;
1217
1218    /* Function Body */
1219    it4 = iseed[4] * 2549;
1220    it3 = it4 / 4096;
1221    it4 -= it3 << 12;
1222    it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508;
1223    it2 = it3 / 4096;
1224    it3 -= it2 << 12;
1225    it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322;
1226    it1 = it2 / 4096;
1227    it2 -= it1 << 12;
1228    it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4] * 494;
1229    it1 %= 4096;
1230
1231    /*     return updated seed */
1232
1233    iseed[1] = it1;
1234    iseed[2] = it2;
1235    iseed[3] = it3;
1236    iseed[4] = it4;
1237
1238    /*     convert 48-bit integer to a double number in the interval (0,1) */
1239
1240    ret_val = ((real_type)it1 + ((real_type)it2 + ((real_type)it3 + (real_type)it4 * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4;
1241    return ret_val;
1242
1243    /*     End of RAN */
1244
1245 } /* ran_ */
1246
1247 /*
1248
1249 Double results:
1250 ~~~~~~~~~~~~~~
1251
1252 norm. resid      resid           machep         x(1)          x(n)
1253 6.4915           7.207e-013      2.2204e-016    1             1
1254
1255
1256
1257 times are reported for matrices of order  1000
1258 factor     solve      total     mflops       unit      ratio
1259 times for array with leading dimension of1001
1260 1.443     0.003      1.446     462.43       0.004325  25.821
1261
1262
1263 mpf_class results:
1264 ~~~~~~~~~~~~~~~~~~
1265
1266 norm. resid      resid           machep         x(1)          x(n)
1267 3.6575e-05       5.2257e-103     2.8575e-101    1             1
1268
1269
1270
1271 times are reported for matrices of order  1000
1272 factor     solve      total     mflops       unit      ratio
1273 times for array with leading dimension of1001
1274 266.45     0.798      267.24    2.5021       0.79933   4772.2
1275
1276
1277 number<gmp_float<100> >:
1278 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1279
1280      norm. resid      resid           machep         x(1)          x(n)
1281   0.36575e-4          0.52257e-102   0.28575e-100    0.1e1         0.1e1
1282
1283
1284
1285     times are reported for matrices of order  1000
1286       factor     solve      total     mflops       unit      ratio
1287  times for array with leading dimension of1001
1288       279.96        0.84       280.8      2.3813     0.83988      5014.3
1289
1290 boost::multiprecision::ef::cpp_dec_float_50:
1291 ~~~~~~~~~~~~~~~~~~~~~~~~~
1292
1293      norm. resid      resid           machep         x(1)          x(n)
1294      2.551330735e-16  1.275665107e-112 1e-99         1             1
1295
1296
1297
1298     times are reported for matrices of order  1000
1299       factor     solve      total     mflops       unit      ratio
1300  times for array with leading dimension of1001
1301       363.89      1.074     364.97    1.8321       1.0916    6517.3
1302 */