]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Copyright 2011 John Maddock. Distributed under the Boost | |
3 | // Software License, Version 1.0. (See accompanying file | |
4 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | /* 1000d.f -- translated by f2c (version 20050501). | |
7 | You must link the resulting object file with libf2c: | |
8 | on Microsoft Windows system, link with libf2c.lib; | |
9 | on Linux or Unix systems, link with .../path/to/libf2c.a -lm | |
10 | or, if you install libf2c.a in a standard place, with -lf2c -lm | |
11 | -- in that order, at the end of the command line, as in | |
12 | cc *.o -lf2c -lm | |
13 | Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., | |
14 | ||
15 | http://www.netlib.org/f2c/libf2c.zip | |
16 | */ | |
17 | #include <iostream> | |
18 | #include <iomanip> | |
19 | #include <cmath> | |
20 | ||
21 | #if defined(TEST_GMPXX) | |
22 | #include <gmpxx.h> | |
23 | typedef mpf_class real_type; | |
24 | #elif defined(TEST_MPFRXX) | |
25 | #include <gmpfrxx.h> | |
26 | typedef mpfr_class real_type; | |
27 | #elif defined(TEST_CPP_DEC_FLOAT) | |
28 | #include <boost/multiprecision/cpp_dec_float.hpp> | |
29 | typedef boost::multiprecision::cpp_dec_float_50 real_type; | |
30 | #elif defined(TEST_MPFR_50) | |
31 | #include <boost/multiprecision/mpfr.hpp> | |
32 | typedef boost::multiprecision::mpfr_float_50 real_type; | |
33 | #elif defined(TEST_MPF_50) | |
34 | #include <boost/multiprecision/gmp.hpp> | |
35 | typedef boost::multiprecision::mpf_float_50 real_type; | |
36 | #elif defined(NATIVE_FLOAT128) | |
37 | #include <boost/multiprecision/float128.hpp> | |
38 | typedef __float128 real_type; | |
39 | ||
40 | std::ostream& operator<<(std::ostream& os, const __float128& f) | |
41 | { | |
42 | return os << boost::multiprecision::float128(f); | |
43 | } | |
44 | ||
45 | #include <boost/type_traits/has_left_shift.hpp> | |
46 | ||
92f5a8d4 | 47 | namespace boost { |
7c673cae | 48 | |
92f5a8d4 TL |
49 | template <> |
50 | struct has_left_shift<std::basic_ostream<char>, __float128> : public mpl::true_ | |
51 | {}; | |
7c673cae | 52 | |
92f5a8d4 | 53 | template <> |
7c673cae | 54 | double lexical_cast<double, __float128>(const __float128& f) |
92f5a8d4 TL |
55 | { |
56 | return f; | |
7c673cae FG |
57 | } |
58 | ||
92f5a8d4 TL |
59 | } // namespace boost |
60 | ||
7c673cae FG |
61 | #elif defined(TEST_FLOAT128) |
62 | #include <boost/multiprecision/float128.hpp> | |
63 | typedef boost::multiprecision::float128 real_type; | |
64 | #elif defined(TEST_CPP_BIN_FLOAT_QUAD) | |
65 | #include <boost/multiprecision/cpp_bin_float.hpp> | |
66 | typedef boost::multiprecision::cpp_bin_float_quad real_type; | |
92f5a8d4 TL |
67 | #elif defined(TEST_CPP_BIN_FLOAT_OCT) |
68 | #include <boost/multiprecision/cpp_bin_float.hpp> | |
69 | typedef boost::multiprecision::cpp_bin_float_oct real_type; | |
7c673cae FG |
70 | #else |
71 | typedef double real_type; | |
72 | #endif | |
73 | ||
74 | #include <boost/lexical_cast.hpp> | |
75 | ||
76 | #ifndef CAST_TO_RT | |
92f5a8d4 | 77 | #define CAST_TO_RT(x) x |
7c673cae FG |
78 | #endif |
79 | ||
80 | extern "C" { | |
81 | #include "f2c.h" | |
92f5a8d4 TL |
82 | integer s_wsfe(cilist*), e_wsfe(void), do_fio(integer*, char*, ftnlen), |
83 | s_wsle(cilist*), do_lio(integer*, integer*, char*, ftnlen), | |
84 | e_wsle(void); | |
85 | /* Subroutine */ int s_stop(char*, ftnlen); | |
7c673cae FG |
86 | |
87 | #undef abs | |
88 | #undef dabs | |
89 | #define dabs abs | |
90 | #undef dmin | |
91 | #undef dmax | |
92 | #define dmin min | |
93 | #define dmax max | |
7c673cae FG |
94 | } |
95 | #include <time.h> | |
96 | ||
7c673cae | 97 | using std::max; |
92f5a8d4 | 98 | using std::min; |
7c673cae FG |
99 | |
100 | /* Table of constant values */ | |
101 | ||
92f5a8d4 | 102 | static integer c__0 = 0; |
7c673cae | 103 | static real_type c_b7 = CAST_TO_RT(1); |
92f5a8d4 TL |
104 | static integer c__1 = 1; |
105 | static integer c__9 = 9; | |
7c673cae FG |
106 | |
107 | inline double second_(void) | |
108 | { | |
109 | return ((double)(clock())) / CLOCKS_PER_SEC; | |
110 | } | |
111 | ||
92f5a8d4 TL |
112 | int dgefa_(real_type*, integer*, integer*, integer*, integer*), dgesl_(real_type*, integer*, integer*, integer*, real_type*, integer*); |
113 | int dmxpy_(integer*, real_type*, integer*, integer*, real_type*, real_type*); | |
114 | int matgen_(real_type*, integer*, integer*, real_type*, real_type*); | |
115 | real_type epslon_(real_type*); | |
116 | real_type ran_(integer*); | |
117 | int dscal_(integer*, real_type*, real_type*, integer*); | |
118 | int daxpy_(integer*, real_type*, real_type*, integer*, real_type*, integer*); | |
119 | integer idamax_(integer*, real_type*, integer*); | |
120 | real_type ddot_(integer*, real_type*, integer*, real_type*, integer*); | |
121 | int daxpy_(integer*, real_type*, real_type*, integer*, real_type*, integer*); | |
122 | int dmxpy_(integer*, real_type*, integer*, integer*, real_type*, real_type*); | |
7c673cae FG |
123 | |
124 | extern "C" int MAIN__() | |
125 | { | |
126 | #ifdef TEST_MPF_50 | |
127 | std::cout << "Testing number<mpf_float<50> >" << std::endl; | |
128 | #elif defined(TEST_MPFR_50) | |
129 | std::cout << "Testing number<mpf_float<50> >" << std::endl; | |
130 | #elif defined(TEST_GMPXX) | |
131 | std::cout << "Testing mpf_class at 50 decimal degits" << std::endl; | |
132 | mpf_set_default_prec(((50 + 1) * 1000L) / 301L); | |
133 | #elif defined(TEST_MPFRXX) | |
134 | std::cout << "Testing mpfr_class at 50 decimal degits" << std::endl; | |
135 | mpfr_set_default_prec(((50 + 1) * 1000L) / 301L); | |
136 | #elif defined(TEST_CPP_DEC_FLOAT) | |
137 | std::cout << "Testing number<cpp_dec_float<50> >" << std::endl; | |
138 | #elif defined(NATIVE_FLOAT128) | |
139 | std::cout << "Testing __float128" << std::endl; | |
140 | #elif defined(TEST_FLOAT128) | |
141 | std::cout << "Testing number<float128_backend, et_off>" << std::endl; | |
142 | #else | |
143 | std::cout << "Testing double" << std::endl; | |
144 | #endif | |
145 | ||
7c673cae FG |
146 | /* Format strings */ |
147 | static char fmt_1[] = "(\002 Please send the results of this run to:\002" | |
92f5a8d4 TL |
148 | "//\002 Jack J. Dongarra\002/\002 Computer Science Department\002/" |
149 | "\002 University of Tennessee\002/\002 Knoxville, Tennessee 37996" | |
150 | "-1300\002//\002 Fax: 615-974-8296\002//\002 Internet: dongarra@c" | |
151 | "s.utk.edu\002/)"; | |
7c673cae | 152 | static char fmt_40[] = "(\002 norm. resid resid mac" |
92f5a8d4 | 153 | "hep\002,\002 x(1) x(n)\002)"; |
7c673cae FG |
154 | static char fmt_50[] = "(1p5e16.8)"; |
155 | static char fmt_60[] = "(//\002 times are reported for matrices of or" | |
92f5a8d4 | 156 | "der \002,i5)"; |
7c673cae | 157 | static char fmt_70[] = "(6x,\002factor\002,5x,\002solve\002,6x,\002tota" |
92f5a8d4 | 158 | "l\002,5x,\002mflops\002,7x,\002unit\002,6x,\002ratio\002)"; |
7c673cae | 159 | static char fmt_80[] = "(\002 times for array with leading dimension o" |
92f5a8d4 | 160 | "f\002,i4)"; |
7c673cae FG |
161 | static char fmt_110[] = "(6(1pe11.3))"; |
162 | ||
163 | /* System generated locals */ | |
92f5a8d4 | 164 | integer i__1; |
7c673cae FG |
165 | real_type d__1, d__2, d__3; |
166 | ||
167 | /* Builtin functions */ | |
168 | ||
169 | /* Local variables */ | |
170 | static real_type a[1001000] /* was [1001][1000] */, b[1000]; | |
92f5a8d4 | 171 | static integer i__, n; |
7c673cae | 172 | static real_type x[1000]; |
92f5a8d4 TL |
173 | static double t1; |
174 | static integer lda; | |
175 | static double ops; | |
7c673cae | 176 | static real_type eps; |
92f5a8d4 TL |
177 | static integer info; |
178 | static double time[6], cray, total; | |
179 | static integer ipvt[1000]; | |
7c673cae FG |
180 | static real_type resid, norma; |
181 | static real_type normx; | |
182 | static real_type residn; | |
183 | ||
184 | /* Fortran I/O blocks */ | |
92f5a8d4 TL |
185 | static cilist io___4 = {0, 6, 0, fmt_1, 0}; |
186 | static cilist io___20 = {0, 6, 0, fmt_40, 0}; | |
187 | static cilist io___21 = {0, 6, 0, fmt_50, 0}; | |
188 | static cilist io___22 = {0, 6, 0, fmt_60, 0}; | |
189 | static cilist io___23 = {0, 6, 0, fmt_70, 0}; | |
190 | static cilist io___24 = {0, 6, 0, fmt_80, 0}; | |
191 | static cilist io___25 = {0, 6, 0, fmt_110, 0}; | |
192 | static cilist io___26 = {0, 6, 0, 0, 0}; | |
7c673cae FG |
193 | |
194 | lda = 1001; | |
195 | ||
196 | /* this program was updated on 10/12/92 to correct a */ | |
197 | /* problem with the random number generator. The previous */ | |
198 | /* random number generator had a short period and produced */ | |
199 | /* singular matrices occasionally. */ | |
200 | ||
92f5a8d4 | 201 | n = 1000; |
7c673cae FG |
202 | cray = .056f; |
203 | s_wsfe(&io___4); | |
204 | e_wsfe(); | |
205 | /* Computing 3rd power */ | |
92f5a8d4 | 206 | d__1 = (real_type)n; |
7c673cae | 207 | /* Computing 2nd power */ |
92f5a8d4 TL |
208 | d__2 = (real_type)n; |
209 | ops = boost::lexical_cast<double>(real_type(d__1 * (d__1 * d__1) * 2. / 3. + d__2 * d__2 * 2.)); | |
7c673cae FG |
210 | |
211 | matgen_(a, &lda, &n, b, &norma); | |
212 | ||
213 | /* ****************************************************************** */ | |
214 | /* ****************************************************************** */ | |
215 | /* you should replace the call to dgefa and dgesl */ | |
216 | /* by calls to your linear equation solver. */ | |
217 | /* ****************************************************************** */ | |
218 | /* ****************************************************************** */ | |
219 | ||
220 | t1 = second_(); | |
221 | dgefa_(a, &lda, &n, ipvt, &info); | |
222 | time[0] = second_() - t1; | |
92f5a8d4 | 223 | t1 = second_(); |
7c673cae FG |
224 | dgesl_(a, &lda, &n, ipvt, b, &c__0); |
225 | time[1] = second_() - t1; | |
92f5a8d4 | 226 | total = time[0] + time[1]; |
7c673cae FG |
227 | /* ****************************************************************** */ |
228 | /* ****************************************************************** */ | |
229 | ||
230 | /* compute a residual to verify results. */ | |
231 | ||
232 | i__1 = n; | |
92f5a8d4 TL |
233 | for (i__ = 1; i__ <= i__1; ++i__) |
234 | { | |
7c673cae FG |
235 | x[i__ - 1] = b[i__ - 1]; |
236 | /* L10: */ | |
237 | } | |
238 | matgen_(a, &lda, &n, b, &norma); | |
239 | i__1 = n; | |
92f5a8d4 TL |
240 | for (i__ = 1; i__ <= i__1; ++i__) |
241 | { | |
7c673cae FG |
242 | b[i__ - 1] = -b[i__ - 1]; |
243 | /* L20: */ | |
244 | } | |
245 | dmxpy_(&n, b, &n, &lda, x, a); | |
246 | resid = CAST_TO_RT(0); | |
247 | normx = CAST_TO_RT(0); | |
92f5a8d4 TL |
248 | i__1 = n; |
249 | for (i__ = 1; i__ <= i__1; ++i__) | |
250 | { | |
7c673cae FG |
251 | /* Computing MAX */ |
252 | d__2 = resid, d__3 = (d__1 = b[i__ - 1], abs(d__1)); | |
92f5a8d4 | 253 | resid = (max)(d__2, d__3); |
7c673cae FG |
254 | /* Computing MAX */ |
255 | d__2 = normx, d__3 = (d__1 = x[i__ - 1], abs(d__1)); | |
92f5a8d4 | 256 | normx = (max)(d__2, d__3); |
7c673cae FG |
257 | /* L30: */ |
258 | } | |
92f5a8d4 | 259 | eps = epslon_(&c_b7); |
7c673cae FG |
260 | residn = resid / (n * norma * normx * eps); |
261 | s_wsfe(&io___20); | |
262 | e_wsfe(); | |
263 | s_wsfe(&io___21); | |
264 | /* | |
265 | do_fio(&c__1, (char *)&residn, (ftnlen)sizeof(real_type)); | |
266 | do_fio(&c__1, (char *)&resid, (ftnlen)sizeof(real_type)); | |
267 | do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real_type)); | |
268 | do_fio(&c__1, (char *)&x[0], (ftnlen)sizeof(real_type)); | |
269 | do_fio(&c__1, (char *)&x[n - 1], (ftnlen)sizeof(real_type)); | |
270 | */ | |
92f5a8d4 | 271 | std::cout << std::setw(12) << std::setprecision(5) << residn << " " << resid << " " << eps << " " << x[0] << " " << x[n - 1] << std::endl; |
7c673cae FG |
272 | e_wsfe(); |
273 | ||
274 | s_wsfe(&io___22); | |
92f5a8d4 | 275 | do_fio(&c__1, (char*)&n, (ftnlen)sizeof(integer)); |
7c673cae FG |
276 | e_wsfe(); |
277 | s_wsfe(&io___23); | |
278 | e_wsfe(); | |
279 | ||
280 | time[2] = total; | |
281 | time[3] = ops / (total * 1e6); | |
282 | time[4] = 2. / time[3]; | |
283 | time[5] = total / cray; | |
284 | s_wsfe(&io___24); | |
92f5a8d4 | 285 | do_fio(&c__1, (char*)&lda, (ftnlen)sizeof(integer)); |
7c673cae FG |
286 | e_wsfe(); |
287 | s_wsfe(&io___25); | |
92f5a8d4 TL |
288 | for (i__ = 1; i__ <= 6; ++i__) |
289 | { | |
7c673cae FG |
290 | // do_fio(&c__1, (char *)&time[i__ - 1], (ftnlen)sizeof(real_type)); |
291 | std::cout << std::setw(12) << std::setprecision(5) << time[i__ - 1]; | |
292 | } | |
293 | e_wsfe(); | |
294 | s_wsle(&io___26); | |
92f5a8d4 | 295 | do_lio(&c__9, &c__1, " end of tests -- this version dated 10/12/92", (ftnlen)44); |
7c673cae FG |
296 | e_wsle(); |
297 | ||
298 | s_stop("", (ftnlen)0); | |
299 | return 0; | |
300 | } /* MAIN__ */ | |
301 | ||
92f5a8d4 TL |
302 | /* Subroutine */ int matgen_(real_type* a, integer* lda, integer* n, |
303 | real_type* b, real_type* norma) | |
7c673cae FG |
304 | { |
305 | /* System generated locals */ | |
92f5a8d4 | 306 | integer a_dim1, a_offset, i__1, i__2; |
7c673cae FG |
307 | real_type d__1, d__2; |
308 | ||
309 | /* Local variables */ | |
310 | static integer i__, j; | |
311 | static integer init[4]; | |
312 | ||
7c673cae | 313 | /* Parameter adjustments */ |
92f5a8d4 | 314 | a_dim1 = *lda; |
7c673cae FG |
315 | a_offset = 1 + a_dim1; |
316 | a -= a_offset; | |
317 | --b; | |
318 | ||
319 | /* Function Body */ | |
320 | init[0] = 1; | |
321 | init[1] = 2; | |
322 | init[2] = 3; | |
323 | init[3] = 1325; | |
92f5a8d4 TL |
324 | *norma = CAST_TO_RT(0); |
325 | i__1 = *n; | |
326 | for (j = 1; j <= i__1; ++j) | |
327 | { | |
7c673cae | 328 | i__2 = *n; |
92f5a8d4 TL |
329 | for (i__ = 1; i__ <= i__2; ++i__) |
330 | { | |
7c673cae FG |
331 | a[i__ + j * a_dim1] = ran_(init) - .5f; |
332 | /* Computing MAX */ | |
92f5a8d4 TL |
333 | d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); |
334 | *norma = (max)(d__2, *norma); | |
7c673cae FG |
335 | /* L20: */ |
336 | } | |
337 | /* L30: */ | |
338 | } | |
339 | i__1 = *n; | |
92f5a8d4 TL |
340 | for (i__ = 1; i__ <= i__1; ++i__) |
341 | { | |
7c673cae FG |
342 | b[i__] = CAST_TO_RT(0); |
343 | /* L35: */ | |
344 | } | |
345 | i__1 = *n; | |
92f5a8d4 TL |
346 | for (j = 1; j <= i__1; ++j) |
347 | { | |
7c673cae | 348 | i__2 = *n; |
92f5a8d4 TL |
349 | for (i__ = 1; i__ <= i__2; ++i__) |
350 | { | |
7c673cae FG |
351 | b[i__] += a[i__ + j * a_dim1]; |
352 | /* L40: */ | |
353 | } | |
354 | /* L50: */ | |
355 | } | |
356 | return 0; | |
357 | } /* matgen_ */ | |
358 | ||
92f5a8d4 | 359 | /* Subroutine */ int dgefa_(real_type* a, integer* lda, integer* n, integer* ipvt, integer* info) |
7c673cae FG |
360 | { |
361 | /* System generated locals */ | |
362 | integer a_dim1, a_offset, i__1, i__2, i__3; | |
363 | ||
364 | /* Local variables */ | |
92f5a8d4 | 365 | static integer j, k, l; |
7c673cae | 366 | static real_type t; |
92f5a8d4 | 367 | static integer kp1, nm1; |
7c673cae FG |
368 | |
369 | /* dgefa factors a double precision matrix by gaussian elimination. */ | |
370 | ||
371 | /* dgefa is usually called by dgeco, but it can be called */ | |
372 | /* directly with a saving in time if rcond is not needed. */ | |
373 | /* (time for dgeco) = (1 + 9/n)*(time for dgefa) . */ | |
374 | ||
375 | /* on entry */ | |
376 | ||
377 | /* a double precision(lda, n) */ | |
378 | /* the matrix to be factored. */ | |
379 | ||
380 | /* lda integer */ | |
381 | /* the leading dimension of the array a . */ | |
382 | ||
383 | /* n integer */ | |
384 | /* the order of the matrix a . */ | |
385 | ||
386 | /* on return */ | |
387 | ||
388 | /* a an upper triangular matrix and the multipliers */ | |
389 | /* which were used to obtain it. */ | |
390 | /* the factorization can be written a = l*u where */ | |
391 | /* l is a product of permutation and unit lower */ | |
392 | /* triangular matrices and u is upper triangular. */ | |
393 | ||
394 | /* ipvt integer(n) */ | |
395 | /* an integer vector of pivot indices. */ | |
396 | ||
397 | /* info integer */ | |
398 | /* = 0 normal value. */ | |
399 | /* = k if u(k,k) .eq. 0.0 . this is not an error */ | |
400 | /* condition for this subroutine, but it does */ | |
401 | /* indicate that dgesl or dgedi will divide by zero */ | |
402 | /* if called. use rcond in dgeco for a reliable */ | |
403 | /* indication of singularity. */ | |
404 | ||
405 | /* linpack. this version dated 08/14/78 . */ | |
406 | /* cleve moler, university of new mexico, argonne national lab. */ | |
407 | ||
408 | /* subroutines and functions */ | |
409 | ||
410 | /* blas daxpy,dscal,idamax */ | |
411 | ||
412 | /* internal variables */ | |
413 | ||
7c673cae FG |
414 | /* gaussian elimination with partial pivoting */ |
415 | ||
416 | /* Parameter adjustments */ | |
92f5a8d4 | 417 | a_dim1 = *lda; |
7c673cae FG |
418 | a_offset = 1 + a_dim1; |
419 | a -= a_offset; | |
420 | --ipvt; | |
421 | ||
422 | /* Function Body */ | |
423 | *info = 0; | |
92f5a8d4 TL |
424 | nm1 = *n - 1; |
425 | if (nm1 < 1) | |
426 | { | |
7c673cae FG |
427 | goto L70; |
428 | } | |
429 | i__1 = nm1; | |
92f5a8d4 TL |
430 | for (k = 1; k <= i__1; ++k) |
431 | { | |
7c673cae FG |
432 | kp1 = k + 1; |
433 | ||
434 | /* find l = pivot index */ | |
435 | ||
92f5a8d4 TL |
436 | i__2 = *n - k + 1; |
437 | l = idamax_(&i__2, &a[k + k * a_dim1], &c__1) + k - 1; | |
7c673cae FG |
438 | ipvt[k] = l; |
439 | ||
440 | /* zero pivot implies this column already triangularized */ | |
441 | ||
92f5a8d4 TL |
442 | if (a[l + k * a_dim1] == 0.) |
443 | { | |
7c673cae FG |
444 | goto L40; |
445 | } | |
446 | ||
447 | /* interchange if necessary */ | |
448 | ||
92f5a8d4 TL |
449 | if (l == k) |
450 | { | |
7c673cae FG |
451 | goto L10; |
452 | } | |
92f5a8d4 | 453 | t = a[l + k * a_dim1]; |
7c673cae FG |
454 | a[l + k * a_dim1] = a[k + k * a_dim1]; |
455 | a[k + k * a_dim1] = t; | |
92f5a8d4 | 456 | L10: |
7c673cae FG |
457 | |
458 | /* compute multipliers */ | |
459 | ||
92f5a8d4 | 460 | t = -1. / a[k + k * a_dim1]; |
7c673cae FG |
461 | i__2 = *n - k; |
462 | dscal_(&i__2, &t, &a[k + 1 + k * a_dim1], &c__1); | |
463 | ||
464 | /* row elimination with column indexing */ | |
465 | ||
466 | i__2 = *n; | |
92f5a8d4 TL |
467 | for (j = kp1; j <= i__2; ++j) |
468 | { | |
7c673cae | 469 | t = a[l + j * a_dim1]; |
92f5a8d4 TL |
470 | if (l == k) |
471 | { | |
7c673cae FG |
472 | goto L20; |
473 | } | |
474 | a[l + j * a_dim1] = a[k + j * a_dim1]; | |
475 | a[k + j * a_dim1] = t; | |
92f5a8d4 | 476 | L20: |
7c673cae | 477 | i__3 = *n - k; |
92f5a8d4 | 478 | daxpy_(&i__3, &t, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + j * a_dim1], &c__1); |
7c673cae FG |
479 | /* L30: */ |
480 | } | |
481 | goto L50; | |
92f5a8d4 | 482 | L40: |
7c673cae | 483 | *info = k; |
92f5a8d4 TL |
484 | L50: |
485 | /* L60: */ | |
486 | ; | |
7c673cae FG |
487 | } |
488 | L70: | |
489 | ipvt[*n] = *n; | |
92f5a8d4 TL |
490 | if (a[*n + *n * a_dim1] == 0.) |
491 | { | |
7c673cae FG |
492 | *info = *n; |
493 | } | |
494 | return 0; | |
495 | } /* dgefa_ */ | |
496 | ||
92f5a8d4 | 497 | /* Subroutine */ int dgesl_(real_type* a, integer* lda, integer* n, integer* ipvt, real_type* b, integer* job) |
7c673cae FG |
498 | { |
499 | /* System generated locals */ | |
500 | integer a_dim1, a_offset, i__1, i__2; | |
501 | ||
502 | /* Local variables */ | |
92f5a8d4 | 503 | static integer k, l; |
7c673cae | 504 | static real_type t; |
92f5a8d4 | 505 | static integer kb, nm1; |
7c673cae FG |
506 | |
507 | /* dgesl solves the double precision system */ | |
508 | /* a * x = b or trans(a) * x = b */ | |
509 | /* using the factors computed by dgeco or dgefa. */ | |
510 | ||
511 | /* on entry */ | |
512 | ||
513 | /* a double precision(lda, n) */ | |
514 | /* the output from dgeco or dgefa. */ | |
515 | ||
516 | /* lda integer */ | |
517 | /* the leading dimension of the array a . */ | |
518 | ||
519 | /* n integer */ | |
520 | /* the order of the matrix a . */ | |
521 | ||
522 | /* ipvt integer(n) */ | |
523 | /* the pivot vector from dgeco or dgefa. */ | |
524 | ||
525 | /* b double precision(n) */ | |
526 | /* the right hand side vector. */ | |
527 | ||
528 | /* job integer */ | |
529 | /* = 0 to solve a*x = b , */ | |
530 | /* = nonzero to solve trans(a)*x = b where */ | |
531 | /* trans(a) is the transpose. */ | |
532 | ||
533 | /* on return */ | |
534 | ||
535 | /* b the solution vector x . */ | |
536 | ||
537 | /* error condition */ | |
538 | ||
539 | /* a division by zero will occur if the input factor contains a */ | |
540 | /* zero on the diagonal. technically this indicates singularity */ | |
541 | /* but it is often caused by improper arguments or improper */ | |
542 | /* setting of lda . it will not occur if the subroutines are */ | |
543 | /* called correctly and if dgeco has set rcond .gt. 0.0 */ | |
544 | /* or dgefa has set info .eq. 0 . */ | |
545 | ||
546 | /* to compute inverse(a) * c where c is a matrix */ | |
547 | /* with p columns */ | |
548 | /* call dgeco(a,lda,n,ipvt,rcond,z) */ | |
549 | /* if (rcond is too small) go to ... */ | |
550 | /* do 10 j = 1, p */ | |
551 | /* call dgesl(a,lda,n,ipvt,c(1,j),0) */ | |
552 | /* 10 continue */ | |
553 | ||
554 | /* linpack. this version dated 08/14/78 . */ | |
555 | /* cleve moler, university of new mexico, argonne national lab. */ | |
556 | ||
557 | /* subroutines and functions */ | |
558 | ||
559 | /* blas daxpy,ddot */ | |
560 | ||
561 | /* internal variables */ | |
562 | ||
7c673cae | 563 | /* Parameter adjustments */ |
92f5a8d4 | 564 | a_dim1 = *lda; |
7c673cae FG |
565 | a_offset = 1 + a_dim1; |
566 | a -= a_offset; | |
567 | --ipvt; | |
568 | --b; | |
569 | ||
570 | /* Function Body */ | |
571 | nm1 = *n - 1; | |
92f5a8d4 TL |
572 | if (*job != 0) |
573 | { | |
7c673cae FG |
574 | goto L50; |
575 | } | |
576 | ||
577 | /* job = 0 , solve a * x = b */ | |
578 | /* first solve l*y = b */ | |
579 | ||
92f5a8d4 TL |
580 | if (nm1 < 1) |
581 | { | |
7c673cae FG |
582 | goto L30; |
583 | } | |
584 | i__1 = nm1; | |
92f5a8d4 TL |
585 | for (k = 1; k <= i__1; ++k) |
586 | { | |
7c673cae FG |
587 | l = ipvt[k]; |
588 | t = b[l]; | |
92f5a8d4 TL |
589 | if (l == k) |
590 | { | |
7c673cae FG |
591 | goto L10; |
592 | } | |
593 | b[l] = b[k]; | |
594 | b[k] = t; | |
92f5a8d4 | 595 | L10: |
7c673cae FG |
596 | i__2 = *n - k; |
597 | daxpy_(&i__2, &t, &a[k + 1 + k * a_dim1], &c__1, &b[k + 1], &c__1); | |
598 | /* L20: */ | |
599 | } | |
600 | L30: | |
601 | ||
602 | /* now solve u*x = y */ | |
603 | ||
604 | i__1 = *n; | |
92f5a8d4 TL |
605 | for (kb = 1; kb <= i__1; ++kb) |
606 | { | |
7c673cae FG |
607 | k = *n + 1 - kb; |
608 | b[k] /= a[k + k * a_dim1]; | |
92f5a8d4 | 609 | t = -b[k]; |
7c673cae FG |
610 | i__2 = k - 1; |
611 | daxpy_(&i__2, &t, &a[k * a_dim1 + 1], &c__1, &b[1], &c__1); | |
612 | /* L40: */ | |
613 | } | |
614 | goto L100; | |
615 | L50: | |
616 | ||
617 | /* job = nonzero, solve trans(a) * x = b */ | |
618 | /* first solve trans(u)*y = b */ | |
619 | ||
620 | i__1 = *n; | |
92f5a8d4 TL |
621 | for (k = 1; k <= i__1; ++k) |
622 | { | |
7c673cae | 623 | i__2 = k - 1; |
92f5a8d4 | 624 | t = ddot_(&i__2, &a[k * a_dim1 + 1], &c__1, &b[1], &c__1); |
7c673cae FG |
625 | b[k] = (b[k] - t) / a[k + k * a_dim1]; |
626 | /* L60: */ | |
627 | } | |
628 | ||
629 | /* now solve trans(l)*x = y */ | |
630 | ||
92f5a8d4 TL |
631 | if (nm1 < 1) |
632 | { | |
7c673cae FG |
633 | goto L90; |
634 | } | |
635 | i__1 = nm1; | |
92f5a8d4 TL |
636 | for (kb = 1; kb <= i__1; ++kb) |
637 | { | |
638 | k = *n - kb; | |
7c673cae FG |
639 | i__2 = *n - k; |
640 | b[k] += ddot_(&i__2, &a[k + 1 + k * a_dim1], &c__1, &b[k + 1], &c__1); | |
641 | l = ipvt[k]; | |
92f5a8d4 TL |
642 | if (l == k) |
643 | { | |
7c673cae FG |
644 | goto L70; |
645 | } | |
92f5a8d4 | 646 | t = b[l]; |
7c673cae FG |
647 | b[l] = b[k]; |
648 | b[k] = t; | |
92f5a8d4 TL |
649 | L70: |
650 | /* L80: */ | |
651 | ; | |
7c673cae FG |
652 | } |
653 | L90: | |
654 | L100: | |
655 | return 0; | |
656 | } /* dgesl_ */ | |
657 | ||
92f5a8d4 TL |
658 | /* Subroutine */ int daxpy_(integer* n, real_type* da, real_type* dx, |
659 | integer* incx, real_type* dy, integer* incy) | |
7c673cae FG |
660 | { |
661 | /* System generated locals */ | |
662 | integer i__1; | |
663 | ||
664 | /* Local variables */ | |
665 | static integer i__, m, ix, iy, mp1; | |
666 | ||
7c673cae FG |
667 | /* constant times a vector plus a vector. */ |
668 | /* uses unrolled loops for increments equal to one. */ | |
669 | /* jack dongarra, linpack, 3/11/78. */ | |
670 | ||
7c673cae FG |
671 | /* Parameter adjustments */ |
672 | --dy; | |
673 | --dx; | |
674 | ||
675 | /* Function Body */ | |
92f5a8d4 TL |
676 | if (*n <= 0) |
677 | { | |
7c673cae FG |
678 | return 0; |
679 | } | |
92f5a8d4 TL |
680 | if (*da == 0.) |
681 | { | |
7c673cae FG |
682 | return 0; |
683 | } | |
92f5a8d4 TL |
684 | if (*incx == 1 && *incy == 1) |
685 | { | |
7c673cae FG |
686 | goto L20; |
687 | } | |
688 | ||
689 | /* code for unequal increments or equal increments */ | |
690 | /* not equal to 1 */ | |
691 | ||
692 | ix = 1; | |
693 | iy = 1; | |
92f5a8d4 TL |
694 | if (*incx < 0) |
695 | { | |
7c673cae FG |
696 | ix = (-(*n) + 1) * *incx + 1; |
697 | } | |
92f5a8d4 TL |
698 | if (*incy < 0) |
699 | { | |
7c673cae FG |
700 | iy = (-(*n) + 1) * *incy + 1; |
701 | } | |
702 | i__1 = *n; | |
92f5a8d4 TL |
703 | for (i__ = 1; i__ <= i__1; ++i__) |
704 | { | |
7c673cae FG |
705 | dy[iy] += *da * dx[ix]; |
706 | ix += *incx; | |
707 | iy += *incy; | |
708 | /* L10: */ | |
709 | } | |
710 | return 0; | |
711 | ||
712 | /* code for both increments equal to 1 */ | |
713 | ||
7c673cae FG |
714 | /* clean-up loop */ |
715 | ||
716 | L20: | |
717 | m = *n % 4; | |
92f5a8d4 TL |
718 | if (m == 0) |
719 | { | |
7c673cae FG |
720 | goto L40; |
721 | } | |
722 | i__1 = m; | |
92f5a8d4 TL |
723 | for (i__ = 1; i__ <= i__1; ++i__) |
724 | { | |
7c673cae FG |
725 | dy[i__] += *da * dx[i__]; |
726 | /* L30: */ | |
727 | } | |
92f5a8d4 TL |
728 | if (*n < 4) |
729 | { | |
7c673cae FG |
730 | return 0; |
731 | } | |
732 | L40: | |
92f5a8d4 | 733 | mp1 = m + 1; |
7c673cae | 734 | i__1 = *n; |
92f5a8d4 TL |
735 | for (i__ = mp1; i__ <= i__1; i__ += 4) |
736 | { | |
7c673cae FG |
737 | dy[i__] += *da * dx[i__]; |
738 | dy[i__ + 1] += *da * dx[i__ + 1]; | |
739 | dy[i__ + 2] += *da * dx[i__ + 2]; | |
740 | dy[i__ + 3] += *da * dx[i__ + 3]; | |
741 | /* L50: */ | |
742 | } | |
743 | return 0; | |
744 | } /* daxpy_ */ | |
745 | ||
92f5a8d4 TL |
746 | real_type ddot_(integer* n, real_type* dx, integer* incx, real_type* dy, |
747 | integer* incy) | |
7c673cae FG |
748 | { |
749 | /* System generated locals */ | |
92f5a8d4 | 750 | integer i__1; |
7c673cae FG |
751 | real_type ret_val; |
752 | ||
753 | /* Local variables */ | |
92f5a8d4 | 754 | static integer i__, m, ix, iy, mp1; |
7c673cae FG |
755 | static real_type dtemp; |
756 | ||
7c673cae FG |
757 | /* forms the dot product of two vectors. */ |
758 | /* uses unrolled loops for increments equal to one. */ | |
759 | /* jack dongarra, linpack, 3/11/78. */ | |
760 | ||
7c673cae FG |
761 | /* Parameter adjustments */ |
762 | --dy; | |
763 | --dx; | |
764 | ||
765 | /* Function Body */ | |
766 | ret_val = CAST_TO_RT(0); | |
92f5a8d4 TL |
767 | dtemp = CAST_TO_RT(0); |
768 | if (*n <= 0) | |
769 | { | |
7c673cae FG |
770 | return ret_val; |
771 | } | |
92f5a8d4 TL |
772 | if (*incx == 1 && *incy == 1) |
773 | { | |
7c673cae FG |
774 | goto L20; |
775 | } | |
776 | ||
777 | /* code for unequal increments or equal increments */ | |
778 | /* not equal to 1 */ | |
779 | ||
780 | ix = 1; | |
781 | iy = 1; | |
92f5a8d4 TL |
782 | if (*incx < 0) |
783 | { | |
7c673cae FG |
784 | ix = (-(*n) + 1) * *incx + 1; |
785 | } | |
92f5a8d4 TL |
786 | if (*incy < 0) |
787 | { | |
7c673cae FG |
788 | iy = (-(*n) + 1) * *incy + 1; |
789 | } | |
790 | i__1 = *n; | |
92f5a8d4 TL |
791 | for (i__ = 1; i__ <= i__1; ++i__) |
792 | { | |
7c673cae FG |
793 | dtemp += dx[ix] * dy[iy]; |
794 | ix += *incx; | |
795 | iy += *incy; | |
796 | /* L10: */ | |
797 | } | |
798 | ret_val = dtemp; | |
799 | return ret_val; | |
800 | ||
801 | /* code for both increments equal to 1 */ | |
802 | ||
7c673cae FG |
803 | /* clean-up loop */ |
804 | ||
805 | L20: | |
806 | m = *n % 5; | |
92f5a8d4 TL |
807 | if (m == 0) |
808 | { | |
7c673cae FG |
809 | goto L40; |
810 | } | |
811 | i__1 = m; | |
92f5a8d4 TL |
812 | for (i__ = 1; i__ <= i__1; ++i__) |
813 | { | |
7c673cae FG |
814 | dtemp += dx[i__] * dy[i__]; |
815 | /* L30: */ | |
816 | } | |
92f5a8d4 TL |
817 | if (*n < 5) |
818 | { | |
7c673cae FG |
819 | goto L60; |
820 | } | |
821 | L40: | |
92f5a8d4 | 822 | mp1 = m + 1; |
7c673cae | 823 | i__1 = *n; |
92f5a8d4 TL |
824 | for (i__ = mp1; i__ <= i__1; i__ += 5) |
825 | { | |
826 | dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 4] * dy[i__ + 4]; | |
827 | /* L50: */ | |
7c673cae FG |
828 | } |
829 | L60: | |
830 | ret_val = dtemp; | |
831 | return ret_val; | |
832 | } /* ddot_ */ | |
833 | ||
92f5a8d4 TL |
834 | /* Subroutine */ int dscal_(integer* n, real_type* da, real_type* dx, |
835 | integer* incx) | |
7c673cae FG |
836 | { |
837 | /* System generated locals */ | |
838 | integer i__1, i__2; | |
839 | ||
840 | /* Local variables */ | |
841 | static integer i__, m, mp1, nincx; | |
842 | ||
7c673cae FG |
843 | /* scales a vector by a constant. */ |
844 | /* uses unrolled loops for increment equal to one. */ | |
845 | /* jack dongarra, linpack, 3/11/78. */ | |
846 | ||
7c673cae FG |
847 | /* Parameter adjustments */ |
848 | --dx; | |
849 | ||
850 | /* Function Body */ | |
92f5a8d4 TL |
851 | if (*n <= 0) |
852 | { | |
7c673cae FG |
853 | return 0; |
854 | } | |
92f5a8d4 TL |
855 | if (*incx == 1) |
856 | { | |
7c673cae FG |
857 | goto L20; |
858 | } | |
859 | ||
860 | /* code for increment not equal to 1 */ | |
861 | ||
862 | nincx = *n * *incx; | |
92f5a8d4 TL |
863 | i__1 = nincx; |
864 | i__2 = *incx; | |
865 | for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) | |
866 | { | |
7c673cae FG |
867 | dx[i__] = *da * dx[i__]; |
868 | /* L10: */ | |
869 | } | |
870 | return 0; | |
871 | ||
872 | /* code for increment equal to 1 */ | |
873 | ||
7c673cae FG |
874 | /* clean-up loop */ |
875 | ||
876 | L20: | |
877 | m = *n % 5; | |
92f5a8d4 TL |
878 | if (m == 0) |
879 | { | |
7c673cae FG |
880 | goto L40; |
881 | } | |
882 | i__2 = m; | |
92f5a8d4 TL |
883 | for (i__ = 1; i__ <= i__2; ++i__) |
884 | { | |
7c673cae FG |
885 | dx[i__] = *da * dx[i__]; |
886 | /* L30: */ | |
887 | } | |
92f5a8d4 TL |
888 | if (*n < 5) |
889 | { | |
7c673cae FG |
890 | return 0; |
891 | } | |
892 | L40: | |
92f5a8d4 | 893 | mp1 = m + 1; |
7c673cae | 894 | i__2 = *n; |
92f5a8d4 TL |
895 | for (i__ = mp1; i__ <= i__2; i__ += 5) |
896 | { | |
897 | dx[i__] = *da * dx[i__]; | |
7c673cae FG |
898 | dx[i__ + 1] = *da * dx[i__ + 1]; |
899 | dx[i__ + 2] = *da * dx[i__ + 2]; | |
900 | dx[i__ + 3] = *da * dx[i__ + 3]; | |
901 | dx[i__ + 4] = *da * dx[i__ + 4]; | |
902 | /* L50: */ | |
903 | } | |
904 | return 0; | |
905 | } /* dscal_ */ | |
906 | ||
92f5a8d4 | 907 | integer idamax_(integer* n, real_type* dx, integer* incx) |
7c673cae FG |
908 | { |
909 | /* System generated locals */ | |
92f5a8d4 | 910 | integer ret_val, i__1; |
7c673cae FG |
911 | real_type d__1; |
912 | ||
913 | /* Local variables */ | |
92f5a8d4 | 914 | static integer i__, ix; |
7c673cae FG |
915 | static real_type dmax__; |
916 | ||
7c673cae FG |
917 | /* finds the index of element having max. dabsolute value. */ |
918 | /* jack dongarra, linpack, 3/11/78. */ | |
919 | ||
7c673cae FG |
920 | /* Parameter adjustments */ |
921 | --dx; | |
922 | ||
923 | /* Function Body */ | |
924 | ret_val = 0; | |
92f5a8d4 TL |
925 | if (*n < 1) |
926 | { | |
7c673cae FG |
927 | return ret_val; |
928 | } | |
929 | ret_val = 1; | |
92f5a8d4 TL |
930 | if (*n == 1) |
931 | { | |
7c673cae FG |
932 | return ret_val; |
933 | } | |
92f5a8d4 TL |
934 | if (*incx == 1) |
935 | { | |
7c673cae FG |
936 | goto L20; |
937 | } | |
938 | ||
939 | /* code for increment not equal to 1 */ | |
940 | ||
92f5a8d4 | 941 | ix = 1; |
7c673cae FG |
942 | dmax__ = abs(dx[1]); |
943 | ix += *incx; | |
944 | i__1 = *n; | |
92f5a8d4 TL |
945 | for (i__ = 2; i__ <= i__1; ++i__) |
946 | { | |
947 | if ((d__1 = dx[ix], abs(d__1)) <= dmax__) | |
948 | { | |
7c673cae FG |
949 | goto L5; |
950 | } | |
951 | ret_val = i__; | |
92f5a8d4 TL |
952 | dmax__ = (d__1 = dx[ix], abs(d__1)); |
953 | L5: | |
7c673cae FG |
954 | ix += *incx; |
955 | /* L10: */ | |
956 | } | |
957 | return ret_val; | |
958 | ||
959 | /* code for increment equal to 1 */ | |
960 | ||
961 | L20: | |
962 | dmax__ = abs(dx[1]); | |
92f5a8d4 TL |
963 | i__1 = *n; |
964 | for (i__ = 2; i__ <= i__1; ++i__) | |
965 | { | |
966 | if ((d__1 = dx[i__], abs(d__1)) <= dmax__) | |
967 | { | |
7c673cae FG |
968 | goto L30; |
969 | } | |
970 | ret_val = i__; | |
92f5a8d4 TL |
971 | dmax__ = (d__1 = dx[i__], abs(d__1)); |
972 | L30:; | |
7c673cae FG |
973 | } |
974 | return ret_val; | |
975 | } /* idamax_ */ | |
976 | ||
92f5a8d4 | 977 | real_type epslon_(real_type* x) |
7c673cae FG |
978 | { |
979 | #if defined(TEST_MPF_100) || defined(TEST_MPFR_100) || defined(TEST_GMPXX) || defined(TEST_MPFRXX) | |
980 | return std::ldexp(1.0, 1 - ((100 + 1) * 1000L) / 301L); | |
981 | #elif defined(TEST_CPP_DEC_FLOAT_BN) | |
92f5a8d4 | 982 | return std::pow(10.0, 1 - std::numeric_limits<efx::cpp_dec_float_50>::digits10); |
7c673cae FG |
983 | #elif defined(NATIVE_FLOAT128) |
984 | return FLT128_EPSILON; | |
985 | #else | |
986 | return CAST_TO_RT(std::numeric_limits<real_type>::epsilon()); | |
987 | #endif | |
988 | } /* epslon_ */ | |
989 | ||
92f5a8d4 TL |
990 | /* Subroutine */ int mm_(real_type* a, integer* lda, integer* n1, integer* n3, real_type* b, integer* ldb, integer* n2, real_type* c__, |
991 | integer* ldc) | |
7c673cae FG |
992 | { |
993 | /* System generated locals */ | |
994 | integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2; | |
995 | ||
996 | /* Local variables */ | |
997 | static integer i__, j; | |
998 | ||
7c673cae FG |
999 | /* purpose: */ |
1000 | /* multiply matrix b times matrix c and store the result in matrix a. */ | |
1001 | ||
1002 | /* parameters: */ | |
1003 | ||
1004 | /* a double precision(lda,n3), matrix of n1 rows and n3 columns */ | |
1005 | ||
1006 | /* lda integer, leading dimension of array a */ | |
1007 | ||
1008 | /* n1 integer, number of rows in matrices a and b */ | |
1009 | ||
1010 | /* n3 integer, number of columns in matrices a and c */ | |
1011 | ||
1012 | /* b double precision(ldb,n2), matrix of n1 rows and n2 columns */ | |
1013 | ||
1014 | /* ldb integer, leading dimension of array b */ | |
1015 | ||
1016 | /* n2 integer, number of columns in matrix b, and number of rows in */ | |
1017 | /* matrix c */ | |
1018 | ||
1019 | /* c double precision(ldc,n3), matrix of n2 rows and n3 columns */ | |
1020 | ||
1021 | /* ldc integer, leading dimension of array c */ | |
1022 | ||
1023 | /* ---------------------------------------------------------------------- */ | |
1024 | ||
1025 | /* Parameter adjustments */ | |
92f5a8d4 | 1026 | a_dim1 = *lda; |
7c673cae FG |
1027 | a_offset = 1 + a_dim1; |
1028 | a -= a_offset; | |
92f5a8d4 | 1029 | b_dim1 = *ldb; |
7c673cae FG |
1030 | b_offset = 1 + b_dim1; |
1031 | b -= b_offset; | |
92f5a8d4 | 1032 | c_dim1 = *ldc; |
7c673cae FG |
1033 | c_offset = 1 + c_dim1; |
1034 | c__ -= c_offset; | |
1035 | ||
1036 | /* Function Body */ | |
1037 | i__1 = *n3; | |
92f5a8d4 TL |
1038 | for (j = 1; j <= i__1; ++j) |
1039 | { | |
7c673cae | 1040 | i__2 = *n1; |
92f5a8d4 TL |
1041 | for (i__ = 1; i__ <= i__2; ++i__) |
1042 | { | |
7c673cae FG |
1043 | a[i__ + j * a_dim1] = CAST_TO_RT(0); |
1044 | /* L10: */ | |
1045 | } | |
92f5a8d4 TL |
1046 | dmxpy_(n2, &a[j * a_dim1 + 1], n1, ldb, &c__[j * c_dim1 + 1], &b[b_offset]); |
1047 | /* L20: */ | |
7c673cae FG |
1048 | } |
1049 | ||
1050 | return 0; | |
1051 | } /* mm_ */ | |
1052 | ||
92f5a8d4 | 1053 | /* Subroutine */ int dmxpy_(integer* n1, real_type* y, integer* n2, integer* ldm, real_type* x, real_type* m) |
7c673cae FG |
1054 | { |
1055 | /* System generated locals */ | |
1056 | integer m_dim1, m_offset, i__1, i__2; | |
1057 | ||
1058 | /* Local variables */ | |
1059 | static integer i__, j, jmin; | |
1060 | ||
7c673cae FG |
1061 | /* purpose: */ |
1062 | /* multiply matrix m times vector x and add the result to vector y. */ | |
1063 | ||
1064 | /* parameters: */ | |
1065 | ||
1066 | /* n1 integer, number of elements in vector y, and number of rows in */ | |
1067 | /* matrix m */ | |
1068 | ||
1069 | /* y double precision(n1), vector of length n1 to which is added */ | |
1070 | /* the product m*x */ | |
1071 | ||
1072 | /* n2 integer, number of elements in vector x, and number of columns */ | |
1073 | /* in matrix m */ | |
1074 | ||
1075 | /* ldm integer, leading dimension of array m */ | |
1076 | ||
1077 | /* x double precision(n2), vector of length n2 */ | |
1078 | ||
1079 | /* m double precision(ldm,n2), matrix of n1 rows and n2 columns */ | |
1080 | ||
1081 | /* ---------------------------------------------------------------------- */ | |
1082 | ||
1083 | /* cleanup odd vector */ | |
1084 | ||
1085 | /* Parameter adjustments */ | |
1086 | --y; | |
92f5a8d4 | 1087 | m_dim1 = *ldm; |
7c673cae FG |
1088 | m_offset = 1 + m_dim1; |
1089 | m -= m_offset; | |
1090 | --x; | |
1091 | ||
1092 | /* Function Body */ | |
1093 | j = *n2 % 2; | |
92f5a8d4 TL |
1094 | if (j >= 1) |
1095 | { | |
7c673cae | 1096 | i__1 = *n1; |
92f5a8d4 TL |
1097 | for (i__ = 1; i__ <= i__1; ++i__) |
1098 | { | |
7c673cae FG |
1099 | y[i__] += x[j] * m[i__ + j * m_dim1]; |
1100 | /* L10: */ | |
1101 | } | |
1102 | } | |
1103 | ||
1104 | /* cleanup odd group of two vectors */ | |
1105 | ||
1106 | j = *n2 % 4; | |
92f5a8d4 TL |
1107 | if (j >= 2) |
1108 | { | |
7c673cae | 1109 | i__1 = *n1; |
92f5a8d4 TL |
1110 | for (i__ = 1; i__ <= i__1; ++i__) |
1111 | { | |
1112 | y[i__] = y[i__] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1]; | |
1113 | /* L20: */ | |
7c673cae FG |
1114 | } |
1115 | } | |
1116 | ||
1117 | /* cleanup odd group of four vectors */ | |
1118 | ||
1119 | j = *n2 % 8; | |
92f5a8d4 TL |
1120 | if (j >= 4) |
1121 | { | |
7c673cae | 1122 | i__1 = *n1; |
92f5a8d4 TL |
1123 | for (i__ = 1; i__ <= i__1; ++i__) |
1124 | { | |
1125 | y[i__] = y[i__] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1]; | |
7c673cae FG |
1126 | /* L30: */ |
1127 | } | |
1128 | } | |
1129 | ||
1130 | /* cleanup odd group of eight vectors */ | |
1131 | ||
1132 | j = *n2 % 16; | |
92f5a8d4 TL |
1133 | if (j >= 8) |
1134 | { | |
7c673cae | 1135 | i__1 = *n1; |
92f5a8d4 TL |
1136 | for (i__ = 1; i__ <= i__1; ++i__) |
1137 | { | |
1138 | y[i__] = y[i__] + x[j - 7] * m[i__ + (j - 7) * m_dim1] + x[j - 6] * m[i__ + (j - 6) * m_dim1] + x[j - 5] * m[i__ + (j - 5) * m_dim1] + x[j - 4] * m[i__ + (j - 4) * m_dim1] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1]; | |
7c673cae FG |
1139 | /* L40: */ |
1140 | } | |
1141 | } | |
1142 | ||
1143 | /* main loop - groups of sixteen vectors */ | |
1144 | ||
1145 | jmin = j + 16; | |
1146 | i__1 = *n2; | |
92f5a8d4 TL |
1147 | for (j = jmin; j <= i__1; j += 16) |
1148 | { | |
7c673cae | 1149 | i__2 = *n1; |
92f5a8d4 TL |
1150 | for (i__ = 1; i__ <= i__2; ++i__) |
1151 | { | |
1152 | y[i__] = y[i__] + x[j - 15] * m[i__ + (j - 15) * m_dim1] + x[j - 14] * m[i__ + (j - 14) * m_dim1] + x[j - 13] * m[i__ + (j - 13) * m_dim1] + x[j - 12] * m[i__ + (j - 12) * m_dim1] + x[j - 11] * m[i__ + (j - 11) * m_dim1] + x[j - 10] * m[i__ + (j - 10) * m_dim1] + x[j - 9] * m[i__ + (j - 9) * m_dim1] + x[j - 8] * m[i__ + (j - 8) * m_dim1] + x[j - 7] * m[i__ + (j - 7) * m_dim1] + x[j - 6] * m[i__ + (j - 6) * m_dim1] + x[j - 5] * m[i__ + (j - 5) * m_dim1] + x[j - 4] * m[i__ + (j - 4) * m_dim1] + x[j - 3] * m[i__ + (j - 3) * m_dim1] + x[j - 2] * m[i__ + (j - 2) * m_dim1] + x[j - 1] * m[i__ + (j - 1) * m_dim1] + x[j] * m[i__ + j * m_dim1]; | |
1153 | /* L50: */ | |
7c673cae FG |
1154 | } |
1155 | /* L60: */ | |
1156 | } | |
1157 | return 0; | |
1158 | } /* dmxpy_ */ | |
1159 | ||
92f5a8d4 | 1160 | real_type ran_(integer* iseed) |
7c673cae FG |
1161 | { |
1162 | /* System generated locals */ | |
1163 | real_type ret_val; | |
1164 | ||
1165 | /* Local variables */ | |
1166 | static integer it1, it2, it3, it4; | |
1167 | ||
7c673cae FG |
1168 | /* modified from the LAPACK auxiliary routine 10/12/92 JD */ |
1169 | /* -- LAPACK auxiliary routine (version 1.0) -- */ | |
1170 | /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ | |
1171 | /* Courant Institute, Argonne National Lab, and Rice University */ | |
1172 | /* February 29, 1992 */ | |
1173 | ||
1174 | /* .. Array Arguments .. */ | |
1175 | /* .. */ | |
1176 | ||
1177 | /* Purpose */ | |
1178 | /* ======= */ | |
1179 | ||
1180 | /* DLARAN returns a random double number from a uniform (0,1) */ | |
1181 | /* distribution. */ | |
1182 | ||
1183 | /* Arguments */ | |
1184 | /* ========= */ | |
1185 | ||
1186 | /* ISEED (input/output) INTEGER array, dimension (4) */ | |
1187 | /* On entry, the seed of the random number generator; the array */ | |
1188 | /* elements must be between 0 and 4095, and ISEED(4) must be */ | |
1189 | /* odd. */ | |
1190 | /* On exit, the seed is updated. */ | |
1191 | ||
1192 | /* Further Details */ | |
1193 | /* =============== */ | |
1194 | ||
1195 | /* This routine uses a multiplicative congruential method with modulus */ | |
1196 | /* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */ | |
1197 | /* 'Multiplicative congruential random number generators with modulus */ | |
1198 | /* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */ | |
1199 | /* b = 48', Math. Comp. 189, pp 331-344, 1990). */ | |
1200 | ||
1201 | /* 48-bit integers are stored in 4 integer array elements with 12 bits */ | |
1202 | /* per element. Hence the routine is portable across machines with */ | |
1203 | /* integers of 32 bits or more. */ | |
1204 | ||
1205 | /* .. Parameters .. */ | |
1206 | /* .. */ | |
1207 | /* .. Local Scalars .. */ | |
1208 | /* .. */ | |
1209 | /* .. Intrinsic Functions .. */ | |
1210 | /* .. */ | |
1211 | /* .. Executable Statements .. */ | |
1212 | ||
1213 | /* multiply the seed by the multiplier modulo 2**48 */ | |
1214 | ||
1215 | /* Parameter adjustments */ | |
1216 | --iseed; | |
1217 | ||
1218 | /* Function Body */ | |
1219 | it4 = iseed[4] * 2549; | |
1220 | it3 = it4 / 4096; | |
1221 | it4 -= it3 << 12; | |
1222 | it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508; | |
1223 | it2 = it3 / 4096; | |
1224 | it3 -= it2 << 12; | |
1225 | it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322; | |
1226 | it1 = it2 / 4096; | |
1227 | it2 -= it1 << 12; | |
92f5a8d4 | 1228 | it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4] * 494; |
7c673cae FG |
1229 | it1 %= 4096; |
1230 | ||
1231 | /* return updated seed */ | |
1232 | ||
1233 | iseed[1] = it1; | |
1234 | iseed[2] = it2; | |
1235 | iseed[3] = it3; | |
1236 | iseed[4] = it4; | |
1237 | ||
1238 | /* convert 48-bit integer to a double number in the interval (0,1) */ | |
1239 | ||
92f5a8d4 | 1240 | ret_val = ((real_type)it1 + ((real_type)it2 + ((real_type)it3 + (real_type)it4 * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4; |
7c673cae FG |
1241 | return ret_val; |
1242 | ||
1243 | /* End of RAN */ | |
1244 | ||
1245 | } /* ran_ */ | |
1246 | ||
1247 | /* | |
1248 | ||
1249 | Double results: | |
1250 | ~~~~~~~~~~~~~~ | |
1251 | ||
1252 | norm. resid resid machep x(1) x(n) | |
1253 | 6.4915 7.207e-013 2.2204e-016 1 1 | |
1254 | ||
1255 | ||
1256 | ||
1257 | times are reported for matrices of order 1000 | |
1258 | factor solve total mflops unit ratio | |
1259 | times for array with leading dimension of1001 | |
1260 | 1.443 0.003 1.446 462.43 0.004325 25.821 | |
1261 | ||
1262 | ||
1263 | mpf_class results: | |
1264 | ~~~~~~~~~~~~~~~~~~ | |
1265 | ||
1266 | norm. resid resid machep x(1) x(n) | |
1267 | 3.6575e-05 5.2257e-103 2.8575e-101 1 1 | |
1268 | ||
1269 | ||
1270 | ||
1271 | times are reported for matrices of order 1000 | |
1272 | factor solve total mflops unit ratio | |
1273 | times for array with leading dimension of1001 | |
1274 | 266.45 0.798 267.24 2.5021 0.79933 4772.2 | |
1275 | ||
1276 | ||
1277 | number<gmp_float<100> >: | |
1278 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
1279 | ||
1280 | norm. resid resid machep x(1) x(n) | |
1281 | 0.36575e-4 0.52257e-102 0.28575e-100 0.1e1 0.1e1 | |
1282 | ||
1283 | ||
1284 | ||
1285 | times are reported for matrices of order 1000 | |
1286 | factor solve total mflops unit ratio | |
1287 | times for array with leading dimension of1001 | |
1288 | 279.96 0.84 280.8 2.3813 0.83988 5014.3 | |
1289 | ||
1290 | boost::multiprecision::ef::cpp_dec_float_50: | |
1291 | ~~~~~~~~~~~~~~~~~~~~~~~~~ | |
1292 | ||
1293 | norm. resid resid machep x(1) x(n) | |
1294 | 2.551330735e-16 1.275665107e-112 1e-99 1 1 | |
1295 | ||
1296 | ||
1297 | ||
1298 | times are reported for matrices of order 1000 | |
1299 | factor solve total mflops unit ratio | |
1300 | times for array with leading dimension of1001 | |
1301 | 363.89 1.074 364.97 1.8321 1.0916 6517.3 | |
1302 | */ |