subroutine nitgm (n, xcur, fcur, fcnrm, step, eta, f, jacv, rpar, $ ipar, ijacv, irpre, iksmax, iresup, ifdord, nfe, njve, $ nrpre, nli, kdmax, kdmaxp1, vv, rr, svbig, svsml, w, rwork, $ rsnrm, dinpr, dnorm, itrmks) implicit none integer n, ipar(*), ijacv, irpre, iksmax, iresup, ifdord, nfe, $ njve, nrpre, nli, kdmax, kdmaxp1, itrmks double precision xcur(n), fcur(n), fcnrm, step(n), eta, rpar(*), $ vv(n,kdmaxp1), rr(kdmax,kdmax), svbig(kdmax), svsml(kdmax), $ w(kdmax), rwork(n), rsnrm, dinpr, dnorm external f, jacv, dinpr, dnorm c ------------------------------------------------------------------------ c c This is nitgm v0.3, the GMRES routine for determining (trial) inexact c Newton steps. This implementation is the "simpler" Gram-Schmidt GMRES c implementation from L. Zhou and H. F. Walker, "A simpler GMRES," c J. Numerical Lin. Alg. Appl., 1 (1994), pp. 571-581. c c ------------------------------------------------------------------------ c c Explanation: c c n = dimension of the problem. c c xcur = vector of length n, current approximate solution. c c fcur = vector of length n, value of f at xcur. c c fcnrm = norm of fcur. c c step = vector of length n, (trial) step. c c eta = relative residual reduction factor. c c f = name of user-supplied subroutine for evaluating the function c the zero of which is sought; this routine has the form c c subroutine f(n, xcur, fcur, rpar, ipar, itrmf) c c where xcur is the array containing the current x value, fcur c is f(xcur) on output, rpar and ipar are, respectively, real c and integer parameter/work arrays for use by the subroutine, c and itrmf is an integer termination flag. The meaning of c itrmf is as follows: c 0 => normal termination; desired function value calculated. c 1 => failure to produce f(xcur). c c jacv = name of user-supplied subroutine for evaluating J*v or c P(inverse)*v, where J is the Jacobian of f and P is a c right preconditioning operator. If neither analytic J*v c evaluations nor right preconditioning is used, this can c be a dummy subroutine; if right preconditioning is used but c not analytic J*v evaluations, this need only evaluate c P(inverse)*v. The form is c c subroutine jacv(n, xcur, fcur, ijob, v, z, rpar, ipar, itrmjv) c c where xcur and fcur are vectors of length n containing the c current x and f values, ijob is an integer flag indicating c which product is desired, v is a vector of length n to be c multiplied, z is a vector of length n containing the desired c product on output, rpar and ipar are, respectively, real c and integer parameter/work arrays for use by the subroutine, c and itrmjv is an integer termination c flag. The meaning of ijob is as follows: c 0 => z = J*v c 1 => z = P(inverse)*v c The meaning of itrmjv is as follows: c 0 => normal termination; desired product evaluated. c 1 => failure to produce J*v. c 2 => failure to produce P(inverse)*v. c This subroutine is called only from nitjv, and is always c called with v .ne. z. c c rpar = real parameter/work array passed to the f and jacv routines. c c ipar = integer parameter/work array passed to the f and jacv routines. c c ijacv = flag for determining method of J*v evaluation. c 0 => finite-difference evaluation (default). c 1 => analytic evaluation. c c irpre = flag for right preconditioning. c 0 => no right preconditioning c 1 => right preconditioning c c iksmax = maximum allowable number of GMRES iterations. c c iresup = residual update flag; on GMRES restarts, the residual is c updated as follows: c 0 => linear combination (default) c 1 => direct evaluation c The first is cheap (one n-vector saxpy) but may lose c accuracy with extreme residual reduction; the second c retains accuracy better but costs one J*v product c c ifdord = order of the finite-difference formula (sometimes) used on c GMRES restarts when J*v products are evaluated using finite- c differences. When ijacv = 0 on input to nitsol, ifdord is set c to 1, 2, or 4 in nitsol; otherwise, it is irrelevant. When c ijacv = 0 on input to this subroutine, the precise meaning is c as follows: c c With GMRES, ifdord matters only if iresup = 1, in which case c it determines the order of the finite-difference formula used c in evaluating the initial residual at each GMRES restart c (default 2). If iresup = 1 and ijacv = 0 on input to this c subroutine, then ijacv is temporarily reset to -1 at each c restart below to force a finite-difference evaluation of order c ifdord. NOTE: This only affects initial residuals at restarts; c first-order differences are always used within each GMRES c cycle. Using higher-order differences at restarts only should c give the same accuracy as if higher-order differences were c used throughout; see K. Turner and H. F. Walker, "Efficient c high accuracy solutions with GMRES(m)," SIAM J. Sci. Stat. c Comput., 13 (1992), pp. 815--825. c c nfe = number of function evaluations. c c njve = number of J*v evaluations. c c nrpre = number of P(inverse)*v evaluations. c c nli = number of linear iterations. c c kdmax = maximum Krylov subspace dimension; default 10. c c kdmaxp1 = kdmax + 1. c c vv = n x (kdmax+1) matrix for storage of Krylov basis in GMRES; c on return, the residual vector is contained in the first c column. c c rr = kdmax x kdmax matrix for storage of triangular matrix in GMRES. c c svbig = vector of length kdmax for storage of estimate of singular c vector of rr with largest singular value. c c svsml = vector of length kdmax for storage of estimate of singular c vector of rr with smallest singular value. c c w = vector of length kdmax, contains right-hand side of c triangular system and least-squares residual norm in GMRES. c c rwork = vector of length n, work array. c c rsnrm = GMRES residual norm on return. c c dinpr = inner-product routine, either user-supplied or blas ddot. c c dnorm = norm routine, either user-supplied or blas dnrm2. c c itrmks = termination flag; values have the following meanings: c 0 => normal termination: acceptable step found. c 1 => J*v failure in nitjv. c 2 => P(inverse)*v failure in nitjv. c 3 => acceptable step not found in iksmax GMRES iterations. c 4 => insufficient residual norm reduction over a cycle c of kdmax steps (stagnation) before an acceptable step c has been found. c 5 => dangerous ill-conditioning detected before an acceptable c step has been found. c c Note: On return, nitsol terminates if itrmks is 1 or 2. If c itrmks is 3, 4, or 5, nitsol may terminate or continue. In c this event, a meaningful inexact Newton step is returned, c even though the desired inexact Newton condition may not c hold, and a decision on termination/continuation is made c in nitdrv according to whether there is sufficient residual c norm reduction. c c ------------------------------------------------------------------------- c c Subroutines required by this and all called routines: c c user supplied: f, jacv c c nitsol routines: nitjv, nitfd c c lapack routine: dlaic1, dlamch c c blas routines: daxpy, dcopy, dscal c c user supplied or blas: dinpr, dnorm c c explanation: In nitsol, dinpr and dnorm are set to either the blas c ddot and dnrm2 routines or the user-supplied usrnpr and usrnrm c routines. c c This subroutine called by: nitdrv c c Subroutines called by this subroutine: daxpy, dcopy, dscal, dlaic1, c dlamch, dinpr, dnorm, nitjv c c Common block: c include 'nitprint.h' c c If diagnostic information is desired, include this common block in the c main program and set iplvl and ipunit according to the following: c c iplvl = 0 => no printout c = 1 => iteration numbers and F-norms c = 2 => ... + some stats, step norms, and linear model norms c = 3 => ... + some Krylov solver and backtrack information c = 4 => ... + more Krylov solver and backtrack information c c ipunit = printout unit number. c c ------------------------------------------------------------------------ c double precision abstol, big, cs, cndmax, epsmach, rsnrm0, $ sestpr, small, sn, temp integer i, igm, ijob, itask, itrmjv, kd, kdp1, ncall double precision dlamch external dlamch c ------------------------------------------------------------------------ c ------------------------------------------------------------------------ data ncall / 0 / save ncall, cndmax, epsmach c ------------------------------------------------------------------------ c Initialize. c ------------------------------------------------------------------------ if (ncall .eq. 0) then epsmach = 2.0d0*dlamch( 'e' ) ncall = 1 cndmax = 1.d0/(100.d0*epsmach) endif do 20 i = 1, n step(i) = 0.d0 20 continue igm = 0 c ------------------------------------------------------------------------ c For printing: if (iplvl .ge. 3) then write(ipunit,*) write(ipunit,800) eta endif 800 format('nitgm: eta =', 1pd10.3) if (iplvl .ge. 4) then write(ipunit,810) write(ipunit,*) write(ipunit,820) igm, fcnrm endif 810 format('nitgm: GMRES iteration no., linear residual norm, ', $ 'condition no. estimate') 820 format(5x,i4,2(5x,1pd10.3)) c ------------------------------------------------------------------------ c c ------------------------------------------------------------------------ c Set the stopping tolerance, etc. c ------------------------------------------------------------------------ rsnrm0 = fcnrm abstol = eta*rsnrm0 c ------------------------------------------------------------------------ c Place the normalized initial residual in the first column of the vv array. c ------------------------------------------------------------------------ call dcopy(n, fcur, 1, vv(1,1), 1) temp = -1.d0/fcnrm call dscal(n, temp, vv(1,1), 1) c ------------------------------------------------------------------------ c Top of the outer GMRES loop. c ------------------------------------------------------------------------ 100 continue kd = 0 rsnrm = 1.d0 c ------------------------------------------------------------------------ c Top of the inner GMRES loop. c ------------------------------------------------------------------------ 200 continue kd = kd + 1 kdp1 = kd + 1 nli = nli + 1 igm = igm + 1 c ------------------------------------------------------------------------ c Evaluate J*(kd-th Krylov subspace basis vector) in vv(.,kdp1). c Note: rwork can be used for both work arrays in this call because c the second is not referenced within nitjv. c ------------------------------------------------------------------------ if (irpre .eq. 0) then itask = 0 else itask = 1 endif call nitjv(n, xcur, fcur, f, jacv, rpar, ipar, ijacv, $ ifdord, itask, nfe, njve, nrpre, vv(1,kd), vv(1,kdp1), $ rwork, rwork, dnorm, itrmjv) if (itrmjv .gt. 0) then if (itrmjv .eq. 1) itrmks = 1 if (itrmjv .eq. 2) itrmks = 2 go to 900 endif c ------------------------------------------------------------------------ c Do modified Gram-Schmidt. c ------------------------------------------------------------------------ do 210 i = 2, kd rr(i-1,kd) = dinpr(n, vv(1,i), 1, vv(1,kdp1), 1) call daxpy(n, -rr(i-1,kd), vv(1,i), 1, vv(1,kdp1), 1) 210 continue rr(kd,kd) = dnorm(n, vv(1,kdp1), 1) c ------------------------------------------------------------------------ c Update the estimates of the largest and smallest singular values. c ------------------------------------------------------------------------ if (kd .eq. 1) then big = rr(1,1) small = big svbig(1) = 1.d0 svsml(1) = 1.d0 else ijob = 1 call dlaic1(ijob, kd-1, svbig, big, rr(1,kd), rr(kd,kd), $ sestpr, sn, cs) big = sestpr call dscal(kd-1, sn, svbig, 1) svbig(kd) = cs ijob = 2 call dlaic1(ijob, kd-1, svsml, small, rr(1,kd), rr(kd,kd), $ sestpr, sn, cs) small = sestpr call dscal(kd-1, sn, svsml, 1) svsml(kd) = cs endif c ------------------------------------------------------------------------ c Terminate if the estimated condition number is too great. c ------------------------------------------------------------------------ if (big .ge. small*cndmax) then if (kd .eq. 1) then itrmks = 5 go to 900 else kdp1 = kd kd = kd - 1 call daxpy(n, w(kd), vv(1,kdp1), 1, vv(1,1), 1) go to 300 endif endif c ------------------------------------------------------------------------ c Normalize vv(.,kdp1). c ------------------------------------------------------------------------ temp = 1.d0/rr(kd,kd) call dscal(n, temp, vv(1,kdp1), 1) c ------------------------------------------------------------------------ c Update w and the residual norm by rsnrm <- rsnrm*dsin(dacos(w(kd)/rsnrm). c ------------------------------------------------------------------------ w(kd) = dinpr(n, vv(1,1), 1, vv(1,kdp1), 1) temp = max(min(w(kd)/rsnrm,1.0D0),-1.0d0) rsnrm = rsnrm*dsin(dacos(temp)) c ------------------------------------------------------------------------ c For printing: if (iplvl .ge. 4) then write(ipunit,820) igm, rsnrm*rsnrm0, big/small c if (kd .eq. kdmax) write(ipunit,*) endif c ------------------------------------------------------------------------ c c ------------------------------------------------------------------------ c Test for termination of the inner loop. c ------------------------------------------------------------------------ if ( (rsnrm0*rsnrm .le. abstol) .or. (kd .eq. kdmax) .or. $ (igm .ge. iksmax) ) go to 300 c ------------------------------------------------------------------------ c If not terminating the inner loop, update the residual vector c and go to the top of the inner loop. c ------------------------------------------------------------------------ call daxpy(n, -w(kd), vv(1,kdp1), 1, vv(1,1), 1) go to 200 c ------------------------------------------------------------------------ c Bottom of inner loop. c ------------------------------------------------------------------------ 300 continue c ------------------------------------------------------------------------ c For printing: if (iplvl .ge. 4) then write(ipunit,*) endif c ------------------------------------------------------------------------ c c ------------------------------------------------------------------------ c Compute the solution: c ------------------------------------------------------------------------ c c ------------------------------------------------------------------------ c Use svbig for storage of the original components of w. c ------------------------------------------------------------------------ call dcopy(kd, w, 1, svbig, 1) c ------------------------------------------------------------------------ c Overwrite w with the solution of the upper triangular system. c ------------------------------------------------------------------------ do 310 i = kd, 1, -1 w(i) = w(i)/rr(i,i) if (i .gt. 1) call daxpy(i-1, -w(i), rr(1,i), 1, w, 1) 310 continue c ------------------------------------------------------------------------ c Now form the linear combination to accumulate the correction in c the work vector. c ------------------------------------------------------------------------ call dcopy(n, vv(1,1), 1, rwork, 1) call dscal(n, w(1), rwork, 1) if (kd .gt. 1) then call daxpy(kd-1, w(1), svbig, 1, w(2), 1) do 320 i = 2, kd call daxpy(n, w(i), vv(1,i), 1, rwork, 1) 320 continue endif c ------------------------------------------------------------------------ c If iresup .eq. 0, then update the residual vector by linear c combination. This frees vv(.,kdp1) for use as a work array. c ------------------------------------------------------------------------ if (iresup .eq. 0) then call daxpy(n, -svbig(kd), vv(1,kdp1), 1, vv(1,1), 1) endif c ------------------------------------------------------------------------ c If right preconditioning is used, overwrite c correction <-- P(inverse)*correction, using vv(.,kdp1) as a work array. c Note: vv(.,kdp1) can be used for both work arrays in this call because c the second is not referenced within nitjv. c ------------------------------------------------------------------------ if (irpre .gt. 0) then itask = 2 call nitjv(n, xcur, fcur, f, jacv, rpar, ipar, ijacv, $ ifdord, itask, nfe, njve, nrpre, rwork, rwork, $ vv(1,kdp1), vv(1,kdp1), dnorm, itrmjv) if (itrmjv .gt. 0) then itrmks = 2 go to 900 endif endif c ------------------------------------------------------------------------ c Update the step. This frees rwork for use as a work array. c ------------------------------------------------------------------------ call daxpy(n, rsnrm0, rwork, 1, step, 1) c ------------------------------------------------------------------------ c If iresup .eq. 1, then update the residual vector by direct evaluation, c using rwork and vv(.,kdp1) as work arrays. Note: Two distinct work c arrays are needed in this call because both are referenced within nitjv c if the J*step product is evaluated with a finite-difference of order c two or higher. If finite-differences are used (ijacv= 0), then ijacv c is temporarily set to -1 to signal to nitjv that the order of the c finite-difference formula is to be determined by ifdord. c ------------------------------------------------------------------------ if (iresup .eq. 1) then itask = 0 if (ijacv .eq. 0) ijacv = -1 call nitjv(n, xcur, fcur, f, jacv, rpar, ipar, ijacv, $ ifdord, itask, nfe, njve, nrpre, step, vv(1,1), $ rwork, vv(1,kdp1), dnorm, itrmjv) if (ijacv .eq. -1) ijacv = 0 if (itrmjv .gt. 0) then itrmks = 1 go to 900 endif do 330 i = 1, n vv(i,1) = -fcur(i) - vv(i,1) 330 continue endif c ------------------------------------------------------------------------ c Test for termination. c ------------------------------------------------------------------------ if (rsnrm0*rsnrm .le. abstol) then itrmks = 0 go to 900 endif if (igm .ge. iksmax) then itrmks = 3 go to 900 endif if (big .ge. small*cndmax) then itrmks = 5 go to 900 endif temp = dfloat(kd)*dlog(abstol/(rsnrm0*rsnrm))/ $ dlog(rsnrm/(1.d0 + 10.d0*epsmach)) if (temp .ge. 1000.d0*dfloat(iksmax - igm)) then itrmks = 4 go to 900 endif c ------------------------------------------------------------------------ c If not terminating, then normalize the initial residual, etc., and c return to the top of the outer loop. c ------------------------------------------------------------------------ if (iresup .eq. 0) then rsnrm0 = rsnrm0*rsnrm temp = 1.d0/rsnrm else rsnrm0 = dnorm(n, vv(1,1), 1) temp = 1.d0/rsnrm0 endif call dscal(n, temp, vv(1,1), 1) go to 100 c ------------------------------------------------------------------------ c All returns made here. c ------------------------------------------------------------------------ 900 continue if (itrmks .ne. 1 .and. itrmks .ne. 2) then if (iresup .eq. 0) then call dscal(n, rsnrm0, vv(1,1), 1) rsnrm = rsnrm0*rsnrm else rsnrm = dnorm(n, vv(1,1), 1) endif endif c ------------------------------------------------------------------------ c For printing: if (iplvl .ge. 3) then if (itrmks .ne. 1 .and. itrmks .ne. 2) then write(ipunit,830) itrmks, rsnrm 830 format('nitgm: itrmks =', i2, ' final lin. res. norm =', $ 1pd10.3) else write(ipunit,840) itrmks 840 format('nitgm: itrmks:', i4) endif endif c ------------------------------------------------------------------------ return end