module solver use const use var use comm use io implicit real*8 (a-h,o-z) contains subroutine init_solver ! ------------------------- ! evp elliptic solver data ! used for surface pressure ! ------------------------- ! open(99,file='prepglo/data/evp'//grd,form='unformatted') if(lsolver.eq.1) then if(myid.eq.0) write(*,*)'evp solver' call rdnc_evp1 ! read(99) al,ar,ab,at,ac,rinv,rinv1,ie ! close(99) elseif(lsolver.eq.2) then if(myid.eq.0) write(*,*)'bi-cgstab solver' call rdnc_evp2 ! read(99) al,ar,ab,at,ac,cl,cb,cc,cr,ct ! close(99) else if(myid.eq.0)write(*,*)'lsolver=',lsolver,' (incorrect solver)' call mpi_finalize(ierr) stop endif end subroutine ! ***************** ! elliptic solver * ! ***************** ! ---------------------------------------------------------------------- subroutine p_bicgstab_le(ab,al,ac,ar,at,b,x,cb,cl,cc,cr,ct,r,rh,p,v,s,t,ph,sh) !$OMP DECLARE SIMD(p_bicgstab_le) UNIFORM(r,rh,p,v,s,t,ph,sh) parameter(i0j0=i0*j0) real*8 x,r,rh,p,v,s,t,ph,sh real*8 rho,rhn,alpha,beta,w,res,tmp,tp,tol dimension ab(i2,j2),al(i2,j2),ac(i2,j2),ar(i2,j2),at(i2,0:j2), & b(i2,j2),x(i0,j0),r(i0,j0),rh(i0,j0),p(i0,j0),v(i0,j0), & s(i0,j0),t(i0,j0),ph(i0,j0),sh(i0,j0), & cb(i2,j2),cl(i2,j2),cc(i2,j2),cr(i2,j2),ct(i2,j2) iter=0 10 rho=1.d0 alpha=1.d0 w=1.d0 call p_sqprod(ab,al,ac,ar,at,x,r) do 50 j=2,j1 do 50 i=2,i1 50 r(i,j)=b(i-1,j-1)-r(i,j) call dcopy(i0j0,r,1,rh,1) call p_inprod(r,r,tp) res=sqrt(tp) if (res .lt. 1e-7) then call mpi_sendrecv(x(2,1),1,m_v8lon,m_w,1,x(i0,1),1,m_v8lon,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,1),1,m_v8lon,m_e,1,x(1,1),1,m_v8lon,m_w,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,2),1,m_v8lat,m_s,1,x(1,j0),1,m_v8lat,m_n,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,j1),1,m_v8lat,m_n,1,x(1,1),1,m_v8lat,m_s,1,m_cart,istat,ierr) return endif 100 iter=iter+1 call p_inprod(r,rh,rhn) beta=(rhn/rho)*(alpha/w) ! print*,iter,'beta=',beta call daxpy(i0j0,-1.d0*w,v,1,p,1) call dscal(i0j0,beta,p,1) call daxpy(i0j0,1.d0,r,1,p,1) ! do 200 j=2,j1 ! do 200 i=2,i1 ! 200 ph(i,j)=p(i,j) ! call p_sqelim(cb,cl,cc,cr,ct,p,ph) ! call p_lusqelim(cb,cl,cc,cr,ct,p,ph) call p_sipsqelim(cb,cl,cc,cr,ct,p,ph) call p_sqprod(ab,al,ac,ar,at,ph,v) call p_inprod(rh,v,tmp) alpha=rhn/tmp ! print*,iter,'alpha=', alpha call dcopy(i0j0,r,1,s,1) call daxpy(i0j0,-1.d0*alpha,v,1,s,1) ! call dcopy(i0j0,s,1,sh,1) ! do 300 j=2,j1 ! do 300 i=2,i1 ! 300 sh(i,j)=s(i,j) call p_sipsqelim(cb,cl,cc,cr,ct,s,sh) ! call p_lusqelim(cb,cl,cc,cr,ct,s,sh) ! call p_sqelim(cb,cl,cc,cr,ct,s,sh) call p_sqprod(ab,al,ac,ar,at,sh,t) call p_inprod(t,t,tmp) call p_inprod(t,s,w) w=w/tmp ! print*,iter,'wn=',w tmp=0.d0 call daxpy(i0j0,alpha,ph,1,x,1) call daxpy(i0j0,w,sh,1,x,1) call dcopy(i0j0,s,1,r,1) call daxpy(i0j0,-1.d0*w,t,1,r,1) call p_inprod(r,r,tp) rho=rhn ! if (myid .eq. 0) print*,iter,sqrt(tp) ! if (sqrt(tp)/res .gt. 1e-3 .and. mod(iter,100) .ne. 0) goto 100 ! if (myid .eq. 0) print*, iter,sqrt(tp) !if (sqrt(tp) .gt. 1e-7 .and. iter .lt. 10000) goto 10 !TS if (sqrt(tp).gt.1.e-9.or.sqrt(tp)/res .gt. 1e-3.and. mod(iter,1000) .ne. 0) goto 100 if (sqrt(tp).gt.1.e-12.or.sqrt(tp)/res .gt. 1e-6.and. mod(iter,1000) .ne. 0) goto 100 call mpi_sendrecv(x(2,1),1,m_v8lon,m_w,1,x(i0,1),1,m_v8lon,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,1),1,m_v8lon,m_e,1,x(1,1),1,m_v8lon,m_w,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,2),1,m_v8lat,m_s,1,x(1,j0),1,m_v8lat,m_n,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,j1),1,m_v8lat,m_n,1,x(1,1),1,m_v8lat,m_s,1,m_cart,istat,ierr) continue end subroutine subroutine p_bicgstab_le_algo11(ab,al,ac,ar,at,b,x,cb,cl,cc,cr,ct)!,r,rh,p,v,s,t,ph,sh) !$OMP DECLARE SIMD(p_bicgstab_le_algo11) parameter(i0j0=i0*j0) real*8 x,r,b,rh,w,wh,t,ph,z,s,sh,q,qh,y,zh,v,tmp_rx real*8 rr,rw,beta,alpha,omega,qy,yy,r0,r0r,r0w,r0s,r0z,pre_r0r,qy_local,yy_local,tmp_r integer max_iter, ireq_qy, ireq_r dimension ab(i2,j2),al(i2,j2),ac(i2,j2),ar(i2,j2),at(i2,0:j2), & cb(i2,j2),cl(i2,j2),cc(i2,j2),cr(i2,j2),ct(i2,j2), & x(i0,j0),r(i0,j0),b(i2,j2),rh(i0,j0),w(i0,j0),wh(i0,j0), & t(i0,j0),ph(i0,j0),s(i0,j0),sh(i0,j0), & z(i0,j0),q(i0,j0),qh(i0,j0),y(i0,j0),zh(i0,j0),v(i0,j0), & r0(i0,j0),qdoty(i0,j0),ydoty(i0,j0),tmp_rx(i0,j0), & ireq_qy(2),ireq_r(4) call p_sqprod(ab,al,ac,ar,at,x,r) ! r0 = Ax0 r(2:i1, 2:j1) = b(1:i1-1, 1:j1-1) - r(2:i1, 2:j1) ! r0 = b - Ax0 call dcopy(i0j0,r,1,r0,1) ! r0 := r0 call p_sipsqelim(cb,cl,cc,cr,ct,r,rh) ! rhat(0) = M * r0 call p_sqprod(ab,al,ac,ar,at,rh,w) ! w(0) = A * rhat(0) call p_sipsqelim(cb,cl,cc,cr,ct,w,wh) ! what(0) = M * w0 call p_sqprod(ab,al,ac,ar,at,wh,t) ! t(0) = A * what(0) call p_inprod(r0,r0,rr) ! rr = r0 dot r0 call p_inprod(r0,w,rw) ! rw = r0 dot rw alpha = rr / rw beta = 0.d0 pre_r0r = rr res=sqrt(rr) if (res .lt. 1e-7) then call mpi_sendrecv(x(2,1),1,m_v8lon,m_w,1,x(i0,1),1,m_v8lon,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,1),1,m_v8lon,m_e,1,x(1,1),1,m_v8lon,m_w,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,2),1,m_v8lat,m_s,1,x(1,j0),1,m_v8lat,m_n,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,j1),1,m_v8lat,m_n,1,x(1,1),1,m_v8lat,m_s,1,m_cart,istat,ierr) return endif call dcopy(i0j0,rh,1,ph,1) ! i = 0, phat = rhat call dcopy(i0j0,w,1,s,1) ! i = 0, s = w call dcopy(i0j0,wh,1,sh,1) ! i = 0, shat = what call dcopy(i0j0,t,1,z,1) ! i = 0, z = t max_iter = 1000 do i = 0, max_iter if (i .gt. 0) then call daxpy(i0j0,-1.d0*omega,sh,1,ph,1) ! phat(i) = phat(i-1) - omega*shat(i-1) call dscal(i0j0,beta,ph,1) ! phat(i) = beta*(phat(i-1) - omega*shat(i-1)) call daxpy(i0j0,1.d0,rh,1,ph,1) ! phat(i) = rhat(i) + beta*(phat(i-1) - omega*shat(i-1)) call daxpy(i0j0,-1.d0*omega,z,1,s,1) ! s(i) = s(i-1) - omega*z(i-1) call dscal(i0j0,beta,s,1) ! s(i) = beta*(s(i-1) - omega*z(i-1)) call daxpy(i0j0,1.d0,w,1,s,1) ! s(i) = w(i) + beta*(s(i-1) - omega*z(i-1)) call daxpy(i0j0,-1.d0*omega,zh,1,sh,1) ! shat(i) = shat(i-1) - omega*zhat(i-1) call dscal(i0j0,beta,sh,1) ! shat(i) = beta*(shat(i-1) - omega*zhat(i-1)) call daxpy(i0j0,1.d0,wh,1,sh,1) ! shat(i) = what(i) + beta*(shat(i-1) - omega*zhat(i-1)) call daxpy(i0j0,-1.d0*omega,v,1,z,1) ! z(i) = z(i-1) - omega*v(i-1) call dscal(i0j0,beta,z,1) ! z(i) = beta * (z(i-1) - omega*v(i-1)) call daxpy(i0j0,1.d0,t,1,z,1) ! z(i) = t(i) + (beta * (z(i-1) - omega*v(i-1))) endif call dcopy(i0j0,r,1,q,1) ! q(i) = r(i) call daxpy(i0j0,-1.d0*alpha,s,1,q,1) ! q(i) = r(i) - alpha * s(i) call dcopy(i0j0,rh,1,qh,1) ! qhat(i) = rhat(i) call daxpy(i0j0,-1.d0*alpha,sh,1,qh,1) ! qhat(i) = rhat(i) - alpha*shat(i) call dcopy(i0j0,w,1,y,1) ! y(i) = w(i) call daxpy(i0j0,-1.d0*alpha,z,1,y,1) ! y(i) = w(i) - alpha * z !call p_inprod(q, y, qy) ! qy = q dot y !call p_inprod(y, y, yy) ! yy = y dot y qdoty(2:i1,2:j1) = q(2:i1,2:j1) * y(2:i1,2:j1) qy_local = sum(qdoty(2:i1,2:j1)) call mpi_iallreduce(qy_local,qy,1,mpi_real8,mpi_sum,m_cart,ireq_qy(1),ierr) ydoty(2:i1,2:j1) = y(2:i1,2:j1) * y(2:i1,2:j1) yy_local = sum(ydoty(2:i1,2:j1)) call mpi_iallreduce(yy_local,yy,1,mpi_real8,mpi_sum,m_cart,ireq_qy(2),ierr) call p_sipsqelim(cb,cl,cc,cr,ct,z,zh) ! zhat(i) = M * z(i) call p_sqprod(ab,al,ac,ar,at,zh,v) ! v(i) = A * zhat(i) call mpi_waitall(2, ireq_qy(1:2), MPI_STATUSES_IGNORE, ierr) omega = qy / yy ! omega = qy / yy call daxpy(i0j0,alpha,ph,1,x,1) ! x(i+1) = x(i) + alpha * phat(i) call daxpy(i0j0,omega,qh,1,x,1) ! x(i+1) = x(i) + alpha * phat(i) + omega * qhat(i) call dcopy(i0j0,q,1,r,1) ! r(i+1) = q(i+1) call daxpy(i0j0,-1.d0*omega,y,1,r,1) ! r(i+1) = q(i+1) - omega * y(i) call dcopy(i0j0,wh,1,rh,1) ! rhat(i+1) = what(i) call daxpy(i0j0,-1.d0*alpha,zh,1,rh,1) ! rhat(i+1) = what(i) - alpha * zhat(i) call dscal(i0j0,-1.d0*omega,rh,1) ! rhat(i+1) = -omega * (what(i) - alpha * zhat(i)) call daxpy(i0j0,1.d0,qh,1,rh,1) ! rhat(i+1) = qhat(i) -omega * (what(i) - alpha * zhat(i)) call dcopy(i0j0,t,1,w,1) ! w(i+1) = t(i) call daxpy(i0j0,-1.d0*alpha,v,1,w,1) ! w(i+1) = t(i) - alpha * v(i) call dscal(i0j0,-1.d0*omega,w,1) ! w(i+1) = -omega * (t(i) - alpha * v(i)) call daxpy(i0j0,1.d0,y,1,w,1) ! w(i+1) = y(i) - omega * (t(i) - alpha * v(i)) tmp_rx(2:i1,2:j1) = r0(2:i1,2:j1) * r(2:i1,2:j1) tmp_r = sum(tmp_rx(2:i1,2:j1)) call mpi_iallreduce(tmp_r,r0r,1,mpi_real8,mpi_sum,m_cart,ireq_r(1),ierr) tmp_rx(2:i1,2:j1) = r0(2:i1,2:j1) * w(2:i1,2:j1) tmp_r = sum(tmp_rx(2:i1,2:j1)) call mpi_iallreduce(tmp_r,r0w,1,mpi_real8,mpi_sum,m_cart,ireq_r(2),ierr) tmp_rx(2:i1,2:j1) = r0(2:i1,2:j1) * s(2:i1,2:j1) tmp_r = sum(tmp_rx(2:i1,2:j1)) call mpi_iallreduce(tmp_r,r0s,1,mpi_real8,mpi_sum,m_cart,ireq_r(3),ierr) tmp_rx(2:i1,2:j1) = r0(2:i1,2:j1) * z(2:i1,2:j1) tmp_r = sum(tmp_rx(2:i1,2:j1)) call mpi_iallreduce(tmp_r,r0z,1,mpi_real8,mpi_sum,m_cart,ireq_r(4),ierr) !call p_inprod(r0, r, r0r) ! r0r = r(0) dot r(i+1) !call p_inprod(r0, w, r0w) ! r0w = r(0) dot w(i+1) !call p_inprod(r0, s, r0s) ! r0s = r(0) dot s(i) !call p_inprod(r0, z, r0z) ! r0s = r(0) dot z(i) call p_sipsqelim(cb,cl,cc,cr,ct,w,wh) ! what(i+1) = M * w(i+1) call p_sqprod(ab,al,ac,ar,at,wh,t) ! t(i+1) = A * what(i+1) call mpi_waitall(4, ireq_r(1:4), MPI_STATUSES_IGNORE, ierr) beta = ((alpha / omega) * r0r) / pre_r0r alpha = r0r / (r0w + beta*r0s - beta*omega*r0z) pre_r0r = r0r call p_inprod(r,r,rr) if ( (sqrt(rr).lt.1.e-12).and.(sqrt(rr)/res.lt.1e-3) ) then call mpi_sendrecv(x(2,1),1,m_v8lon,m_w,1,x(i0,1),1,m_v8lon,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,1),1,m_v8lon,m_e,1,x(1,1),1,m_v8lon,m_w,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,2),1,m_v8lat,m_s,1,x(1,j0),1,m_v8lat,m_n,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,j1),1,m_v8lat,m_n,1,x(1,1),1,m_v8lat,m_s,1,m_cart,istat,ierr) return endif enddo call mpi_sendrecv(x(2,1),1,m_v8lon,m_w,1,x(i0,1),1,m_v8lon,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,1),1,m_v8lon,m_e,1,x(1,1),1,m_v8lon,m_w,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,2),1,m_v8lat,m_s,1,x(1,j0),1,m_v8lat,m_n,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,j1),1,m_v8lat,m_n,1,x(1,1),1,m_v8lat,m_s,1,m_cart,istat,ierr) ! 100 iter=iter+1 ! !call p_inprod(r,rh,rhn) ! rhn = r(0) dot r(i) ! !beta=(rhn/rho)*(alpha/w) ! ! !call daxpy(i0j0,-1.d0*w,v,1,p,1) ! !call dscal(i0j0,beta,p,1) ! !call daxpy(i0j0,1.d0,r,1,p,1) ! ! call p_sipsqelim(cb,cl,cc,cr,ct,p,ph) ! p_hat(i) = M * p(i) ! call p_sqprod(ab,al,ac,ar,at,ph,v) ! v(i) = A * p_hat(i) ! call p_inprod(rh,v,tmp) ! tmp = r0 * v ! alpha=rho/tmp ! alpha(i) = rho / tmp ! ! call dcopy(i0j0,r,1,s,1) ! call daxpy(i0j0,-1.d0*alpha,v,1,s,1) ! q(i) = r(i) - alpha(i) * s(i) ! call p_sipsqelim(cb,cl,cc,cr,ct,s,sh) ! q_hat(i) = M * q(i) ! call p_sqprod(ab,al,ac,ar,at,sh,t) ! y(i) = A * q_hat(i) ! !call p_inprod(t,t,tmp) ! !call p_inprod(t,s,w) ! tt = t * t ! sum_tt = sum(tt(1:i1, 1:j1)) ! call mpi_iallreduce(sum_tt,tmp,1,mpi_real8,mpi_sum,m_cart, allreduce_ireq(1),ierr) ! ts = t * s ! sum_ts = sum(ts(1:i1, 1:j1)) ! call mpi_iallreduce(sum_ts,w,1,mpi_real8,mpi_sum,m_cart, allreduce_ireq(2),ierr) ! call mpi_waitall(2, allreduce_ireq(1), MPI_STATUSES_IGNORE, ierr) ! begin reduction(q(i),y(i)), (y(i),y(i)) ! ! ! w=w/tmp ! w = w / tmp ! ! tmp=0.d0 ! ! call daxpy(i0j0,alpha,ph,1,x,1) ! x(i+1) = x(i) + alpha * p_hat(i) ! call daxpy(i0j0,w,sh,1,x,1) ! x(i+1) = x(i) + alpha * p_hat(i) + w(i) * q_hat(i) ! call dcopy(i0j0,s,1,r,1) ! call daxpy(i0j0,-1.d0*w,t,1,r,1) ! r(i+1) = q(i) - w(i) * y(i) ! ! ! call p_inprod(r, rh, rhn) ! rhn = r(0) * r(i+1) ! beta = ((alpha / w) * rhn) / rho ! beta = alpha(i) / w(i) * rhn / rho ! ! call daxpy(i0j0,-1.d0*w,v,1,p,1) ! p(i) = p(i) - w(i) * s(i) ! call dscal(i0j0,beta,p,1) ! beta * p(i) = beta(p(i) - w(i) * s(i)) ! call daxpy(i0j0,1.d0,r,1,p,1) ! p(i+1) = r(i) + beta(i) * (p(i) - w(i) * s(i)) ! ! call p_inprod(r,r,tp) ! ! rho=rhn end subroutine ! ---------------------------------------------------------------------- subroutine p_sqprod(ab,al,ac,ar,at,x,b) ! real*8 ab,al,ac,ar,at,x,b real*8 x,b dimension ab(i2,j2),al(i2,j2),ac(i2,j2),ar(i2,j2),at(i2,0:j2),x(i0,j0),b(i0,j0) call mpi_sendrecv(x(2,1),1,m_v8lon,m_w,1,x(i0,1),1,m_v8lon,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,1),1,m_v8lon,m_e,1,x(1,1),1,m_v8lon,m_w,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,2),1,m_v8lat,m_s,1,x(1,j0),1,m_v8lat,m_n,1,m_cart,istat,ierr) call mpi_sendrecv(x(1,j1),1,m_v8lat,m_n,1,x(1,1),1,m_v8lat,m_s,1,m_cart,istat,ierr) do 100 j=2,j1 do 100 i=2,i1 it=i-1 jt=j-1 100 b(i,j)=ab(it,jt)*x(i,jt)+al(it,jt)*x(it,j)+ac(it,jt)*x(i,j)+ar(it,jt)*x(i+1,j)+at(it,jt)*x(i,j+1) end subroutine ! ---------------------------------------------------------------------- !subroutine p_inprod(a1,a2,pd) ! real*8 a1,a2,tmp,pd ! dimension a1(i0,j0),a2(i0,j0) ! ! tmp=0.d0 ! do j=2,j1 ! do i=2,i1 ! tmp=tmp+a1(i,j)*a2(i,j) ! enddo ! enddo ! call mpi_allreduce(tmp,pd,1,mpi_real8,mpi_sum,m_cart,ierr) ! ! return !end subroutine subroutine p_inprod(a1,a2,pd) !$OMP DECLARE SIMD(p_inprod) UNIFORM(a1,a2) real*8 a1,a2,tmp,pd dimension a1(i0,j0),a2(i0,j0), tmpa(i0, j0) ! tmpa(2:i1, 2:j1) = a1(2:i1, 2:j1) * a2(2:i1, 2:j1) do j=2,j1 !$omp do simd do i=2,i1 tmpa(i,j) = a1(i,j) * a2(i,j) enddo enddo tmp = sum(tmpa(2:i1, 2:j1)) call mpi_allreduce(tmp,pd,1,mpi_real8,mpi_sum,m_cart,ierr) return end subroutine ! -------------------------- subroutine p_sqelim(cb,cl,cc,cr,ct,b,bh) parameter(i3=i2-1,j3=j2-1,i0j0=i0*j0) ! real*8 cb,cl,cc,cr,ct, real*8 b,bh dimension cb(i2,j2),cl(i2,j2),cc(i2,j2),cr(i2,j2),ct(i2,j2),b(i0,j0),bh(i0,j0) call dcopy(i0j0,b,1,bh,1) do 100 j=2,j2 jt=j-1 do 150 i=2,i2 it=i-1 bh(i,j)=bh(i,j)*cc(it,jt) bh(i+1,j)=bh(i+1,j)-cr(it,jt)*bh(i,j) 150 bh(i,j+1)=bh(i,j+1)-ct(it,jt)*bh(i,j) bh(i1,j)=bh(i1,j)*cc(i2,jt) 100 bh(i1,j+1)=bh(i1,j+1)-ct(i2,j)*bh(i1,j) do 200 i=2,i2 it=i-1 bh(i,j1)=bh(i,j1)*cc(it,j2) 200 bh(i+1,j1)=bh(i+1,j1)-cr(it,j2)*bh(i,j1) bh(i1,j1)=bh(i1,j1)*cc(i2,j2) do 300 j=j1,3,-1 jt=j-1 do 350 i=i1,3,-1 it=i-1 bh(i,j)=bh(i,j)*cc(it,jt) bh(i-1,j)=bh(i-1,j)-cr(it,jt)*bh(i,j) 350 bh(i,j-1)=bh(i,j-1)-ct(it,jt)*bh(i,j) bh(2,j)=bh(2,j)*cc(1,jt) 300 bh(2,j-1)=bh(2,j-1)-ct(1,jt)*bh(2,j) do 400 i=i1,3,-1 it=i-1 bh(i,2)=bh(i,2)*cc(it,1) 400 bh(i-1,2)=bh(i-1,2)-cr(it,1)*bh(i,2) bh(2,2)=bh(2,2)*cc(1,1) end subroutine ! --------------------------------------------------------------------- subroutine p_lusqelim(cb,cl,cc,cr,ct,b,bh) parameter(i3=i2-1,j3=j2-1,i0j0=i0*j0) ! real*8 cb,cl,cc,cr,ct,b,bh real*8 b,bh dimension cb(i2,j2),cl(i2,j2),cc(i2,j2),cr(i2,j2),ct(i2,j2),b(i0,j0),bh(i0,j0) call dcopy(i0j0,b,1,bh,1) do 100 j=2,j2 jt=j-1 do 150 i=2,i2 it=i-1 bh(i+1,j)=bh(i+1,j)-cl(it,jt)*bh(i,j) 150 bh(i,j+1)=bh(i,j+1)-cb(it,jt)*bh(i,j) 100 bh(i1,j+1)=bh(i1,j+1)-cb(i2,j)*bh(i1,j) do 200 i=2,i2 it=i-1 200 bh(i+1,j1)=bh(i+1,j1)-cl(it,j2)*bh(i,j1) bh(i1,j1)=bh(i1,j1) do 300 j=j1,3,-1 jt=j-1 do 350 i=i1,3,-1 it=i-1 bh(i,j)=bh(i,j)*cc(it,jt) bh(i-1,j)=bh(i-1,j)-cr(it,jt)*bh(i,j) 350 bh(i,j-1)=bh(i,j-1)-ct(it,jt)*bh(i,j) bh(2,j)=bh(2,j)*cc(1,jt) 300 bh(2,j-1)=bh(2,j-1)-ct(1,jt)*bh(2,j) do 400 i=i1,3,-1 it=i-1 bh(i,2)=bh(i,2)*cc(it,1) 400 bh(i-1,2)=bh(i-1,2)-cr(it,1)*bh(i,2) bh(2,2)=bh(2,2)*cc(1,1) end subroutine ! --------------------------------------------------------------------- subroutine p_sipsqelim(cb,cl,cc,cr,ct,b,bh) parameter(i3=i2-1,j3=j2-1,i0j0=i0*j0) ! real*8 cb,cl,cc,cr,ct,b,bh real*8 b,bh dimension cb(i2,j2),cl(i2,j2),cc(i2,j2),cr(i2,j2),ct(i2,j2),b(i0,j0),bh(i0,j0) call dcopy(i0j0,b,1,bh,1) do 100 j=2,j2 jt=j-1 do 150 i=2,i2 it=i-1 bh(i,j)=bh(i,j)*cc(it,jt) bh(i+1,j)=bh(i+1,j)-cl(it,jt)*bh(i,j) 150 bh(i,j+1)=bh(i,j+1)-cb(it,jt)*bh(i,j) bh(i1,j)=bh(i1,j)*cc(i2,jt) 100 bh(i1,j+1)=bh(i1,j+1)-cb(i2,jt)*bh(i1,j) do 200 i=2,i2 it=i-1 bh(i,j1)=bh(i,j1)*cc(it,j2) 200 bh(i+1,j1)=bh(i+1,j1)-cl(it,j2)*bh(i,j1) bh(i1,j1)=bh(i1,j1)*cc(i2,j2) do 300 j=j1,3,-1 jt=j-1 do 350 i=i1,3,-1 it=i-1 bh(i-1,j)=bh(i-1,j)-cr(it,jt)*bh(i,j) 350 bh(i,j-1)=bh(i,j-1)-ct(it,jt)*bh(i,j) 300 bh(2,j-1)=bh(2,j-1)-ct(1,jt)*bh(2,j) do 400 i=i1,3,-1 400 bh(i-1,2)=bh(i-1,2)-cr(i-1,1)*bh(i,2) end subroutine ! ---------------------------------------------------------------------- subroutine rep2(ax,ay,bb,cx,cy,rinv,rinv1,dum0,dum1,dum2,dumg,f,x,ie) ! ---------------------------------------------------------------------- !ibm real*8 rinv,rinv1,dum0,dum1,dum2,x,h,row parameter(nbg0=nb0*ngy0,nbg1=nbg0-1,ibir=i2t/ngy0) real*8 x,rinv,rinv1,dum1,dum2,dum0,dumg,dd dimension ax(i2,j2),ay(i2,j2),bb(i2,j2),cx(i2,j2),cy(i2,0:j2),f(i2,j2),rinv(ibir,i0,nbg0),rinv1(ibir,i0,nbg1),ie(nb0),dum0(i0,nb0),dum1(ibir),dum2(i0),x(i0,j0),dumg(i2t) do 160 ng=0,ngy0-1 jsp=1 do 150 nb=1,nb0 nbg=ng*nb0+nb if (mylat .eq. ng) then jfp=ie(nb)-2 do 106 j=jsp,jfp do 105 i=1,i2 105 x(i+1,j+2)=(f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1))/cy(i,j) call mpi_sendrecv(x(2,j+2),1,mpi_real8,m_w,1,x(i0,j+2),1,mpi_real8,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,j+2),1,mpi_real8,m_e,1,x(1,j+2),1,mpi_real8,m_w,1,m_cart,istat,ierr) 106 continue if (nbg.eq.nbg0) go to 160 j=ie(nb)-1 do 115 i=1,i2 115 dum2(i)=f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1)-cy(i,j)*x(i+1,j+2) j=ie(nb) call mpi_allgather(dum2,i2,mpi_real8,dumg,i2,mpi_real8,m_comm_lon,ierr) do 116 n=1,i0 dum2(n)=x(n,j) 116 dum0(n,nb)=x(n,j) dd=1.d0 else if (nbg .eq. nbg0) go to 160 dd=0.d0 endif call mpi_scatter(dumg,ibir,mpi_real8,dum1,ibir,mpi_real8,ng,m_comm_lat,ierr) call dgemv('t',ibir,i0,-1.d0,rinv1(1,1,nbg),ibir,dum1,1,dd,dum2,1) if (nb .eq. nb0) go to 150 call mpi_reduce(dum2,x(1,ie(nb)),i0,mpi_real8,mpi_sum,ng,m_comm_lat,ierr) jsp=ie(nb) 150 continue call mpi_reduce(dum2,x(1,1),i0,mpi_real8,mpi_sum,ng+1,m_comm_lat,ierr) 160 continue ! wenien ! open(160,file='opt/rep160'//grd,form='unformatted') ! write(160) x,f ! close(160) do 300 ng=ngy0-1,0,-1 do 250 nbs=1,nb0 nb=nb0-nbs+1 nbg=nb0*ng+nb jsp=1 if (nb.ne.1) jsp=ie(nb-1) if (mylat .eq. ng) then if (nbg.eq.nbg0) go to 201 j=ie(nb) do 200 n=1,i0 200 x(n,j)=dum0(n,nb) 201 j=ie(nb)-1 do 210 i=1,i2 210 dum2(i)=f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1)-cy(i,j)*x(i+1,j+2) call mpi_allgather(dum2,i2,mpi_real8,dumg,i2,mpi_real8,m_comm_lon,ierr) endif call mpi_scatter(dumg,ibir,mpi_real8,dum1,ibir,mpi_real8,ng,m_comm_lat,ierr) call dgemv('t',ibir,i0,1.d0,rinv(1,1,nbg),ibir,dum1,1,0.d0,dum2,1) call mpi_reduce(dum2,dum0(1,nb),i0,mpi_real8,mpi_sum,ng,m_comm_lat,ierr) if (mylat .eq. ng) then do 220 n=1,i0 220 x(n,jsp+1)=x(n,jsp+1)+dum0(n,nb) endif if (nbg.eq.1) go to 300 if (mylat .eq. ng) then do 230 m=1,i2 230 dum2(m)=dum0(m+1,nb)*cy(m,jsp-1) call mpi_allgather(dum2,i2,mpi_real8,dumg,i2,mpi_real8,m_comm_lon,ierr) endif call mpi_scatter(dumg,ibir,mpi_real8,dum1,ibir,mpi_real8,ng,m_comm_lat,ierr) call dgemv('t',ibir,i0,1.d0,rinv1(1,1,nbg-1),ibir,dum1,1,0.d0,dum2,1) call mpi_reduce(dum2,dum0(1,nb),i0,mpi_real8,mpi_sum,ng,m_comm_lat,ierr) if (mylat .eq. ng) then do 245 n=1,i0 245 dum0(n,nb)=x(n,jsp)+dum0(n,nb) endif 250 continue if (mylat .eq. ng) call mpi_send(x(1,2),i0,mpi_real8,m_s,1,m_cart,ierr) if (mylat .eq. ng-1) call mpi_recv(x(1,j0),i0,mpi_real8,m_n,1,m_cart,istat,ierr) 300 continue if (mylat .eq. 0) then do 350 i=1,i0 350 dum0(i,1)=0.d0 endif jsp=1 do 400 nb=1,nb0 jfp=ie(nb)-2 if (nb.eq.nb0) jfp=ie(nb)-2 do 410 i=1,i0 410 x(i,jsp)=dum0(i,nb) do 420 j=jsp,jfp do 430 i=1,i2 430 x(i+1,j+2)=(f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1))/cy(i,j) call mpi_sendrecv(x(2,j+2),1,mpi_real8,m_w,1,x(i0,j+2),1,mpi_real8,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,j+2),1,mpi_real8,m_e,1,x(1,j+2),1,mpi_real8,m_w,1,m_cart,istat,ierr) 420 continue 400 jsp=ie(nb) end subroutine ! ---------------------------------------------------------------------- subroutine rep(ax,ay,bb,cx,cy,rinv,rinv1,dum0,dum1,dum2,dumg,f,x,ie) parameter(nbg0=nb0*ngy0,nbg1=nbg0-1,ig0=i2*ngx0+2,ig2=ig0-2,ib=ig2/ngy0) real*8 x,rinv,rinv1,dum1,dum2,dum0,dumg,dd ! real*8 ax,ay,bb,cx,cy,f dimension ax(i2,j2),ay(i2,j2),bb(i2,j2),cx(i2,j2),cy(i2,0:j2),f(i2,j2),rinv(ib,i0,nbg0),rinv1(ib,i0,nbg1),ie(nb0),dum0(i0,nb0),dum1(ib),dum2(i0),x(i0,j0),dumg(i2t) !real*8 timer !common/timer/timer(10) timer(7)=mpi_wtime() do 160 ng=0,ngy0-1 jsp=1 do 150 nb=1,nb0 nbg=ng*nb0+nb if (mylat .eq. ng) then jfp=ie(nb)-2 do 106 j=jsp,jfp do 105 i=1,i2 105 x(i+1,j+2)=(f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1))/cy(i,j) timer(8)=mpi_wtime() call mpi_sendrecv(x(2,j+2),1,mpi_real8,m_w,1,x(i0,j+2),1,mpi_real8,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,j+2),1,mpi_real8,m_e,1,x(1,j+2),1,mpi_real8,m_w,1,m_cart,istat,ierr) 106 timer(5)=timer(5)+mpi_wtime()-timer(8) if (nbg.eq.nbg0) go to 160 j=ie(nb)-1 do 115 i=1,i2 115 dum2(i)=f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1)-cy(i,j)*x(i+1,j+2) j=ie(nb) timer(8)=mpi_wtime() call mpi_allgather(dum2,i2,mpi_real8,dumg,i2,mpi_real8,m_comm_lon,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) do 116 n=1,i0 dum2(n)=x(n,j) 116 dum0(n,nb)=x(n,j) dd=1.d0 else if (nbg .eq. nbg0) go to 160 dd=0.d0 endif timer(8)=mpi_wtime() call mpi_scatter(dumg,ib,mpi_real8,dum1,ib,mpi_real8,ng,m_comm_lat,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) call dgemv('t',ib,i0,-1.d0,rinv1(1,1,nbg),ib,dum1,1,dd,dum2,1) if (nb .eq. nb0) go to 150 timer(8)=mpi_wtime() call mpi_reduce(dum2,x(1,ie(nb)),i0,mpi_real8,mpi_sum,ng,m_comm_lat,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) jsp=ie(nb) 150 continue timer(8)=mpi_wtime() call mpi_reduce(dum2,x(1,1),i0,mpi_real8,mpi_sum,ng+1,m_comm_lat,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) 160 continue do 300 ng=ngy0-1,0,-1 do 250 nbs=1,nb0 nb=nb0-nbs+1 nbg=nb0*ng+nb jsp=1 if (nb.ne.1) jsp=ie(nb-1) if (mylat .eq. ng) then if (nbg.eq.nbg0) go to 201 j=ie(nb) do 200 n=1,i0 200 x(n,j)=dum0(n,nb) 201 j=ie(nb)-1 do 210 i=1,i2 210 dum2(i)=f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1)-cy(i,j)*x(i+1,j+2) timer(8)=mpi_wtime() call mpi_allgather(dum2,i2,mpi_real8,dumg,i2,mpi_real8,m_comm_lon,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) endif timer(8)=mpi_wtime() call mpi_scatter(dumg,ib,mpi_real8,dum1,ib,mpi_real8,ng,m_comm_lat,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) call dgemv('t',ib,i0,1.d0,rinv(1,1,nbg),ib,dum1,1,0.d0,dum2,1) timer(8)=mpi_wtime() call mpi_reduce(dum2,dum0(1,nb),i0,mpi_real8,mpi_sum,ng,m_comm_lat,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) if (mylat .eq. ng) then do 220 n=1,i0 220 x(n,jsp+1)=x(n,jsp+1)+dum0(n,nb) endif if (nbg.eq.1) go to 300 if (mylat .eq. ng) then do 230 m=1,i2 230 dum2(m)=dum0(m+1,nb)*cy(m,jsp-1) timer(8)=mpi_wtime() call mpi_allgather(dum2,i2,mpi_real8,dumg,i2,mpi_real8,m_comm_lon,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) endif timer(8)=mpi_wtime() call mpi_scatter(dumg,ib,mpi_real8,dum1,ib,mpi_real8,ng,m_comm_lat,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) call dgemv('t',ib,i0,1.d0,rinv1(1,1,nbg-1),ib,dum1,1,0.d0,dum2,1) timer(8)=mpi_wtime() call mpi_reduce(dum2,dum0(1,nb),i0,mpi_real8,mpi_sum,ng,m_comm_lat,ierr) timer(5)=timer(5)+mpi_wtime()-timer(8) if (mylat .eq. ng) then do 245 n=1,i0 245 dum0(n,nb)=x(n,jsp)+dum0(n,nb) endif 250 continue if (mylat .eq. ng) call mpi_send(x(1,2),i0,mpi_real8,m_s,1,m_cart,ierr) if (mylat .eq. ng-1) call mpi_recv(x(1,j0),i0,mpi_real8,m_n,1,m_cart,istat,ierr) 300 continue if (mylat .eq. 0) then do 350 i=1,i0 350 dum0(i,1)=0.d0 endif jsp=1 do 400 nb=1,nb0 jfp=ie(nb)-2 if (nb.eq.nb0) jfp=ie(nb)-2 do 410 i=1,i0 410 x(i,jsp)=dum0(i,nb) do 420 j=jsp,jfp do 430 i=1,i2 430 x(i+1,j+2)=(f(i,j)-ax(i,j)*x(i,j+1)-ay(i,j)*x(i+1,j)-bb(i,j)*x(i+1,j+1)-cx(i,j)*x(i+2,j+1))/cy(i,j) timer(8)=mpi_wtime() call mpi_sendrecv(x(2,j+2),1,mpi_real8,m_w,1,x(i0,j+2),1,mpi_real8,m_e,1,m_cart,istat,ierr) call mpi_sendrecv(x(i1,j+2),1,mpi_real8,m_e,1,x(1,j+2),1,mpi_real8,m_w,1,m_cart,istat,ierr) 420 timer(5)=timer(5)+mpi_wtime()-timer(8) 400 jsp=ie(nb) end subroutine end module