Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Remove warning about uninitialized variable
[simgrid.git] / examples / smpi / NAS / SP / y_solve.f
1
2 c---------------------------------------------------------------------
3 c---------------------------------------------------------------------
4
5        subroutine y_solve
6
7 c---------------------------------------------------------------------
8 c---------------------------------------------------------------------
9
10 c---------------------------------------------------------------------
11 c this function performs the solution of the approximate factorization
12 c step in the y-direction for all five matrix components
13 c simultaneously. The Thomas algorithm is employed to solve the
14 c systems for the y-lines. Boundary conditions are non-periodic
15 c---------------------------------------------------------------------
16
17        include 'header.h'
18        include 'mpinpb.h'
19
20        integer i, j, k, stage, ip, kp, n, isize, jend, ksize, j1, j2,
21      >         buffer_size, c, m, p, jstart, error,
22      >         requests(2), statuses(MPI_STATUS_SIZE, 2)
23        double precision  r1, r2, d, e, s(5), sm1, sm2,
24      >                   fac1, fac2
25
26
27 c---------------------------------------------------------------------
28 c now do a sweep on a layer-by-layer basis, i.e. sweeping through cells
29 c on this node in the direction of increasing i for the forward sweep,
30 c and after that reversing the direction for the backsubstitution  
31 c---------------------------------------------------------------------
32
33 c---------------------------------------------------------------------
34 c                          FORWARD ELIMINATION  
35 c---------------------------------------------------------------------
36        do    stage = 1, ncells
37           c      = slice(2,stage)
38
39           jstart = 0
40           jend   = cell_size(2,c)-1
41
42           isize     = cell_size(1,c)
43           ksize     = cell_size(3,c)
44           ip        = cell_coord(1,c)-1
45           kp        = cell_coord(3,c)-1
46
47           buffer_size = (isize-start(1,c)-end(1,c)) * 
48      >                  (ksize-start(3,c)-end(3,c))
49
50           if ( stage .ne. 1) then
51
52 c---------------------------------------------------------------------
53 c            if this is not the first processor in this row of cells, 
54 c            receive data from predecessor containing the right hand
55 c            sides and the upper diagonal elements of the previous two rows
56 c---------------------------------------------------------------------
57
58              call mpi_irecv(in_buffer, 22*buffer_size, 
59      >                      dp_type, predecessor(2), 
60      >                      DEFAULT_TAG, comm_solve, 
61      >                      requests(1), error)
62
63 c---------------------------------------------------------------------
64 c            communication has already been started. 
65 c            compute the left hand side while waiting for the msg
66 c---------------------------------------------------------------------
67              call lhsy(c)
68
69 c---------------------------------------------------------------------
70 c            wait for pending communication to complete
71 c            This waits on the current receive and on the send
72 c            from the previous stage. They always come in pairs. 
73 c---------------------------------------------------------------------
74              call mpi_waitall(2, requests, statuses, error)
75
76 c---------------------------------------------------------------------
77 c            unpack the buffer                                 
78 c---------------------------------------------------------------------
79              j  = jstart
80              j1 = jstart + 1
81              n = 0
82 c---------------------------------------------------------------------
83 c            create a running pointer
84 c---------------------------------------------------------------------
85              p = 0
86              do    k = start(3,c), ksize-end(3,c)-1
87                 do    i = start(1,c), isize-end(1,c)-1
88                    lhs(i,j,k,n+2,c) = lhs(i,j,k,n+2,c) -
89      >                       in_buffer(p+1) * lhs(i,j,k,n+1,c)
90                    lhs(i,j,k,n+3,c) = lhs(i,j,k,n+3,c) -
91      >                       in_buffer(p+2) * lhs(i,j,k,n+1,c)
92                    do    m = 1, 3
93                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) -
94      >                       in_buffer(p+2+m) * lhs(i,j,k,n+1,c)
95                    end do
96                    d            = in_buffer(p+6)
97                    e            = in_buffer(p+7)
98                    do    m = 1, 3
99                       s(m) = in_buffer(p+7+m)
100                    end do
101                    r1 = lhs(i,j,k,n+2,c)
102                    lhs(i,j,k,n+3,c) = lhs(i,j,k,n+3,c) - d * r1
103                    lhs(i,j,k,n+4,c) = lhs(i,j,k,n+4,c) - e * r1
104                    do    m = 1, 3
105                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) - s(m) * r1
106                    end do
107                    r2 = lhs(i,j1,k,n+1,c)
108                    lhs(i,j1,k,n+2,c) = lhs(i,j1,k,n+2,c) - d * r2
109                    lhs(i,j1,k,n+3,c) = lhs(i,j1,k,n+3,c) - e * r2
110                    do    m = 1, 3
111                       rhs(i,j1,k,m,c) = rhs(i,j1,k,m,c) - s(m) * r2
112                    end do
113                    p = p + 10
114                 end do
115              end do
116
117              do    m = 4, 5
118                 n = (m-3)*5
119                 do    k = start(3,c), ksize-end(3,c)-1
120                    do    i = start(1,c), isize-end(1,c)-1
121                       lhs(i,j,k,n+2,c) = lhs(i,j,k,n+2,c) -
122      >                          in_buffer(p+1) * lhs(i,j,k,n+1,c)
123                       lhs(i,j,k,n+3,c) = lhs(i,j,k,n+3,c) -
124      >                          in_buffer(p+2) * lhs(i,j,k,n+1,c)
125                       rhs(i,j,k,m,c)   = rhs(i,j,k,m,c) -
126      >                          in_buffer(p+3) * lhs(i,j,k,n+1,c)
127                       d                = in_buffer(p+4)
128                       e                = in_buffer(p+5)
129                       s(m)             = in_buffer(p+6)
130                       r1 = lhs(i,j,k,n+2,c)
131                       lhs(i,j,k,n+3,c) = lhs(i,j,k,n+3,c) - d * r1
132                       lhs(i,j,k,n+4,c) = lhs(i,j,k,n+4,c) - e * r1
133                       rhs(i,j,k,m,c)   = rhs(i,j,k,m,c) - s(m) * r1
134                       r2 = lhs(i,j1,k,n+1,c)
135                       lhs(i,j1,k,n+2,c) = lhs(i,j1,k,n+2,c) - d * r2
136                       lhs(i,j1,k,n+3,c) = lhs(i,j1,k,n+3,c) - e * r2
137                       rhs(i,j1,k,m,c)   = rhs(i,j1,k,m,c) - s(m) * r2
138                       p = p + 6
139                    end do
140                 end do
141              end do
142
143           else            
144
145 c---------------------------------------------------------------------
146 c            if this IS the first cell, we still compute the lhs
147 c---------------------------------------------------------------------
148              call lhsy(c)
149           endif
150
151 c---------------------------------------------------------------------
152 c         perform the Thomas algorithm; first, FORWARD ELIMINATION     
153 c---------------------------------------------------------------------
154           n = 0
155
156           do    k = start(3,c), ksize-end(3,c)-1
157              do    j = jstart, jend-2
158                 do    i = start(1,c), isize-end(1,c)-1
159                    j1 = j  + 1
160                    j2 = j  + 2
161                    fac1               = 1.d0/lhs(i,j,k,n+3,c)
162                    lhs(i,j,k,n+4,c)   = fac1*lhs(i,j,k,n+4,c)
163                    lhs(i,j,k,n+5,c)   = fac1*lhs(i,j,k,n+5,c)
164                    do    m = 1, 3
165                       rhs(i,j,k,m,c) = fac1*rhs(i,j,k,m,c)
166                    end do
167                    lhs(i,j1,k,n+3,c) = lhs(i,j1,k,n+3,c) -
168      >                         lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+4,c)
169                    lhs(i,j1,k,n+4,c) = lhs(i,j1,k,n+4,c) -
170      >                         lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+5,c)
171                    do    m = 1, 3
172                       rhs(i,j1,k,m,c) = rhs(i,j1,k,m,c) -
173      >                         lhs(i,j1,k,n+2,c)*rhs(i,j,k,m,c)
174                    end do
175                    lhs(i,j2,k,n+2,c) = lhs(i,j2,k,n+2,c) -
176      >                         lhs(i,j2,k,n+1,c)*lhs(i,j,k,n+4,c)
177                    lhs(i,j2,k,n+3,c) = lhs(i,j2,k,n+3,c) -
178      >                         lhs(i,j2,k,n+1,c)*lhs(i,j,k,n+5,c)
179                    do    m = 1, 3
180                       rhs(i,j2,k,m,c) = rhs(i,j2,k,m,c) -
181      >                         lhs(i,j2,k,n+1,c)*rhs(i,j,k,m,c)
182                    end do
183                 end do
184              end do
185           end do
186
187 c---------------------------------------------------------------------
188 c         The last two rows in this grid block are a bit different, 
189 c         since they do not have two more rows available for the
190 c         elimination of off-diagonal entries
191 c---------------------------------------------------------------------
192
193           j  = jend - 1
194           j1 = jend
195           do    k = start(3,c), ksize-end(3,c)-1
196              do    i = start(1,c), isize-end(1,c)-1
197                 fac1               = 1.d0/lhs(i,j,k,n+3,c)
198                 lhs(i,j,k,n+4,c)   = fac1*lhs(i,j,k,n+4,c)
199                 lhs(i,j,k,n+5,c)   = fac1*lhs(i,j,k,n+5,c)
200                 do    m = 1, 3
201                    rhs(i,j,k,m,c) = fac1*rhs(i,j,k,m,c)
202                 end do
203                 lhs(i,j1,k,n+3,c) = lhs(i,j1,k,n+3,c) -
204      >                      lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+4,c)
205                 lhs(i,j1,k,n+4,c) = lhs(i,j1,k,n+4,c) -
206      >                      lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+5,c)
207                 do    m = 1, 3
208                    rhs(i,j1,k,m,c) = rhs(i,j1,k,m,c) -
209      >                      lhs(i,j1,k,n+2,c)*rhs(i,j,k,m,c)
210                 end do
211 c---------------------------------------------------------------------
212 c               scale the last row immediately (some of this is
213 c               overkill in case this is the last cell)
214 c---------------------------------------------------------------------
215                 fac2               = 1.d0/lhs(i,j1,k,n+3,c)
216                 lhs(i,j1,k,n+4,c) = fac2*lhs(i,j1,k,n+4,c)
217                 lhs(i,j1,k,n+5,c) = fac2*lhs(i,j1,k,n+5,c)  
218                 do    m = 1, 3
219                    rhs(i,j1,k,m,c) = fac2*rhs(i,j1,k,m,c)
220                 end do
221              end do
222           end do
223
224 c---------------------------------------------------------------------
225 c         do the u+c and the u-c factors                 
226 c---------------------------------------------------------------------
227           do    m = 4, 5
228              n = (m-3)*5
229              do    k = start(3,c), ksize-end(3,c)-1
230                 do    j = jstart, jend-2
231                    do    i = start(1,c), isize-end(1,c)-1
232                    j1 = j  + 1
233                    j2 = j  + 2
234                    fac1               = 1.d0/lhs(i,j,k,n+3,c)
235                    lhs(i,j,k,n+4,c)   = fac1*lhs(i,j,k,n+4,c)
236                    lhs(i,j,k,n+5,c)   = fac1*lhs(i,j,k,n+5,c)
237                    rhs(i,j,k,m,c) = fac1*rhs(i,j,k,m,c)
238                    lhs(i,j1,k,n+3,c) = lhs(i,j1,k,n+3,c) -
239      >                         lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+4,c)
240                    lhs(i,j1,k,n+4,c) = lhs(i,j1,k,n+4,c) -
241      >                         lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+5,c)
242                    rhs(i,j1,k,m,c) = rhs(i,j1,k,m,c) -
243      >                         lhs(i,j1,k,n+2,c)*rhs(i,j,k,m,c)
244                    lhs(i,j2,k,n+2,c) = lhs(i,j2,k,n+2,c) -
245      >                         lhs(i,j2,k,n+1,c)*lhs(i,j,k,n+4,c)
246                    lhs(i,j2,k,n+3,c) = lhs(i,j2,k,n+3,c) -
247      >                         lhs(i,j2,k,n+1,c)*lhs(i,j,k,n+5,c)
248                    rhs(i,j2,k,m,c) = rhs(i,j2,k,m,c) -
249      >                         lhs(i,j2,k,n+1,c)*rhs(i,j,k,m,c)
250                 end do
251              end do
252           end do
253
254 c---------------------------------------------------------------------
255 c            And again the last two rows separately
256 c---------------------------------------------------------------------
257              j  = jend - 1
258              j1 = jend
259              do    k = start(3,c), ksize-end(3,c)-1
260                 do    i = start(1,c), isize-end(1,c)-1
261                 fac1               = 1.d0/lhs(i,j,k,n+3,c)
262                 lhs(i,j,k,n+4,c)   = fac1*lhs(i,j,k,n+4,c)
263                 lhs(i,j,k,n+5,c)   = fac1*lhs(i,j,k,n+5,c)
264                 rhs(i,j,k,m,c)     = fac1*rhs(i,j,k,m,c)
265                 lhs(i,j1,k,n+3,c) = lhs(i,j1,k,n+3,c) -
266      >                      lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+4,c)
267                 lhs(i,j1,k,n+4,c) = lhs(i,j1,k,n+4,c) -
268      >                      lhs(i,j1,k,n+2,c)*lhs(i,j,k,n+5,c)
269                 rhs(i,j1,k,m,c)   = rhs(i,j1,k,m,c) -
270      >                      lhs(i,j1,k,n+2,c)*rhs(i,j,k,m,c)
271 c---------------------------------------------------------------------
272 c               Scale the last row immediately (some of this is overkill
273 c               if this is the last cell)
274 c---------------------------------------------------------------------
275                 fac2               = 1.d0/lhs(i,j1,k,n+3,c)
276                 lhs(i,j1,k,n+4,c) = fac2*lhs(i,j1,k,n+4,c)
277                 lhs(i,j1,k,n+5,c) = fac2*lhs(i,j1,k,n+5,c)
278                 rhs(i,j1,k,m,c)   = fac2*rhs(i,j1,k,m,c)
279
280              end do
281           end do
282        end do
283
284 c---------------------------------------------------------------------
285 c         send information to the next processor, except when this
286 c         is the last grid block;
287 c---------------------------------------------------------------------
288
289           if (stage .ne. ncells) then
290
291 c---------------------------------------------------------------------
292 c            create a running pointer for the send buffer  
293 c---------------------------------------------------------------------
294              p = 0
295              n = 0
296              do    k = start(3,c), ksize-end(3,c)-1
297                 do    i = start(1,c), isize-end(1,c)-1
298                    do    j = jend-1, jend
299                       out_buffer(p+1) = lhs(i,j,k,n+4,c)
300                       out_buffer(p+2) = lhs(i,j,k,n+5,c)
301                       do    m = 1, 3
302                          out_buffer(p+2+m) = rhs(i,j,k,m,c)
303                       end do
304                       p = p+5
305                    end do
306                 end do
307              end do
308
309              do    m = 4, 5
310                 n = (m-3)*5
311                 do    k = start(3,c), ksize-end(3,c)-1
312                    do    i = start(1,c), isize-end(1,c)-1
313                       do    j = jend-1, jend
314                          out_buffer(p+1) = lhs(i,j,k,n+4,c)
315                          out_buffer(p+2) = lhs(i,j,k,n+5,c)
316                          out_buffer(p+3) = rhs(i,j,k,m,c)
317                          p = p + 3
318                       end do
319                    end do
320                 end do
321              end do
322
323 c---------------------------------------------------------------------
324 c            pack and send the buffer
325 c---------------------------------------------------------------------
326              call mpi_isend(out_buffer, 22*buffer_size, 
327      >                     dp_type, successor(2), 
328      >                     DEFAULT_TAG, comm_solve, 
329      >                     requests(2), error)
330
331           endif
332        end do
333
334 c---------------------------------------------------------------------
335 c      now go in the reverse direction                      
336 c---------------------------------------------------------------------
337
338 c---------------------------------------------------------------------
339 c                         BACKSUBSTITUTION 
340 c---------------------------------------------------------------------
341        do    stage = ncells, 1, -1
342           c = slice(2,stage)
343
344           jstart = 0
345           jend   = cell_size(2,c)-1
346
347           isize = cell_size(1,c)
348           ksize = cell_size(3,c)
349           ip    = cell_coord(1,c)-1
350           kp    = cell_coord(3,c)-1
351
352           buffer_size = (isize-start(1,c)-end(1,c)) * 
353      >                  (ksize-start(3,c)-end(3,c))
354
355           if (stage .ne. ncells) then
356
357 c---------------------------------------------------------------------
358 c            if this is not the starting cell in this row of cells, 
359 c            wait for a message to be received, containing the 
360 c            solution of the previous two stations     
361 c---------------------------------------------------------------------
362
363              call mpi_irecv(in_buffer, 10*buffer_size, 
364      >                      dp_type, successor(2), 
365      >                      DEFAULT_TAG, comm_solve, 
366      >                      requests(1), error)
367
368
369 c---------------------------------------------------------------------
370 c            communication has already been started
371 c            while waiting, do the block-diagonal inversion for the 
372 c            cell that was just finished                
373 c---------------------------------------------------------------------
374
375              call pinvr(slice(2,stage+1))
376
377 c---------------------------------------------------------------------
378 c            wait for pending communication to complete
379 c---------------------------------------------------------------------
380              call mpi_waitall(2, requests, statuses, error)
381
382 c---------------------------------------------------------------------
383 c            unpack the buffer for the first three factors         
384 c---------------------------------------------------------------------
385              n = 0
386              p = 0
387              j  = jend
388              j1 = j - 1
389              do    m = 1, 3
390                 do   k = start(3,c), ksize-end(3,c)-1
391                    do   i = start(1,c), isize-end(1,c)-1
392                       sm1 = in_buffer(p+1)
393                       sm2 = in_buffer(p+2)
394                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) - 
395      >                        lhs(i,j,k,n+4,c)*sm1 -
396      >                        lhs(i,j,k,n+5,c)*sm2
397                       rhs(i,j1,k,m,c) = rhs(i,j1,k,m,c) -
398      >                        lhs(i,j1,k,n+4,c) * rhs(i,j,k,m,c) - 
399      >                        lhs(i,j1,k,n+5,c) * sm1
400                       p = p + 2
401                    end do
402                 end do
403              end do
404
405 c---------------------------------------------------------------------
406 c            now unpack the buffer for the remaining two factors
407 c---------------------------------------------------------------------
408              do    m = 4, 5
409                 n = (m-3)*5
410                 do   k = start(3,c), ksize-end(3,c)-1
411                    do   i = start(1,c), isize-end(1,c)-1
412                       sm1 = in_buffer(p+1)
413                       sm2 = in_buffer(p+2)
414                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) - 
415      >                        lhs(i,j,k,n+4,c)*sm1 -
416      >                        lhs(i,j,k,n+5,c)*sm2
417                       rhs(i,j1,k,m,c) = rhs(i,j1,k,m,c) -
418      >                        lhs(i,j1,k,n+4,c) * rhs(i,j,k,m,c) - 
419      >                        lhs(i,j1,k,n+5,c) * sm1
420                       p = p + 2
421                    end do
422                 end do
423              end do
424
425           else
426 c---------------------------------------------------------------------
427 c            now we know this is the first grid block on the back sweep,
428 c            so we don't need a message to start the substitution. 
429 c---------------------------------------------------------------------
430
431              j  = jend - 1
432              j1 = jend
433              n = 0
434              do   m = 1, 3
435                 do   k = start(3,c), ksize-end(3,c)-1
436                    do   i = start(1,c), isize-end(1,c)-1
437                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) -
438      >                             lhs(i,j,k,n+4,c)*rhs(i,j1,k,m,c)
439                    end do
440                 end do
441              end do
442
443              do    m = 4, 5
444                 n = (m-3)*5
445                 do   k = start(3,c), ksize-end(3,c)-1
446                    do   i = start(1,c), isize-end(1,c)-1
447                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) -
448      >                             lhs(i,j,k,n+4,c)*rhs(i,j1,k,m,c)
449                    end do
450                 end do
451              end do
452           endif
453
454 c---------------------------------------------------------------------
455 c         Whether or not this is the last processor, we always have
456 c         to complete the back-substitution 
457 c---------------------------------------------------------------------
458
459 c---------------------------------------------------------------------
460 c         The first three factors
461 c---------------------------------------------------------------------
462           n = 0
463           do   m = 1, 3
464              do   k = start(3,c), ksize-end(3,c)-1
465                 do   j = jend-2, jstart, -1
466                    do    i = start(1,c), isize-end(1,c)-1
467                       j1 = j  + 1
468                       j2 = j  + 2
469                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) - 
470      >                          lhs(i,j,k,n+4,c)*rhs(i,j1,k,m,c) -
471      >                          lhs(i,j,k,n+5,c)*rhs(i,j2,k,m,c)
472                    end do
473                 end do
474              end do
475           end do
476
477 c---------------------------------------------------------------------
478 c         And the remaining two
479 c---------------------------------------------------------------------
480           do    m = 4, 5
481              n = (m-3)*5
482              do   k = start(3,c), ksize-end(3,c)-1
483                 do   j = jend-2, jstart, -1
484                    do    i = start(1,c), isize-end(1,c)-1
485                       j1 = j  + 1
486                       j2 = j1 + 1
487                       rhs(i,j,k,m,c) = rhs(i,j,k,m,c) - 
488      >                          lhs(i,j,k,n+4,c)*rhs(i,j1,k,m,c) -
489      >                          lhs(i,j,k,n+5,c)*rhs(i,j2,k,m,c)
490                    end do
491                 end do
492              end do
493           end do
494
495 c---------------------------------------------------------------------
496 c         send on information to the previous processor, if needed
497 c---------------------------------------------------------------------
498           if (stage .ne.  1) then
499              j  = jstart
500              j1 = jstart + 1
501              p = 0
502              do    m = 1, 5
503                 do    k = start(3,c), ksize-end(3,c)-1
504                    do    i = start(1,c), isize-end(1,c)-1
505                       out_buffer(p+1) = rhs(i,j,k,m,c)
506                       out_buffer(p+2) = rhs(i,j1,k,m,c)
507                       p = p + 2
508                    end do
509                 end do
510              end do
511
512 c---------------------------------------------------------------------
513 c            pack and send the buffer
514 c---------------------------------------------------------------------
515
516              call mpi_isend(out_buffer, 10*buffer_size, 
517      >                     dp_type, predecessor(2), 
518      >                     DEFAULT_TAG, comm_solve, 
519      >                     requests(2), error)
520
521           endif
522
523 c---------------------------------------------------------------------
524 c         If this was the last stage, do the block-diagonal inversion          
525 c---------------------------------------------------------------------
526           if (stage .eq. 1) call pinvr(c)
527
528        end do
529
530        return
531        end
532     
533
534
535
536
537
538