-
Notifications
You must be signed in to change notification settings - Fork 9
/
host_alloc_module.fypp
413 lines (314 loc) · 11.1 KB
/
host_alloc_module.fypp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
#! (C) Copyright 2022- ECMWF.
#! (C) Copyright 2022- Meteo-France.
#!
#! This software is licensed under the terms of the Apache Licence Version 2.0
#! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
#! In applying this licence, ECMWF does not waive the privileges and immunities
#! granted to it by virtue of its status as an intergovernmental organisation
#! nor does it submit to any jurisdiction.
MODULE HOST_ALLOC_MODULE
#:set fieldTypeList = fieldType.getFieldTypeList ()
${fieldType.useParkind1 ()}$
USE ISO_C_BINDING
#:if defined('CUDA')
USE CUDAFOR
#:endif
USE, INTRINSIC :: ISO_FORTRAN_ENV, ONLY : INT64
USE FIELD_DEFAULTS_MODULE
USE FIELD_STATISTICS_MODULE
USE FIELD_ABORT_MODULE
IMPLICIT NONE
PRIVATE
INTERFACE HOST_ALLOC
#:for ft in fieldTypeList
MODULE PROCEDURE ${ft.name}$_HOST_ALLOC
#:endfor
END INTERFACE HOST_ALLOC
INTERFACE HOST_ALLOC_POOLED
#:for ft in fieldTypeList
MODULE PROCEDURE ${ft.name}$_HOST_ALLOC_POOLED
#:endfor
END INTERFACE HOST_ALLOC_POOLED
INTERFACE HOST_DEALLOC
#:for ft in fieldTypeList
MODULE PROCEDURE ${ft.name}$_HOST_DEALLOC
#:endfor
END INTERFACE HOST_DEALLOC
INTERFACE HOST_DEALLOC_POOLED
#:for ft in fieldTypeList
MODULE PROCEDURE ${ft.name}$_HOST_DEALLOC_POOLED
#:endfor
END INTERFACE HOST_DEALLOC_POOLED
INTERFACE
SUBROUTINE C_MALLOC (SIZ, PTR) BIND (C, NAME='c_malloc')
IMPORT :: C_PTR, C_SIZE_T
INTEGER (C_SIZE_T), VALUE, INTENT(IN) :: SIZ
TYPE (C_PTR), INTENT(OUT) :: PTR
END SUBROUTINE C_MALLOC
SUBROUTINE C_FREE (PTR) BIND (C, NAME='c_free')
IMPORT :: C_PTR
TYPE (C_PTR), VALUE, INTENT(IN) :: PTR
END SUBROUTINE C_FREE
SUBROUTINE C_PTR_INCR (SIZ, PTR, DATA) BIND (C, NAME='c_ptr_incr')
IMPORT :: C_PTR, C_SIZE_T
INTEGER (C_SIZE_T), VALUE, INTENT(IN) :: SIZ
TYPE (C_PTR), INTENT(IN) :: PTR
TYPE (C_PTR), INTENT(OUT) :: DATA
END SUBROUTINE C_PTR_INCR
END INTERFACE
TYPE :: MEM_BLOCK
TYPE(C_PTR) :: DATA
INTEGER(KIND=INT64) :: POS = 0
INTEGER(KIND=INT64) :: SIZE = 0
INTEGER :: NUMFLDS = 0
TYPE(MEM_BLOCK), POINTER :: NEXT => NULL()
CONTAINS
PROCEDURE :: INIT => MEM_BLOCK_INIT
PROCEDURE :: FINAL => MEM_BLOCK_FINAL
PROCEDURE :: ALLOC => MEM_BLOCK_ALLOC
PROCEDURE :: DEALLOC => MEM_BLOCK_DEALLOC
END TYPE MEM_BLOCK
TYPE :: MEM_POOL
TYPE(MEM_BLOCK), POINTER :: START_BLK => NULL()
CONTAINS
PROCEDURE :: ALLOC => MEM_POOL_ALLOC
PROCEDURE :: DEALLOC => MEM_POOL_DEALLOC
PROCEDURE :: REQUEST_MEM => MEM_POOL_REQUEST_MEM
PROCEDURE :: REQUEST_FREE => MEM_POOL_REQUEST_FREE
PROCEDURE :: FINAL => MEM_POOL_FINAL
PROCEDURE, PRIVATE :: MEM_FREE => MEM_POOL_MEM_FREE
END TYPE MEM_POOL
TYPE(MEM_POOL) :: HOST_POOL
PUBLIC :: HOST_ALLOC
PUBLIC :: HOST_ALLOC_POOLED
PUBLIC :: HOST_DEALLOC
PUBLIC :: HOST_DEALLOC_POOLED
PUBLIC :: HOST_POOL
CONTAINS
SUBROUTINE MEM_BLOCK_INIT( SELF )
CLASS(MEM_BLOCK) :: SELF
INTEGER :: ISTAT
CALL C_MALLOC(SELF%SIZE, SELF%DATA)
#:if defined('CUDA')
IF(INIT_PINNED_VALUE)THEN
CALL PIN_ALLOCATION(SELF%DATA, SELF%SIZE, ISTAT)
IF (ISTAT /= 0) THEN
CALL FIELD_ABORT ("MEM_POOL: FAILED TO REGISTER IN PAGE-LOCKED MEMORY")
ENDIF
ENDIF
#:endif
END SUBROUTINE MEM_BLOCK_INIT
SUBROUTINE MEM_BLOCK_ALLOC( SELF, ALLOC_SIZE, DATA )
CLASS(MEM_BLOCK) :: SELF
INTEGER(C_SIZE_T), INTENT(IN) :: ALLOC_SIZE
TYPE(C_PTR), INTENT(OUT) :: DATA
CALL C_PTR_INCR(SELF%POS, SELF%DATA, DATA)
SELF%POS = SELF%POS + ALLOC_SIZE
SELF%NUMFLDS = SELF%NUMFLDS + 1
END SUBROUTINE MEM_BLOCK_ALLOC
SUBROUTINE MEM_BLOCK_DEALLOC( SELF )
CLASS(MEM_BLOCK) :: SELF
SELF%NUMFLDS = SELF%NUMFLDS - 1
IF( SELF%NUMFLDS == 0 ) SELF%POS = 0
END SUBROUTINE MEM_BLOCK_DEALLOC
SUBROUTINE MEM_BLOCK_FINAL( SELF )
CLASS(MEM_BLOCK) :: SELF
INTEGER :: ISTAT
IF( .NOT. SELF%NUMFLDS == 0 )THEN
PRINT *, "FIELD_API DETECTED UNFINALISED FIELDS, POTENTIAL DEVICE MEMORY LEAK"
ENDIF
#:if defined('CUDA')
IF (INIT_PINNED_VALUE) THEN
CALL UNPIN_ALLOCATION(SELF%DATA, ISTAT)
IF (ISTAT /= 0) THEN
CALL FIELD_ABORT ("MEM_POOL: FAILED TO UNREGISTER PAGE-LOCKED MEMORY")
ENDIF
ENDIF
#:endif
CALL C_FREE(SELF%DATA)
SELF%SIZE = 0
SELF%POS = 0
SELF%NUMFLDS = 0
END SUBROUTINE MEM_BLOCK_FINAL
SUBROUTINE MEM_POOL_FINAL( SELF )
CLASS(MEM_POOL) :: SELF
IF( ASSOCIATED(SELF%START_BLK) ) CALL SELF%MEM_FREE(SELF%START_BLK)
END SUBROUTINE MEM_POOL_FINAL
SUBROUTINE MEM_POOL_MEM_FREE(SELF, BLK)
CLASS(MEM_POOL) :: SELF
TYPE(MEM_BLOCK), POINTER, INTENT(INOUT) :: BLK
IF( ASSOCIATED(BLK%NEXT) ) CALL SELF%MEM_FREE(BLK%NEXT)
CALL BLK%FINAL()
DEALLOCATE(BLK)
NULLIFY(BLK)
END SUBROUTINE MEM_POOL_MEM_FREE
SUBROUTINE MEM_POOL_ALLOC( SELF, ARR_SIZE, BLKID, DATA )
CLASS(MEM_POOL) :: SELF
INTEGER(C_SIZE_T), INTENT(IN) :: ARR_SIZE
INTEGER(KIND=JPIM), INTENT(INOUT) :: BLKID
TYPE(C_PTR), INTENT(OUT) :: DATA
INTEGER(C_SIZE_T) :: ALLOC_SIZE
ALLOC_SIZE = ARR_SIZE + MOD(ARR_SIZE, POOL_ALLOC_PADDING_FACTOR)
IF( .NOT. ASSOCIATED(SELF%START_BLK) ) ALLOCATE(SELF%START_BLK)
CALL SELF%REQUEST_MEM( ALLOC_SIZE, SELF%START_BLK, DATA, BLKID )
END SUBROUTINE MEM_POOL_ALLOC
SUBROUTINE MEM_POOL_DEALLOC( SELF, FIELD_BLKID )
CLASS(MEM_POOL) :: SELF
INTEGER(KIND=JPIM), INTENT(IN) :: FIELD_BLKID
INTEGER(KIND=JPIM) :: BLKID
BLKID = 1
CALL SELF%REQUEST_FREE( FIELD_BLKID, SELF%START_BLK, BLKID )
END SUBROUTINE MEM_POOL_DEALLOC
SUBROUTINE MEM_POOL_REQUEST_FREE( SELF, FIELD_BLKID, BLK, BLKID )
CLASS(MEM_POOL) :: SELF
INTEGER(KIND=JPIM), INTENT(IN) :: FIELD_BLKID
TYPE(MEM_BLOCK), POINTER, INTENT(INOUT) :: BLK
INTEGER(KIND=JPIM), INTENT(INOUT) :: BLKID
IF( FIELD_BLKID == BLKID )THEN
CALL BLK%DEALLOC()
ELSE
BLKID = BLKID + 1
CALL SELF%REQUEST_FREE(FIELD_BLKID, BLK%NEXT, BLKID)
ENDIF
END SUBROUTINE MEM_POOL_REQUEST_FREE
SUBROUTINE MEM_POOL_REQUEST_MEM( SELF, ALLOC_SIZE, BLK, DATA, BLKID )
CLASS(MEM_POOL) :: SELF
INTEGER(C_SIZE_T), INTENT(IN) :: ALLOC_SIZE
TYPE(MEM_BLOCK), POINTER, INTENT(INOUT) :: BLK
TYPE(C_PTR), INTENT(OUT) :: DATA
INTEGER(KIND=JPIM), INTENT(INOUT) :: BLKID
BLKID = BLKID + 1
IF( C_ASSOCIATED(BLK%DATA) )THEN
IF( BLK%POS + ALLOC_SIZE <= BLK%SIZE ) THEN
!... Allocation can fit within existing block
CALL BLK%ALLOC(ALLOC_SIZE, DATA)
ELSE
!... Proceed to next block
IF( .NOT. ASSOCIATED(BLK%NEXT) ) ALLOCATE(BLK%NEXT)
CALL SELF%REQUEST_MEM(ALLOC_SIZE, BLK%NEXT, DATA, BLKID)
ENDIF
ELSE
!... Create new block
BLK%SIZE = POOL_BLOCK_SIZE
DO WHILE ( BLK%SIZE < ALLOC_SIZE )
BLK%SIZE = BLK%SIZE*2
ENDDO
CALL BLK%INIT()
CALL BLK%ALLOC(ALLOC_SIZE, DATA)
ENDIF
END SUBROUTINE MEM_POOL_REQUEST_MEM
#:if defined('CUDA')
SUBROUTINE PIN_ALLOCATION(DATA, ARR_SIZE, ISTAT)
TYPE(C_PTR), INTENT(INOUT) :: DATA
INTEGER, INTENT(OUT) :: ISTAT
INTEGER(C_SIZE_T), INTENT(IN) :: ARR_SIZE
ISTAT = CUDAHOSTREGISTER (DATA, ARR_SIZE, CUDAHOSTREGISTERMAPPED)
END SUBROUTINE PIN_ALLOCATION
SUBROUTINE UNPIN_ALLOCATION(DATA, ISTAT)
TYPE(C_PTR), INTENT(INOUT) :: DATA
INTEGER, INTENT(OUT) :: ISTAT
ISTAT = CUDAHOSTUNREGISTER (DATA)
END SUBROUTINE UNPIN_ALLOCATION
#:endif
#:for ft in fieldTypeList
SUBROUTINE ${ft.name}$_HOST_ALLOC_POOLED (HST, LBOUNDS, UBOUNDS, BLKID)
${ft.type}$, POINTER, INTENT(OUT) :: HST(${ft.shape}$)
INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(${ft.rank}$)
INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(${ft.rank}$)
INTEGER(KIND=JPIM), INTENT(INOUT) :: BLKID
${ft.type}$, POINTER :: PTR(${ft.shape}$)
TYPE(C_PTR) :: DATA
INTEGER(C_SIZE_T) :: ARR_SIZE
INTEGER :: ISHAPE(${ft.rank}$), ISTAT
ARR_SIZE = KIND(HST)
#:for r in range(ft.rank)
ISHAPE(${r+1}$) = UBOUNDS(${r+1}$) - LBOUNDS(${r+1}$) + 1
ARR_SIZE = ARR_SIZE * ISHAPE(${r+1}$)
#:endfor
IF(ARR_SIZE > 0)THEN
CALL HOST_POOL%ALLOC(ARR_SIZE, BLKID, DATA)
CALL C_F_POINTER(DATA, PTR, SHAPE=ISHAPE)
HST(${', '.join(map(lambda r: 'LBOUNDS('+str(r+1)+'):', range(0, ft.rank)))}$) => PTR
ELSE
ALLOCATE(HST(${','.join([f'LBOUNDS({r+1}):UBOUNDS({r+1})' for r in range(ft.rank)])}$))
ENDIF
IF (FIELD_STATISTICS_ENABLE) CALL FIELD_STATISTICS_HOST_ALLOCATE (SIZE (HST, KIND=JPIB) * INT (KIND (HST), KIND=JPIB))
END SUBROUTINE ${ft.name}$_HOST_ALLOC_POOLED
SUBROUTINE ${ft.name}$_HOST_ALLOC (HST, LBOUNDS, UBOUNDS, PINNED)
${ft.type}$, POINTER, INTENT(OUT) :: HST(${ft.shape}$)
INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(${ft.rank}$)
INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(${ft.rank}$)
LOGICAL, INTENT(IN) :: PINNED
${ft.type}$, POINTER :: PTR(${ft.shape}$)
TYPE(C_PTR) :: DATA
INTEGER(C_SIZE_T) :: ARR_SIZE
INTEGER :: ISHAPE(${ft.rank}$), ISTAT
ARR_SIZE = KIND(HST)
#:for r in range(ft.rank)
ISHAPE(${r+1}$) = UBOUNDS(${r+1}$) - LBOUNDS(${r+1}$) + 1
ARR_SIZE = ARR_SIZE * ISHAPE(${r+1}$)
#:endfor
IF(ARR_SIZE > 0)THEN
CALL C_MALLOC(ARR_SIZE, DATA)
#:if defined('CUDA')
IF(PINNED)THEN
CALL PIN_ALLOCATION(DATA, ARR_SIZE, ISTAT)
IF (ISTAT /= 0) THEN
CALL FIELD_ABORT ("${ft.name}$_OWNER: FAILED TO REGISTER IN PAGE-LOCKED MEMORY")
ENDIF
ENDIF
#:endif
CALL C_F_POINTER(DATA, PTR, SHAPE=ISHAPE)
HST(${', '.join(map(lambda r: 'LBOUNDS('+str(r+1)+'):', range(0, ft.rank)))}$) => PTR
ELSE
ALLOCATE(HST(${','.join([f'LBOUNDS({r+1}):UBOUNDS({r+1})' for r in range(ft.rank)])}$))
ENDIF
IF (FIELD_STATISTICS_ENABLE) CALL FIELD_STATISTICS_HOST_ALLOCATE (SIZE (HST, KIND=JPIB) * INT (KIND (HST), KIND=JPIB))
END SUBROUTINE ${ft.name}$_HOST_ALLOC
SUBROUTINE ${ft.name}$_HOST_DEALLOC(HST, PINNED)
${ft.type}$, POINTER, INTENT(INOUT) :: HST(${ft.shape}$)
LOGICAL, INTENT (IN) :: PINNED
TYPE(C_PTR) :: DATA
INTEGER :: ISTAT
IF (FIELD_STATISTICS_ENABLE) CALL FIELD_STATISTICS_HOST_DEALLOCATE (SIZE (HST, KIND=JPIB) * INT (KIND (HST), KIND=JPIB))
IF(SIZE(HST) > 0)THEN
DATA = C_LOC (HST (${ ', '.join (map (lambda i: 'LBOUND (HST, ' + str (i) + ')', range (1, ft.rank+1))) }$))
#:if defined('CUDA')
IF (PINNED) THEN
CALL UNPIN_ALLOCATION(DATA, ISTAT)
IF (ISTAT /= 0) THEN
CALL FIELD_ABORT ("${ft.name}$_OWNER: FAILED TO UNREGISTER PAGE-LOCKED MEMORY")
ENDIF
ENDIF
#:endif
CALL C_FREE(DATA)
ELSE
DEALLOCATE(HST)
ENDIF
NULLIFY(HST)
END SUBROUTINE ${ft.name}$_HOST_DEALLOC
SUBROUTINE ${ft.name}$_HOST_DEALLOC_POOLED(HST, BLKID)
${ft.type}$, POINTER, INTENT(INOUT) :: HST(${ft.shape}$)
INTEGER(KIND=JPIM), INTENT(IN) :: BLKID
TYPE(C_PTR) :: DATA
INTEGER(C_SIZE_T) :: ARR_SIZE
INTEGER :: ISHAPE(${ft.rank}$)
INTEGER :: UBOUNDS(${ft.rank}$)
INTEGER :: LBOUNDS(${ft.rank}$)
UBOUNDS = UBOUND(HST)
LBOUNDS = LBOUND(HST)
ARR_SIZE = KIND(HST)
#:for r in range(ft.rank)
ISHAPE(${r+1}$) = UBOUNDS(${r+1}$) - LBOUNDS(${r+1}$) + 1
ARR_SIZE = ARR_SIZE * ISHAPE(${r+1}$)
#:endfor
IF (FIELD_STATISTICS_ENABLE) CALL FIELD_STATISTICS_HOST_DEALLOCATE (SIZE (HST, KIND=JPIB) * INT (KIND (HST), KIND=JPIB))
IF(ARR_SIZE > 0)THEN
CALL HOST_POOL%DEALLOC(BLKID)
ELSE
DEALLOCATE(HST)
ENDIF
NULLIFY(HST)
END SUBROUTINE ${ft.name}$_HOST_DEALLOC_POOLED
#:endfor
END MODULE HOST_ALLOC_MODULE