Line data Source code
1 : /**
2 : * \file dart_globmem.c
3 : *
4 : * Implementation of all the related global pointer operations
5 : *
6 : * All the following functions are implemented with the underlying *MPI-3*
7 : * one-sided runtime system.
8 : */
9 :
10 : #include <stdio.h>
11 : #include <mpi.h>
12 : #include <dash/dart/base/logging.h>
13 : #include <dash/dart/if/dart_types.h>
14 : #include <dash/dart/if/dart_globmem.h>
15 : #include <dash/dart/if/dart_team_group.h>
16 : #include <dash/dart/if/dart_communication.h>
17 : #include <dash/dart/mpi/dart_mpi_util.h>
18 : #include <dash/dart/mpi/dart_mem.h>
19 : #include <dash/dart/mpi/dart_translation.h>
20 : #include <dash/dart/mpi/dart_team_private.h>
21 :
22 : /* For PRIu64, uint64_t in printf */
23 : #define __STDC_FORMAT_MACROS
24 : #include <inttypes.h>
25 :
26 : /**
27 : * @note For dart collective allocation/free: offset in the returned gptr
28 : * represents the displacement relative to the beginning of sub-memory
29 : * spanned by certain dart collective allocation.
30 : * For dart local allocation/free: offset in the returned gptr represents
31 : * the displacement relative to
32 : * the base address of memory region reserved for the dart local
33 : * allocation/free.
34 : */
35 : int16_t dart_memid;
36 : int16_t dart_registermemid;
37 :
38 2410 : dart_ret_t dart_gptr_getaddr(const dart_gptr_t gptr, void **addr)
39 : {
40 2410 : int16_t seg_id = gptr.segid;
41 2410 : uint64_t offset = gptr.addr_or_offs.offset;
42 : dart_unit_t myid;
43 2410 : dart_myid (&myid);
44 :
45 2410 : if (myid == gptr.unitid) {
46 2410 : if (seg_id) {
47 2362 : if (dart_adapt_transtable_get_selfbaseptr(seg_id, (char **)addr) == -1) {
48 0 : return DART_ERR_INVAL;}
49 :
50 2362 : *addr = offset + (char *)(*addr);
51 : } else {
52 48 : if (myid == gptr.unitid) {
53 48 : *addr = offset + dart_mempool_localalloc;
54 : }
55 : }
56 : } else {
57 0 : *addr = NULL;
58 : }
59 2410 : return DART_OK;
60 : }
61 :
62 0 : dart_ret_t dart_gptr_setaddr(dart_gptr_t* gptr, void* addr)
63 : {
64 0 : int16_t seg_id = gptr->segid;
65 : /* The modification to addr is reflected in the fact that modifying
66 : * the offset.
67 : */
68 0 : if (seg_id) {
69 : char* addr_base;
70 0 : if (dart_adapt_transtable_get_selfbaseptr(seg_id, &addr_base) == -1) {
71 0 : return DART_ERR_INVAL;
72 : }
73 0 : gptr->addr_or_offs.offset = (char *)addr - addr_base;
74 : } else {
75 0 : gptr->addr_or_offs.offset = (char *)addr - dart_mempool_localalloc;
76 : }
77 0 : return DART_OK;
78 : }
79 :
80 12806075 : dart_ret_t dart_gptr_incaddr (dart_gptr_t* gptr, int offs)
81 : {
82 12806075 : gptr -> addr_or_offs.offset += offs;
83 12806075 : return DART_OK;
84 : }
85 :
86 :
87 12800830 : dart_ret_t dart_gptr_setunit (dart_gptr_t* gptr, dart_unit_t unit_id)
88 : {
89 12800830 : gptr->unitid = unit_id;
90 12800830 : return DART_OK;
91 : }
92 :
93 24 : dart_ret_t dart_memalloc (size_t nbytes, dart_gptr_t *gptr)
94 : {
95 : dart_unit_t unitid;
96 24 : dart_myid (&unitid);
97 24 : gptr->unitid = unitid;
98 24 : gptr->segid = 0; /* For local allocation, the segid is marked as '0'. */
99 24 : gptr->flags = 0; /* For local allocation, the flag is marked as '0'. */
100 24 : gptr->addr_or_offs.offset = dart_buddy_alloc(dart_localpool, nbytes);
101 24 : if (gptr->addr_or_offs.offset == (uint64_t)(-1)) {
102 0 : DART_LOG_ERROR("dart_memalloc: Out of bounds "
103 : "(dart_buddy_alloc %zu bytes): global memory exhausted",
104 : nbytes);
105 0 : return DART_ERR_OTHER;
106 : }
107 : DART_LOG_DEBUG("dart_memalloc: local alloc nbytes:%lu offset:%"PRIu64"",
108 : nbytes, gptr->addr_or_offs.offset);
109 24 : return DART_OK;
110 : }
111 :
112 24 : dart_ret_t dart_memfree (dart_gptr_t gptr)
113 : {
114 24 : if (dart_buddy_free(dart_localpool, gptr.addr_or_offs.offset) == -1) {
115 0 : DART_LOG_ERROR("dart_memfree: invalid local global pointer: "
116 : "invalid offset: %"PRIu64"",
117 : gptr.addr_or_offs.offset);
118 0 : return DART_ERR_INVAL;
119 : }
120 : DART_LOG_DEBUG("dart_memfree: local free, gptr.unitid:%2d offset:%"PRIu64"",
121 : gptr.unitid, gptr.addr_or_offs.offset);
122 24 : return DART_OK;
123 : }
124 :
125 : dart_ret_t
126 636 : dart_team_memalloc_aligned(
127 : dart_team_t teamid,
128 : size_t nbytes,
129 : dart_gptr_t * gptr)
130 : {
131 : size_t team_size;
132 : dart_unit_t unitid;
133 636 : dart_unit_t gptr_unitid = -1;
134 636 : dart_team_myid(teamid, &unitid);
135 636 : dart_team_size(teamid, &team_size);
136 :
137 : char * sub_mem;
138 :
139 : /* The units belonging to the specified team are eligible to participate
140 : * below codes enclosed.
141 : */
142 :
143 : MPI_Win win;
144 : MPI_Comm comm;
145 : MPI_Aint disp;
146 636 : MPI_Aint * disp_set = (MPI_Aint*)(malloc(team_size * sizeof (MPI_Aint)));
147 :
148 : uint16_t index;
149 636 : int result = dart_adapt_teamlist_convert(teamid, &index);
150 : DART_LOG_DEBUG(
151 : "dart_team_memalloc_aligned: dart_adapt_teamlist_convert completed, "
152 : "index:%d", index);
153 :
154 636 : if (result == -1) {
155 0 : return DART_ERR_INVAL;
156 : }
157 636 : comm = dart_teams[index];
158 : #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS)
159 : MPI_Win sharedmem_win;
160 : MPI_Comm sharedmem_comm;
161 636 : sharedmem_comm = dart_sharedmem_comm_list[index];
162 : #endif
163 636 : dart_unit_t localid = 0;
164 636 : if (index == 0) {
165 636 : gptr_unitid = localid;
166 : } else {
167 : MPI_Group group;
168 : MPI_Group group_all;
169 0 : MPI_Comm_group(comm, &group);
170 0 : MPI_Comm_group(MPI_COMM_WORLD, &group_all);
171 0 : MPI_Group_translate_ranks(group, 1, &localid, group_all, &gptr_unitid);
172 : }
173 : #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS)
174 : MPI_Info win_info;
175 636 : MPI_Info_create(&win_info);
176 636 : MPI_Info_set(win_info, "alloc_shared_noncontig", "true");
177 :
178 : /* Allocate shared memory on sharedmem_comm, and create the related
179 : * sharedmem_win */
180 :
181 : /* NOTE:
182 : * Windows should definitely be optimized for the concrete value type i.e.
183 : * via MPI_Type_create_index_block as this greatly improves performance of
184 : * MPI_Get, MPI_Put and other RMA friends.
185 : *
186 : * !!! BUG IN INTEL-MPI 5.0
187 : * !!!
188 : * !!! See:
189 : * !!! https://software.intel.com/de-de/forums/intel-clusters-and-hpc-technology/topic/519995
190 : * !!!
191 : * !!! Quote:
192 : * !!! "[When allocating, e.g., an] integer*4-array of array dimension N,
193 : * !!! then use it by the MPI-processes (on the same node), and then
194 : * !!! repeats the same for the next shared allocation [...] the number of
195 : * !!! shared windows do accumulate in the run, because I do not free the
196 : * !!! shared windows allocated so far. This allocation of shared windows
197 : * !!! works, but only until the total number of allocated memory exceeds
198 : * !!! a limit of ~30 millions of Integer*4 numbers (~120 MB).
199 : * !!! When that limit is reached, the next call of
200 : * !!! MPI_WIN_ALLOCATE_SHARED, MPI_WIN_SHARED_QUERY to allocated one
201 : * !!! more shared window do not give an error message, but the 1st
202 : * !!! attempt to use that allocated shared array results in a bus error
203 : * !!! (because the shared array has not been allocated correctly)."
204 : * !!!
205 : * !!! Reproduced on SuperMUC and mpich3.1 on projekt03.
206 : *
207 : * Related support ticket of MPICH:
208 : * http://trac.mpich.org/projects/mpich/ticket/2178
209 : */
210 : DART_LOG_DEBUG("dart_team_memalloc_aligned: "
211 : "MPI_Win_allocate_shared(nbytes:%ld)", nbytes);
212 636 : if (sharedmem_comm != MPI_COMM_NULL) {
213 636 : int ret = MPI_Win_allocate_shared(
214 : nbytes,
215 : sizeof(char),
216 : win_info,
217 : sharedmem_comm,
218 : &sub_mem,
219 : &sharedmem_win);
220 636 : if (ret != MPI_SUCCESS) {
221 0 : DART_LOG_ERROR("dart_team_memalloc_aligned: "
222 : "MPI_Win_allocate_shared failed, error %d (%s)",
223 : ret, DART__MPI__ERROR_STR(ret));
224 0 : return DART_ERR_OTHER;
225 : }
226 : } else {
227 0 : DART_LOG_ERROR("dart_team_memalloc_aligned: "
228 : "Shared memory communicator is MPI_COMM_NULL, "
229 : "cannot call MPI_Win_allocate_shared");
230 0 : return DART_ERR_OTHER;
231 : }
232 :
233 : int sharedmem_unitid;
234 : MPI_Aint winseg_size;
235 : char ** baseptr_set;
236 : char * baseptr;
237 : int disp_unit, i;
238 636 : MPI_Comm_rank (sharedmem_comm, &sharedmem_unitid);
239 636 : baseptr_set = (char**)malloc(sizeof(char*) * dart_sharedmemnode_size[index]);
240 :
241 3180 : for (i = 0; i < dart_sharedmemnode_size[index]; i++) {
242 2544 : if (sharedmem_unitid != i) {
243 1908 : MPI_Win_shared_query(sharedmem_win, i,
244 : &winseg_size, &disp_unit, &baseptr);
245 1908 : baseptr_set[i] = baseptr;
246 : } else {
247 636 : baseptr_set[i] = sub_mem;
248 : }
249 : }
250 : #else
251 : if (MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &sub_mem) != MPI_SUCCESS) {
252 : DART_LOG_ERROR(
253 : "dart_team_memalloc_aligned: bytes:%lu MPI_Alloc_mem failed", nbytes);
254 : return DART_ERR_OTHER;
255 : }
256 : #endif
257 :
258 636 : win = dart_win_lists[index];
259 : /* Attach the allocated shared memory to win */
260 636 : if (MPI_Win_attach(win, sub_mem, nbytes) != MPI_SUCCESS) {
261 0 : DART_LOG_ERROR(
262 : "dart_team_memalloc_aligned: bytes:%lu MPI_Win_attach failed", nbytes);
263 0 : return DART_ERR_OTHER;
264 : }
265 636 : if (MPI_Get_address(sub_mem, &disp) != MPI_SUCCESS) {
266 0 : DART_LOG_ERROR(
267 : "dart_team_memalloc_aligned: bytes:%lu MPI_Get_address failed", nbytes);
268 0 : return DART_ERR_OTHER;
269 : }
270 :
271 : /* Collect the disp information from all the ranks in comm */
272 636 : MPI_Allgather(&disp, 1, MPI_AINT, disp_set, 1, MPI_AINT, comm);
273 :
274 : /* -- Updating infos on gptr -- */
275 636 : gptr->unitid = gptr_unitid;
276 : /* Segid equals to dart_memid (always a positive integer), identifies an
277 : * unique collective global memory. */
278 636 : gptr->segid = dart_memid;
279 : /* For collective allocation, the flag is marked as 'index' */
280 636 : gptr->flags = index;
281 636 : gptr->addr_or_offs.offset = 0;
282 :
283 : /* Updating the translation table of teamid with the created
284 : * (offset, win) infos */
285 : info_t item;
286 636 : item.seg_id = dart_memid;
287 636 : item.size = nbytes;
288 636 : item.disp = disp_set;
289 : #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS)
290 636 : item.win = sharedmem_win;
291 636 : item.baseptr = baseptr_set;
292 : #else
293 : item.win = MPI_WIN_NULL;
294 : item.baseptr = NULL;
295 : #endif
296 636 : item.selfbaseptr = sub_mem;
297 : /* Add this newly generated correspondence relationship record into the
298 : * translation table. */
299 636 : dart_adapt_transtable_add(item);
300 : #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS)
301 636 : MPI_Info_free(&win_info);
302 : #endif
303 636 : dart_memid++;
304 :
305 : DART_LOG_DEBUG(
306 : "dart_team_memalloc_aligned: bytes:%lu offset:%d gptr_unitid:%d "
307 : "across team %d",
308 : nbytes, 0, gptr_unitid, teamid);
309 :
310 636 : return DART_OK;
311 : }
312 :
313 632 : dart_ret_t dart_team_memfree(
314 : dart_team_t teamid,
315 : dart_gptr_t gptr)
316 : {
317 : dart_unit_t unitid;
318 632 : uint16_t index = gptr.flags;
319 632 : int16_t seg_id = gptr.segid;
320 : char * sub_mem;
321 : MPI_Win win;
322 :
323 632 : dart_team_myid(teamid, &unitid);
324 :
325 632 : win = dart_win_lists[index];
326 :
327 632 : if (dart_adapt_transtable_get_selfbaseptr(seg_id, &sub_mem) == -1) {
328 0 : return DART_ERR_INVAL;
329 : }
330 :
331 : /* Detach the window associated with sub-memory to be freed:
332 : */
333 632 : MPI_Win_detach(win, sub_mem);
334 :
335 : /* Free the window's associated sub-memory:
336 : */
337 : #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS)
338 : MPI_Win sharedmem_win;
339 632 : if (dart_adapt_transtable_get_win(seg_id, &sharedmem_win) == -1) {
340 0 : return DART_ERR_OTHER;
341 : }
342 632 : if (MPI_Win_free(&sharedmem_win) != MPI_SUCCESS) {
343 0 : DART_LOG_ERROR("dart_team_memfree: MPI_Win_free failed");
344 0 : return DART_ERR_OTHER;
345 : }
346 : #else
347 : if (MPI_Free_mem(sub_mem) != MPI_SUCCESS) {
348 : DART_LOG_ERROR("dart_team_memfree: MPI_Free_mem failed");
349 : return DART_ERR_OTHER;
350 : }
351 : #endif
352 : DART_LOG_DEBUG("dart_team_memfree: collective free, team unit id: %2d "
353 : "offset:%"PRIu64" gptr_unitid:%d across team %d",
354 : unitid, gptr.addr_or_offs.offset, gptr.unitid, teamid);
355 : /* Remove the related correspondence relation record from the related
356 : * translation table. */
357 632 : if (dart_adapt_transtable_remove(seg_id) == -1) {
358 0 : return DART_ERR_INVAL;
359 : }
360 632 : return DART_OK;
361 : }
362 :
363 : dart_ret_t
364 0 : dart_team_memregister_aligned(
365 : dart_team_t teamid,
366 : size_t nbytes,
367 : void * addr,
368 : dart_gptr_t * gptr)
369 : {
370 : size_t size;
371 : dart_unit_t unitid;
372 0 : dart_unit_t gptr_unitid = -1;
373 0 : dart_team_myid(teamid, &unitid);
374 0 : dart_team_size(teamid, &size);
375 :
376 : MPI_Win win;
377 : MPI_Comm comm;
378 : MPI_Aint disp;
379 0 : MPI_Aint * disp_set = (MPI_Aint*)malloc(size * sizeof (MPI_Aint));
380 : uint16_t index;
381 0 : int result = dart_adapt_teamlist_convert(teamid, &index);
382 :
383 0 : if (result == -1) {
384 0 : return DART_ERR_INVAL;
385 : }
386 0 : comm = dart_teams[index];
387 0 : dart_unit_t localid = 0;
388 0 : if (index == 0) {
389 0 : gptr_unitid = localid;
390 : } else {
391 : MPI_Group group;
392 : MPI_Group group_all;
393 0 : MPI_Comm_group(comm, &group);
394 0 : MPI_Comm_group(MPI_COMM_WORLD, &group_all);
395 0 : MPI_Group_translate_ranks(group, 1, &localid,
396 : group_all, &gptr_unitid);
397 : }
398 0 : win = dart_win_lists[index];
399 0 : MPI_Win_attach(win, (char*)addr, nbytes);
400 0 : MPI_Get_address((char*)addr, &disp);
401 0 : MPI_Allgather(&disp, 1, MPI_AINT, disp_set, 1, MPI_AINT, comm);
402 0 : gptr->unitid = gptr_unitid;
403 0 : gptr->segid = dart_registermemid;
404 0 : gptr->flags = index;
405 0 : gptr->addr_or_offs.offset = 0;
406 : info_t item;
407 0 : item.seg_id = dart_registermemid;
408 0 : item.size = nbytes;
409 0 : item.disp = disp_set;
410 0 : item.win = MPI_WIN_NULL;
411 0 : item.baseptr = NULL;
412 0 : item.selfbaseptr = (char*)addr;
413 0 : dart_adapt_transtable_add(item);
414 0 : dart_registermemid--;
415 : DART_LOG_DEBUG("dart_team_memregister_aligned: collective alloc, "
416 : "unit:%2d, nbytes:%lu offset:%d gptr_unitid:%d "
417 : "across team %d",
418 : unitid, nbytes, 0, gptr_unitid, teamid);
419 0 : return DART_OK;
420 : }
421 :
422 : dart_ret_t
423 208 : dart_team_memregister(
424 : dart_team_t teamid,
425 : size_t nbytes,
426 : void * addr,
427 : dart_gptr_t * gptr)
428 : {
429 : size_t size;
430 : dart_unit_t unitid;
431 208 : dart_unit_t gptr_unitid = -1;
432 208 : dart_team_myid(teamid, &unitid);
433 208 : dart_team_size(teamid, &size);
434 :
435 : MPI_Win win;
436 : MPI_Comm comm;
437 : MPI_Aint disp;
438 208 : MPI_Aint * disp_set = (MPI_Aint*)malloc(size * sizeof (MPI_Aint));
439 : uint16_t index;
440 208 : int result = dart_adapt_teamlist_convert(teamid, &index);
441 : int nil;
442 :
443 208 : if (nbytes == 0) {
444 : // Attaching empty memory region, set sendbuf to valid dummy pointer:
445 72 : addr = (void*)(&nil);
446 : }
447 :
448 208 : if (result == -1) {
449 0 : return DART_ERR_INVAL;
450 : }
451 208 : comm = dart_teams[index];
452 208 : dart_unit_t localid = 0;
453 208 : if (index == 0) {
454 208 : gptr_unitid = localid;
455 : } else {
456 : MPI_Group group;
457 : MPI_Group group_all;
458 0 : MPI_Comm_group(comm, &group);
459 0 : MPI_Comm_group(MPI_COMM_WORLD, &group_all);
460 0 : MPI_Group_translate_ranks(group, 1, &localid,
461 : group_all, &gptr_unitid);
462 : }
463 208 : win = dart_win_lists[index];
464 208 : MPI_Win_attach(win, (char*)addr, nbytes);
465 208 : MPI_Get_address((char*)addr, &disp);
466 208 : MPI_Allgather(&disp, 1, MPI_AINT, disp_set, 1, MPI_AINT, comm);
467 208 : gptr->unitid = gptr_unitid;
468 208 : gptr->segid = dart_registermemid;
469 208 : gptr->flags = index;
470 208 : gptr->addr_or_offs.offset = 0;
471 : info_t item;
472 208 : item.seg_id = dart_registermemid;
473 208 : item.size = nbytes;
474 208 : item.disp = disp_set;
475 208 : item.win = MPI_WIN_NULL;
476 208 : item.baseptr = NULL;
477 208 : item.selfbaseptr = (char*)addr;
478 208 : dart_adapt_transtable_add(item);
479 208 : dart_registermemid--;
480 :
481 : DART_LOG_DEBUG("dart_team_memregister: collective alloc, "
482 : "unit:%2d, nbytes:%lu offset:%d gptr_unitid:%d "
483 : "across team %d",
484 : unitid, nbytes, 0, gptr_unitid, teamid);
485 208 : return DART_OK;
486 : }
487 :
488 : dart_ret_t
489 208 : dart_team_memderegister(
490 : dart_team_t teamid,
491 : dart_gptr_t gptr)
492 : {
493 : dart_unit_t unitid;
494 208 : uint16_t index = gptr.flags;
495 208 : int16_t seg_id = gptr.segid;
496 : char * sub_mem;
497 :
498 : MPI_Win win;
499 :
500 208 : dart_team_myid(teamid, &unitid);
501 :
502 208 : win = dart_win_lists[index];
503 :
504 208 : if (dart_adapt_transtable_get_selfbaseptr (seg_id, &sub_mem) == -1) {
505 0 : return DART_ERR_INVAL;
506 : }
507 208 : MPI_Win_detach(win, sub_mem);
508 208 : if (dart_adapt_transtable_remove (seg_id) == -1){
509 0 : return DART_ERR_INVAL;
510 : }
511 : DART_LOG_DEBUG("dart_team_memderegister: collective free, "
512 : "team unit %2d offset:%"PRIu64" gptr_unitid:%d"
513 : "across team %d",
514 : unitid, gptr.addr_or_offs.offset, gptr.unitid, teamid);
515 208 : return DART_OK;
516 : }
517 :
518 :
|