StarPU Internal Handbook
starpu_mpi_fxt.h
Go to the documentation of this file.
1 /* StarPU --- Runtime system for heterogeneous multicore architectures.
2  *
3  * Copyright (C) 2010-2022 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
4  * Copyright (C) 2019 Federal University of Rio Grande do Sul (UFRGS)
5  *
6  * StarPU is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation; either version 2.1 of the License, or (at
9  * your option) any later version.
10  *
11  * StarPU is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14  *
15  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
16  */
17 
18 #ifndef __STARPU_MPI_FXT_H__
19 #define __STARPU_MPI_FXT_H__
20 
21 #include <starpu.h>
22 #include <common/config.h>
23 #include <common/fxt.h>
24 
27 #ifdef __cplusplus
28 extern "C"
29 {
30 #endif
31 
32 #define _STARPU_MPI_FUT_POINT_TO_POINT_SEND 0x100
33 #define _STARPU_MPI_FUT_COLLECTIVE_SEND 0x101
34 
35 #define _STARPU_MPI_FUT_START 0x5201
36 #define _STARPU_MPI_FUT_STOP 0x5202
37 #define _STARPU_MPI_FUT_BARRIER 0x5203
38 #define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN 0x5204
39 #define _STARPU_MPI_FUT_ISEND_SUBMIT_END 0x5205
40 #define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN 0x5206
41 #define _STARPU_MPI_FUT_IRECV_SUBMIT_END 0x5207
42 #define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN 0x5208
43 #define _STARPU_MPI_FUT_ISEND_COMPLETE_END 0x5209
44 #define _STARPU_MPI_FUT_DATA_SET_RANK 0x521a
45 #define _STARPU_MPI_FUT_IRECV_TERMINATED 0x521b
46 #define _STARPU_MPI_FUT_ISEND_TERMINATED 0x521c
47 #define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN 0x521d
48 #define _STARPU_MPI_FUT_TESTING_DETACHED_END 0x521e
49 #define _STARPU_MPI_FUT_TEST_BEGIN 0x521f
50 #define _STARPU_MPI_FUT_TEST_END 0x5220
51 #define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN 0x520a
52 #define _STARPU_MPI_FUT_IRECV_COMPLETE_END 0x520b
53 #define _STARPU_MPI_FUT_SLEEP_BEGIN 0x520c
54 #define _STARPU_MPI_FUT_SLEEP_END 0x520d
55 #define _STARPU_MPI_FUT_DTESTING_BEGIN 0x520e
56 #define _STARPU_MPI_FUT_DTESTING_END 0x520f
57 #define _STARPU_MPI_FUT_UTESTING_BEGIN 0x5210
58 #define _STARPU_MPI_FUT_UTESTING_END 0x5211
59 #define _STARPU_MPI_FUT_UWAIT_BEGIN 0x5212
60 #define _STARPU_MPI_FUT_UWAIT_END 0x5213
61 #define _STARPU_MPI_FUT_POLLING_BEGIN 0x5214
62 #define _STARPU_MPI_FUT_POLLING_END 0x5215
63 #define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN 0x5216
64 #define _STARPU_MPI_FUT_DRIVER_RUN_END 0x5217
65 #define _STARPU_MPI_FUT_DATA_SET_TAG 0x5218
66 #define _STARPU_MPI_FUT_IRECV_NUMA_NODE 0x5219
67 #define _STARPU_MPI_FUT_ISEND_NUMA_NODE 0x5221
68 #define _STARPU_MPI_FUT_CHECKPOINT_BEGIN 0x5222
69 #define _STARPU_MPI_FUT_CHECKPOINT_END 0x5223
70 
71 #ifdef STARPU_USE_FXT
72 
73 #define _STARPU_MPI_TRACE_START(rank, worldsize) \
74  FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid());
75 #define _STARPU_MPI_TRACE_STOP(rank, worldsize) \
76  FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid());
77 #define _STARPU_MPI_TRACE_BARRIER(rank, worldsize, key, local_time) do {\
78  if (_starpu_fxt_started) \
79  FUT_DO_ALWAYS_PROBE5(_STARPU_MPI_FUT_BARRIER, (rank), (worldsize), (key), (local_time), _starpu_gettid()); \
80 } while (0)
81 #define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(dest, data_tag, size) \
82  FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN, (dest), (data_tag), (size), _starpu_gettid());
83 #define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(type, req, prio) \
84  FUT_FULL_PROBE8(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_END, (type), (req)->node_tag.node.rank, (req)->node_tag.data_tag, starpu_data_get_size((req)->data_handle), (req)->pre_sync_jobid, (req)->data_handle, (prio), _starpu_gettid()); \
85  FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_ISEND_NUMA_NODE, (req)->node_tag.node.rank, (req)->pre_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid());
86 #define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(src, data_tag) \
87  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN, (src), (data_tag), _starpu_gettid());
88 #define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(src, data_tag) \
89  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_END, (src), (data_tag), _starpu_gettid());
90 #define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, size) \
91  FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN, (dest), (data_tag), (size), _starpu_gettid());
92 #define _STARPU_MPI_TRACE_COMPLETE_BEGIN(type, rank, data_tag) \
93  if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN((rank), (data_tag), 0); }
94 #define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, size) \
95  FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_END, (dest), (data_tag), (size), _starpu_gettid());
96 #define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(src, data_tag) \
97  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN, (src), (data_tag), _starpu_gettid());
98 #define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(src, data_tag) \
99  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_END, (src), (data_tag), _starpu_gettid());
100 #define _STARPU_MPI_TRACE_COMPLETE_END(type, rank, data_tag) \
101  if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_END((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_END((rank), (data_tag), 0); }
102 #define _STARPU_MPI_TRACE_TERMINATED(req) \
103  if ((req)->request_type == RECV_REQ) { \
104  FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, (req)->post_sync_jobid, _starpu_gettid(), (req)->data_handle); \
105  FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_IRECV_NUMA_NODE, (req)->node_tag.node.rank, (req)->post_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid()); \
106  } else \
107  if ((req)->request_type == SEND_REQ) FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, _starpu_gettid());
108 #define _STARPU_MPI_TRACE_SLEEP_BEGIN() \
109  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_BEGIN, _starpu_gettid());
110 #define _STARPU_MPI_TRACE_SLEEP_END() \
111  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_END, _starpu_gettid());
112 #define _STARPU_MPI_TRACE_DTESTING_BEGIN() \
113  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_BEGIN, _starpu_gettid());
114 #define _STARPU_MPI_TRACE_DTESTING_END() \
115  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_END, _starpu_gettid());
116 #define _STARPU_MPI_TRACE_UTESTING_BEGIN(src, data_tag) \
117  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_BEGIN, (src), (data_tag), _starpu_gettid());
118 #define _STARPU_MPI_TRACE_UTESTING_END(src, data_tag) \
119  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_END, (src), (data_tag), _starpu_gettid());
120 #define _STARPU_MPI_TRACE_UWAIT_BEGIN(src, data_tag) \
121  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_BEGIN, (src), (data_tag), _starpu_gettid());
122 #define _STARPU_MPI_TRACE_UWAIT_END(src, data_tag) \
123  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_END, (src), (data_tag), _starpu_gettid());
124 #define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank) \
125  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid());
126 #define _STARPU_MPI_TRACE_DATA_SET_TAG(handle, data_tag) \
127  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_TAG, (handle), (data_tag), _starpu_gettid());
128 #if 0
129 /* This is very expensive in the trace, only enable for debugging */
130 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() \
131  FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_BEGIN, _starpu_gettid());
132 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END() \
133  FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_END, _starpu_gettid());
134 #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) \
135  FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_BEGIN, (peer), (data_tag), _starpu_gettid());
136 #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) \
137  FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_END, (peer), (data_tag), _starpu_gettid());
138 #else
139 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0)
140 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0)
141 #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0)
142 #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0)
143 #endif
144 #define _STARPU_MPI_TRACE_POLLING_BEGIN() \
145  if(!trace_loop) { \
146  trace_loop = 1; \
147  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_BEGIN, _starpu_gettid()); \
148  }
149 #define _STARPU_MPI_TRACE_POLLING_END() \
150  if(trace_loop) { \
151  trace_loop = 0; \
152  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_END, _starpu_gettid()); \
153  }
154 #define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() \
155  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_BEGIN, _starpu_gettid());
156 #define _STARPU_MPI_TRACE_DRIVER_RUN_END() \
157  FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid());
158 #define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) \
159  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_BEGIN, (cp_instance), (cp_domain), _starpu_gettid());
160 #define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) \
161  FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_END, (cp_instance), (cp_domain), _starpu_gettid());
162 #define TRACE
163 #else
164 #define _STARPU_MPI_TRACE_START(a, b) do {} while(0);
165 #define _STARPU_MPI_TRACE_STOP(a, b) do {} while(0);
166 #define _STARPU_MPI_TRACE_BARRIER(a, b, c, d) do {} while(0);
167 #define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(a, b, c) do {} while(0);
168 #define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(a, b, c) do {} while(0);
169 #define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(a, b) do {} while(0);
170 #define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(a, b) do {} while(0);
171 #define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(a, b, c) do {} while(0);
172 #define _STARPU_MPI_TRACE_COMPLETE_BEGIN(a, b, c) do {} while(0);
173 #define _STARPU_MPI_TRACE_COMPLETE_END(a, b, c) do {} while(0);
174 #define _STARPU_MPI_TRACE_TERMINATED(a) do {} while(0);
175 #define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(a, b, c) do {} while(0);
176 #define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(a, b) do {} while(0);
177 #define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(a, b) do {} while(0);
178 #define _STARPU_MPI_TRACE_SLEEP_BEGIN() do {} while(0);
179 #define _STARPU_MPI_TRACE_SLEEP_END() do {} while(0);
180 #define _STARPU_MPI_TRACE_DTESTING_BEGIN() do {} while(0);
181 #define _STARPU_MPI_TRACE_DTESTING_END() do {} while(0);
182 #define _STARPU_MPI_TRACE_UTESTING_BEGIN(a, b) do {} while(0);
183 #define _STARPU_MPI_TRACE_UTESTING_END(a, b) do {} while(0);
184 #define _STARPU_MPI_TRACE_UWAIT_BEGIN(a, b) do {} while(0);
185 #define _STARPU_MPI_TRACE_UWAIT_END(a, b) do {} while(0);
186 #define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b) do {} while(0);
187 #define _STARPU_MPI_TRACE_DATA_SET_TAG(a, b) do {} while(0);
188 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0)
189 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0)
190 #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0)
191 #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0)
192 #define _STARPU_MPI_TRACE_POLLING_BEGIN() do {} while(0);
193 #define _STARPU_MPI_TRACE_POLLING_END() do {} while(0);
194 #define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() do {} while(0);
195 #define _STARPU_MPI_TRACE_DRIVER_RUN_END() do {} while(0);
196 #define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) do {} while(0)
197 #define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) do {} while(0)
198 #endif
199 
200 void _starpu_mpi_fxt_init(void* arg);
201 void _starpu_mpi_fxt_shutdown();
202 
203 #ifdef __cplusplus
204 }
205 #endif
206 
207 
208 #endif // __STARPU_MPI_FXT_H__