Line data Source code
1 : /* SPDX-License-Identifier: LGPL-2.1+ */
2 :
3 : /*
4 : * IPC barrier tests
5 : * These tests verify the correct behavior of the IPC Barrier implementation.
6 : * Note that the tests use alarm-timers to verify dead-locks and timeouts. These
7 : * might not work on slow machines where 20ms are too short to perform specific
8 : * operations (though, very unlikely). In case that turns out true, we have to
9 : * increase it at the slightly cost of lengthen test-duration on other machines.
10 : */
11 :
12 : #include <stdio.h>
13 : #include <sys/time.h>
14 : #include <sys/wait.h>
15 : #include <unistd.h>
16 :
17 : #include "barrier.h"
18 : #include "util.h"
19 : #include "tests.h"
20 : #include "virt.h"
21 : #include "time-util.h"
22 :
23 : /* 20ms to test deadlocks; All timings use multiples of this constant as
24 : * alarm/sleep timers. If this timeout is too small for slow machines to perform
25 : * the requested operations, we have to increase it. On an i7 this works fine
26 : * with 1ms base-time, so 20ms should be just fine for everyone. */
27 : #define BASE_TIME (20 * USEC_PER_MSEC)
28 :
29 0 : static void set_alarm(usec_t usecs) {
30 0 : struct itimerval v = { };
31 :
32 0 : timeval_store(&v.it_value, usecs);
33 0 : assert_se(setitimer(ITIMER_REAL, &v, NULL) >= 0);
34 0 : }
35 :
36 0 : static void sleep_for(usec_t usecs) {
37 : /* stupid usleep() might fail if >1000000 */
38 0 : assert_se(usecs < USEC_PER_SEC);
39 0 : usleep(usecs);
40 0 : }
41 :
42 : #define TEST_BARRIER(_FUNCTION, _CHILD_CODE, _WAIT_CHILD, _PARENT_CODE, _WAIT_PARENT) \
43 : static void _FUNCTION(void) { \
44 : Barrier b = BARRIER_NULL; \
45 : pid_t pid1, pid2; \
46 : \
47 : assert_se(barrier_create(&b) >= 0); \
48 : assert_se(b.me > 0); \
49 : assert_se(b.them > 0); \
50 : assert_se(b.pipe[0] > 0); \
51 : assert_se(b.pipe[1] > 0); \
52 : \
53 : pid1 = fork(); \
54 : assert_se(pid1 >= 0); \
55 : if (pid1 == 0) { \
56 : barrier_set_role(&b, BARRIER_CHILD); \
57 : { _CHILD_CODE; } \
58 : exit(42); \
59 : } \
60 : \
61 : pid2 = fork(); \
62 : assert_se(pid2 >= 0); \
63 : if (pid2 == 0) { \
64 : barrier_set_role(&b, BARRIER_PARENT); \
65 : { _PARENT_CODE; } \
66 : exit(42); \
67 : } \
68 : \
69 : barrier_destroy(&b); \
70 : set_alarm(999999); \
71 : { _WAIT_CHILD; } \
72 : { _WAIT_PARENT; } \
73 : set_alarm(0); \
74 : }
75 :
76 : #define TEST_BARRIER_WAIT_SUCCESS(_pid) \
77 : ({ \
78 : int pidr, status; \
79 : pidr = waitpid(_pid, &status, 0); \
80 : assert_se(pidr == _pid); \
81 : assert_se(WIFEXITED(status)); \
82 : assert_se(WEXITSTATUS(status) == 42); \
83 : })
84 :
85 : #define TEST_BARRIER_WAIT_ALARM(_pid) \
86 : ({ \
87 : int pidr, status; \
88 : pidr = waitpid(_pid, &status, 0); \
89 : assert_se(pidr == _pid); \
90 : assert_se(WIFSIGNALED(status)); \
91 : assert_se(WTERMSIG(status) == SIGALRM); \
92 : })
93 :
94 : /*
95 : * Test basic sync points
96 : * This places a barrier in both processes and waits synchronously for them.
97 : * The timeout makes sure the sync works as expected. The sleep_for() on one side
98 : * makes sure the exit of the parent does not overwrite previous barriers. Due
99 : * to the sleep_for(), we know that the parent already exited, thus there's a
100 : * pending HUP on the pipe. However, the barrier_sync() prefers reads on the
101 : * eventfd, thus we can safely wait on the barrier.
102 : */
103 0 : TEST_BARRIER(test_barrier_sync,
104 : ({
105 : set_alarm(BASE_TIME * 10);
106 : assert_se(barrier_place(&b));
107 : sleep_for(BASE_TIME * 2);
108 : assert_se(barrier_sync(&b));
109 : }),
110 : TEST_BARRIER_WAIT_SUCCESS(pid1),
111 : ({
112 : set_alarm(BASE_TIME * 10);
113 : assert_se(barrier_place(&b));
114 : assert_se(barrier_sync(&b));
115 : }),
116 : TEST_BARRIER_WAIT_SUCCESS(pid2));
117 :
118 : /*
119 : * Test wait_next()
120 : * This places a barrier in the parent and syncs on it. The child sleeps while
121 : * the parent places the barrier and then waits for a barrier. The wait will
122 : * succeed as the child hasn't read the parent's barrier, yet. The following
123 : * barrier and sync synchronize the exit.
124 : */
125 0 : TEST_BARRIER(test_barrier_wait_next,
126 : ({
127 : sleep_for(BASE_TIME);
128 : set_alarm(BASE_TIME * 10);
129 : assert_se(barrier_wait_next(&b));
130 : assert_se(barrier_place(&b));
131 : assert_se(barrier_sync(&b));
132 : }),
133 : TEST_BARRIER_WAIT_SUCCESS(pid1),
134 : ({
135 : set_alarm(BASE_TIME * 4);
136 : assert_se(barrier_place(&b));
137 : assert_se(barrier_sync(&b));
138 : }),
139 : TEST_BARRIER_WAIT_SUCCESS(pid2));
140 :
141 : /*
142 : * Test wait_next() multiple times
143 : * This places two barriers in the parent and waits for the child to exit. The
144 : * child sleeps 20ms so both barriers _should_ be in place. It then waits for
145 : * the parent to place the next barrier twice. The first call will fetch both
146 : * barriers and return. However, the second call will stall as the parent does
147 : * not place a 3rd barrier (the sleep caught two barriers). wait_next() is does
148 : * not look at barrier-links so this stall is expected. Thus this test times
149 : * out.
150 : */
151 0 : TEST_BARRIER(test_barrier_wait_next_twice,
152 : ({
153 : sleep_for(BASE_TIME);
154 : set_alarm(BASE_TIME);
155 : assert_se(barrier_wait_next(&b));
156 : assert_se(barrier_wait_next(&b));
157 : assert_se(0);
158 : }),
159 : TEST_BARRIER_WAIT_ALARM(pid1),
160 : ({
161 : set_alarm(BASE_TIME * 10);
162 : assert_se(barrier_place(&b));
163 : assert_se(barrier_place(&b));
164 : sleep_for(BASE_TIME * 4);
165 : }),
166 : TEST_BARRIER_WAIT_SUCCESS(pid2));
167 :
168 : /*
169 : * Test wait_next() with local barriers
170 : * This is the same as test_barrier_wait_next_twice, but places local barriers
171 : * between both waits. This does not have any effect on the wait so it times out
172 : * like the other test.
173 : */
174 0 : TEST_BARRIER(test_barrier_wait_next_twice_local,
175 : ({
176 : sleep_for(BASE_TIME);
177 : set_alarm(BASE_TIME);
178 : assert_se(barrier_wait_next(&b));
179 : assert_se(barrier_place(&b));
180 : assert_se(barrier_place(&b));
181 : assert_se(barrier_wait_next(&b));
182 : assert_se(0);
183 : }),
184 : TEST_BARRIER_WAIT_ALARM(pid1),
185 : ({
186 : set_alarm(BASE_TIME * 10);
187 : assert_se(barrier_place(&b));
188 : assert_se(barrier_place(&b));
189 : sleep_for(BASE_TIME * 4);
190 : }),
191 : TEST_BARRIER_WAIT_SUCCESS(pid2));
192 :
193 : /*
194 : * Test wait_next() with sync_next()
195 : * This is again the same as test_barrier_wait_next_twice but uses a
196 : * synced wait as the second wait. This works just fine because the local state
197 : * has no barriers placed, therefore, the remote is always in sync.
198 : */
199 0 : TEST_BARRIER(test_barrier_wait_next_twice_sync,
200 : ({
201 : sleep_for(BASE_TIME);
202 : set_alarm(BASE_TIME);
203 : assert_se(barrier_wait_next(&b));
204 : assert_se(barrier_sync_next(&b));
205 : }),
206 : TEST_BARRIER_WAIT_SUCCESS(pid1),
207 : ({
208 : set_alarm(BASE_TIME * 10);
209 : assert_se(barrier_place(&b));
210 : assert_se(barrier_place(&b));
211 : }),
212 : TEST_BARRIER_WAIT_SUCCESS(pid2));
213 :
214 : /*
215 : * Test wait_next() with sync_next() and local barriers
216 : * This is again the same as test_barrier_wait_next_twice_local but uses a
217 : * synced wait as the second wait. This works just fine because the local state
218 : * is in sync with the remote.
219 : */
220 0 : TEST_BARRIER(test_barrier_wait_next_twice_local_sync,
221 : ({
222 : sleep_for(BASE_TIME);
223 : set_alarm(BASE_TIME);
224 : assert_se(barrier_wait_next(&b));
225 : assert_se(barrier_place(&b));
226 : assert_se(barrier_place(&b));
227 : assert_se(barrier_sync_next(&b));
228 : }),
229 : TEST_BARRIER_WAIT_SUCCESS(pid1),
230 : ({
231 : set_alarm(BASE_TIME * 10);
232 : assert_se(barrier_place(&b));
233 : assert_se(barrier_place(&b));
234 : }),
235 : TEST_BARRIER_WAIT_SUCCESS(pid2));
236 :
237 : /*
238 : * Test sync_next() and sync()
239 : * This tests sync_*() synchronizations and makes sure they work fine if the
240 : * local state is behind the remote state.
241 : */
242 0 : TEST_BARRIER(test_barrier_sync_next,
243 : ({
244 : set_alarm(BASE_TIME * 10);
245 : assert_se(barrier_sync_next(&b));
246 : assert_se(barrier_sync(&b));
247 : assert_se(barrier_place(&b));
248 : assert_se(barrier_place(&b));
249 : assert_se(barrier_sync_next(&b));
250 : assert_se(barrier_sync_next(&b));
251 : assert_se(barrier_sync(&b));
252 : }),
253 : TEST_BARRIER_WAIT_SUCCESS(pid1),
254 : ({
255 : set_alarm(BASE_TIME * 10);
256 : sleep_for(BASE_TIME);
257 : assert_se(barrier_place(&b));
258 : assert_se(barrier_place(&b));
259 : assert_se(barrier_sync(&b));
260 : }),
261 : TEST_BARRIER_WAIT_SUCCESS(pid2));
262 :
263 : /*
264 : * Test sync_next() and sync() with local barriers
265 : * This tests timeouts if sync_*() is used if local barriers are placed but the
266 : * remote didn't place any.
267 : */
268 0 : TEST_BARRIER(test_barrier_sync_next_local,
269 : ({
270 : set_alarm(BASE_TIME);
271 : assert_se(barrier_place(&b));
272 : assert_se(barrier_sync_next(&b));
273 : assert_se(0);
274 : }),
275 : TEST_BARRIER_WAIT_ALARM(pid1),
276 : ({
277 : sleep_for(BASE_TIME * 2);
278 : }),
279 : TEST_BARRIER_WAIT_SUCCESS(pid2));
280 :
281 : /*
282 : * Test sync_next() and sync() with local barriers and abortion
283 : * This is the same as test_barrier_sync_next_local but aborts the sync in the
284 : * parent. Therefore, the sync_next() succeeds just fine due to the abortion.
285 : */
286 0 : TEST_BARRIER(test_barrier_sync_next_local_abort,
287 : ({
288 : set_alarm(BASE_TIME * 10);
289 : assert_se(barrier_place(&b));
290 : assert_se(!barrier_sync_next(&b));
291 : }),
292 : TEST_BARRIER_WAIT_SUCCESS(pid1),
293 : ({
294 : assert_se(barrier_abort(&b));
295 : }),
296 : TEST_BARRIER_WAIT_SUCCESS(pid2));
297 :
298 : /*
299 : * Test matched wait_abortion()
300 : * This runs wait_abortion() with remote abortion.
301 : */
302 0 : TEST_BARRIER(test_barrier_wait_abortion,
303 : ({
304 : set_alarm(BASE_TIME * 10);
305 : assert_se(barrier_wait_abortion(&b));
306 : }),
307 : TEST_BARRIER_WAIT_SUCCESS(pid1),
308 : ({
309 : assert_se(barrier_abort(&b));
310 : }),
311 : TEST_BARRIER_WAIT_SUCCESS(pid2));
312 :
313 : /*
314 : * Test unmatched wait_abortion()
315 : * This runs wait_abortion() without any remote abortion going on. It thus must
316 : * timeout.
317 : */
318 0 : TEST_BARRIER(test_barrier_wait_abortion_unmatched,
319 : ({
320 : set_alarm(BASE_TIME);
321 : assert_se(barrier_wait_abortion(&b));
322 : assert_se(0);
323 : }),
324 : TEST_BARRIER_WAIT_ALARM(pid1),
325 : ({
326 : sleep_for(BASE_TIME * 2);
327 : }),
328 : TEST_BARRIER_WAIT_SUCCESS(pid2));
329 :
330 : /*
331 : * Test matched wait_abortion() with local abortion
332 : * This runs wait_abortion() with local and remote abortion.
333 : */
334 0 : TEST_BARRIER(test_barrier_wait_abortion_local,
335 : ({
336 : set_alarm(BASE_TIME * 10);
337 : assert_se(barrier_abort(&b));
338 : assert_se(!barrier_wait_abortion(&b));
339 : }),
340 : TEST_BARRIER_WAIT_SUCCESS(pid1),
341 : ({
342 : assert_se(barrier_abort(&b));
343 : }),
344 : TEST_BARRIER_WAIT_SUCCESS(pid2));
345 :
346 : /*
347 : * Test unmatched wait_abortion() with local abortion
348 : * This runs wait_abortion() with only local abortion. This must time out.
349 : */
350 0 : TEST_BARRIER(test_barrier_wait_abortion_local_unmatched,
351 : ({
352 : set_alarm(BASE_TIME);
353 : assert_se(barrier_abort(&b));
354 : assert_se(!barrier_wait_abortion(&b));
355 : assert_se(0);
356 : }),
357 : TEST_BARRIER_WAIT_ALARM(pid1),
358 : ({
359 : sleep_for(BASE_TIME * 2);
360 : }),
361 : TEST_BARRIER_WAIT_SUCCESS(pid2));
362 :
363 : /*
364 : * Test child exit
365 : * Place barrier and sync with the child. The child only exits()s, which should
366 : * cause an implicit abortion and wake the parent.
367 : */
368 0 : TEST_BARRIER(test_barrier_exit,
369 : ({
370 : }),
371 : TEST_BARRIER_WAIT_SUCCESS(pid1),
372 : ({
373 : set_alarm(BASE_TIME * 10);
374 : assert_se(barrier_place(&b));
375 : assert_se(!barrier_sync(&b));
376 : }),
377 : TEST_BARRIER_WAIT_SUCCESS(pid2));
378 :
379 : /*
380 : * Test child exit with sleep
381 : * Same as test_barrier_exit but verifies the test really works due to the
382 : * child-exit. We add a usleep() which triggers the alarm in the parent and
383 : * causes the test to time out.
384 : */
385 0 : TEST_BARRIER(test_barrier_no_exit,
386 : ({
387 : sleep_for(BASE_TIME * 2);
388 : }),
389 : TEST_BARRIER_WAIT_SUCCESS(pid1),
390 : ({
391 : set_alarm(BASE_TIME);
392 : assert_se(barrier_place(&b));
393 : assert_se(!barrier_sync(&b));
394 : }),
395 : TEST_BARRIER_WAIT_ALARM(pid2));
396 :
397 : /*
398 : * Test pending exit against sync
399 : * The parent places a barrier *and* exits. The 20ms wait in the child
400 : * guarantees both are pending. However, our logic prefers pending barriers over
401 : * pending exit-abortions (unlike normal abortions), thus the wait_next() must
402 : * succeed, same for the sync_next() as our local barrier-count is smaller than
403 : * the remote. Once we place a barrier our count is equal, so the sync still
404 : * succeeds. Only if we place one more barrier, we're ahead of the remote, thus
405 : * we will fail due to HUP on the pipe.
406 : */
407 0 : TEST_BARRIER(test_barrier_pending_exit,
408 : ({
409 : set_alarm(BASE_TIME * 4);
410 : sleep_for(BASE_TIME * 2);
411 : assert_se(barrier_wait_next(&b));
412 : assert_se(barrier_sync_next(&b));
413 : assert_se(barrier_place(&b));
414 : assert_se(barrier_sync_next(&b));
415 : assert_se(barrier_place(&b));
416 : assert_se(!barrier_sync_next(&b));
417 : }),
418 : TEST_BARRIER_WAIT_SUCCESS(pid1),
419 : ({
420 : assert_se(barrier_place(&b));
421 : }),
422 : TEST_BARRIER_WAIT_SUCCESS(pid2));
423 :
424 1 : int main(int argc, char *argv[]) {
425 : int v;
426 1 : test_setup_logging(LOG_INFO);
427 :
428 1 : if (!slow_tests_enabled())
429 1 : return log_tests_skipped("slow tests are disabled");
430 :
431 : /*
432 : * This test uses real-time alarms and sleeps to test for CPU races
433 : * explicitly. This is highly fragile if your system is under load. We
434 : * already increased the BASE_TIME value to make the tests more robust,
435 : * but that just makes the test take significantly longer. Given the recent
436 : * issues when running the test in a virtualized environments, limit it
437 : * to bare metal machines only, to minimize false-positives in CIs.
438 : */
439 0 : v = detect_virtualization();
440 0 : if (IN_SET(v, -EPERM, -EACCES))
441 0 : return log_tests_skipped("Cannot detect virtualization");
442 :
443 0 : if (v != VIRTUALIZATION_NONE)
444 0 : return log_tests_skipped("This test requires a baremetal machine");
445 :
446 0 : test_barrier_sync();
447 0 : test_barrier_wait_next();
448 0 : test_barrier_wait_next_twice();
449 0 : test_barrier_wait_next_twice_sync();
450 0 : test_barrier_wait_next_twice_local();
451 0 : test_barrier_wait_next_twice_local_sync();
452 0 : test_barrier_sync_next();
453 0 : test_barrier_sync_next_local();
454 0 : test_barrier_sync_next_local_abort();
455 0 : test_barrier_wait_abortion();
456 0 : test_barrier_wait_abortion_unmatched();
457 0 : test_barrier_wait_abortion_local();
458 0 : test_barrier_wait_abortion_local_unmatched();
459 0 : test_barrier_exit();
460 0 : test_barrier_no_exit();
461 0 : test_barrier_pending_exit();
462 :
463 0 : return 0;
464 : }
|