affinity-1.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156
  1. /* Affinity tests.
  2. Copyright (C) 2013-2022 Free Software Foundation, Inc.
  3. GCC is free software; you can redistribute it and/or modify it under
  4. the terms of the GNU General Public License as published by the Free
  5. Software Foundation; either version 3, or (at your option) any later
  6. version.
  7. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  8. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  9. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  10. for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with GCC; see the file COPYING3. If not see
  13. <http://www.gnu.org/licenses/>. */
  14. /* { dg-do run } */
  15. /* { dg-set-target-env-var OMP_PROC_BIND "false" } */
  16. /* { dg-additional-options "-Wno-deprecated-declarations" } */
  17. /* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl -Wno-deprecated-declarations" { target *-*-linux* } } */
  18. #ifndef _GNU_SOURCE
  19. #define _GNU_SOURCE
  20. #endif
  21. #include "config.h"
  22. #include <omp.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <unistd.h>
  27. #ifdef DO_FORK
  28. #include <signal.h>
  29. #include <sys/wait.h>
  30. #endif
  31. #ifdef HAVE_PTHREAD_AFFINITY_NP
  32. #include <sched.h>
  33. #include <pthread.h>
  34. #ifdef INTERPOSE_GETAFFINITY
  35. #include <dlfcn.h>
  36. #endif
  37. #endif
  38. struct place
  39. {
  40. int start, len;
  41. };
  42. struct places
  43. {
  44. const char *name;
  45. int count;
  46. struct place places[8];
  47. } places_array[] = {
  48. { "", 1, { { -1, -1 } } },
  49. { "{0}:8", 8,
  50. { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 },
  51. { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
  52. { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } },
  53. { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } },
  54. { "{1}:7:1", 7,
  55. { { 1, 1 }, { 2, 1 }, { 3, 1 },
  56. { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
  57. { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5,
  58. { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
  59. { "1,2,{2,3,!2},3,3,!3,!{5:3:-1,!4,!5},{4},5,!4,!5,"
  60. "1:2,!{1},!2,7:3:-2,!{5},!7,!3", 3,
  61. { { 1, 1 }, { 2, 1 }, { 3, 1 } } }
  62. };
  63. unsigned long contig_cpucount;
  64. unsigned long min_cpusetsize;
  65. #if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \
  66. && defined (CPU_ALLOC_SIZE)
  67. #if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY)
  68. int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *);
  69. int
  70. pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset)
  71. {
  72. int ret;
  73. unsigned long i, max;
  74. if (orig_getaffinity_np == NULL)
  75. {
  76. orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *))
  77. dlsym (RTLD_NEXT, "pthread_getaffinity_np");
  78. if (orig_getaffinity_np == NULL)
  79. exit (0);
  80. }
  81. ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
  82. if (ret != 0)
  83. return ret;
  84. if (contig_cpucount == 0)
  85. {
  86. max = 8 * cpusetsize;
  87. for (i = 0; i < max; i++)
  88. if (!CPU_ISSET_S (i, cpusetsize, cpuset))
  89. break;
  90. contig_cpucount = i;
  91. min_cpusetsize = cpusetsize;
  92. }
  93. return ret;
  94. }
  95. #endif
  96. void
  97. print_affinity (struct place p)
  98. {
  99. static unsigned long size;
  100. if (size == 0)
  101. {
  102. if (min_cpusetsize)
  103. size = min_cpusetsize;
  104. else
  105. {
  106. size = sysconf (_SC_NPROCESSORS_CONF);
  107. size = CPU_ALLOC_SIZE (size);
  108. if (size < sizeof (cpu_set_t))
  109. size = sizeof (cpu_set_t);
  110. }
  111. }
  112. cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size);
  113. if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0)
  114. {
  115. unsigned long i, len, max = 8 * size;
  116. int notfirst = 0, unexpected = 1;
  117. printf (" bound to {");
  118. for (i = 0, len = 0; i < max; i++)
  119. if (CPU_ISSET_S (i, size, cpusetp))
  120. {
  121. if (len == 0)
  122. {
  123. if (notfirst)
  124. {
  125. unexpected = 1;
  126. printf (",");
  127. }
  128. else if (i == (unsigned long) p.start)
  129. unexpected = 0;
  130. notfirst = 1;
  131. printf ("%lu", i);
  132. }
  133. ++len;
  134. }
  135. else
  136. {
  137. if (len && len != (unsigned long) p.len)
  138. unexpected = 1;
  139. if (len > 1)
  140. printf (":%lu", len);
  141. len = 0;
  142. }
  143. if (len && len != (unsigned long) p.len)
  144. unexpected = 1;
  145. if (len > 1)
  146. printf (":%lu", len);
  147. printf ("}");
  148. if (p.start != -1 && unexpected)
  149. {
  150. printf (", expected {%d", p.start);
  151. if (p.len != 1)
  152. printf (":%d", p.len);
  153. printf ("} instead");
  154. }
  155. else if (p.start != -1)
  156. printf (", verified");
  157. }
  158. }
  159. #else
  160. void
  161. print_affinity (struct place p)
  162. {
  163. (void) p.start;
  164. (void) p.len;
  165. }
  166. #endif
  167. int
  168. main ()
  169. {
  170. char *env_proc_bind = getenv ("OMP_PROC_BIND");
  171. int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0;
  172. int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0;
  173. int test_spread_master_close
  174. = (env_proc_bind
  175. && (strcmp (env_proc_bind, "spread,master,close") == 0
  176. || strcmp (env_proc_bind, "spread,primary,close") == 0));
  177. char *env_places = getenv ("OMP_PLACES");
  178. int test_places = 0;
  179. if (omp_proc_bind_master != omp_proc_bind_primary)
  180. abort ();
  181. #ifdef DO_FORK
  182. if (env_places == NULL && contig_cpucount >= 8 && test_false
  183. && getenv ("GOMP_AFFINITY") == NULL)
  184. {
  185. int i, j, status;
  186. pid_t pid;
  187. for (j = 0; j < 3; j++)
  188. {
  189. if (setenv ("OMP_PROC_BIND",
  190. j > 1 ? "spread,primary,close"
  191. : (j ? "spread,master,close" : "true"), 1) < 0)
  192. break;
  193. for (i = sizeof (places_array) / sizeof (places_array[0]) - 1;
  194. i; --i)
  195. {
  196. if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0)
  197. break;
  198. pid = fork ();
  199. if (pid == -1)
  200. break;
  201. if (pid == 0)
  202. {
  203. execl ("/proc/self/exe", "affinity-1.exe", NULL);
  204. _exit (1);
  205. }
  206. if (waitpid (pid, &status, 0) < 0)
  207. break;
  208. if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT)
  209. abort ();
  210. else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
  211. break;
  212. }
  213. if (i)
  214. break;
  215. }
  216. }
  217. #endif
  218. int first = 1;
  219. if (env_proc_bind)
  220. {
  221. printf ("OMP_PROC_BIND='%s'", env_proc_bind);
  222. first = 0;
  223. }
  224. if (env_places)
  225. printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places);
  226. printf ("\n");
  227. if (env_places && contig_cpucount >= 8
  228. && (test_true || test_spread_master_close))
  229. {
  230. for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1;
  231. test_places; --test_places)
  232. if (strcmp (env_places, places_array[test_places].name) == 0)
  233. break;
  234. }
  235. #define verify(if_true, if_s_m_c) \
  236. if (test_false && omp_get_proc_bind () != omp_proc_bind_false) \
  237. abort (); \
  238. if (test_true && omp_get_proc_bind () != if_true) \
  239. abort (); \
  240. if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c) \
  241. abort ();
  242. verify (omp_proc_bind_true, omp_proc_bind_spread);
  243. printf ("Initial thread");
  244. print_affinity (places_array[test_places].places[0]);
  245. printf ("\n");
  246. omp_set_nested (1);
  247. omp_set_dynamic (0);
  248. #pragma omp parallel if (0)
  249. {
  250. verify (omp_proc_bind_true, omp_proc_bind_master);
  251. #pragma omp parallel if (0)
  252. {
  253. verify (omp_proc_bind_true, omp_proc_bind_close);
  254. #pragma omp parallel if (0)
  255. {
  256. verify (omp_proc_bind_true, omp_proc_bind_close);
  257. }
  258. #pragma omp parallel if (0) proc_bind (spread)
  259. {
  260. verify (omp_proc_bind_spread, omp_proc_bind_spread);
  261. }
  262. }
  263. #pragma omp parallel if (0) proc_bind (master)
  264. {
  265. verify (omp_proc_bind_master, omp_proc_bind_close);
  266. #pragma omp parallel if (0)
  267. {
  268. verify (omp_proc_bind_master, omp_proc_bind_close);
  269. }
  270. #pragma omp parallel if (0) proc_bind (spread)
  271. {
  272. verify (omp_proc_bind_spread, omp_proc_bind_spread);
  273. }
  274. }
  275. }
  276. /* True/spread */
  277. #pragma omp parallel num_threads (4)
  278. {
  279. verify (omp_proc_bind_true, omp_proc_bind_master);
  280. #pragma omp critical
  281. {
  282. struct place p = places_array[0].places[0];
  283. int thr = omp_get_thread_num ();
  284. printf ("#1 thread %d", thr);
  285. if (omp_get_num_threads () == 4 && test_spread_master_close)
  286. switch (places_array[test_places].count)
  287. {
  288. case 8:
  289. /* T = 4, P = 8, each subpartition has 2 places. */
  290. case 7:
  291. /* T = 4, P = 7, each subpartition has 2 places, but
  292. last partition, which has just one place. */
  293. p = places_array[test_places].places[2 * thr];
  294. break;
  295. case 5:
  296. /* T = 4, P = 5, first subpartition has 2 places, the
  297. rest just one. */
  298. p = places_array[test_places].places[thr ? 1 + thr : 0];
  299. break;
  300. case 3:
  301. /* T = 4, P = 3, unit sized subpartitions, first gets
  302. thr0 and thr3, second thr1, third thr2. */
  303. p = places_array[test_places].places[thr == 3 ? 0 : thr];
  304. break;
  305. case 2:
  306. /* T = 4, P = 2, unit sized subpartitions, each with
  307. 2 threads. */
  308. p = places_array[test_places].places[thr / 2];
  309. break;
  310. }
  311. print_affinity (p);
  312. printf ("\n");
  313. }
  314. #pragma omp barrier
  315. if (omp_get_thread_num () == 3)
  316. {
  317. /* True/spread, true/master. */
  318. #pragma omp parallel num_threads (3)
  319. {
  320. verify (omp_proc_bind_true, omp_proc_bind_close);
  321. #pragma omp critical
  322. {
  323. struct place p = places_array[0].places[0];
  324. int thr = omp_get_thread_num ();
  325. printf ("#1,#1 thread 3,%d", thr);
  326. if (omp_get_num_threads () == 3 && test_spread_master_close)
  327. /* Outer is spread, inner master, so just bind to the
  328. place or the master thread, which is thr 3 above. */
  329. switch (places_array[test_places].count)
  330. {
  331. case 8:
  332. case 7:
  333. p = places_array[test_places].places[6];
  334. break;
  335. case 5:
  336. p = places_array[test_places].places[4];
  337. break;
  338. case 3:
  339. p = places_array[test_places].places[0];
  340. break;
  341. case 2:
  342. p = places_array[test_places].places[1];
  343. break;
  344. }
  345. print_affinity (p);
  346. printf ("\n");
  347. }
  348. }
  349. /* True/spread, spread. */
  350. #pragma omp parallel num_threads (5) proc_bind (spread)
  351. {
  352. verify (omp_proc_bind_spread, omp_proc_bind_close);
  353. #pragma omp critical
  354. {
  355. struct place p = places_array[0].places[0];
  356. int thr = omp_get_thread_num ();
  357. printf ("#1,#2 thread 3,%d", thr);
  358. if (omp_get_num_threads () == 5 && test_spread_master_close)
  359. /* Outer is spread, inner spread. */
  360. switch (places_array[test_places].count)
  361. {
  362. case 8:
  363. /* T = 5, P = 2, unit sized subpartitions. */
  364. p = places_array[test_places].places[thr == 4 ? 6
  365. : 6 + thr / 2];
  366. break;
  367. /* The rest are T = 5, P = 1. */
  368. case 7:
  369. p = places_array[test_places].places[6];
  370. break;
  371. case 5:
  372. p = places_array[test_places].places[4];
  373. break;
  374. case 3:
  375. p = places_array[test_places].places[0];
  376. break;
  377. case 2:
  378. p = places_array[test_places].places[1];
  379. break;
  380. }
  381. print_affinity (p);
  382. printf ("\n");
  383. }
  384. #pragma omp barrier
  385. if (omp_get_thread_num () == 3)
  386. {
  387. /* True/spread, spread, close. */
  388. #pragma omp parallel num_threads (5) proc_bind (close)
  389. {
  390. verify (omp_proc_bind_close, omp_proc_bind_close);
  391. #pragma omp critical
  392. {
  393. struct place p = places_array[0].places[0];
  394. int thr = omp_get_thread_num ();
  395. printf ("#1,#2,#1 thread 3,3,%d", thr);
  396. if (omp_get_num_threads () == 5 && test_spread_master_close)
  397. /* Outer is spread, inner spread, innermost close. */
  398. switch (places_array[test_places].count)
  399. {
  400. /* All are T = 5, P = 1. */
  401. case 8:
  402. p = places_array[test_places].places[7];
  403. break;
  404. case 7:
  405. p = places_array[test_places].places[6];
  406. break;
  407. case 5:
  408. p = places_array[test_places].places[4];
  409. break;
  410. case 3:
  411. p = places_array[test_places].places[0];
  412. break;
  413. case 2:
  414. p = places_array[test_places].places[1];
  415. break;
  416. }
  417. print_affinity (p);
  418. printf ("\n");
  419. }
  420. }
  421. }
  422. }
  423. /* True/spread, master. */
  424. #pragma omp parallel num_threads (4) proc_bind(master)
  425. {
  426. verify (omp_proc_bind_master, omp_proc_bind_close);
  427. #pragma omp critical
  428. {
  429. struct place p = places_array[0].places[0];
  430. int thr = omp_get_thread_num ();
  431. printf ("#1,#3 thread 3,%d", thr);
  432. if (omp_get_num_threads () == 4 && test_spread_master_close)
  433. /* Outer is spread, inner master, so just bind to the
  434. place or the master thread, which is thr 3 above. */
  435. switch (places_array[test_places].count)
  436. {
  437. case 8:
  438. case 7:
  439. p = places_array[test_places].places[6];
  440. break;
  441. case 5:
  442. p = places_array[test_places].places[4];
  443. break;
  444. case 3:
  445. p = places_array[test_places].places[0];
  446. break;
  447. case 2:
  448. p = places_array[test_places].places[1];
  449. break;
  450. }
  451. print_affinity (p);
  452. printf ("\n");
  453. }
  454. }
  455. /* True/spread, close. */
  456. #pragma omp parallel num_threads (6) proc_bind (close)
  457. {
  458. verify (omp_proc_bind_close, omp_proc_bind_close);
  459. #pragma omp critical
  460. {
  461. struct place p = places_array[0].places[0];
  462. int thr = omp_get_thread_num ();
  463. printf ("#1,#4 thread 3,%d", thr);
  464. if (omp_get_num_threads () == 6 && test_spread_master_close)
  465. /* Outer is spread, inner close. */
  466. switch (places_array[test_places].count)
  467. {
  468. case 8:
  469. /* T = 6, P = 2, unit sized subpartitions. */
  470. p = places_array[test_places].places[6 + thr / 3];
  471. break;
  472. /* The rest are T = 6, P = 1. */
  473. case 7:
  474. p = places_array[test_places].places[6];
  475. break;
  476. case 5:
  477. p = places_array[test_places].places[4];
  478. break;
  479. case 3:
  480. p = places_array[test_places].places[0];
  481. break;
  482. case 2:
  483. p = places_array[test_places].places[1];
  484. break;
  485. }
  486. print_affinity (p);
  487. printf ("\n");
  488. }
  489. }
  490. }
  491. }
  492. /* Spread. */
  493. #pragma omp parallel num_threads (5) proc_bind(spread)
  494. {
  495. verify (omp_proc_bind_spread, omp_proc_bind_master);
  496. #pragma omp critical
  497. {
  498. struct place p = places_array[0].places[0];
  499. int thr = omp_get_thread_num ();
  500. printf ("#2 thread %d", thr);
  501. if (omp_get_num_threads () == 5
  502. && (test_spread_master_close || test_true))
  503. switch (places_array[test_places].count)
  504. {
  505. case 8:
  506. /* T = 5, P = 8, first 3 subpartitions have 2 places, last
  507. 2 one place. */
  508. p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr];
  509. break;
  510. case 7:
  511. /* T = 5, P = 7, first 2 subpartitions have 2 places, last
  512. 3 one place. */
  513. p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr];
  514. break;
  515. case 5:
  516. /* T = 5, P = 5, unit sized subpartitions, each one with one
  517. thread. */
  518. p = places_array[test_places].places[thr];
  519. break;
  520. case 3:
  521. /* T = 5, P = 3, unit sized subpartitions, first gets
  522. thr0 and thr3, second thr1 and thr4, third thr2. */
  523. p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
  524. break;
  525. case 2:
  526. /* T = 5, P = 2, unit sized subpartitions, first with
  527. thr{0,1,4} and second with thr{2,3}. */
  528. p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
  529. break;
  530. }
  531. print_affinity (p);
  532. printf ("\n");
  533. }
  534. #pragma omp barrier
  535. if (omp_get_thread_num () == 3)
  536. {
  537. int pp = 0;
  538. switch (places_array[test_places].count)
  539. {
  540. case 8: pp = 6; break;
  541. case 7: pp = 5; break;
  542. case 5: pp = 3; break;
  543. case 2: pp = 1; break;
  544. }
  545. /* Spread, spread/master. */
  546. #pragma omp parallel num_threads (3) firstprivate (pp)
  547. {
  548. verify (omp_proc_bind_spread, omp_proc_bind_close);
  549. #pragma omp critical
  550. {
  551. struct place p = places_array[0].places[0];
  552. int thr = omp_get_thread_num ();
  553. printf ("#2,#1 thread 3,%d", thr);
  554. if (test_spread_master_close || test_true)
  555. /* Outer is spread, inner spread resp. master, bit we have
  556. just unit sized partitions. */
  557. p = places_array[test_places].places[pp];
  558. print_affinity (p);
  559. printf ("\n");
  560. }
  561. }
  562. /* Spread, spread. */
  563. #pragma omp parallel num_threads (5) proc_bind (spread) \
  564. firstprivate (pp)
  565. {
  566. verify (omp_proc_bind_spread, omp_proc_bind_close);
  567. #pragma omp critical
  568. {
  569. struct place p = places_array[0].places[0];
  570. int thr = omp_get_thread_num ();
  571. printf ("#2,#2 thread 3,%d", thr);
  572. if (test_spread_master_close || test_true)
  573. /* Outer is spread, inner spread, bit we have
  574. just unit sized partitions. */
  575. p = places_array[test_places].places[pp];
  576. print_affinity (p);
  577. printf ("\n");
  578. }
  579. }
  580. /* Spread, master. */
  581. #pragma omp parallel num_threads (4) proc_bind(master) \
  582. firstprivate(pp)
  583. {
  584. verify (omp_proc_bind_master, omp_proc_bind_close);
  585. #pragma omp critical
  586. {
  587. struct place p = places_array[0].places[0];
  588. int thr = omp_get_thread_num ();
  589. printf ("#2,#3 thread 3,%d", thr);
  590. if (test_spread_master_close || test_true)
  591. /* Outer is spread, inner master, bit we have
  592. just unit sized partitions. */
  593. p = places_array[test_places].places[pp];
  594. print_affinity (p);
  595. printf ("\n");
  596. }
  597. }
  598. /* Spread, close. */
  599. #pragma omp parallel num_threads (6) proc_bind (close) \
  600. firstprivate (pp)
  601. {
  602. verify (omp_proc_bind_close, omp_proc_bind_close);
  603. #pragma omp critical
  604. {
  605. struct place p = places_array[0].places[0];
  606. int thr = omp_get_thread_num ();
  607. printf ("#2,#4 thread 3,%d", thr);
  608. if (test_spread_master_close || test_true)
  609. /* Outer is spread, inner close, bit we have
  610. just unit sized partitions. */
  611. p = places_array[test_places].places[pp];
  612. print_affinity (p);
  613. printf ("\n");
  614. }
  615. }
  616. }
  617. }
  618. /* Master. */
  619. #pragma omp parallel num_threads (3) proc_bind(master)
  620. {
  621. verify (omp_proc_bind_master, omp_proc_bind_master);
  622. #pragma omp critical
  623. {
  624. struct place p = places_array[0].places[0];
  625. int thr = omp_get_thread_num ();
  626. printf ("#3 thread %d", thr);
  627. if (test_spread_master_close || test_true)
  628. p = places_array[test_places].places[0];
  629. print_affinity (p);
  630. printf ("\n");
  631. }
  632. #pragma omp barrier
  633. if (omp_get_thread_num () == 2)
  634. {
  635. /* Master, master. */
  636. #pragma omp parallel num_threads (4)
  637. {
  638. verify (omp_proc_bind_master, omp_proc_bind_close);
  639. #pragma omp critical
  640. {
  641. struct place p = places_array[0].places[0];
  642. int thr = omp_get_thread_num ();
  643. printf ("#3,#1 thread 2,%d", thr);
  644. if (test_spread_master_close || test_true)
  645. /* Outer is master, inner is master. */
  646. p = places_array[test_places].places[0];
  647. print_affinity (p);
  648. printf ("\n");
  649. }
  650. }
  651. /* Master, spread. */
  652. #pragma omp parallel num_threads (4) proc_bind (spread)
  653. {
  654. verify (omp_proc_bind_spread, omp_proc_bind_close);
  655. #pragma omp critical
  656. {
  657. struct place p = places_array[0].places[0];
  658. int thr = omp_get_thread_num ();
  659. printf ("#3,#2 thread 2,%d", thr);
  660. if (omp_get_num_threads () == 4
  661. && (test_spread_master_close || test_true))
  662. /* Outer is master, inner is spread. */
  663. switch (places_array[test_places].count)
  664. {
  665. case 8:
  666. /* T = 4, P = 8, each subpartition has 2 places. */
  667. case 7:
  668. /* T = 4, P = 7, each subpartition has 2 places, but
  669. last partition, which has just one place. */
  670. p = places_array[test_places].places[2 * thr];
  671. break;
  672. case 5:
  673. /* T = 4, P = 5, first subpartition has 2 places, the
  674. rest just one. */
  675. p = places_array[test_places].places[thr ? 1 + thr : 0];
  676. break;
  677. case 3:
  678. /* T = 4, P = 3, unit sized subpartitions, first gets
  679. thr0 and thr3, second thr1, third thr2. */
  680. p = places_array[test_places].places[thr == 3 ? 0 : thr];
  681. break;
  682. case 2:
  683. /* T = 4, P = 2, unit sized subpartitions, each with
  684. 2 threads. */
  685. p = places_array[test_places].places[thr / 2];
  686. break;
  687. }
  688. print_affinity (p);
  689. printf ("\n");
  690. }
  691. #pragma omp barrier
  692. if (omp_get_thread_num () == 0)
  693. {
  694. /* Master, spread, close. */
  695. #pragma omp parallel num_threads (5) proc_bind (close)
  696. {
  697. verify (omp_proc_bind_close, omp_proc_bind_close);
  698. #pragma omp critical
  699. {
  700. struct place p = places_array[0].places[0];
  701. int thr = omp_get_thread_num ();
  702. printf ("#3,#2,#1 thread 2,0,%d", thr);
  703. if (omp_get_num_threads () == 5
  704. && (test_spread_master_close || test_true))
  705. /* Outer is master, inner spread, innermost close. */
  706. switch (places_array[test_places].count)
  707. {
  708. /* First 3 are T = 5, P = 2. */
  709. case 8:
  710. case 7:
  711. case 5:
  712. p = places_array[test_places].places[(thr & 2) / 2];
  713. break;
  714. /* All the rest are T = 5, P = 1. */
  715. case 3:
  716. case 2:
  717. p = places_array[test_places].places[0];
  718. break;
  719. }
  720. print_affinity (p);
  721. printf ("\n");
  722. }
  723. }
  724. }
  725. #pragma omp barrier
  726. if (omp_get_thread_num () == 3)
  727. {
  728. /* Master, spread, close. */
  729. #pragma omp parallel num_threads (5) proc_bind (close)
  730. {
  731. verify (omp_proc_bind_close, omp_proc_bind_close);
  732. #pragma omp critical
  733. {
  734. struct place p = places_array[0].places[0];
  735. int thr = omp_get_thread_num ();
  736. printf ("#3,#2,#2 thread 2,3,%d", thr);
  737. if (omp_get_num_threads () == 5
  738. && (test_spread_master_close || test_true))
  739. /* Outer is master, inner spread, innermost close. */
  740. switch (places_array[test_places].count)
  741. {
  742. case 8:
  743. /* T = 5, P = 2. */
  744. p = places_array[test_places].places[6
  745. + (thr & 2) / 2];
  746. break;
  747. /* All the rest are T = 5, P = 1. */
  748. case 7:
  749. p = places_array[test_places].places[6];
  750. break;
  751. case 5:
  752. p = places_array[test_places].places[4];
  753. break;
  754. case 3:
  755. p = places_array[test_places].places[0];
  756. break;
  757. case 2:
  758. p = places_array[test_places].places[1];
  759. break;
  760. }
  761. print_affinity (p);
  762. printf ("\n");
  763. }
  764. }
  765. }
  766. }
  767. /* Master, master. */
  768. #pragma omp parallel num_threads (4) proc_bind(master)
  769. {
  770. verify (omp_proc_bind_master, omp_proc_bind_close);
  771. #pragma omp critical
  772. {
  773. struct place p = places_array[0].places[0];
  774. int thr = omp_get_thread_num ();
  775. printf ("#3,#3 thread 2,%d", thr);
  776. if (test_spread_master_close || test_true)
  777. /* Outer is master, inner master. */
  778. p = places_array[test_places].places[0];
  779. print_affinity (p);
  780. printf ("\n");
  781. }
  782. }
  783. /* Master, close. */
  784. #pragma omp parallel num_threads (6) proc_bind (close)
  785. {
  786. verify (omp_proc_bind_close, omp_proc_bind_close);
  787. #pragma omp critical
  788. {
  789. struct place p = places_array[0].places[0];
  790. int thr = omp_get_thread_num ();
  791. printf ("#3,#4 thread 2,%d", thr);
  792. if (omp_get_num_threads () == 6
  793. && (test_spread_master_close || test_true))
  794. switch (places_array[test_places].count)
  795. {
  796. case 8:
  797. /* T = 6, P = 8. */
  798. case 7:
  799. /* T = 6, P = 7. */
  800. p = places_array[test_places].places[thr];
  801. break;
  802. case 5:
  803. /* T = 6, P = 5. thr{0,5} go into the first place. */
  804. p = places_array[test_places].places[thr == 5 ? 0 : thr];
  805. break;
  806. case 3:
  807. /* T = 6, P = 3, two threads into each place. */
  808. p = places_array[test_places].places[thr / 2];
  809. break;
  810. case 2:
  811. /* T = 6, P = 2, 3 threads into each place. */
  812. p = places_array[test_places].places[thr / 3];
  813. break;
  814. }
  815. print_affinity (p);
  816. printf ("\n");
  817. }
  818. }
  819. }
  820. }
  821. #pragma omp parallel num_threads (5) proc_bind(close)
  822. {
  823. verify (omp_proc_bind_close, omp_proc_bind_master);
  824. #pragma omp critical
  825. {
  826. struct place p = places_array[0].places[0];
  827. int thr = omp_get_thread_num ();
  828. printf ("#4 thread %d", thr);
  829. if (omp_get_num_threads () == 5
  830. && (test_spread_master_close || test_true))
  831. switch (places_array[test_places].count)
  832. {
  833. case 8:
  834. /* T = 5, P = 8. */
  835. case 7:
  836. /* T = 5, P = 7. */
  837. case 5:
  838. /* T = 5, P = 5. */
  839. p = places_array[test_places].places[thr];
  840. break;
  841. case 3:
  842. /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second,
  843. thr2 in third. */
  844. p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
  845. break;
  846. case 2:
  847. /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second. */
  848. p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
  849. break;
  850. }
  851. print_affinity (p);
  852. printf ("\n");
  853. }
  854. #pragma omp barrier
  855. if (omp_get_thread_num () == 2)
  856. {
  857. int pp = 0;
  858. switch (places_array[test_places].count)
  859. {
  860. case 8:
  861. case 7:
  862. case 5:
  863. case 3:
  864. pp = 2;
  865. break;
  866. case 2:
  867. pp = 1;
  868. break;
  869. }
  870. /* Close, close/master. */
  871. #pragma omp parallel num_threads (4) firstprivate (pp)
  872. {
  873. verify (omp_proc_bind_close, omp_proc_bind_close);
  874. #pragma omp critical
  875. {
  876. struct place p = places_array[0].places[0];
  877. int thr = omp_get_thread_num ();
  878. printf ("#4,#1 thread 2,%d", thr);
  879. if (test_spread_master_close)
  880. /* Outer is close, inner is master. */
  881. p = places_array[test_places].places[pp];
  882. else if (omp_get_num_threads () == 4 && test_true)
  883. /* Outer is close, inner is close. */
  884. switch (places_array[test_places].count)
  885. {
  886. case 8:
  887. /* T = 4, P = 8. */
  888. case 7:
  889. /* T = 4, P = 7. */
  890. p = places_array[test_places].places[2 + thr];
  891. break;
  892. case 5:
  893. /* T = 4, P = 5. There is wrap-around for thr3. */
  894. p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr];
  895. break;
  896. case 3:
  897. /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2
  898. into p1. */
  899. p = places_array[test_places].places[(2 + thr) % 3];
  900. break;
  901. case 2:
  902. /* T = 4, P = 2, 2 threads into each place. */
  903. p = places_array[test_places].places[1 - thr / 2];
  904. break;
  905. }
  906. print_affinity (p);
  907. printf ("\n");
  908. }
  909. }
  910. /* Close, spread. */
  911. #pragma omp parallel num_threads (4) proc_bind (spread)
  912. {
  913. verify (omp_proc_bind_spread, omp_proc_bind_close);
  914. #pragma omp critical
  915. {
  916. struct place p = places_array[0].places[0];
  917. int thr = omp_get_thread_num ();
  918. printf ("#4,#2 thread 2,%d", thr);
  919. if (omp_get_num_threads () == 4
  920. && (test_spread_master_close || test_true))
  921. /* Outer is close, inner is spread. */
  922. switch (places_array[test_places].count)
  923. {
  924. case 8:
  925. /* T = 4, P = 8, each subpartition has 2 places. */
  926. case 7:
  927. /* T = 4, P = 7, each subpartition has 2 places, but
  928. last partition, which has just one place. */
  929. p = places_array[test_places].places[thr == 3 ? 0
  930. : 2 + 2 * thr];
  931. break;
  932. case 5:
  933. /* T = 4, P = 5, first subpartition has 2 places, the
  934. rest just one. */
  935. p = places_array[test_places].places[thr == 3 ? 0
  936. : 2 + thr];
  937. break;
  938. case 3:
  939. /* T = 4, P = 3, unit sized subpartitions, third gets
  940. thr0 and thr3, first thr1, second thr2. */
  941. p = places_array[test_places].places[thr == 0 ? 2 : thr - 1];
  942. break;
  943. case 2:
  944. /* T = 4, P = 2, unit sized subpartitions, each with
  945. 2 threads. */
  946. p = places_array[test_places].places[1 - thr / 2];
  947. break;
  948. }
  949. print_affinity (p);
  950. printf ("\n");
  951. }
  952. #pragma omp barrier
  953. if (omp_get_thread_num () == 0)
  954. {
  955. /* Close, spread, close. */
  956. #pragma omp parallel num_threads (5) proc_bind (close)
  957. {
  958. verify (omp_proc_bind_close, omp_proc_bind_close);
  959. #pragma omp critical
  960. {
  961. struct place p = places_array[0].places[0];
  962. int thr = omp_get_thread_num ();
  963. printf ("#4,#2,#1 thread 2,0,%d", thr);
  964. if (omp_get_num_threads () == 5
  965. && (test_spread_master_close || test_true))
  966. /* Outer is close, inner spread, innermost close. */
  967. switch (places_array[test_places].count)
  968. {
  969. case 8:
  970. case 7:
  971. /* T = 5, P = 2. */
  972. p = places_array[test_places].places[2
  973. + (thr & 2) / 2];
  974. break;
  975. /* All the rest are T = 5, P = 1. */
  976. case 5:
  977. case 3:
  978. p = places_array[test_places].places[2];
  979. break;
  980. case 2:
  981. p = places_array[test_places].places[1];
  982. break;
  983. }
  984. print_affinity (p);
  985. printf ("\n");
  986. }
  987. }
  988. }
  989. #pragma omp barrier
  990. if (omp_get_thread_num () == 2)
  991. {
  992. /* Close, spread, close. */
  993. #pragma omp parallel num_threads (5) proc_bind (close)
  994. {
  995. verify (omp_proc_bind_close, omp_proc_bind_close);
  996. #pragma omp critical
  997. {
  998. struct place p = places_array[0].places[0];
  999. int thr = omp_get_thread_num ();
  1000. printf ("#4,#2,#2 thread 2,2,%d", thr);
  1001. if (omp_get_num_threads () == 5
  1002. && (test_spread_master_close || test_true))
  1003. /* Outer is close, inner spread, innermost close. */
  1004. switch (places_array[test_places].count)
  1005. {
  1006. case 8:
  1007. /* T = 5, P = 2. */
  1008. p = places_array[test_places].places[6
  1009. + (thr & 2) / 2];
  1010. break;
  1011. /* All the rest are T = 5, P = 1. */
  1012. case 7:
  1013. p = places_array[test_places].places[6];
  1014. break;
  1015. case 5:
  1016. p = places_array[test_places].places[4];
  1017. break;
  1018. case 3:
  1019. p = places_array[test_places].places[1];
  1020. break;
  1021. case 2:
  1022. p = places_array[test_places].places[0];
  1023. break;
  1024. }
  1025. print_affinity (p);
  1026. printf ("\n");
  1027. }
  1028. }
  1029. }
  1030. #pragma omp barrier
  1031. if (omp_get_thread_num () == 3)
  1032. {
  1033. /* Close, spread, close. */
  1034. #pragma omp parallel num_threads (5) proc_bind (close)
  1035. {
  1036. verify (omp_proc_bind_close, omp_proc_bind_close);
  1037. #pragma omp critical
  1038. {
  1039. struct place p = places_array[0].places[0];
  1040. int thr = omp_get_thread_num ();
  1041. printf ("#4,#2,#3 thread 2,3,%d", thr);
  1042. if (omp_get_num_threads () == 5
  1043. && (test_spread_master_close || test_true))
  1044. /* Outer is close, inner spread, innermost close. */
  1045. switch (places_array[test_places].count)
  1046. {
  1047. case 8:
  1048. case 7:
  1049. case 5:
  1050. /* T = 5, P = 2. */
  1051. p = places_array[test_places].places[(thr & 2) / 2];
  1052. break;
  1053. /* All the rest are T = 5, P = 1. */
  1054. case 3:
  1055. p = places_array[test_places].places[2];
  1056. break;
  1057. case 2:
  1058. p = places_array[test_places].places[0];
  1059. break;
  1060. }
  1061. print_affinity (p);
  1062. printf ("\n");
  1063. }
  1064. }
  1065. }
  1066. }
  1067. /* Close, master. */
  1068. #pragma omp parallel num_threads (4) proc_bind(master) \
  1069. firstprivate (pp)
  1070. {
  1071. verify (omp_proc_bind_master, omp_proc_bind_close);
  1072. #pragma omp critical
  1073. {
  1074. struct place p = places_array[0].places[0];
  1075. int thr = omp_get_thread_num ();
  1076. printf ("#4,#3 thread 2,%d", thr);
  1077. if (test_spread_master_close || test_true)
  1078. /* Outer is close, inner master. */
  1079. p = places_array[test_places].places[pp];
  1080. print_affinity (p);
  1081. printf ("\n");
  1082. }
  1083. }
  1084. /* Close, close. */
  1085. #pragma omp parallel num_threads (6) proc_bind (close)
  1086. {
  1087. verify (omp_proc_bind_close, omp_proc_bind_close);
  1088. #pragma omp critical
  1089. {
  1090. struct place p = places_array[0].places[0];
  1091. int thr = omp_get_thread_num ();
  1092. printf ("#4,#4 thread 2,%d", thr);
  1093. if (omp_get_num_threads () == 6
  1094. && (test_spread_master_close || test_true))
  1095. switch (places_array[test_places].count)
  1096. {
  1097. case 8:
  1098. /* T = 6, P = 8. */
  1099. p = places_array[test_places].places[2 + thr];
  1100. break;
  1101. case 7:
  1102. /* T = 6, P = 7. */
  1103. p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr];
  1104. break;
  1105. case 5:
  1106. /* T = 6, P = 5. thr{0,5} go into the third place. */
  1107. p = places_array[test_places].places[thr >= 3 ? thr - 3
  1108. : 2 + thr];
  1109. break;
  1110. case 3:
  1111. /* T = 6, P = 3, two threads into each place. */
  1112. p = places_array[test_places].places[thr < 2 ? 2
  1113. : thr / 2 - 1];
  1114. break;
  1115. case 2:
  1116. /* T = 6, P = 2, 3 threads into each place. */
  1117. p = places_array[test_places].places[1 - thr / 3];
  1118. break;
  1119. }
  1120. print_affinity (p);
  1121. printf ("\n");
  1122. }
  1123. }
  1124. }
  1125. }
  1126. return 0;
  1127. }