loop_ull.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023
  1. /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
  2. Contributed by Richard Henderson <rth@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file handles the LOOP (FOR/DO) construct. */
  21. #include <limits.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include "libgomp.h"
  25. ialias (GOMP_loop_ull_runtime_next)
  26. ialias_redirect (GOMP_taskgroup_reduction_register)
  27. typedef unsigned long long gomp_ull;
  28. /* Initialize the given work share construct from the given arguments. */
  29. static inline void
  30. gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
  31. gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
  32. gomp_ull chunk_size)
  33. {
  34. ws->sched = sched;
  35. ws->chunk_size_ull = chunk_size;
  36. /* Canonicalize loops that have zero iterations to ->next == ->end. */
  37. ws->end_ull = ((up && start > end) || (!up && start < end))
  38. ? start : end;
  39. ws->incr_ull = incr;
  40. ws->next_ull = start;
  41. ws->mode = 0;
  42. if (sched == GFS_DYNAMIC)
  43. {
  44. ws->chunk_size_ull *= incr;
  45. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  46. {
  47. /* For dynamic scheduling prepare things to make each iteration
  48. faster. */
  49. struct gomp_thread *thr = gomp_thread ();
  50. struct gomp_team *team = thr->ts.team;
  51. long nthreads = team ? team->nthreads : 1;
  52. if (__builtin_expect (up, 1))
  53. {
  54. /* Cheap overflow protection. */
  55. if (__builtin_expect ((nthreads | ws->chunk_size_ull)
  56. < 1ULL << (sizeof (gomp_ull)
  57. * __CHAR_BIT__ / 2 - 1), 1))
  58. ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
  59. - (nthreads + 1) * ws->chunk_size_ull);
  60. }
  61. /* Cheap overflow protection. */
  62. else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
  63. < 1ULL << (sizeof (gomp_ull)
  64. * __CHAR_BIT__ / 2 - 1), 1))
  65. ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
  66. - (__LONG_LONG_MAX__ * 2ULL + 1));
  67. }
  68. #endif
  69. }
  70. if (!up)
  71. ws->mode |= 2;
  72. }
  73. /* The *_start routines are called when first encountering a loop construct
  74. that is not bound directly to a parallel construct. The first thread
  75. that arrives will create the work-share construct; subsequent threads
  76. will see the construct exists and allocate work from it.
  77. START, END, INCR are the bounds of the loop; due to the restrictions of
  78. OpenMP, these values must be the same in every thread. This is not
  79. verified (nor is it entirely verifiable, since START is not necessarily
  80. retained intact in the work-share data structure). CHUNK_SIZE is the
  81. scheduling parameter; again this must be identical in all threads.
  82. Returns true if there's any work for this thread to perform. If so,
  83. *ISTART and *IEND are filled with the bounds of the iteration block
  84. allocated to this thread. Returns false if all work was assigned to
  85. other threads prior to this thread's arrival. */
  86. static bool
  87. gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
  88. gomp_ull incr, gomp_ull chunk_size,
  89. gomp_ull *istart, gomp_ull *iend)
  90. {
  91. struct gomp_thread *thr = gomp_thread ();
  92. thr->ts.static_trip = 0;
  93. if (gomp_work_share_start (0))
  94. {
  95. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  96. GFS_STATIC, chunk_size);
  97. gomp_work_share_init_done ();
  98. }
  99. return !gomp_iter_ull_static_next (istart, iend);
  100. }
  101. static bool
  102. gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  103. gomp_ull incr, gomp_ull chunk_size,
  104. gomp_ull *istart, gomp_ull *iend)
  105. {
  106. struct gomp_thread *thr = gomp_thread ();
  107. bool ret;
  108. if (gomp_work_share_start (0))
  109. {
  110. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  111. GFS_DYNAMIC, chunk_size);
  112. gomp_work_share_init_done ();
  113. }
  114. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  115. ret = gomp_iter_ull_dynamic_next (istart, iend);
  116. #else
  117. gomp_mutex_lock (&thr->ts.work_share->lock);
  118. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  119. gomp_mutex_unlock (&thr->ts.work_share->lock);
  120. #endif
  121. return ret;
  122. }
  123. static bool
  124. gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
  125. gomp_ull incr, gomp_ull chunk_size,
  126. gomp_ull *istart, gomp_ull *iend)
  127. {
  128. struct gomp_thread *thr = gomp_thread ();
  129. bool ret;
  130. if (gomp_work_share_start (0))
  131. {
  132. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  133. GFS_GUIDED, chunk_size);
  134. gomp_work_share_init_done ();
  135. }
  136. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  137. ret = gomp_iter_ull_guided_next (istart, iend);
  138. #else
  139. gomp_mutex_lock (&thr->ts.work_share->lock);
  140. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  141. gomp_mutex_unlock (&thr->ts.work_share->lock);
  142. #endif
  143. return ret;
  144. }
  145. bool
  146. GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
  147. gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
  148. {
  149. struct gomp_task_icv *icv = gomp_icv (false);
  150. switch (icv->run_sched_var & ~GFS_MONOTONIC)
  151. {
  152. case GFS_STATIC:
  153. return gomp_loop_ull_static_start (up, start, end, incr,
  154. icv->run_sched_chunk_size,
  155. istart, iend);
  156. case GFS_DYNAMIC:
  157. return gomp_loop_ull_dynamic_start (up, start, end, incr,
  158. icv->run_sched_chunk_size,
  159. istart, iend);
  160. case GFS_GUIDED:
  161. return gomp_loop_ull_guided_start (up, start, end, incr,
  162. icv->run_sched_chunk_size,
  163. istart, iend);
  164. case GFS_AUTO:
  165. /* For now map to schedule(static), later on we could play with feedback
  166. driven choice. */
  167. return gomp_loop_ull_static_start (up, start, end, incr,
  168. 0, istart, iend);
  169. default:
  170. abort ();
  171. }
  172. }
  173. static long
  174. gomp_adjust_sched (long sched, gomp_ull *chunk_size)
  175. {
  176. sched &= ~GFS_MONOTONIC;
  177. switch (sched)
  178. {
  179. case GFS_STATIC:
  180. case GFS_DYNAMIC:
  181. case GFS_GUIDED:
  182. return sched;
  183. /* GFS_RUNTIME is used for runtime schedule without monotonic
  184. or nonmonotonic modifiers on the clause.
  185. GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
  186. modifier. */
  187. case GFS_RUNTIME:
  188. /* GFS_AUTO is used for runtime schedule with nonmonotonic
  189. modifier. */
  190. case GFS_AUTO:
  191. {
  192. struct gomp_task_icv *icv = gomp_icv (false);
  193. sched = icv->run_sched_var & ~GFS_MONOTONIC;
  194. switch (sched)
  195. {
  196. case GFS_STATIC:
  197. case GFS_DYNAMIC:
  198. case GFS_GUIDED:
  199. *chunk_size = icv->run_sched_chunk_size;
  200. break;
  201. case GFS_AUTO:
  202. sched = GFS_STATIC;
  203. *chunk_size = 0;
  204. break;
  205. default:
  206. abort ();
  207. }
  208. return sched;
  209. }
  210. default:
  211. abort ();
  212. }
  213. }
  214. bool
  215. GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
  216. gomp_ull incr, long sched, gomp_ull chunk_size,
  217. gomp_ull *istart, gomp_ull *iend,
  218. uintptr_t *reductions, void **mem)
  219. {
  220. struct gomp_thread *thr = gomp_thread ();
  221. thr->ts.static_trip = 0;
  222. if (reductions)
  223. gomp_workshare_taskgroup_start ();
  224. if (gomp_work_share_start (0))
  225. {
  226. sched = gomp_adjust_sched (sched, &chunk_size);
  227. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  228. sched, chunk_size);
  229. if (reductions)
  230. {
  231. GOMP_taskgroup_reduction_register (reductions);
  232. thr->task->taskgroup->workshare = true;
  233. thr->ts.work_share->task_reductions = reductions;
  234. }
  235. if (mem)
  236. {
  237. uintptr_t size = (uintptr_t) *mem;
  238. #define INLINE_ORDERED_TEAM_IDS_OFF \
  239. ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
  240. + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
  241. if (size > (sizeof (struct gomp_work_share)
  242. - INLINE_ORDERED_TEAM_IDS_OFF))
  243. *mem
  244. = (void *) (thr->ts.work_share->ordered_team_ids
  245. = gomp_malloc_cleared (size));
  246. else
  247. *mem = memset (((char *) thr->ts.work_share)
  248. + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
  249. }
  250. gomp_work_share_init_done ();
  251. }
  252. else
  253. {
  254. if (reductions)
  255. {
  256. uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
  257. gomp_workshare_task_reduction_register (reductions,
  258. first_reductions);
  259. }
  260. if (mem)
  261. {
  262. if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
  263. & (__alignof__ (long long) - 1)) == 0)
  264. *mem = (void *) thr->ts.work_share->ordered_team_ids;
  265. else
  266. {
  267. uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
  268. p += __alignof__ (long long) - 1;
  269. p &= ~(__alignof__ (long long) - 1);
  270. *mem = (void *) p;
  271. }
  272. }
  273. }
  274. return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
  275. }
  276. /* The *_ordered_*_start routines are similar. The only difference is that
  277. this work-share construct is initialized to expect an ORDERED section. */
  278. static bool
  279. gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
  280. gomp_ull incr, gomp_ull chunk_size,
  281. gomp_ull *istart, gomp_ull *iend)
  282. {
  283. struct gomp_thread *thr = gomp_thread ();
  284. thr->ts.static_trip = 0;
  285. if (gomp_work_share_start (1))
  286. {
  287. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  288. GFS_STATIC, chunk_size);
  289. gomp_ordered_static_init ();
  290. gomp_work_share_init_done ();
  291. }
  292. return !gomp_iter_ull_static_next (istart, iend);
  293. }
  294. static bool
  295. gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  296. gomp_ull incr, gomp_ull chunk_size,
  297. gomp_ull *istart, gomp_ull *iend)
  298. {
  299. struct gomp_thread *thr = gomp_thread ();
  300. bool ret;
  301. if (gomp_work_share_start (1))
  302. {
  303. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  304. GFS_DYNAMIC, chunk_size);
  305. gomp_mutex_lock (&thr->ts.work_share->lock);
  306. gomp_work_share_init_done ();
  307. }
  308. else
  309. gomp_mutex_lock (&thr->ts.work_share->lock);
  310. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  311. if (ret)
  312. gomp_ordered_first ();
  313. gomp_mutex_unlock (&thr->ts.work_share->lock);
  314. return ret;
  315. }
  316. static bool
  317. gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
  318. gomp_ull incr, gomp_ull chunk_size,
  319. gomp_ull *istart, gomp_ull *iend)
  320. {
  321. struct gomp_thread *thr = gomp_thread ();
  322. bool ret;
  323. if (gomp_work_share_start (1))
  324. {
  325. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  326. GFS_GUIDED, chunk_size);
  327. gomp_mutex_lock (&thr->ts.work_share->lock);
  328. gomp_work_share_init_done ();
  329. }
  330. else
  331. gomp_mutex_lock (&thr->ts.work_share->lock);
  332. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  333. if (ret)
  334. gomp_ordered_first ();
  335. gomp_mutex_unlock (&thr->ts.work_share->lock);
  336. return ret;
  337. }
  338. bool
  339. GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
  340. gomp_ull incr, gomp_ull *istart,
  341. gomp_ull *iend)
  342. {
  343. struct gomp_task_icv *icv = gomp_icv (false);
  344. switch (icv->run_sched_var & ~GFS_MONOTONIC)
  345. {
  346. case GFS_STATIC:
  347. return gomp_loop_ull_ordered_static_start (up, start, end, incr,
  348. icv->run_sched_chunk_size,
  349. istart, iend);
  350. case GFS_DYNAMIC:
  351. return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
  352. icv->run_sched_chunk_size,
  353. istart, iend);
  354. case GFS_GUIDED:
  355. return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
  356. icv->run_sched_chunk_size,
  357. istart, iend);
  358. case GFS_AUTO:
  359. /* For now map to schedule(static), later on we could play with feedback
  360. driven choice. */
  361. return gomp_loop_ull_ordered_static_start (up, start, end, incr,
  362. 0, istart, iend);
  363. default:
  364. abort ();
  365. }
  366. }
  367. bool
  368. GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
  369. gomp_ull incr, long sched, gomp_ull chunk_size,
  370. gomp_ull *istart, gomp_ull *iend,
  371. uintptr_t *reductions, void **mem)
  372. {
  373. struct gomp_thread *thr = gomp_thread ();
  374. size_t ordered = 1;
  375. bool ret;
  376. thr->ts.static_trip = 0;
  377. if (reductions)
  378. gomp_workshare_taskgroup_start ();
  379. if (mem)
  380. ordered += (uintptr_t) *mem;
  381. if (gomp_work_share_start (ordered))
  382. {
  383. sched = gomp_adjust_sched (sched, &chunk_size);
  384. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  385. sched, chunk_size);
  386. if (reductions)
  387. {
  388. GOMP_taskgroup_reduction_register (reductions);
  389. thr->task->taskgroup->workshare = true;
  390. thr->ts.work_share->task_reductions = reductions;
  391. }
  392. if (sched == GFS_STATIC)
  393. gomp_ordered_static_init ();
  394. else
  395. gomp_mutex_lock (&thr->ts.work_share->lock);
  396. gomp_work_share_init_done ();
  397. }
  398. else
  399. {
  400. if (reductions)
  401. {
  402. uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
  403. gomp_workshare_task_reduction_register (reductions,
  404. first_reductions);
  405. }
  406. sched = thr->ts.work_share->sched;
  407. if (sched != GFS_STATIC)
  408. gomp_mutex_lock (&thr->ts.work_share->lock);
  409. }
  410. if (mem)
  411. {
  412. uintptr_t p
  413. = (uintptr_t) (thr->ts.work_share->ordered_team_ids
  414. + (thr->ts.team ? thr->ts.team->nthreads : 1));
  415. p += __alignof__ (long long) - 1;
  416. p &= ~(__alignof__ (long long) - 1);
  417. *mem = (void *) p;
  418. }
  419. switch (sched)
  420. {
  421. case GFS_STATIC:
  422. case GFS_AUTO:
  423. return !gomp_iter_ull_static_next (istart, iend);
  424. case GFS_DYNAMIC:
  425. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  426. break;
  427. case GFS_GUIDED:
  428. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  429. break;
  430. default:
  431. abort ();
  432. }
  433. if (ret)
  434. gomp_ordered_first ();
  435. gomp_mutex_unlock (&thr->ts.work_share->lock);
  436. return ret;
  437. }
  438. /* The *_doacross_*_start routines are similar. The only difference is that
  439. this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
  440. section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
  441. and other COUNTS array elements tell the library number of iterations
  442. in the ordered inner loops. */
  443. static bool
  444. gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
  445. gomp_ull chunk_size, gomp_ull *istart,
  446. gomp_ull *iend)
  447. {
  448. struct gomp_thread *thr = gomp_thread ();
  449. thr->ts.static_trip = 0;
  450. if (gomp_work_share_start (0))
  451. {
  452. gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
  453. GFS_STATIC, chunk_size);
  454. gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
  455. gomp_work_share_init_done ();
  456. }
  457. return !gomp_iter_ull_static_next (istart, iend);
  458. }
  459. static bool
  460. gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
  461. gomp_ull chunk_size, gomp_ull *istart,
  462. gomp_ull *iend)
  463. {
  464. struct gomp_thread *thr = gomp_thread ();
  465. bool ret;
  466. if (gomp_work_share_start (0))
  467. {
  468. gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
  469. GFS_DYNAMIC, chunk_size);
  470. gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
  471. gomp_work_share_init_done ();
  472. }
  473. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  474. ret = gomp_iter_ull_dynamic_next (istart, iend);
  475. #else
  476. gomp_mutex_lock (&thr->ts.work_share->lock);
  477. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  478. gomp_mutex_unlock (&thr->ts.work_share->lock);
  479. #endif
  480. return ret;
  481. }
  482. static bool
  483. gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
  484. gomp_ull chunk_size, gomp_ull *istart,
  485. gomp_ull *iend)
  486. {
  487. struct gomp_thread *thr = gomp_thread ();
  488. bool ret;
  489. if (gomp_work_share_start (0))
  490. {
  491. gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
  492. GFS_GUIDED, chunk_size);
  493. gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
  494. gomp_work_share_init_done ();
  495. }
  496. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  497. ret = gomp_iter_ull_guided_next (istart, iend);
  498. #else
  499. gomp_mutex_lock (&thr->ts.work_share->lock);
  500. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  501. gomp_mutex_unlock (&thr->ts.work_share->lock);
  502. #endif
  503. return ret;
  504. }
  505. bool
  506. GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
  507. gomp_ull *istart, gomp_ull *iend)
  508. {
  509. struct gomp_task_icv *icv = gomp_icv (false);
  510. switch (icv->run_sched_var & ~GFS_MONOTONIC)
  511. {
  512. case GFS_STATIC:
  513. return gomp_loop_ull_doacross_static_start (ncounts, counts,
  514. icv->run_sched_chunk_size,
  515. istart, iend);
  516. case GFS_DYNAMIC:
  517. return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
  518. icv->run_sched_chunk_size,
  519. istart, iend);
  520. case GFS_GUIDED:
  521. return gomp_loop_ull_doacross_guided_start (ncounts, counts,
  522. icv->run_sched_chunk_size,
  523. istart, iend);
  524. case GFS_AUTO:
  525. /* For now map to schedule(static), later on we could play with feedback
  526. driven choice. */
  527. return gomp_loop_ull_doacross_static_start (ncounts, counts,
  528. 0, istart, iend);
  529. default:
  530. abort ();
  531. }
  532. }
  533. bool
  534. GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
  535. long sched, gomp_ull chunk_size,
  536. gomp_ull *istart, gomp_ull *iend,
  537. uintptr_t *reductions, void **mem)
  538. {
  539. struct gomp_thread *thr = gomp_thread ();
  540. thr->ts.static_trip = 0;
  541. if (reductions)
  542. gomp_workshare_taskgroup_start ();
  543. if (gomp_work_share_start (0))
  544. {
  545. size_t extra = 0;
  546. if (mem)
  547. extra = (uintptr_t) *mem;
  548. sched = gomp_adjust_sched (sched, &chunk_size);
  549. gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
  550. sched, chunk_size);
  551. gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
  552. if (reductions)
  553. {
  554. GOMP_taskgroup_reduction_register (reductions);
  555. thr->task->taskgroup->workshare = true;
  556. thr->ts.work_share->task_reductions = reductions;
  557. }
  558. gomp_work_share_init_done ();
  559. }
  560. else
  561. {
  562. if (reductions)
  563. {
  564. uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
  565. gomp_workshare_task_reduction_register (reductions,
  566. first_reductions);
  567. }
  568. sched = thr->ts.work_share->sched;
  569. }
  570. if (mem)
  571. *mem = thr->ts.work_share->doacross->extra;
  572. return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
  573. }
  574. /* The *_next routines are called when the thread completes processing of
  575. the iteration block currently assigned to it. If the work-share
  576. construct is bound directly to a parallel construct, then the iteration
  577. bounds may have been set up before the parallel. In which case, this
  578. may be the first iteration for the thread.
  579. Returns true if there is work remaining to be performed; *ISTART and
  580. *IEND are filled with a new iteration block. Returns false if all work
  581. has been assigned. */
  582. static bool
  583. gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
  584. {
  585. return !gomp_iter_ull_static_next (istart, iend);
  586. }
  587. static bool
  588. gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  589. {
  590. bool ret;
  591. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  592. ret = gomp_iter_ull_dynamic_next (istart, iend);
  593. #else
  594. struct gomp_thread *thr = gomp_thread ();
  595. gomp_mutex_lock (&thr->ts.work_share->lock);
  596. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  597. gomp_mutex_unlock (&thr->ts.work_share->lock);
  598. #endif
  599. return ret;
  600. }
  601. static bool
  602. gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
  603. {
  604. bool ret;
  605. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  606. ret = gomp_iter_ull_guided_next (istart, iend);
  607. #else
  608. struct gomp_thread *thr = gomp_thread ();
  609. gomp_mutex_lock (&thr->ts.work_share->lock);
  610. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  611. gomp_mutex_unlock (&thr->ts.work_share->lock);
  612. #endif
  613. return ret;
  614. }
  615. bool
  616. GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
  617. {
  618. struct gomp_thread *thr = gomp_thread ();
  619. switch (thr->ts.work_share->sched)
  620. {
  621. case GFS_STATIC:
  622. case GFS_AUTO:
  623. return gomp_loop_ull_static_next (istart, iend);
  624. case GFS_DYNAMIC:
  625. return gomp_loop_ull_dynamic_next (istart, iend);
  626. case GFS_GUIDED:
  627. return gomp_loop_ull_guided_next (istart, iend);
  628. default:
  629. abort ();
  630. }
  631. }
  632. /* The *_ordered_*_next routines are called when the thread completes
  633. processing of the iteration block currently assigned to it.
  634. Returns true if there is work remaining to be performed; *ISTART and
  635. *IEND are filled with a new iteration block. Returns false if all work
  636. has been assigned. */
  637. static bool
  638. gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
  639. {
  640. struct gomp_thread *thr = gomp_thread ();
  641. int test;
  642. gomp_ordered_sync ();
  643. gomp_mutex_lock (&thr->ts.work_share->lock);
  644. test = gomp_iter_ull_static_next (istart, iend);
  645. if (test >= 0)
  646. gomp_ordered_static_next ();
  647. gomp_mutex_unlock (&thr->ts.work_share->lock);
  648. return test == 0;
  649. }
  650. static bool
  651. gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  652. {
  653. struct gomp_thread *thr = gomp_thread ();
  654. bool ret;
  655. gomp_ordered_sync ();
  656. gomp_mutex_lock (&thr->ts.work_share->lock);
  657. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  658. if (ret)
  659. gomp_ordered_next ();
  660. else
  661. gomp_ordered_last ();
  662. gomp_mutex_unlock (&thr->ts.work_share->lock);
  663. return ret;
  664. }
  665. static bool
  666. gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
  667. {
  668. struct gomp_thread *thr = gomp_thread ();
  669. bool ret;
  670. gomp_ordered_sync ();
  671. gomp_mutex_lock (&thr->ts.work_share->lock);
  672. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  673. if (ret)
  674. gomp_ordered_next ();
  675. else
  676. gomp_ordered_last ();
  677. gomp_mutex_unlock (&thr->ts.work_share->lock);
  678. return ret;
  679. }
  680. bool
  681. GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
  682. {
  683. struct gomp_thread *thr = gomp_thread ();
  684. switch (thr->ts.work_share->sched)
  685. {
  686. case GFS_STATIC:
  687. case GFS_AUTO:
  688. return gomp_loop_ull_ordered_static_next (istart, iend);
  689. case GFS_DYNAMIC:
  690. return gomp_loop_ull_ordered_dynamic_next (istart, iend);
  691. case GFS_GUIDED:
  692. return gomp_loop_ull_ordered_guided_next (istart, iend);
  693. default:
  694. abort ();
  695. }
  696. }
  697. /* We use static functions above so that we're sure that the "runtime"
  698. function can defer to the proper routine without interposition. We
  699. export the static function with a strong alias when possible, or with
  700. a wrapper function otherwise. */
  701. #ifdef HAVE_ATTRIBUTE_ALIAS
  702. extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
  703. __attribute__((alias ("gomp_loop_ull_static_start")));
  704. extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
  705. __attribute__((alias ("gomp_loop_ull_dynamic_start")));
  706. extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
  707. __attribute__((alias ("gomp_loop_ull_guided_start")));
  708. extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
  709. __attribute__((alias ("gomp_loop_ull_dynamic_start")));
  710. extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
  711. __attribute__((alias ("gomp_loop_ull_guided_start")));
  712. extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
  713. __attribute__((alias ("GOMP_loop_ull_runtime_start")));
  714. extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
  715. __attribute__((alias ("GOMP_loop_ull_runtime_start")));
  716. extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
  717. __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
  718. extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
  719. __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
  720. extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
  721. __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
  722. extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
  723. __attribute__((alias ("gomp_loop_ull_doacross_static_start")));
  724. extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
  725. __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
  726. extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
  727. __attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
  728. extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
  729. __attribute__((alias ("gomp_loop_ull_static_next")));
  730. extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
  731. __attribute__((alias ("gomp_loop_ull_dynamic_next")));
  732. extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
  733. __attribute__((alias ("gomp_loop_ull_guided_next")));
  734. extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
  735. __attribute__((alias ("gomp_loop_ull_dynamic_next")));
  736. extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
  737. __attribute__((alias ("gomp_loop_ull_guided_next")));
  738. extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
  739. __attribute__((alias ("GOMP_loop_ull_runtime_next")));
  740. extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
  741. __attribute__((alias ("GOMP_loop_ull_runtime_next")));
  742. extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
  743. __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
  744. extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
  745. __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
  746. extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
  747. __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
  748. #else
  749. bool
  750. GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
  751. gomp_ull incr, gomp_ull chunk_size,
  752. gomp_ull *istart, gomp_ull *iend)
  753. {
  754. return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
  755. iend);
  756. }
  757. bool
  758. GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  759. gomp_ull incr, gomp_ull chunk_size,
  760. gomp_ull *istart, gomp_ull *iend)
  761. {
  762. return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
  763. iend);
  764. }
  765. bool
  766. GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
  767. gomp_ull incr, gomp_ull chunk_size,
  768. gomp_ull *istart, gomp_ull *iend)
  769. {
  770. return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
  771. iend);
  772. }
  773. bool
  774. GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
  775. gomp_ull end, gomp_ull incr,
  776. gomp_ull chunk_size,
  777. gomp_ull *istart, gomp_ull *iend)
  778. {
  779. return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
  780. iend);
  781. }
  782. bool
  783. GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
  784. gomp_ull incr, gomp_ull chunk_size,
  785. gomp_ull *istart, gomp_ull *iend)
  786. {
  787. return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
  788. iend);
  789. }
  790. bool
  791. GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
  792. gomp_ull end, gomp_ull incr,
  793. gomp_ull *istart, gomp_ull *iend)
  794. {
  795. return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
  796. }
  797. bool
  798. GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
  799. gomp_ull end, gomp_ull incr,
  800. gomp_ull *istart,
  801. gomp_ull *iend)
  802. {
  803. return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
  804. }
  805. bool
  806. GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
  807. gomp_ull incr, gomp_ull chunk_size,
  808. gomp_ull *istart, gomp_ull *iend)
  809. {
  810. return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
  811. istart, iend);
  812. }
  813. bool
  814. GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  815. gomp_ull incr, gomp_ull chunk_size,
  816. gomp_ull *istart, gomp_ull *iend)
  817. {
  818. return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
  819. istart, iend);
  820. }
  821. bool
  822. GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
  823. gomp_ull incr, gomp_ull chunk_size,
  824. gomp_ull *istart, gomp_ull *iend)
  825. {
  826. return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
  827. istart, iend);
  828. }
  829. bool
  830. GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
  831. gomp_ull chunk_size, gomp_ull *istart,
  832. gomp_ull *iend)
  833. {
  834. return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
  835. istart, iend);
  836. }
  837. bool
  838. GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
  839. gomp_ull chunk_size, gomp_ull *istart,
  840. gomp_ull *iend)
  841. {
  842. return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
  843. istart, iend);
  844. }
  845. bool
  846. GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
  847. gomp_ull chunk_size, gomp_ull *istart,
  848. gomp_ull *iend)
  849. {
  850. return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
  851. istart, iend);
  852. }
  853. bool
  854. GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
  855. {
  856. return gomp_loop_ull_static_next (istart, iend);
  857. }
  858. bool
  859. GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  860. {
  861. return gomp_loop_ull_dynamic_next (istart, iend);
  862. }
  863. bool
  864. GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
  865. {
  866. return gomp_loop_ull_guided_next (istart, iend);
  867. }
  868. bool
  869. GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  870. {
  871. return gomp_loop_ull_dynamic_next (istart, iend);
  872. }
  873. bool
  874. GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
  875. {
  876. return gomp_loop_ull_guided_next (istart, iend);
  877. }
  878. bool
  879. GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
  880. {
  881. return GOMP_loop_ull_runtime_next (istart, iend);
  882. }
  883. bool
  884. GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
  885. gomp_ull *iend)
  886. {
  887. return GOMP_loop_ull_runtime_next (istart, iend);
  888. }
  889. bool
  890. GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
  891. {
  892. return gomp_loop_ull_ordered_static_next (istart, iend);
  893. }
  894. bool
  895. GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  896. {
  897. return gomp_loop_ull_ordered_dynamic_next (istart, iend);
  898. }
  899. bool
  900. GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
  901. {
  902. return gomp_loop_ull_ordered_guided_next (istart, iend);
  903. }
  904. #endif