team.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124
  1. /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
  2. Contributed by Richard Henderson <rth@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file handles the maintenance of threads in response to team
  21. creation and termination. */
  22. #include "libgomp.h"
  23. #include "pool.h"
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #ifdef LIBGOMP_USE_PTHREADS
  27. pthread_attr_t gomp_thread_attr;
  28. /* This key is for the thread destructor. */
  29. pthread_key_t gomp_thread_destructor;
  30. /* This is the libgomp per-thread data structure. */
  31. #if defined HAVE_TLS || defined USE_EMUTLS
  32. __thread struct gomp_thread gomp_tls_data;
  33. #else
  34. pthread_key_t gomp_tls_key;
  35. #endif
  36. /* This structure is used to communicate across pthread_create. */
  37. struct gomp_thread_start_data
  38. {
  39. void (*fn) (void *);
  40. void *fn_data;
  41. struct gomp_team_state ts;
  42. struct gomp_task *task;
  43. struct gomp_thread_pool *thread_pool;
  44. unsigned int place;
  45. unsigned int num_teams;
  46. unsigned int team_num;
  47. bool nested;
  48. pthread_t handle;
  49. };
  50. /* This function is a pthread_create entry point. This contains the idle
  51. loop in which a thread waits to be called up to become part of a team. */
  52. static void *
  53. gomp_thread_start (void *xdata)
  54. {
  55. struct gomp_thread_start_data *data = xdata;
  56. struct gomp_thread *thr;
  57. struct gomp_thread_pool *pool;
  58. void (*local_fn) (void *);
  59. void *local_data;
  60. #if defined HAVE_TLS || defined USE_EMUTLS
  61. thr = &gomp_tls_data;
  62. #else
  63. struct gomp_thread local_thr;
  64. thr = &local_thr;
  65. #endif
  66. gomp_sem_init (&thr->release, 0);
  67. /* Extract what we need from data. */
  68. local_fn = data->fn;
  69. local_data = data->fn_data;
  70. thr->thread_pool = data->thread_pool;
  71. thr->ts = data->ts;
  72. thr->task = data->task;
  73. thr->place = data->place;
  74. thr->num_teams = data->num_teams;
  75. thr->team_num = data->team_num;
  76. #ifdef GOMP_NEEDS_THREAD_HANDLE
  77. thr->handle = data->handle;
  78. #endif
  79. #if !(defined HAVE_TLS || defined USE_EMUTLS)
  80. pthread_setspecific (gomp_tls_key, thr);
  81. #endif
  82. thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
  83. /* Make thread pool local. */
  84. pool = thr->thread_pool;
  85. if (data->nested)
  86. {
  87. struct gomp_team *team = thr->ts.team;
  88. struct gomp_task *task = thr->task;
  89. gomp_barrier_wait (&team->barrier);
  90. local_fn (local_data);
  91. gomp_team_barrier_wait_final (&team->barrier);
  92. gomp_finish_task (task);
  93. gomp_barrier_wait_last (&team->barrier);
  94. }
  95. else
  96. {
  97. pool->threads[thr->ts.team_id] = thr;
  98. gomp_simple_barrier_wait (&pool->threads_dock);
  99. do
  100. {
  101. struct gomp_team *team = thr->ts.team;
  102. struct gomp_task *task = thr->task;
  103. local_fn (local_data);
  104. gomp_team_barrier_wait_final (&team->barrier);
  105. gomp_finish_task (task);
  106. gomp_simple_barrier_wait (&pool->threads_dock);
  107. local_fn = thr->fn;
  108. local_data = thr->data;
  109. thr->fn = NULL;
  110. }
  111. while (local_fn);
  112. }
  113. gomp_sem_destroy (&thr->release);
  114. pthread_detach (pthread_self ());
  115. thr->thread_pool = NULL;
  116. thr->task = NULL;
  117. return NULL;
  118. }
  119. #endif
  120. static inline struct gomp_team *
  121. get_last_team (unsigned nthreads)
  122. {
  123. struct gomp_thread *thr = gomp_thread ();
  124. if (thr->ts.team == NULL)
  125. {
  126. struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
  127. struct gomp_team *last_team = pool->last_team;
  128. if (last_team != NULL && last_team->nthreads == nthreads)
  129. {
  130. pool->last_team = NULL;
  131. return last_team;
  132. }
  133. }
  134. return NULL;
  135. }
  136. /* Create a new team data structure. */
  137. struct gomp_team *
  138. gomp_new_team (unsigned nthreads)
  139. {
  140. struct gomp_team *team;
  141. int i;
  142. team = get_last_team (nthreads);
  143. if (team == NULL)
  144. {
  145. size_t extra = sizeof (team->ordered_release[0])
  146. + sizeof (team->implicit_task[0]);
  147. #ifdef GOMP_USE_ALIGNED_WORK_SHARES
  148. team = gomp_aligned_alloc (__alignof (struct gomp_team),
  149. sizeof (*team) + nthreads * extra);
  150. #else
  151. team = team_malloc (sizeof (*team) + nthreads * extra);
  152. #endif
  153. #ifndef HAVE_SYNC_BUILTINS
  154. gomp_mutex_init (&team->work_share_list_free_lock);
  155. #endif
  156. gomp_barrier_init (&team->barrier, nthreads);
  157. gomp_mutex_init (&team->task_lock);
  158. team->nthreads = nthreads;
  159. }
  160. team->work_share_chunk = 8;
  161. #ifdef HAVE_SYNC_BUILTINS
  162. team->single_count = 0;
  163. #endif
  164. team->work_shares_to_free = &team->work_shares[0];
  165. gomp_init_work_share (&team->work_shares[0], 0, nthreads);
  166. team->work_shares[0].next_alloc = NULL;
  167. team->work_share_list_free = NULL;
  168. team->work_share_list_alloc = &team->work_shares[1];
  169. for (i = 1; i < 7; i++)
  170. team->work_shares[i].next_free = &team->work_shares[i + 1];
  171. team->work_shares[i].next_free = NULL;
  172. gomp_sem_init (&team->master_release, 0);
  173. team->ordered_release = (void *) &team->implicit_task[nthreads];
  174. team->ordered_release[0] = &team->master_release;
  175. priority_queue_init (&team->task_queue);
  176. team->task_count = 0;
  177. team->task_queued_count = 0;
  178. team->task_running_count = 0;
  179. team->work_share_cancelled = 0;
  180. team->team_cancelled = 0;
  181. team->task_detach_count = 0;
  182. return team;
  183. }
  184. /* Free a team data structure. */
  185. static void
  186. free_team (struct gomp_team *team)
  187. {
  188. #ifndef HAVE_SYNC_BUILTINS
  189. gomp_mutex_destroy (&team->work_share_list_free_lock);
  190. #endif
  191. gomp_barrier_destroy (&team->barrier);
  192. gomp_mutex_destroy (&team->task_lock);
  193. priority_queue_free (&team->task_queue);
  194. team_free (team);
  195. }
  196. static void
  197. gomp_free_pool_helper (void *thread_pool)
  198. {
  199. struct gomp_thread *thr = gomp_thread ();
  200. struct gomp_thread_pool *pool
  201. = (struct gomp_thread_pool *) thread_pool;
  202. gomp_simple_barrier_wait_last (&pool->threads_dock);
  203. gomp_sem_destroy (&thr->release);
  204. thr->thread_pool = NULL;
  205. thr->task = NULL;
  206. #ifdef LIBGOMP_USE_PTHREADS
  207. pthread_detach (pthread_self ());
  208. pthread_exit (NULL);
  209. #elif defined(__nvptx__)
  210. asm ("exit;");
  211. #elif defined(__AMDGCN__)
  212. asm ("s_dcache_wb\n\t"
  213. "s_endpgm");
  214. #else
  215. #error gomp_free_pool_helper must terminate the thread
  216. #endif
  217. }
  218. /* Free a thread pool and release its threads. */
  219. void
  220. gomp_free_thread (void *arg __attribute__((unused)))
  221. {
  222. struct gomp_thread *thr = gomp_thread ();
  223. struct gomp_thread_pool *pool = thr->thread_pool;
  224. if (pool)
  225. {
  226. if (pool->threads_used > 0)
  227. {
  228. int i;
  229. for (i = 1; i < pool->threads_used; i++)
  230. {
  231. struct gomp_thread *nthr = pool->threads[i];
  232. nthr->fn = gomp_free_pool_helper;
  233. nthr->data = pool;
  234. }
  235. /* This barrier undocks threads docked on pool->threads_dock. */
  236. gomp_simple_barrier_wait (&pool->threads_dock);
  237. /* And this waits till all threads have called gomp_barrier_wait_last
  238. in gomp_free_pool_helper. */
  239. gomp_simple_barrier_wait (&pool->threads_dock);
  240. /* Now it is safe to destroy the barrier and free the pool. */
  241. gomp_simple_barrier_destroy (&pool->threads_dock);
  242. #ifdef HAVE_SYNC_BUILTINS
  243. __sync_fetch_and_add (&gomp_managed_threads,
  244. 1L - pool->threads_used);
  245. #else
  246. gomp_mutex_lock (&gomp_managed_threads_lock);
  247. gomp_managed_threads -= pool->threads_used - 1L;
  248. gomp_mutex_unlock (&gomp_managed_threads_lock);
  249. #endif
  250. }
  251. if (pool->last_team)
  252. free_team (pool->last_team);
  253. #ifndef __nvptx__
  254. team_free (pool->threads);
  255. team_free (pool);
  256. #endif
  257. thr->thread_pool = NULL;
  258. }
  259. if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
  260. gomp_team_end ();
  261. if (thr->task != NULL)
  262. {
  263. struct gomp_task *task = thr->task;
  264. gomp_end_task ();
  265. free (task);
  266. }
  267. }
  268. /* Launch a team. */
  269. #ifdef LIBGOMP_USE_PTHREADS
  270. void
  271. gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
  272. unsigned flags, struct gomp_team *team,
  273. struct gomp_taskgroup *taskgroup)
  274. {
  275. struct gomp_thread_start_data *start_data = NULL;
  276. struct gomp_thread *thr, *nthr;
  277. struct gomp_task *task;
  278. struct gomp_task_icv *icv;
  279. bool nested;
  280. struct gomp_thread_pool *pool;
  281. unsigned i, n, old_threads_used = 0;
  282. pthread_attr_t thread_attr, *attr;
  283. unsigned long nthreads_var;
  284. char bind, bind_var;
  285. unsigned int s = 0, rest = 0, p = 0, k = 0;
  286. unsigned int affinity_count = 0;
  287. struct gomp_thread **affinity_thr = NULL;
  288. bool force_display = false;
  289. thr = gomp_thread ();
  290. nested = thr->ts.level;
  291. pool = thr->thread_pool;
  292. task = thr->task;
  293. icv = task ? &task->icv : &gomp_global_icv;
  294. if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
  295. {
  296. gomp_init_affinity ();
  297. if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
  298. gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
  299. thr->place);
  300. }
  301. /* Always save the previous state, even if this isn't a nested team.
  302. In particular, we should save any work share state from an outer
  303. orphaned work share construct. */
  304. team->prev_ts = thr->ts;
  305. thr->ts.team = team;
  306. thr->ts.team_id = 0;
  307. ++thr->ts.level;
  308. if (nthreads > 1)
  309. ++thr->ts.active_level;
  310. thr->ts.work_share = &team->work_shares[0];
  311. thr->ts.last_work_share = NULL;
  312. #ifdef HAVE_SYNC_BUILTINS
  313. thr->ts.single_count = 0;
  314. #endif
  315. thr->ts.static_trip = 0;
  316. thr->task = &team->implicit_task[0];
  317. #ifdef GOMP_NEEDS_THREAD_HANDLE
  318. thr->handle = pthread_self ();
  319. #endif
  320. nthreads_var = icv->nthreads_var;
  321. if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
  322. && thr->ts.level < gomp_nthreads_var_list_len)
  323. nthreads_var = gomp_nthreads_var_list[thr->ts.level];
  324. bind_var = icv->bind_var;
  325. if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
  326. bind_var = flags & 7;
  327. bind = bind_var;
  328. if (__builtin_expect (gomp_bind_var_list != NULL, 0)
  329. && thr->ts.level < gomp_bind_var_list_len)
  330. bind_var = gomp_bind_var_list[thr->ts.level];
  331. gomp_init_task (thr->task, task, icv);
  332. thr->task->taskgroup = taskgroup;
  333. team->implicit_task[0].icv.nthreads_var = nthreads_var;
  334. team->implicit_task[0].icv.bind_var = bind_var;
  335. if (nthreads == 1)
  336. return;
  337. i = 1;
  338. if (__builtin_expect (gomp_places_list != NULL, 0))
  339. {
  340. /* Depending on chosen proc_bind model, set subpartition
  341. for the master thread and initialize helper variables
  342. P and optionally S, K and/or REST used by later place
  343. computation for each additional thread. */
  344. p = thr->place - 1;
  345. switch (bind)
  346. {
  347. case omp_proc_bind_true:
  348. case omp_proc_bind_close:
  349. if (nthreads > thr->ts.place_partition_len)
  350. {
  351. /* T > P. S threads will be placed in each place,
  352. and the final REM threads placed one by one
  353. into the already occupied places. */
  354. s = nthreads / thr->ts.place_partition_len;
  355. rest = nthreads % thr->ts.place_partition_len;
  356. }
  357. else
  358. s = 1;
  359. k = 1;
  360. break;
  361. case omp_proc_bind_master:
  362. /* Each thread will be bound to master's place. */
  363. break;
  364. case omp_proc_bind_spread:
  365. if (nthreads <= thr->ts.place_partition_len)
  366. {
  367. /* T <= P. Each subpartition will have in between s
  368. and s+1 places (subpartitions starting at or
  369. after rest will have s places, earlier s+1 places),
  370. each thread will be bound to the first place in
  371. its subpartition (except for the master thread
  372. that can be bound to another place in its
  373. subpartition). */
  374. s = thr->ts.place_partition_len / nthreads;
  375. rest = thr->ts.place_partition_len % nthreads;
  376. rest = (s + 1) * rest + thr->ts.place_partition_off;
  377. if (p < rest)
  378. {
  379. p -= (p - thr->ts.place_partition_off) % (s + 1);
  380. thr->ts.place_partition_len = s + 1;
  381. }
  382. else
  383. {
  384. p -= (p - rest) % s;
  385. thr->ts.place_partition_len = s;
  386. }
  387. thr->ts.place_partition_off = p;
  388. }
  389. else
  390. {
  391. /* T > P. Each subpartition will have just a single
  392. place and we'll place between s and s+1
  393. threads into each subpartition. */
  394. s = nthreads / thr->ts.place_partition_len;
  395. rest = nthreads % thr->ts.place_partition_len;
  396. thr->ts.place_partition_off = p;
  397. thr->ts.place_partition_len = 1;
  398. k = 1;
  399. }
  400. break;
  401. }
  402. }
  403. else
  404. bind = omp_proc_bind_false;
  405. /* We only allow the reuse of idle threads for non-nested PARALLEL
  406. regions. This appears to be implied by the semantics of
  407. threadprivate variables, but perhaps that's reading too much into
  408. things. Certainly it does prevent any locking problems, since
  409. only the initial program thread will modify gomp_threads. */
  410. if (!nested)
  411. {
  412. old_threads_used = pool->threads_used;
  413. if (nthreads <= old_threads_used)
  414. n = nthreads;
  415. else if (old_threads_used == 0)
  416. {
  417. n = 0;
  418. gomp_simple_barrier_init (&pool->threads_dock, nthreads);
  419. }
  420. else
  421. {
  422. n = old_threads_used;
  423. /* Increase the barrier threshold to make sure all new
  424. threads arrive before the team is released. */
  425. gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
  426. }
  427. /* Not true yet, but soon will be. We're going to release all
  428. threads from the dock, and those that aren't part of the
  429. team will exit. */
  430. pool->threads_used = nthreads;
  431. /* If necessary, expand the size of the gomp_threads array. It is
  432. expected that changes in the number of threads are rare, thus we
  433. make no effort to expand gomp_threads_size geometrically. */
  434. if (nthreads >= pool->threads_size)
  435. {
  436. pool->threads_size = nthreads + 1;
  437. pool->threads
  438. = gomp_realloc (pool->threads,
  439. pool->threads_size
  440. * sizeof (struct gomp_thread *));
  441. /* Add current (master) thread to threads[]. */
  442. pool->threads[0] = thr;
  443. }
  444. /* Release existing idle threads. */
  445. for (; i < n; ++i)
  446. {
  447. unsigned int place_partition_off = thr->ts.place_partition_off;
  448. unsigned int place_partition_len = thr->ts.place_partition_len;
  449. unsigned int place = 0;
  450. if (__builtin_expect (gomp_places_list != NULL, 0))
  451. {
  452. switch (bind)
  453. {
  454. case omp_proc_bind_true:
  455. case omp_proc_bind_close:
  456. if (k == s)
  457. {
  458. ++p;
  459. if (p == (team->prev_ts.place_partition_off
  460. + team->prev_ts.place_partition_len))
  461. p = team->prev_ts.place_partition_off;
  462. k = 1;
  463. if (i == nthreads - rest)
  464. s = 1;
  465. }
  466. else
  467. ++k;
  468. break;
  469. case omp_proc_bind_master:
  470. break;
  471. case omp_proc_bind_spread:
  472. if (k == 0)
  473. {
  474. /* T <= P. */
  475. if (p < rest)
  476. p += s + 1;
  477. else
  478. p += s;
  479. if (p == (team->prev_ts.place_partition_off
  480. + team->prev_ts.place_partition_len))
  481. p = team->prev_ts.place_partition_off;
  482. place_partition_off = p;
  483. if (p < rest)
  484. place_partition_len = s + 1;
  485. else
  486. place_partition_len = s;
  487. }
  488. else
  489. {
  490. /* T > P. */
  491. if (k == s)
  492. {
  493. ++p;
  494. if (p == (team->prev_ts.place_partition_off
  495. + team->prev_ts.place_partition_len))
  496. p = team->prev_ts.place_partition_off;
  497. k = 1;
  498. if (i == nthreads - rest)
  499. s = 1;
  500. }
  501. else
  502. ++k;
  503. place_partition_off = p;
  504. place_partition_len = 1;
  505. }
  506. break;
  507. }
  508. if (affinity_thr != NULL
  509. || (bind != omp_proc_bind_true
  510. && pool->threads[i]->place != p + 1)
  511. || pool->threads[i]->place <= place_partition_off
  512. || pool->threads[i]->place > (place_partition_off
  513. + place_partition_len))
  514. {
  515. unsigned int l;
  516. force_display = true;
  517. if (affinity_thr == NULL)
  518. {
  519. unsigned int j;
  520. if (team->prev_ts.place_partition_len > 64)
  521. affinity_thr
  522. = gomp_malloc (team->prev_ts.place_partition_len
  523. * sizeof (struct gomp_thread *));
  524. else
  525. affinity_thr
  526. = gomp_alloca (team->prev_ts.place_partition_len
  527. * sizeof (struct gomp_thread *));
  528. memset (affinity_thr, '\0',
  529. team->prev_ts.place_partition_len
  530. * sizeof (struct gomp_thread *));
  531. for (j = i; j < old_threads_used; j++)
  532. {
  533. if (pool->threads[j]->place
  534. > team->prev_ts.place_partition_off
  535. && (pool->threads[j]->place
  536. <= (team->prev_ts.place_partition_off
  537. + team->prev_ts.place_partition_len)))
  538. {
  539. l = pool->threads[j]->place - 1
  540. - team->prev_ts.place_partition_off;
  541. pool->threads[j]->data = affinity_thr[l];
  542. affinity_thr[l] = pool->threads[j];
  543. }
  544. pool->threads[j] = NULL;
  545. }
  546. if (nthreads > old_threads_used)
  547. memset (&pool->threads[old_threads_used],
  548. '\0', ((nthreads - old_threads_used)
  549. * sizeof (struct gomp_thread *)));
  550. n = nthreads;
  551. affinity_count = old_threads_used - i;
  552. }
  553. if (affinity_count == 0)
  554. break;
  555. l = p;
  556. if (affinity_thr[l - team->prev_ts.place_partition_off]
  557. == NULL)
  558. {
  559. if (bind != omp_proc_bind_true)
  560. continue;
  561. for (l = place_partition_off;
  562. l < place_partition_off + place_partition_len;
  563. l++)
  564. if (affinity_thr[l - team->prev_ts.place_partition_off]
  565. != NULL)
  566. break;
  567. if (l == place_partition_off + place_partition_len)
  568. continue;
  569. }
  570. nthr = affinity_thr[l - team->prev_ts.place_partition_off];
  571. affinity_thr[l - team->prev_ts.place_partition_off]
  572. = (struct gomp_thread *) nthr->data;
  573. affinity_count--;
  574. pool->threads[i] = nthr;
  575. }
  576. else
  577. nthr = pool->threads[i];
  578. place = p + 1;
  579. }
  580. else
  581. nthr = pool->threads[i];
  582. nthr->ts.team = team;
  583. nthr->ts.work_share = &team->work_shares[0];
  584. nthr->ts.last_work_share = NULL;
  585. nthr->ts.team_id = i;
  586. nthr->ts.level = team->prev_ts.level + 1;
  587. nthr->ts.active_level = thr->ts.active_level;
  588. nthr->ts.place_partition_off = place_partition_off;
  589. nthr->ts.place_partition_len = place_partition_len;
  590. nthr->ts.def_allocator = thr->ts.def_allocator;
  591. #ifdef HAVE_SYNC_BUILTINS
  592. nthr->ts.single_count = 0;
  593. #endif
  594. nthr->ts.static_trip = 0;
  595. nthr->num_teams = thr->num_teams;
  596. nthr->team_num = thr->team_num;
  597. nthr->task = &team->implicit_task[i];
  598. nthr->place = place;
  599. gomp_init_task (nthr->task, task, icv);
  600. team->implicit_task[i].icv.nthreads_var = nthreads_var;
  601. team->implicit_task[i].icv.bind_var = bind_var;
  602. nthr->task->taskgroup = taskgroup;
  603. nthr->fn = fn;
  604. nthr->data = data;
  605. team->ordered_release[i] = &nthr->release;
  606. }
  607. if (__builtin_expect (affinity_thr != NULL, 0))
  608. {
  609. /* If AFFINITY_THR is non-NULL just because we had to
  610. permute some threads in the pool, but we've managed
  611. to find exactly as many old threads as we'd find
  612. without affinity, we don't need to handle this
  613. specially anymore. */
  614. if (nthreads <= old_threads_used
  615. ? (affinity_count == old_threads_used - nthreads)
  616. : (i == old_threads_used))
  617. {
  618. if (team->prev_ts.place_partition_len > 64)
  619. free (affinity_thr);
  620. affinity_thr = NULL;
  621. affinity_count = 0;
  622. }
  623. else
  624. {
  625. i = 1;
  626. /* We are going to compute the places/subpartitions
  627. again from the beginning. So, we need to reinitialize
  628. vars modified by the switch (bind) above inside
  629. of the loop, to the state they had after the initial
  630. switch (bind). */
  631. switch (bind)
  632. {
  633. case omp_proc_bind_true:
  634. case omp_proc_bind_close:
  635. if (nthreads > thr->ts.place_partition_len)
  636. /* T > P. S has been changed, so needs
  637. to be recomputed. */
  638. s = nthreads / thr->ts.place_partition_len;
  639. k = 1;
  640. p = thr->place - 1;
  641. break;
  642. case omp_proc_bind_master:
  643. /* No vars have been changed. */
  644. break;
  645. case omp_proc_bind_spread:
  646. p = thr->ts.place_partition_off;
  647. if (k != 0)
  648. {
  649. /* T > P. */
  650. s = nthreads / team->prev_ts.place_partition_len;
  651. k = 1;
  652. }
  653. break;
  654. }
  655. /* Increase the barrier threshold to make sure all new
  656. threads and all the threads we're going to let die
  657. arrive before the team is released. */
  658. if (affinity_count)
  659. gomp_simple_barrier_reinit (&pool->threads_dock,
  660. nthreads + affinity_count);
  661. }
  662. }
  663. if (i == nthreads)
  664. goto do_release;
  665. }
  666. if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
  667. {
  668. long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
  669. if (old_threads_used == 0)
  670. --diff;
  671. #ifdef HAVE_SYNC_BUILTINS
  672. __sync_fetch_and_add (&gomp_managed_threads, diff);
  673. #else
  674. gomp_mutex_lock (&gomp_managed_threads_lock);
  675. gomp_managed_threads += diff;
  676. gomp_mutex_unlock (&gomp_managed_threads_lock);
  677. #endif
  678. }
  679. attr = &gomp_thread_attr;
  680. if (__builtin_expect (gomp_places_list != NULL, 0))
  681. {
  682. size_t stacksize;
  683. pthread_attr_init (&thread_attr);
  684. if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
  685. pthread_attr_setstacksize (&thread_attr, stacksize);
  686. attr = &thread_attr;
  687. }
  688. start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
  689. * (nthreads - i));
  690. /* Launch new threads. */
  691. for (; i < nthreads; ++i)
  692. {
  693. int err;
  694. start_data->ts.place_partition_off = thr->ts.place_partition_off;
  695. start_data->ts.place_partition_len = thr->ts.place_partition_len;
  696. start_data->place = 0;
  697. if (__builtin_expect (gomp_places_list != NULL, 0))
  698. {
  699. switch (bind)
  700. {
  701. case omp_proc_bind_true:
  702. case omp_proc_bind_close:
  703. if (k == s)
  704. {
  705. ++p;
  706. if (p == (team->prev_ts.place_partition_off
  707. + team->prev_ts.place_partition_len))
  708. p = team->prev_ts.place_partition_off;
  709. k = 1;
  710. if (i == nthreads - rest)
  711. s = 1;
  712. }
  713. else
  714. ++k;
  715. break;
  716. case omp_proc_bind_master:
  717. break;
  718. case omp_proc_bind_spread:
  719. if (k == 0)
  720. {
  721. /* T <= P. */
  722. if (p < rest)
  723. p += s + 1;
  724. else
  725. p += s;
  726. if (p == (team->prev_ts.place_partition_off
  727. + team->prev_ts.place_partition_len))
  728. p = team->prev_ts.place_partition_off;
  729. start_data->ts.place_partition_off = p;
  730. if (p < rest)
  731. start_data->ts.place_partition_len = s + 1;
  732. else
  733. start_data->ts.place_partition_len = s;
  734. }
  735. else
  736. {
  737. /* T > P. */
  738. if (k == s)
  739. {
  740. ++p;
  741. if (p == (team->prev_ts.place_partition_off
  742. + team->prev_ts.place_partition_len))
  743. p = team->prev_ts.place_partition_off;
  744. k = 1;
  745. if (i == nthreads - rest)
  746. s = 1;
  747. }
  748. else
  749. ++k;
  750. start_data->ts.place_partition_off = p;
  751. start_data->ts.place_partition_len = 1;
  752. }
  753. break;
  754. }
  755. start_data->place = p + 1;
  756. if (affinity_thr != NULL && pool->threads[i] != NULL)
  757. continue;
  758. gomp_init_thread_affinity (attr, p);
  759. }
  760. start_data->fn = fn;
  761. start_data->fn_data = data;
  762. start_data->ts.team = team;
  763. start_data->ts.work_share = &team->work_shares[0];
  764. start_data->ts.last_work_share = NULL;
  765. start_data->ts.team_id = i;
  766. start_data->ts.level = team->prev_ts.level + 1;
  767. start_data->ts.active_level = thr->ts.active_level;
  768. start_data->ts.def_allocator = thr->ts.def_allocator;
  769. #ifdef HAVE_SYNC_BUILTINS
  770. start_data->ts.single_count = 0;
  771. #endif
  772. start_data->ts.static_trip = 0;
  773. start_data->num_teams = thr->num_teams;
  774. start_data->team_num = thr->team_num;
  775. start_data->task = &team->implicit_task[i];
  776. gomp_init_task (start_data->task, task, icv);
  777. team->implicit_task[i].icv.nthreads_var = nthreads_var;
  778. team->implicit_task[i].icv.bind_var = bind_var;
  779. start_data->task->taskgroup = taskgroup;
  780. start_data->thread_pool = pool;
  781. start_data->nested = nested;
  782. attr = gomp_adjust_thread_attr (attr, &thread_attr);
  783. err = pthread_create (&start_data->handle, attr, gomp_thread_start,
  784. start_data);
  785. start_data++;
  786. if (err != 0)
  787. gomp_fatal ("Thread creation failed: %s", strerror (err));
  788. }
  789. if (__builtin_expect (attr == &thread_attr, 0))
  790. pthread_attr_destroy (&thread_attr);
  791. do_release:
  792. if (nested)
  793. gomp_barrier_wait (&team->barrier);
  794. else
  795. gomp_simple_barrier_wait (&pool->threads_dock);
  796. /* Decrease the barrier threshold to match the number of threads
  797. that should arrive back at the end of this team. The extra
  798. threads should be exiting. Note that we arrange for this test
  799. to never be true for nested teams. If AFFINITY_COUNT is non-zero,
  800. the barrier as well as gomp_managed_threads was temporarily
  801. set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
  802. AFFINITY_COUNT if non-zero will be always at least
  803. OLD_THREADS_COUNT - NTHREADS. */
  804. if (__builtin_expect (nthreads < old_threads_used, 0)
  805. || __builtin_expect (affinity_count, 0))
  806. {
  807. long diff = (long) nthreads - (long) old_threads_used;
  808. if (affinity_count)
  809. diff = -affinity_count;
  810. gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
  811. #ifdef HAVE_SYNC_BUILTINS
  812. __sync_fetch_and_add (&gomp_managed_threads, diff);
  813. #else
  814. gomp_mutex_lock (&gomp_managed_threads_lock);
  815. gomp_managed_threads += diff;
  816. gomp_mutex_unlock (&gomp_managed_threads_lock);
  817. #endif
  818. }
  819. if (__builtin_expect (gomp_display_affinity_var, 0))
  820. {
  821. if (nested
  822. || nthreads != old_threads_used
  823. || force_display)
  824. {
  825. gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
  826. thr->place);
  827. if (nested)
  828. {
  829. start_data -= nthreads - 1;
  830. for (i = 1; i < nthreads; ++i)
  831. {
  832. gomp_display_affinity_thread (
  833. #ifdef LIBGOMP_USE_PTHREADS
  834. start_data->handle,
  835. #else
  836. gomp_thread_self (),
  837. #endif
  838. &start_data->ts,
  839. start_data->place);
  840. start_data++;
  841. }
  842. }
  843. else
  844. {
  845. for (i = 1; i < nthreads; ++i)
  846. {
  847. gomp_thread_handle handle
  848. = gomp_thread_to_pthread_t (pool->threads[i]);
  849. gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
  850. pool->threads[i]->place);
  851. }
  852. }
  853. }
  854. }
  855. if (__builtin_expect (affinity_thr != NULL, 0)
  856. && team->prev_ts.place_partition_len > 64)
  857. free (affinity_thr);
  858. }
  859. #endif
  860. /* Terminate the current team. This is only to be called by the master
  861. thread. We assume that we must wait for the other threads. */
  862. void
  863. gomp_team_end (void)
  864. {
  865. struct gomp_thread *thr = gomp_thread ();
  866. struct gomp_team *team = thr->ts.team;
  867. /* This barrier handles all pending explicit threads.
  868. As #pragma omp cancel parallel might get awaited count in
  869. team->barrier in a inconsistent state, we need to use a different
  870. counter here. */
  871. gomp_team_barrier_wait_final (&team->barrier);
  872. if (__builtin_expect (team->team_cancelled, 0))
  873. {
  874. struct gomp_work_share *ws = team->work_shares_to_free;
  875. do
  876. {
  877. struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
  878. if (next_ws == NULL)
  879. gomp_ptrlock_set (&ws->next_ws, ws);
  880. gomp_fini_work_share (ws);
  881. ws = next_ws;
  882. }
  883. while (ws != NULL);
  884. }
  885. else
  886. gomp_fini_work_share (thr->ts.work_share);
  887. gomp_end_task ();
  888. thr->ts = team->prev_ts;
  889. if (__builtin_expect (thr->ts.level != 0, 0))
  890. {
  891. #ifdef HAVE_SYNC_BUILTINS
  892. __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
  893. #else
  894. gomp_mutex_lock (&gomp_managed_threads_lock);
  895. gomp_managed_threads -= team->nthreads - 1L;
  896. gomp_mutex_unlock (&gomp_managed_threads_lock);
  897. #endif
  898. /* This barrier has gomp_barrier_wait_last counterparts
  899. and ensures the team can be safely destroyed. */
  900. gomp_barrier_wait (&team->barrier);
  901. }
  902. if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
  903. {
  904. struct gomp_work_share *ws = team->work_shares[0].next_alloc;
  905. do
  906. {
  907. struct gomp_work_share *next_ws = ws->next_alloc;
  908. free (ws);
  909. ws = next_ws;
  910. }
  911. while (ws != NULL);
  912. }
  913. gomp_sem_destroy (&team->master_release);
  914. if (__builtin_expect (thr->ts.team != NULL, 0)
  915. || __builtin_expect (team->nthreads == 1, 0))
  916. free_team (team);
  917. else
  918. {
  919. struct gomp_thread_pool *pool = thr->thread_pool;
  920. if (pool->last_team)
  921. free_team (pool->last_team);
  922. pool->last_team = team;
  923. gomp_release_thread_pool (pool);
  924. }
  925. }
  926. #ifdef LIBGOMP_USE_PTHREADS
  927. /* Constructors for this file. */
  928. static void __attribute__((constructor))
  929. initialize_team (void)
  930. {
  931. #if !defined HAVE_TLS && !defined USE_EMUTLS
  932. static struct gomp_thread initial_thread_tls_data;
  933. pthread_key_create (&gomp_tls_key, NULL);
  934. pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
  935. #endif
  936. if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
  937. gomp_fatal ("could not create thread pool destructor.");
  938. }
  939. static void __attribute__((destructor))
  940. team_destructor (void)
  941. {
  942. /* Without this dlclose on libgomp could lead to subsequent
  943. crashes. */
  944. pthread_key_delete (gomp_thread_destructor);
  945. }
  946. /* Similar to gomp_free_pool_helper, but don't detach itself,
  947. gomp_pause_host will pthread_join those threads. */
  948. static void
  949. gomp_pause_pool_helper (void *thread_pool)
  950. {
  951. struct gomp_thread *thr = gomp_thread ();
  952. struct gomp_thread_pool *pool
  953. = (struct gomp_thread_pool *) thread_pool;
  954. gomp_simple_barrier_wait_last (&pool->threads_dock);
  955. gomp_sem_destroy (&thr->release);
  956. thr->thread_pool = NULL;
  957. thr->task = NULL;
  958. pthread_exit (NULL);
  959. }
  960. /* Free a thread pool and release its threads. Return non-zero on
  961. failure. */
  962. int
  963. gomp_pause_host (void)
  964. {
  965. struct gomp_thread *thr = gomp_thread ();
  966. struct gomp_thread_pool *pool = thr->thread_pool;
  967. if (thr->ts.level)
  968. return -1;
  969. if (pool)
  970. {
  971. if (pool->threads_used > 0)
  972. {
  973. int i;
  974. pthread_t *thrs
  975. = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
  976. for (i = 1; i < pool->threads_used; i++)
  977. {
  978. struct gomp_thread *nthr = pool->threads[i];
  979. nthr->fn = gomp_pause_pool_helper;
  980. nthr->data = pool;
  981. thrs[i] = gomp_thread_to_pthread_t (nthr);
  982. }
  983. /* This barrier undocks threads docked on pool->threads_dock. */
  984. gomp_simple_barrier_wait (&pool->threads_dock);
  985. /* And this waits till all threads have called gomp_barrier_wait_last
  986. in gomp_pause_pool_helper. */
  987. gomp_simple_barrier_wait (&pool->threads_dock);
  988. /* Now it is safe to destroy the barrier and free the pool. */
  989. gomp_simple_barrier_destroy (&pool->threads_dock);
  990. #ifdef HAVE_SYNC_BUILTINS
  991. __sync_fetch_and_add (&gomp_managed_threads,
  992. 1L - pool->threads_used);
  993. #else
  994. gomp_mutex_lock (&gomp_managed_threads_lock);
  995. gomp_managed_threads -= pool->threads_used - 1L;
  996. gomp_mutex_unlock (&gomp_managed_threads_lock);
  997. #endif
  998. for (i = 1; i < pool->threads_used; i++)
  999. pthread_join (thrs[i], NULL);
  1000. }
  1001. if (pool->last_team)
  1002. free_team (pool->last_team);
  1003. #ifndef __nvptx__
  1004. team_free (pool->threads);
  1005. team_free (pool);
  1006. #endif
  1007. thr->thread_pool = NULL;
  1008. }
  1009. return 0;
  1010. }
  1011. #endif
  1012. struct gomp_task_icv *
  1013. gomp_new_icv (void)
  1014. {
  1015. struct gomp_thread *thr = gomp_thread ();
  1016. struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
  1017. gomp_init_task (task, NULL, &gomp_global_icv);
  1018. thr->task = task;
  1019. #ifdef LIBGOMP_USE_PTHREADS
  1020. pthread_setspecific (gomp_thread_destructor, thr);
  1021. #endif
  1022. return &task->icv;
  1023. }