sort-1.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /* Test and benchmark of a couple of parallel sorting algorithms.
  2. Copyright (C) 2008-2022 Free Software Foundation, Inc.
  3. GCC is free software; you can redistribute it and/or modify it under
  4. the terms of the GNU General Public License as published by the Free
  5. Software Foundation; either version 3, or (at your option) any later
  6. version.
  7. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  8. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  9. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  10. for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with GCC; see the file COPYING3. If not see
  13. <http://www.gnu.org/licenses/>. */
  14. /* { dg-additional-options "-Wno-deprecated-declarations" } */
  15. #include <limits.h>
  16. #include <omp.h>
  17. #include <stdbool.h>
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. int failures;
  22. #define THRESHOLD 100
  23. static void
  24. verify (const char *name, double stime, int *array, int count)
  25. {
  26. int i;
  27. double etime = omp_get_wtime ();
  28. printf ("%s: %g\n", name, etime - stime);
  29. for (i = 1; i < count; i++)
  30. if (array[i] < array[i - 1])
  31. {
  32. printf ("%s: incorrectly sorted\n", name);
  33. failures = 1;
  34. }
  35. }
  36. static void
  37. insertsort (int *array, int s, int e)
  38. {
  39. int i, j, val;
  40. for (i = s + 1; i <= e; i++)
  41. {
  42. val = array[i];
  43. j = i;
  44. while (j-- > s && val < array[j])
  45. array[j + 1] = array[j];
  46. array[j + 1] = val;
  47. }
  48. }
  49. struct int_pair
  50. {
  51. int lo;
  52. int hi;
  53. };
  54. struct int_pair_stack
  55. {
  56. struct int_pair *top;
  57. #define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
  58. struct int_pair arr[STACK_SIZE];
  59. };
  60. static inline void
  61. init_int_pair_stack (struct int_pair_stack *stack)
  62. {
  63. stack->top = &stack->arr[0];
  64. }
  65. static inline void
  66. push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
  67. {
  68. stack->top->lo = lo;
  69. stack->top->hi = hi;
  70. stack->top++;
  71. }
  72. static inline void
  73. pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
  74. {
  75. stack->top--;
  76. *lo = stack->top->lo;
  77. *hi = stack->top->hi;
  78. }
  79. static inline int
  80. size_int_pair_stack (struct int_pair_stack *stack)
  81. {
  82. return stack->top - &stack->arr[0];
  83. }
  84. static inline void
  85. busy_wait (void)
  86. {
  87. #if defined __i386__ || defined __x86_64__
  88. __builtin_ia32_pause ();
  89. #elif defined __ia64__
  90. __asm volatile ("hint @pause" : : : "memory");
  91. #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
  92. __asm volatile ("membar #LoadLoad" : : : "memory");
  93. #else
  94. __asm volatile ("" : : : "memory");
  95. #endif
  96. }
  97. static inline void
  98. swap (int *array, int a, int b)
  99. {
  100. int val = array[a];
  101. array[a] = array[b];
  102. array[b] = val;
  103. }
  104. static inline int
  105. choose_pivot (int *array, int lo, int hi)
  106. {
  107. int mid = (lo + hi) / 2;
  108. if (array[mid] < array[lo])
  109. swap (array, lo, mid);
  110. if (array[hi] < array[mid])
  111. {
  112. swap (array, mid, hi);
  113. if (array[mid] < array[lo])
  114. swap (array, lo, mid);
  115. }
  116. return array[mid];
  117. }
  118. static inline int
  119. partition (int *array, int lo, int hi)
  120. {
  121. int pivot = choose_pivot (array, lo, hi);
  122. int left = lo;
  123. int right = hi;
  124. for (;;)
  125. {
  126. while (array[++left] < pivot);
  127. while (array[--right] > pivot);
  128. if (left >= right)
  129. break;
  130. swap (array, left, right);
  131. }
  132. return left;
  133. }
  134. static void
  135. sort1 (int *array, int count)
  136. {
  137. omp_lock_t lock;
  138. struct int_pair_stack global_stack;
  139. int busy = 1;
  140. int num_threads;
  141. omp_init_lock (&lock);
  142. init_int_pair_stack (&global_stack);
  143. #pragma omp parallel firstprivate (array, count)
  144. {
  145. int lo = 0, hi = 0, mid, next_lo, next_hi;
  146. bool idle = true;
  147. struct int_pair_stack local_stack;
  148. init_int_pair_stack (&local_stack);
  149. if (omp_get_thread_num () == 0)
  150. {
  151. num_threads = omp_get_num_threads ();
  152. hi = count - 1;
  153. idle = false;
  154. }
  155. for (;;)
  156. {
  157. if (hi - lo < THRESHOLD)
  158. {
  159. insertsort (array, lo, hi);
  160. lo = hi;
  161. }
  162. if (lo >= hi)
  163. {
  164. if (size_int_pair_stack (&local_stack) == 0)
  165. {
  166. again:
  167. omp_set_lock (&lock);
  168. if (size_int_pair_stack (&global_stack) == 0)
  169. {
  170. if (!idle)
  171. busy--;
  172. if (busy == 0)
  173. {
  174. omp_unset_lock (&lock);
  175. break;
  176. }
  177. omp_unset_lock (&lock);
  178. idle = true;
  179. while (size_int_pair_stack (&global_stack) == 0
  180. && busy)
  181. busy_wait ();
  182. goto again;
  183. }
  184. if (idle)
  185. busy++;
  186. pop_int_pair_stack (&global_stack, &lo, &hi);
  187. omp_unset_lock (&lock);
  188. idle = false;
  189. }
  190. else
  191. pop_int_pair_stack (&local_stack, &lo, &hi);
  192. }
  193. mid = partition (array, lo, hi);
  194. if (mid - lo < hi - mid)
  195. {
  196. next_lo = mid;
  197. next_hi = hi;
  198. hi = mid - 1;
  199. }
  200. else
  201. {
  202. next_lo = lo;
  203. next_hi = mid - 1;
  204. lo = mid;
  205. }
  206. if (next_hi - next_lo < THRESHOLD)
  207. insertsort (array, next_lo, next_hi);
  208. else
  209. {
  210. if (size_int_pair_stack (&global_stack) < num_threads - 1)
  211. {
  212. int size;
  213. omp_set_lock (&lock);
  214. size = size_int_pair_stack (&global_stack);
  215. if (size < num_threads - 1 && size < STACK_SIZE)
  216. push_int_pair_stack (&global_stack, next_lo, next_hi);
  217. else
  218. push_int_pair_stack (&local_stack, next_lo, next_hi);
  219. omp_unset_lock (&lock);
  220. }
  221. else
  222. push_int_pair_stack (&local_stack, next_lo, next_hi);
  223. }
  224. }
  225. }
  226. omp_destroy_lock (&lock);
  227. }
  228. static void
  229. sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
  230. {
  231. int mid;
  232. if (hi - lo < THRESHOLD)
  233. {
  234. insertsort (array, lo, hi);
  235. return;
  236. }
  237. mid = partition (array, lo, hi);
  238. if (*busy >= num_threads)
  239. {
  240. sort2_1 (array, lo, mid - 1, num_threads, busy);
  241. sort2_1 (array, mid, hi, num_threads, busy);
  242. return;
  243. }
  244. #pragma omp atomic
  245. *busy += 1;
  246. #pragma omp parallel num_threads (2) \
  247. firstprivate (array, lo, hi, mid, num_threads, busy)
  248. {
  249. if (omp_get_thread_num () == 0)
  250. sort2_1 (array, lo, mid - 1, num_threads, busy);
  251. else
  252. {
  253. sort2_1 (array, mid, hi, num_threads, busy);
  254. #pragma omp atomic
  255. *busy -= 1;
  256. }
  257. }
  258. }
  259. static void
  260. sort2 (int *array, int count)
  261. {
  262. int num_threads;
  263. int busy = 1;
  264. #pragma omp parallel
  265. #pragma omp single nowait
  266. num_threads = omp_get_num_threads ();
  267. sort2_1 (array, 0, count - 1, num_threads, &busy);
  268. }
  269. #if _OPENMP >= 200805
  270. static void
  271. sort3_1 (int *array, int lo, int hi)
  272. {
  273. int mid;
  274. if (hi - lo < THRESHOLD)
  275. {
  276. insertsort (array, lo, hi);
  277. return;
  278. }
  279. mid = partition (array, lo, hi);
  280. #pragma omp task
  281. sort3_1 (array, lo, mid - 1);
  282. sort3_1 (array, mid, hi);
  283. }
  284. static void
  285. sort3 (int *array, int count)
  286. {
  287. #pragma omp parallel
  288. #pragma omp single
  289. sort3_1 (array, 0, count - 1);
  290. }
  291. #endif
  292. int
  293. main (int argc, char **argv)
  294. {
  295. int i, count = 1000000;
  296. double stime;
  297. int *unsorted, *sorted, num_threads;
  298. if (argc >= 2)
  299. count = strtoul (argv[1], NULL, 0);
  300. unsorted = malloc (count * sizeof (int));
  301. sorted = malloc (count * sizeof (int));
  302. if (unsorted == NULL || sorted == NULL)
  303. {
  304. puts ("allocation failure");
  305. exit (1);
  306. }
  307. srand (0xdeadbeef);
  308. for (i = 0; i < count; i++)
  309. unsorted[i] = rand ();
  310. omp_set_nested (1);
  311. omp_set_dynamic (0);
  312. #pragma omp parallel
  313. #pragma omp single nowait
  314. num_threads = omp_get_num_threads ();
  315. printf ("Threads: %d\n", num_threads);
  316. memcpy (sorted, unsorted, count * sizeof (int));
  317. stime = omp_get_wtime ();
  318. sort1 (sorted, count);
  319. verify ("sort1", stime, sorted, count);
  320. memcpy (sorted, unsorted, count * sizeof (int));
  321. stime = omp_get_wtime ();
  322. sort2 (sorted, count);
  323. verify ("sort2", stime, sorted, count);
  324. #if _OPENMP >= 200805
  325. memcpy (sorted, unsorted, count * sizeof (int));
  326. stime = omp_get_wtime ();
  327. sort3 (sorted, count);
  328. verify ("sort3", stime, sorted, count);
  329. #endif
  330. return 0;
  331. }