scan-18.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. /* { dg-require-effective-target size32plus } */
  2. /* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */
  3. /* { dg-additional-options "-msse2" { target sse2_runtime } } */
  4. /* { dg-additional-options "-mavx" { target avx_runtime } } */
  5. /* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */
  6. extern void abort (void);
  7. int r, a[1024], b[1024];
  8. unsigned short r2, b2[1024];
  9. unsigned char r3, b3[1024];
  10. __attribute__((noipa)) void
  11. foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
  12. {
  13. #pragma omp for simd reduction (inscan, +:r, r2, r3)
  14. for (int i = 0; i < 1024; i++)
  15. {
  16. {
  17. b[i] = r;
  18. b2[i] = r2;
  19. b3[i] = r3;
  20. }
  21. #pragma omp scan exclusive(r, r2, r3)
  22. { r += a[i]; r2 += a[i]; r3 += a[i]; }
  23. }
  24. }
  25. __attribute__((noipa)) int
  26. bar (unsigned short *s2p, unsigned char *s3p)
  27. {
  28. int s = 0;
  29. unsigned short s2 = 0;
  30. unsigned char s3 = 0;
  31. #pragma omp parallel
  32. #pragma omp for simd reduction (inscan, +:s, s2, s3)
  33. for (int i = 0; i < 1024; i++)
  34. {
  35. { b[i] = s; b2[i] = s2; b3[i] = s3; }
  36. #pragma omp scan exclusive(s, s2, s3)
  37. {
  38. s += 2 * a[i];
  39. s2 += 2 * a[i];
  40. s3 += 2 * a[i];
  41. }
  42. }
  43. *s2p = s2;
  44. *s3p = s3;
  45. return s;
  46. }
  47. __attribute__((noipa)) void
  48. baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
  49. {
  50. #pragma omp parallel for simd reduction (inscan, +:r, r2, r3) if (simd: 0)
  51. for (int i = 0; i < 1024; i++)
  52. {
  53. {
  54. b[i] = r;
  55. b2[i] = r2;
  56. b3[i] = r3;
  57. }
  58. #pragma omp scan exclusive(r, r2, r3)
  59. {
  60. r += a[i];
  61. r2 += a[i];
  62. r3 += a[i];
  63. }
  64. }
  65. }
  66. __attribute__((noipa)) int
  67. qux (unsigned short *s2p, unsigned char *s3p)
  68. {
  69. int s = 0;
  70. unsigned short s2 = 0;
  71. unsigned char s3 = 0;
  72. #pragma omp parallel for simd simdlen (1) reduction (inscan, +:s, s2, s3)
  73. for (int i = 0; i < 1024; i++)
  74. {
  75. { b[i] = s; b2[i] = s2; b3[i] = s3; }
  76. #pragma omp scan exclusive(s, s2, s3)
  77. { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
  78. }
  79. *s2p = s2;
  80. *s3p = s3;
  81. return s;
  82. }
  83. int
  84. main ()
  85. {
  86. int s = 0;
  87. unsigned short s2;
  88. unsigned char s3;
  89. for (int i = 0; i < 1024; ++i)
  90. {
  91. a[i] = i;
  92. b[i] = -1;
  93. b2[i] = -1;
  94. b3[i] = -1;
  95. asm ("" : "+g" (i));
  96. }
  97. #pragma omp parallel
  98. foo (a, b, b2, b3);
  99. if (r != 1024 * 1023 / 2
  100. || r2 != (unsigned short) r
  101. || r3 != (unsigned char) r)
  102. abort ();
  103. for (int i = 0; i < 1024; ++i)
  104. {
  105. if (b[i] != s
  106. || b2[i] != (unsigned short) s
  107. || b3[i] != (unsigned char) s)
  108. abort ();
  109. else
  110. {
  111. b[i] = 25;
  112. b2[i] = 24;
  113. b3[i] = 26;
  114. }
  115. s += i;
  116. }
  117. if (bar (&s2, &s3) != 1024 * 1023)
  118. abort ();
  119. if (s2 != (unsigned short) (1024 * 1023)
  120. || s3 != (unsigned char) (1024 * 1023))
  121. abort ();
  122. s = 0;
  123. for (int i = 0; i < 1024; ++i)
  124. {
  125. if (b[i] != s
  126. || b2[i] != (unsigned short) s
  127. || b3[i] != (unsigned char) s)
  128. abort ();
  129. else
  130. {
  131. b[i] = -1;
  132. b2[i] = -1;
  133. b3[i] = -1;
  134. }
  135. s += 2 * i;
  136. }
  137. r = 0;
  138. r2 = 0;
  139. r3 = 0;
  140. baz (a, b, b2, b3);
  141. if (r != 1024 * 1023 / 2
  142. || r2 != (unsigned short) r
  143. || r3 != (unsigned char) r)
  144. abort ();
  145. s = 0;
  146. for (int i = 0; i < 1024; ++i)
  147. {
  148. if (b[i] != s
  149. || b2[i] != (unsigned short) s
  150. || b3[i] != (unsigned char) s)
  151. abort ();
  152. else
  153. {
  154. b[i] = 25;
  155. b2[i] = 24;
  156. b3[i] = 26;
  157. }
  158. s += i;
  159. }
  160. s2 = 0;
  161. s3 = 0;
  162. if (qux (&s2, &s3) != 1024 * 1023)
  163. abort ();
  164. if (s2 != (unsigned short) (1024 * 1023)
  165. || s3 != (unsigned char) (1024 * 1023))
  166. abort ();
  167. s = 0;
  168. for (int i = 0; i < 1024; ++i)
  169. {
  170. if (b[i] != s
  171. || b2[i] != (unsigned short) s
  172. || b3[i] != (unsigned char) s)
  173. abort ();
  174. s += 2 * i;
  175. }
  176. return 0;
  177. }