scan-15.c 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. /* { dg-require-effective-target size32plus } */
  2. /* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */
  3. /* { dg-additional-options "-msse2" { target sse2_runtime } } */
  4. /* { dg-additional-options "-mavx" { target avx_runtime } } */
  5. /* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */
  6. extern void abort (void);
  7. int r, a[1024], b[1024];
  8. __attribute__((noipa)) void
  9. foo (int *a, int *b)
  10. {
  11. #pragma omp for simd reduction (inscan, +:r)
  12. for (int i = 0; i < 1024; i++)
  13. {
  14. b[i] = r;
  15. #pragma omp scan exclusive(r)
  16. r += a[i];
  17. }
  18. }
  19. __attribute__((noipa)) int
  20. bar (void)
  21. {
  22. int s = 0;
  23. #pragma omp parallel
  24. #pragma omp for simd reduction (inscan, +:s)
  25. for (int i = 0; i < 1024; i++)
  26. {
  27. b[i] = s;
  28. #pragma omp scan exclusive(s)
  29. s += 2 * a[i];
  30. }
  31. return s;
  32. }
  33. __attribute__((noipa)) void
  34. baz (int *a, int *b)
  35. {
  36. #pragma omp parallel for simd simdlen (1) reduction (inscan, +:r)
  37. for (int i = 0; i < 1024; i++)
  38. {
  39. b[i] = r;
  40. #pragma omp scan exclusive(r)
  41. r += a[i];
  42. }
  43. }
  44. __attribute__((noipa)) int
  45. qux (void)
  46. {
  47. int s = 0;
  48. #pragma omp parallel for simd if (simd: 0) reduction (inscan, +:s)
  49. for (int i = 0; i < 1024; i++)
  50. {
  51. b[i] = s;
  52. #pragma omp scan exclusive(s)
  53. s += 2 * a[i];
  54. }
  55. return s;
  56. }
  57. int
  58. main ()
  59. {
  60. int s = 0;
  61. for (int i = 0; i < 1024; ++i)
  62. {
  63. a[i] = i;
  64. b[i] = -1;
  65. asm ("" : "+g" (i));
  66. }
  67. #pragma omp parallel
  68. foo (a, b);
  69. if (r != 1024 * 1023 / 2)
  70. abort ();
  71. for (int i = 0; i < 1024; ++i)
  72. {
  73. if (b[i] != s)
  74. abort ();
  75. else
  76. b[i] = 25;
  77. s += i;
  78. }
  79. if (bar () != 1024 * 1023)
  80. abort ();
  81. s = 0;
  82. for (int i = 0; i < 1024; ++i)
  83. {
  84. if (b[i] != s)
  85. abort ();
  86. else
  87. b[i] = -1;
  88. s += 2 * i;
  89. }
  90. r = 0;
  91. baz (a, b);
  92. if (r != 1024 * 1023 / 2)
  93. abort ();
  94. s = 0;
  95. for (int i = 0; i < 1024; ++i)
  96. {
  97. if (b[i] != s)
  98. abort ();
  99. else
  100. b[i] = -25;
  101. s += i;
  102. }
  103. if (qux () != 1024 * 1023)
  104. abort ();
  105. s = 0;
  106. for (int i = 0; i < 1024; ++i)
  107. {
  108. if (b[i] != s)
  109. abort ();
  110. s += 2 * i;
  111. }
  112. return 0;
  113. }