scan-16.c 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /* { dg-require-effective-target size32plus } */
  2. /* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */
  3. /* { dg-additional-options "-msse2" { target sse2_runtime } } */
  4. /* { dg-additional-options "-mavx" { target avx_runtime } } */
  5. /* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */
  6. extern void abort (void);
  7. int r, a[1024], b[1024];
  8. #pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
  9. __attribute__((noipa)) void
  10. foo (int *a, int *b)
  11. {
  12. #pragma omp for simd reduction (inscan, foo:r)
  13. for (int i = 0; i < 1024; i++)
  14. {
  15. b[i] = r;
  16. #pragma omp scan exclusive(r)
  17. r += a[i];
  18. }
  19. }
  20. __attribute__((noipa)) int
  21. bar (void)
  22. {
  23. int s = 0;
  24. #pragma omp parallel
  25. #pragma omp for simd simdlen (1) reduction (inscan, foo:s)
  26. for (int i = 0; i < 1024; i++)
  27. {
  28. b[i] = s;
  29. #pragma omp scan exclusive(s)
  30. s += 2 * a[i];
  31. }
  32. return s;
  33. }
  34. __attribute__((noipa)) void
  35. baz (int *a, int *b)
  36. {
  37. #pragma omp parallel for simd if (simd: 0) reduction (inscan, foo:r)
  38. for (int i = 0; i < 1024; i++)
  39. {
  40. b[i] = r;
  41. #pragma omp scan exclusive(r)
  42. r += a[i];
  43. }
  44. }
  45. __attribute__((noipa)) int
  46. qux (void)
  47. {
  48. int s = 0;
  49. #pragma omp parallel for simd reduction (inscan, foo:s)
  50. for (int i = 0; i < 1024; i++)
  51. {
  52. b[i] = s;
  53. #pragma omp scan exclusive(s)
  54. s += 2 * a[i];
  55. }
  56. return s;
  57. }
  58. int
  59. main ()
  60. {
  61. int s = 0;
  62. for (int i = 0; i < 1024; ++i)
  63. {
  64. a[i] = i;
  65. b[i] = -1;
  66. asm ("" : "+g" (i));
  67. }
  68. #pragma omp parallel
  69. foo (a, b);
  70. if (r != 1024 * 1023 / 2)
  71. abort ();
  72. for (int i = 0; i < 1024; ++i)
  73. {
  74. if (b[i] != s)
  75. abort ();
  76. else
  77. b[i] = 25;
  78. s += i;
  79. }
  80. if (bar () != 1024 * 1023)
  81. abort ();
  82. s = 0;
  83. for (int i = 0; i < 1024; ++i)
  84. {
  85. if (b[i] != s)
  86. abort ();
  87. else
  88. b[i] = -1;
  89. s += 2 * i;
  90. }
  91. r = 0;
  92. baz (a, b);
  93. if (r != 1024 * 1023 / 2)
  94. abort ();
  95. s = 0;
  96. for (int i = 0; i < 1024; ++i)
  97. {
  98. if (b[i] != s)
  99. abort ();
  100. else
  101. b[i] = -25;
  102. s += i;
  103. }
  104. if (qux () != 1024 * 1023)
  105. abort ();
  106. s = 0;
  107. for (int i = 0; i < 1024; ++i)
  108. {
  109. if (b[i] != s)
  110. abort ();
  111. s += 2 * i;
  112. }
  113. return 0;
  114. }