simd-3.c 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /* { dg-do run } */
  2. /* { dg-additional-options "-msse2" { target sse2_runtime } } */
  3. /* { dg-additional-options "-mavx" { target avx_runtime } } */
  4. extern void abort ();
  5. int a[1024] __attribute__((aligned (32))) = { 1 };
  6. int b[1024] __attribute__((aligned (32))) = { 1 };
  7. unsigned char c[1024] __attribute__((aligned (32))) = { 1 };
  8. int k, m;
  9. __UINTPTR_TYPE__ u, u2, u3;
  10. __attribute__((noinline, noclone)) int
  11. foo (int *p)
  12. {
  13. int i, s = 0, s2 = 0, t, t2;
  14. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
  15. lastprivate (t2)
  16. for (i = 0; i < 512; i++)
  17. {
  18. a[i] *= p[i];
  19. t2 = k + p[i];
  20. k += m + 1;
  21. s += p[i] + k;
  22. c[i]++;
  23. }
  24. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
  25. lastprivate (t, u, u2, u3)
  26. for (i = 512; i < 1024; i++)
  27. {
  28. a[i] *= p[i];
  29. k += m + 1;
  30. t = k + p[i];
  31. u = (__UINTPTR_TYPE__) &k;
  32. u2 = (__UINTPTR_TYPE__) &s2;
  33. u3 = (__UINTPTR_TYPE__) &t;
  34. s2 += t;
  35. c[i]++;
  36. }
  37. return s + s2 + t + t2;
  38. }
  39. __attribute__((noinline, noclone)) long int
  40. bar (int *p, long int n, long int o)
  41. {
  42. long int i, s = 0, s2 = 0, t, t2;
  43. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
  44. lastprivate (t2)
  45. for (i = 0; i < n; i++)
  46. {
  47. a[i] *= p[i];
  48. t2 = k + p[i];
  49. k += m + 1;
  50. s += p[i] + k;
  51. c[i]++;
  52. }
  53. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
  54. lastprivate (t, u, u2, u3)
  55. for (i = n; i < o; i++)
  56. {
  57. a[i] *= p[i];
  58. k += m + 1;
  59. t = k + p[i];
  60. u = (__UINTPTR_TYPE__) &k;
  61. u2 = (__UINTPTR_TYPE__) &s2;
  62. u3 = (__UINTPTR_TYPE__) &t;
  63. s2 += t;
  64. c[i]++;
  65. }
  66. return s + s2 + t + t2;
  67. }
  68. int
  69. main ()
  70. {
  71. #if __SIZEOF_INT__ >= 4
  72. int i;
  73. k = 4;
  74. m = 2;
  75. for (i = 0; i < 1024; i++)
  76. {
  77. a[i] = i - 512;
  78. b[i] = (i - 51) % 39;
  79. c[i] = (unsigned char) i;
  80. }
  81. int s = foo (b);
  82. for (i = 0; i < 1024; i++)
  83. {
  84. if (b[i] != (i - 51) % 39
  85. || a[i] != (i - 512) * b[i]
  86. || c[i] != (unsigned char) (i + 1))
  87. abort ();
  88. a[i] = i - 512;
  89. }
  90. if (k != 4 + 3 * 1024
  91. || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
  92. abort ();
  93. k = 4;
  94. s = bar (b, 512, 1024);
  95. for (i = 0; i < 1024; i++)
  96. {
  97. if (b[i] != (i - 51) % 39
  98. || a[i] != (i - 512) * b[i]
  99. || c[i] != (unsigned char) (i + 2))
  100. abort ();
  101. a[i] = i - 512;
  102. }
  103. if (k != 4 + 3 * 1024
  104. || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
  105. abort ();
  106. k = 4;
  107. s = bar (b, 511, 1021);
  108. for (i = 0; i < 1021; i++)
  109. {
  110. if (b[i] != (i - 51) % 39
  111. || a[i] != (i - 512) * b[i]
  112. || c[i] != (unsigned char) (i + 3))
  113. abort ();
  114. a[i] = i - 512;
  115. }
  116. for (i = 1021; i < 1024; i++)
  117. if (b[i] != (i - 51) % 39
  118. || a[i] != i - 512
  119. || c[i] != (unsigned char) (i + 2))
  120. abort ();
  121. if (k != 4 + 3 * 1021
  122. || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020]))
  123. abort ();
  124. #endif
  125. return 0;
  126. }