123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476 |
- /* OpenACC Runtime Library Definitions.
- Copyright (C) 2013-2022 Free Software Foundation, Inc.
- Contributed by Mentor Embedded.
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- #include <assert.h>
- #include <string.h>
- #include "openacc.h"
- #include "libgomp.h"
- #include "oacc-int.h"
- static struct goacc_thread *
- get_goacc_thread (void)
- {
- struct goacc_thread *thr = goacc_thread ();
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
- return thr;
- }
- static int
- validate_async_val (int async)
- {
- if (!async_valid_p (async))
- gomp_fatal ("invalid async-argument: %d", async);
- if (async == acc_async_sync)
- return -1;
- if (async == acc_async_noval)
- return 0;
- if (async >= 0)
- /* TODO: we reserve 0 for acc_async_noval before we can clarify the
- semantics of "default_async". */
- return 1 + async;
- else
- __builtin_unreachable ();
- }
- /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
- might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
- create the asyncqueue if it doesn't exist yet.
- Unless CREATE, this will not generate any OpenACC Profiling Interface
- events. */
- attribute_hidden struct goacc_asyncqueue *
- lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
- {
- async = validate_async_val (async);
- if (async < 0)
- return NULL;
- struct goacc_asyncqueue *ret_aq = NULL;
- struct gomp_device_descr *dev = thr->dev;
- gomp_mutex_lock (&dev->openacc.async.lock);
- if (!create
- && (async >= dev->openacc.async.nasyncqueue
- || !dev->openacc.async.asyncqueue[async]))
- goto end;
- if (async >= dev->openacc.async.nasyncqueue)
- {
- int diff = async + 1 - dev->openacc.async.nasyncqueue;
- dev->openacc.async.asyncqueue
- = gomp_realloc (dev->openacc.async.asyncqueue,
- sizeof (goacc_aq) * (async + 1));
- memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
- 0, sizeof (goacc_aq) * diff);
- dev->openacc.async.nasyncqueue = async + 1;
- }
- if (!dev->openacc.async.asyncqueue[async])
- {
- dev->openacc.async.asyncqueue[async]
- = dev->openacc.async.construct_func (dev->target_id);
- if (!dev->openacc.async.asyncqueue[async])
- {
- gomp_mutex_unlock (&dev->openacc.async.lock);
- gomp_fatal ("async %d creation failed", async);
- }
-
- /* Link new async queue into active list. */
- goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
- n->aq = dev->openacc.async.asyncqueue[async];
- n->next = dev->openacc.async.active;
- dev->openacc.async.active = n;
- }
- ret_aq = dev->openacc.async.asyncqueue[async];
- end:
- gomp_mutex_unlock (&dev->openacc.async.lock);
- return ret_aq;
- }
- /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
- might return NULL if no asyncqueue is to be used. Otherwise, create the
- asyncqueue if it doesn't exist yet. */
- attribute_hidden struct goacc_asyncqueue *
- get_goacc_asyncqueue (int async)
- {
- struct goacc_thread *thr = get_goacc_thread ();
- return lookup_goacc_asyncqueue (thr, true, async);
- }
- int
- acc_async_test (int async)
- {
- struct goacc_thread *thr = goacc_thread ();
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
- goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
- if (!aq)
- return 1;
- acc_prof_info prof_info;
- acc_api_info api_info;
- bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- if (profiling_p)
- {
- prof_info.async = async;
- prof_info.async_queue = prof_info.async;
- }
- int res = thr->dev->openacc.async.test_func (aq);
- if (profiling_p)
- {
- thr->prof_info = NULL;
- thr->api_info = NULL;
- }
- return res;
- }
- int
- acc_async_test_all (void)
- {
- struct goacc_thread *thr = get_goacc_thread ();
- acc_prof_info prof_info;
- acc_api_info api_info;
- bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- int ret = 1;
- gomp_mutex_lock (&thr->dev->openacc.async.lock);
- for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
- if (!thr->dev->openacc.async.test_func (l->aq))
- {
- ret = 0;
- break;
- }
- gomp_mutex_unlock (&thr->dev->openacc.async.lock);
- if (profiling_p)
- {
- thr->prof_info = NULL;
- thr->api_info = NULL;
- }
- return ret;
- }
- void
- acc_wait (int async)
- {
- struct goacc_thread *thr = get_goacc_thread ();
- goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
- if (!aq)
- return;
- acc_prof_info prof_info;
- acc_api_info api_info;
- bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- if (profiling_p)
- {
- prof_info.async = async;
- prof_info.async_queue = prof_info.async;
- }
- if (!thr->dev->openacc.async.synchronize_func (aq))
- gomp_fatal ("wait on %d failed", async);
- if (profiling_p)
- {
- thr->prof_info = NULL;
- thr->api_info = NULL;
- }
- }
- /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
- #ifdef HAVE_ATTRIBUTE_ALIAS
- strong_alias (acc_wait, acc_async_wait)
- #else
- void
- acc_async_wait (int async)
- {
- acc_wait (async);
- }
- #endif
- void
- acc_wait_async (int async1, int async2)
- {
- struct goacc_thread *thr = get_goacc_thread ();
- goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
- /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
- we'll always be synchronous anyways? */
- if (!aq1)
- return;
- acc_prof_info prof_info;
- acc_api_info api_info;
- bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- if (profiling_p)
- {
- prof_info.async = async2;
- prof_info.async_queue = prof_info.async;
- }
- goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
- /* An async queue is always synchronized with itself. */
- if (aq1 == aq2)
- goto out_prof;
- if (aq2)
- {
- if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
- gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
- }
- else
- {
- /* TODO: Local thread synchronization.
- Necessary for the "async2 == acc_async_sync" case, or can just skip? */
- if (!thr->dev->openacc.async.synchronize_func (aq1))
- gomp_fatal ("wait on %d failed", async1);
- }
- out_prof:
- if (profiling_p)
- {
- thr->prof_info = NULL;
- thr->api_info = NULL;
- }
- }
- void
- acc_wait_all (void)
- {
- struct goacc_thread *thr = goacc_thread ();
- acc_prof_info prof_info;
- acc_api_info api_info;
- bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- bool ret = true;
- gomp_mutex_lock (&thr->dev->openacc.async.lock);
- for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
- ret &= thr->dev->openacc.async.synchronize_func (l->aq);
- gomp_mutex_unlock (&thr->dev->openacc.async.lock);
- if (profiling_p)
- {
- thr->prof_info = NULL;
- thr->api_info = NULL;
- }
- if (!ret)
- gomp_fatal ("wait all failed");
- }
- /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
- #ifdef HAVE_ATTRIBUTE_ALIAS
- strong_alias (acc_wait_all, acc_async_wait_all)
- #else
- void
- acc_async_wait_all (void)
- {
- acc_wait_all ();
- }
- #endif
- void
- acc_wait_all_async (int async)
- {
- struct goacc_thread *thr = get_goacc_thread ();
- acc_prof_info prof_info;
- acc_api_info api_info;
- bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- if (profiling_p)
- {
- prof_info.async = async;
- prof_info.async_queue = prof_info.async;
- }
- goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
- bool ret = true;
- gomp_mutex_lock (&thr->dev->openacc.async.lock);
- for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
- {
- if (waiting_queue)
- ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
- else
- /* TODO: Local thread synchronization.
- Necessary for the "async2 == acc_async_sync" case, or can just skip? */
- ret &= thr->dev->openacc.async.synchronize_func (l->aq);
- }
- gomp_mutex_unlock (&thr->dev->openacc.async.lock);
- if (profiling_p)
- {
- thr->prof_info = NULL;
- thr->api_info = NULL;
- }
- if (!ret)
- gomp_fatal ("wait all async(%d) failed", async);
- }
- void
- GOACC_wait (int async, int num_waits, ...)
- {
- goacc_lazy_initialize ();
- struct goacc_thread *thr = goacc_thread ();
- /* No nesting. */
- assert (thr->prof_info == NULL);
- assert (thr->api_info == NULL);
- acc_prof_info prof_info;
- acc_api_info api_info;
- bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- if (profiling_p)
- {
- prof_info.async = async;
- prof_info.async_queue = prof_info.async;
- }
- if (num_waits)
- {
- va_list ap;
- va_start (ap, num_waits);
- goacc_wait (async, num_waits, &ap);
- va_end (ap);
- }
- else if (async == acc_async_sync)
- acc_wait_all ();
- else
- acc_wait_all_async (async);
- if (profiling_p)
- {
- thr->prof_info = NULL;
- thr->api_info = NULL;
- }
- }
- attribute_hidden void
- goacc_wait (int async, int num_waits, va_list *ap)
- {
- while (num_waits--)
- {
- int qid = va_arg (*ap, int);
- /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
- if (qid == acc_async_noval)
- {
- if (async == acc_async_sync)
- acc_wait_all ();
- else
- acc_wait_all_async (async);
- break;
- }
- if (acc_async_test (qid))
- continue;
- if (async == acc_async_sync)
- acc_wait (qid);
- else if (qid == async)
- /* If we're waiting on the same asynchronous queue as we're
- launching on, the queue itself will order work as
- required, so there's no need to wait explicitly. */
- ;
- else
- acc_wait_async (qid, async);
- }
- }
- attribute_hidden void
- goacc_async_free (struct gomp_device_descr *devicep,
- struct goacc_asyncqueue *aq, void *ptr)
- {
- if (!aq)
- free (ptr);
- else
- devicep->openacc.async.queue_callback_func (aq, free, ptr);
- }
- /* This function initializes the asyncqueues for the device specified by
- DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
- return. */
- attribute_hidden void
- goacc_init_asyncqueues (struct gomp_device_descr *devicep)
- {
- devicep->openacc.async.nasyncqueue = 0;
- devicep->openacc.async.asyncqueue = NULL;
- devicep->openacc.async.active = NULL;
- gomp_mutex_init (&devicep->openacc.async.lock);
- }
- /* This function finalizes the asyncqueues for the device specified by DEVICEP.
- TODO DEVICEP must be locked on entry, and remains locked on return. */
- attribute_hidden bool
- goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
- {
- bool ret = true;
- gomp_mutex_lock (&devicep->openacc.async.lock);
- if (devicep->openacc.async.nasyncqueue > 0)
- {
- goacc_aq_list next;
- for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
- {
- ret &= devicep->openacc.async.destruct_func (l->aq);
- next = l->next;
- free (l);
- }
- free (devicep->openacc.async.asyncqueue);
- devicep->openacc.async.nasyncqueue = 0;
- devicep->openacc.async.asyncqueue = NULL;
- devicep->openacc.async.active = NULL;
- }
- gomp_mutex_unlock (&devicep->openacc.async.lock);
- gomp_mutex_destroy (&devicep->openacc.async.lock);
- return ret;
- }
|