123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662 |
- /* OpenACC Profiling Interface
- Copyright (C) 2019-2022 Free Software Foundation, Inc.
- Contributed by Mentor, a Siemens Business.
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- #define _GNU_SOURCE
- #include "libgomp.h"
- #include "oacc-int.h"
- #include "secure_getenv.h"
- #include "acc_prof.h"
- #include <assert.h>
- #ifdef HAVE_STRING_H
- # include <string.h>
- #endif
- #ifdef PLUGIN_SUPPORT
- # include <dlfcn.h>
- #endif
- #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
- /* Statically assert that the layout of the common fields in the
- 'acc_event_info' variants matches. */
- /* 'event_type' */
- STATIC_ASSERT (offsetof (acc_event_info, event_type)
- == offsetof (acc_event_info, data_event.event_type));
- STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
- == offsetof (acc_event_info, launch_event.event_type));
- STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
- == offsetof (acc_event_info, other_event.event_type));
- /* 'valid_bytes' */
- STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
- == offsetof (acc_event_info, launch_event.valid_bytes));
- STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
- == offsetof (acc_event_info, other_event.valid_bytes));
- /* 'parent_construct' */
- STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
- == offsetof (acc_event_info, launch_event.parent_construct));
- STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
- == offsetof (acc_event_info, other_event.parent_construct));
- /* 'implicit' */
- STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
- == offsetof (acc_event_info, launch_event.implicit));
- STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
- == offsetof (acc_event_info, other_event.implicit));
- /* 'tool_info' */
- STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
- == offsetof (acc_event_info, launch_event.tool_info));
- STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
- == offsetof (acc_event_info, other_event.tool_info));
- struct goacc_prof_callback_entry
- {
- acc_prof_callback cb;
- int ref;
- bool enabled;
- struct goacc_prof_callback_entry *next;
- };
- /* Use a separate flag to minimize run-time performance impact for the (very
- common) case that profiling is not enabled.
- Once enabled, we're not going to disable this anymore, anywhere. We
- probably could, by adding appropriate logic to 'acc_prof_register',
- 'acc_prof_unregister'. */
- bool goacc_prof_enabled = false;
- /* Global state for registered callbacks.
- 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
- static bool goacc_prof_callbacks_enabled[acc_ev_last];
- static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
- /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
- 'goacc_prof_callback_entries'. */
- static gomp_mutex_t goacc_prof_lock;
- void
- goacc_profiling_initialize (void)
- {
- gomp_mutex_init (&goacc_prof_lock);
- /* Initially, all callbacks for all events are enabled. */
- for (int i = 0; i < acc_ev_last; ++i)
- goacc_prof_callbacks_enabled[i] = true;
- #ifdef PLUGIN_SUPPORT
- char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
- while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
- {
- char *acc_proflibs_sep = strchr (acc_proflibs, ';');
- char *acc_proflib;
- if (acc_proflibs_sep == acc_proflibs)
- {
- /* Stray ';' separator: make sure we don't 'dlopen' the main
- program. */
- acc_proflib = NULL;
- }
- else
- {
- if (acc_proflibs_sep != NULL)
- {
- /* Single out the first library. */
- acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
- memcpy (acc_proflib, acc_proflibs,
- acc_proflibs_sep - acc_proflibs);
- acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
- }
- else
- {
- /* No ';' separator, so only one library. */
- acc_proflib = acc_proflibs;
- }
- gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
- void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
- if (dl_handle != NULL)
- {
- typeof (&acc_register_library) a_r_l
- = dlsym (dl_handle, "acc_register_library");
- if (a_r_l == NULL)
- goto dl_fail;
- gomp_debug (0, " %s: calling %s:acc_register_library\n",
- __FUNCTION__, acc_proflib);
- a_r_l (acc_prof_register, acc_prof_unregister,
- acc_prof_lookup);
- }
- else
- {
- dl_fail:
- gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
- acc_proflib, dlerror ());
- if (dl_handle != NULL)
- {
- int err = dlclose (dl_handle);
- dl_handle = NULL;
- if (err != 0)
- goto dl_fail;
- }
- }
- }
- if (acc_proflib != acc_proflibs)
- {
- free (acc_proflib);
- acc_proflibs = acc_proflibs_sep + 1;
- }
- else
- acc_proflibs = NULL;
- }
- #endif /* PLUGIN_SUPPORT */
- }
- void
- acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
- {
- gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
- __FUNCTION__, (int) ev, (void *) cb, (int) reg);
- /* For any events to be dispatched, the user first has to register a
- callback, which makes this here a good place for enabling the whole
- machinery. */
- if (!GOACC_PROF_ENABLED)
- __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
- enum
- {
- EVENT_KIND_BOGUS,
- EVENT_KIND_NORMAL,
- /* As end events invoke callbacks in the reverse order, we register these
- in the reverse order here. */
- EVENT_KIND_END,
- } event_kind = EVENT_KIND_BOGUS;
- switch (ev)
- {
- case acc_ev_none:
- case acc_ev_device_init_start:
- case acc_ev_device_shutdown_start:
- case acc_ev_runtime_shutdown:
- case acc_ev_create:
- case acc_ev_delete:
- case acc_ev_alloc:
- case acc_ev_free:
- case acc_ev_enter_data_start:
- case acc_ev_exit_data_start:
- case acc_ev_update_start:
- case acc_ev_compute_construct_start:
- case acc_ev_enqueue_launch_start:
- case acc_ev_enqueue_upload_start:
- case acc_ev_enqueue_download_start:
- case acc_ev_wait_start:
- event_kind = EVENT_KIND_NORMAL;
- break;
- case acc_ev_device_init_end:
- case acc_ev_device_shutdown_end:
- case acc_ev_enter_data_end:
- case acc_ev_exit_data_end:
- case acc_ev_update_end:
- case acc_ev_compute_construct_end:
- case acc_ev_enqueue_launch_end:
- case acc_ev_enqueue_upload_end:
- case acc_ev_enqueue_download_end:
- case acc_ev_wait_end:
- event_kind = EVENT_KIND_END;
- break;
- case acc_ev_last:
- break;
- }
- if (event_kind == EVENT_KIND_BOGUS)
- {
- /* Silently ignore. */
- gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
- return;
- }
- bool bogus = true;
- switch (reg)
- {
- case acc_reg:
- case acc_toggle:
- case acc_toggle_per_thread:
- bogus = false;
- break;
- }
- if (bogus)
- {
- /* Silently ignore. */
- gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
- return;
- }
- /* Special cases. */
- if (reg == acc_toggle)
- {
- if (cb == NULL)
- {
- gomp_debug (0, " globally enabling callbacks\n");
- gomp_mutex_lock (&goacc_prof_lock);
- /* For 'acc_ev_none', this acts as a global toggle. */
- goacc_prof_callbacks_enabled[ev] = true;
- gomp_mutex_unlock (&goacc_prof_lock);
- return;
- }
- else if (ev == acc_ev_none && cb != NULL)
- {
- gomp_debug (0, " ignoring request\n");
- return;
- }
- }
- else if (reg == acc_toggle_per_thread)
- {
- if (ev == acc_ev_none && cb == NULL)
- {
- gomp_debug (0, " thread: enabling callbacks\n");
- goacc_lazy_initialize ();
- struct goacc_thread *thr = goacc_thread ();
- thr->prof_callbacks_enabled = true;
- return;
- }
- /* Silently ignore. */
- gomp_debug (0, " ignoring bogus request\n");
- return;
- }
- gomp_mutex_lock (&goacc_prof_lock);
- struct goacc_prof_callback_entry *it, *it_p;
- it = goacc_prof_callback_entries[ev];
- it_p = NULL;
- while (it)
- {
- if (it->cb == cb)
- break;
- it_p = it;
- it = it->next;
- }
- switch (reg)
- {
- case acc_reg:
- /* If we already have this callback registered, just increment its
- reference count. */
- if (it != NULL)
- {
- it->ref++;
- gomp_debug (0, " already registered;"
- " incrementing reference count to: %d\n", it->ref);
- }
- else
- {
- struct goacc_prof_callback_entry *e
- = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
- e->cb = cb;
- e->ref = 1;
- e->enabled = true;
- bool prepend = (event_kind == EVENT_KIND_END);
- /* If we don't have any callback registered yet, also use the
- 'prepend' code path. */
- if (it_p == NULL)
- prepend = true;
- if (prepend)
- {
- gomp_debug (0, " prepending\n");
- e->next = goacc_prof_callback_entries[ev];
- goacc_prof_callback_entries[ev] = e;
- }
- else
- {
- gomp_debug (0, " appending\n");
- e->next = NULL;
- it_p->next = e;
- }
- }
- break;
- case acc_toggle:
- if (it == NULL)
- {
- gomp_debug (0, " ignoring request: is not registered\n");
- break;
- }
- else
- {
- gomp_debug (0, " enabling\n");
- it->enabled = true;
- }
- break;
- case acc_toggle_per_thread:
- __builtin_unreachable ();
- }
- gomp_mutex_unlock (&goacc_prof_lock);
- }
- void
- acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
- {
- gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
- __FUNCTION__, (int) ev, (void *) cb, (int) reg);
- /* If profiling is not enabled, there cannot be anything to unregister. */
- if (!GOACC_PROF_ENABLED)
- return;
- if (ev < acc_ev_none
- || ev >= acc_ev_last)
- {
- /* Silently ignore. */
- gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
- return;
- }
- bool bogus = true;
- switch (reg)
- {
- case acc_reg:
- case acc_toggle:
- case acc_toggle_per_thread:
- bogus = false;
- break;
- }
- if (bogus)
- {
- /* Silently ignore. */
- gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
- return;
- }
- /* Special cases. */
- if (reg == acc_toggle)
- {
- if (cb == NULL)
- {
- gomp_debug (0, " globally disabling callbacks\n");
- gomp_mutex_lock (&goacc_prof_lock);
- /* For 'acc_ev_none', this acts as a global toggle. */
- goacc_prof_callbacks_enabled[ev] = false;
- gomp_mutex_unlock (&goacc_prof_lock);
- return;
- }
- else if (ev == acc_ev_none && cb != NULL)
- {
- gomp_debug (0, " ignoring request\n");
- return;
- }
- }
- else if (reg == acc_toggle_per_thread)
- {
- if (ev == acc_ev_none && cb == NULL)
- {
- gomp_debug (0, " thread: disabling callbacks\n");
- goacc_lazy_initialize ();
- struct goacc_thread *thr = goacc_thread ();
- thr->prof_callbacks_enabled = false;
- return;
- }
- /* Silently ignore. */
- gomp_debug (0, " ignoring bogus request\n");
- return;
- }
- gomp_mutex_lock (&goacc_prof_lock);
- struct goacc_prof_callback_entry *it, *it_p;
- it = goacc_prof_callback_entries[ev];
- it_p = NULL;
- while (it)
- {
- if (it->cb == cb)
- break;
- it_p = it;
- it = it->next;
- }
- switch (reg)
- {
- case acc_reg:
- if (it == NULL)
- {
- /* Silently ignore. */
- gomp_debug (0, " ignoring bogus request: is not registered\n");
- break;
- }
- it->ref--;
- gomp_debug (0, " decrementing reference count to: %d\n", it->ref);
- if (it->ref == 0)
- {
- if (it_p == NULL)
- goacc_prof_callback_entries[ev] = it->next;
- else
- it_p->next = it->next;
- free (it);
- }
- break;
- case acc_toggle:
- if (it == NULL)
- {
- gomp_debug (0, " ignoring request: is not registered\n");
- break;
- }
- else
- {
- gomp_debug (0, " disabling\n");
- it->enabled = false;
- }
- break;
- case acc_toggle_per_thread:
- __builtin_unreachable ();
- }
- gomp_mutex_unlock (&goacc_prof_lock);
- }
- acc_query_fn
- acc_prof_lookup (const char *name)
- {
- gomp_debug (0, "%s (%s)\n",
- __FUNCTION__, name ?: "NULL");
- return NULL;
- }
- void
- acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
- acc_prof_lookup_func lookup)
- {
- gomp_fatal ("TODO");
- }
- /* Prepare to dispatch events? */
- bool
- _goacc_profiling_dispatch_p (bool check_not_nested_p)
- {
- gomp_debug (0, "%s\n", __FUNCTION__);
- bool ret;
- struct goacc_thread *thr = goacc_thread ();
- if (__builtin_expect (thr == NULL, false))
- {
- /* If we don't have any per-thread state yet, that means that per-thread
- callback dispatch has not been explicitly disabled (which only a call
- to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
- that would have allocated per-thread state via
- 'goacc_lazy_initialize'); initially, all callbacks for all events are
- enabled. */
- gomp_debug (0, " %s: don't have any per-thread state yet\n", __FUNCTION__);
- }
- else
- {
- if (check_not_nested_p)
- {
- /* No nesting. */
- assert (thr->prof_info == NULL);
- assert (thr->api_info == NULL);
- }
- if (__builtin_expect (!thr->prof_callbacks_enabled, true))
- {
- gomp_debug (0, " %s: disabled for this thread\n", __FUNCTION__);
- ret = false;
- goto out;
- }
- }
- gomp_mutex_lock (&goacc_prof_lock);
- /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
- if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
- {
- gomp_debug (0, " %s: disabled globally\n", __FUNCTION__);
- ret = false;
- goto out_unlock;
- }
- else
- ret = true;
- out_unlock:
- gomp_mutex_unlock (&goacc_prof_lock);
- out:
- return ret;
- }
- /* Set up to dispatch events? */
- bool
- _goacc_profiling_setup_p (struct goacc_thread *thr,
- acc_prof_info *prof_info, acc_api_info *api_info)
- {
- gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
- /* If we don't have any per-thread state yet, we can't register 'prof_info'
- and 'api_info'. */
- if (__builtin_expect (thr == NULL, false))
- {
- gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
- " the current call, construct, or directive\n");
- return false;
- }
- if (thr->prof_info != NULL)
- {
- /* Profiling has already been set up for an outer construct. In this
- case, we continue to use the existing information, and thus return
- 'false' here.
- This can happen, for example, for an 'enter data' directive, which
- sets up profiling, then calls into 'acc_copyin', which should not
- again set up profiling, should not overwrite the existing
- information. */
- return false;
- }
- thr->prof_info = prof_info;
- thr->api_info = api_info;
- /* Fill in some defaults. */
- prof_info->event_type = -1; /* Must be set later. */
- prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
- prof_info->version = _ACC_PROF_INFO_VERSION;
- if (thr->dev)
- {
- prof_info->device_type = acc_device_type (thr->dev->type);
- prof_info->device_number = thr->dev->target_id;
- }
- else
- {
- prof_info->device_type = -1;
- prof_info->device_number = -1;
- }
- prof_info->thread_id = -1;
- prof_info->async = acc_async_sync;
- prof_info->async_queue = prof_info->async;
- prof_info->src_file = NULL;
- prof_info->func_name = NULL;
- prof_info->line_no = -1;
- prof_info->end_line_no = -1;
- prof_info->func_line_no = -1;
- prof_info->func_end_line_no = -1;
- api_info->device_api = acc_device_api_none;
- api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
- api_info->device_type = prof_info->device_type;
- api_info->vendor = -1;
- api_info->device_handle = NULL;
- api_info->context_handle = NULL;
- api_info->async_handle = NULL;
- return true;
- }
- /* Dispatch events.
- This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
- 'GOACC_PROFILING_SETUP_P' returned a true result. */
- void
- goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
- acc_api_info *apt_info)
- {
- acc_event_t event_type = event_info->event_type;
- gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
- assert (event_type > acc_ev_none
- && event_type < acc_ev_last);
- gomp_mutex_lock (&goacc_prof_lock);
- if (!goacc_prof_callbacks_enabled[event_type])
- {
- gomp_debug (0, " disabled for this event type\n");
- goto out_unlock;
- }
- for (struct goacc_prof_callback_entry *e
- = goacc_prof_callback_entries[event_type];
- e != NULL;
- e = e->next)
- {
- if (!e->enabled)
- {
- gomp_debug (0, " disabled for callback %p\n", e->cb);
- continue;
- }
- gomp_debug (0, " calling callback %p\n", e->cb);
- e->cb (prof_info, event_info, apt_info);
- }
- out_unlock:
- gomp_mutex_unlock (&goacc_prof_lock);
- }
|