123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128 |
- /*
- Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include "offload_engine.h"
- #include <signal.h>
- #include <errno.h>
- #include <sys/stat.h>
- #include <sys/types.h>
- #include <algorithm>
- #include <vector>
- #include "offload_host.h"
- #include "offload_table.h"
- #include "offload_iterator.h"
- #if defined(HOST_WINNT)
- #define PATH_SEPARATOR ";"
- #else
- #define PATH_SEPARATOR ":"
- #endif
- // Static members of Stream class must be described somewhere.
- // This members describe the list of all streams defined in programm
- // via call to _Offload_stream_create.
- uint64_t Stream::m_streams_count = 0;
- StreamMap Stream::all_streams;
- mutex_t Stream::m_stream_lock;
- char* mic_library_path = 0;
- const char* Engine::m_func_names[Engine::c_funcs_total] =
- {
- "server_compute",
- #ifdef MYO_SUPPORT
- "server_myoinit",
- "server_myofini",
- #endif // MYO_SUPPORT
- "server_init",
- "server_var_table_size",
- "server_var_table_copy",
- "server_set_stream_affinity"
- };
- // Symbolic representation of system signals. Fix for CQ233593
- const char* Engine::c_signal_names[Engine::c_signal_max] =
- {
- "Unknown SIGNAL",
- "SIGHUP", /* 1, Hangup (POSIX). */
- "SIGINT", /* 2, Interrupt (ANSI). */
- "SIGQUIT", /* 3, Quit (POSIX). */
- "SIGILL", /* 4, Illegal instruction (ANSI). */
- "SIGTRAP", /* 5, Trace trap (POSIX). */
- "SIGABRT", /* 6, Abort (ANSI). */
- "SIGBUS", /* 7, BUS error (4.2 BSD). */
- "SIGFPE", /* 8, Floating-point exception (ANSI). */
- "SIGKILL", /* 9, Kill, unblockable (POSIX). */
- "SIGUSR1", /* 10, User-defined signal 1 (POSIX). */
- "SIGSEGV", /* 11, Segmentation violation (ANSI). */
- "SIGUSR2", /* 12, User-defined signal 2 (POSIX). */
- "SIGPIPE", /* 13, Broken pipe (POSIX). */
- "SIGALRM", /* 14, Alarm clock (POSIX). */
- "SIGTERM", /* 15, Termination (ANSI). */
- "SIGSTKFLT", /* 16, Stack fault. */
- "SIGCHLD", /* 17, Child status has changed (POSIX). */
- "SIGCONT", /* 18, Continue (POSIX). */
- "SIGSTOP", /* 19, Stop, unblockable (POSIX). */
- "SIGTSTP", /* 20, Keyboard stop (POSIX). */
- "SIGTTIN", /* 21, Background read from tty (POSIX). */
- "SIGTTOU", /* 22, Background write to tty (POSIX). */
- "SIGURG", /* 23, Urgent condition on socket (4.2 BSD). */
- "SIGXCPU", /* 24, CPU limit exceeded (4.2 BSD). */
- "SIGXFSZ", /* 25, File size limit exceeded (4.2 BSD). */
- "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD). */
- "SIGPROF", /* 27, Profiling alarm clock (4.2 BSD). */
- "SIGWINCH", /* 28, Window size change (4.3 BSD, Sun). */
- "SIGIO", /* 29, I/O now possible (4.2 BSD). */
- "SIGPWR", /* 30, Power failure restart (System V). */
- "SIGSYS" /* 31, Bad system call. */
- };
- void Engine::init(void)
- {
- if (!m_ready) {
- mutex_locker_t locker(m_lock);
- if (!m_ready) {
- // start process if not done yet
- if (m_process == 0) {
- init_process();
- }
- // load penging images
- load_libraries();
- // and (re)build pointer table
- init_ptr_data();
- // it is ready now
- m_ready = true;
-
- // Inform the debugger
- if (__dbg_is_attached) {
- __dbg_target_so_loaded();
- }
- }
- }
- }
- void Engine::print_stream_cpu_list(const char * str)
- {
- int count = 0;
- char buffer[1024];
- CpuEl* cpu_el = m_cpu_head;
-
- OFFLOAD_DEBUG_TRACE(3,
- "%s : cpu list as Index(Count) for the streams is :\n", str);
- buffer[0] = 0;
- for (int i = 0; i < m_num_threads; i++) {
- cpu_el = m_cpus + i;
- if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) {
- count++;
- sprintf(buffer + strlen(buffer), "%d(%d) ", CPU_INDEX(cpu_el), cpu_el->count);
- if (count % 20 == 0) {
- OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer);
- buffer[0] = 0;
- }
- }
- }
- if (count % 20 != 0) {
- OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer);
- }
- }
- void Engine::init_process(void)
- {
- COIENGINE engine;
- COIRESULT res;
- const char **environ;
- char buf[4096]; // For exe path name
- char* mic_device_main = 0;
- // create environment for the target process
- environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
- if (environ != 0) {
- for (const char **p = environ; *p != 0; p++) {
- OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
- }
- }
- // Create execution context in the specified device
- OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
- m_physical_index);
- res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine);
- check_result(res, c_get_engine_handle, m_index, res);
- // Get engine info on threads and cores.
- // The values of core number and thread number will be used later at stream
- // creation by call to _Offload_stream_create(device,number_of_cpus).
- COI_ENGINE_INFO engine_info;
- res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info);
- check_result(res, c_get_engine_info, m_index, res);
- if (mic_library_path == 0 ) {
- if (engine_info.ISA == COI_DEVICE_KNC) {
- mic_library_path = knc_library_path;
- }
- else if (engine_info.ISA == COI_DEVICE_KNL) {
- mic_library_path = knl_library_path;
- }
- else {
- LIBOFFLOAD_ERROR(c_unknown_mic_device_type);
- }
- }
- // m_cpus is the list of all available threads.
- // At the begining all threads made available through OFFLOAD_DEVICES
- // or all threads existed at the engine if OFFLOAD_DEVICES isn't set.
- // m_cpu_head points to the head of the m_cpus list.
- // m_cpus is ordered by number of streams using the thread.
- // m_cpu_head points to the least used thread.
- // After creating and destroying a stream the m_cpus list must be fixed
- // to be ordered.
- m_cpus = (CpuEl*)malloc(engine_info.NumThreads * sizeof(CpuEl));
- if (m_cpus == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- memset(m_cpus, 0, engine_info.NumThreads * sizeof(CpuEl));
- CpuEl* prev_cpu = NULL;
- for (int i = 0; i < engine_info.NumThreads; i++) {
- if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) {
- if (prev_cpu) {
- prev_cpu->next = m_cpus + i;
- }
- else {
- m_cpu_head = m_cpus + i;
- }
- m_cpus[i].prev = prev_cpu;
- m_cpus[i].count = 0;
- prev_cpu = m_cpus + i;
- }
- }
- // The following values will be used at pipeline creation for streams
- m_num_cores = engine_info.NumCores;
- m_num_threads = engine_info.NumThreads;
- print_stream_cpu_list("init_process");
- // Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2
- // Only the value 2 is supported in 16.0
- if (mic_dma_channel_count == 2) {
- if (COI::ProcessConfigureDMA) {
- // Set DMA channels using COI API
- COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE);
- }
- else {
- // Set environment variable COI_DMA_CHANNEL_COUNT
- // use putenv instead of setenv as Windows has no setenv.
- // Note: putenv requires its argument can't be freed or modified.
- // So no free after call to putenv or elsewhere.
- char * env_var = strdup("COI_DMA_CHANNEL_COUNT=2");
- if (env_var == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- putenv(env_var);
- }
- }
- // Target executable is not available then use compiler provided offload_main
- if (__target_exe == 0) {
- // find target executable to be used if main application is not an
- // offload build application.
- const char *base_name = "offload_main";
- if (mic_library_path != 0) {
- char *buf = strdup(mic_library_path);
- if (buf == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- char *try_name = (char*) alloca(strlen(mic_library_path) +
- strlen(base_name) + 2);
- char *dir, *ptr;
- for (dir = strtok_r(buf, PATH_SEPARATOR, &ptr); dir != 0;
- dir = strtok_r(0, PATH_SEPARATOR, &ptr)) {
- // compose a full path
- sprintf(try_name, "%s/%s", dir, base_name);
- // check if such file exists
- struct stat st;
- if (stat(try_name, &st) == 0 && S_ISREG(st.st_mode)) {
- mic_device_main = strdup(try_name);
- if (mic_device_main == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- break;
- }
- }
- free(buf);
- }
- if (mic_device_main == 0) {
- LIBOFFLOAD_ERROR(c_report_no_target_exe, "offload_main");
- exit(1);
- }
- OFFLOAD_DEBUG_TRACE(2,
- "Loading target executable %s\n",mic_device_main);
- res = COI::ProcessCreateFromFile(
- engine, // in_Engine
- mic_device_main, // in_pBinaryName
- 0, // in_Argc
- 0, // in_ppArgv
- environ == 0, // in_DupEnv
- environ, // in_ppAdditionalEnv
- mic_proxy_io, // in_ProxyActive
- mic_proxy_fs_root, // in_ProxyfsRoot
- mic_buffer_size, // in_BufferSpace
- mic_library_path, // in_LibrarySearchPath
- &m_process // out_pProcess
- );
- }
- else {
- // Target executable should be available by the time when we
- // attempt to initialize the device
- // Need the full path of the FAT exe for VTUNE
- {
- #ifndef TARGET_WINNT
- ssize_t len = readlink("/proc/self/exe", buf,1000);
- #else
- int len = GetModuleFileName(NULL, buf,1000);
- #endif // TARGET_WINNT
- if (len == -1) {
- LIBOFFLOAD_ERROR(c_report_no_host_exe);
- exit(1);
- }
- else if (len > 999) {
- LIBOFFLOAD_ERROR(c_report_path_buff_overflow);
- exit(1);
- }
- buf[len] = '\0';
- }
- OFFLOAD_DEBUG_TRACE(2,
- "Loading target executable \"%s\" from %p, size %lld, host file %s\n",
- __target_exe->name, __target_exe->data, __target_exe->size,
- buf);
- res = COI::ProcessCreateFromMemory(
- engine, // in_Engine
- __target_exe->name, // in_pBinaryName
- __target_exe->data, // in_pBinaryBuffer
- __target_exe->size, // in_BinaryBufferLength,
- 0, // in_Argc
- 0, // in_ppArgv
- environ == 0, // in_DupEnv
- environ, // in_ppAdditionalEnv
- mic_proxy_io, // in_ProxyActive
- mic_proxy_fs_root, // in_ProxyfsRoot
- mic_buffer_size, // in_BufferSpace
- mic_library_path, // in_LibrarySearchPath
- buf, // in_FileOfOrigin
- -1, // in_FileOfOriginOffset use -1 to indicate to
- // COI that is is a FAT binary
- &m_process // out_pProcess
- );
- }
- check_result(res, c_process_create, m_index, res);
- if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) {
- // available only in MPSS 4.2 and greater
- if (COI::ProcessSetCacheSize != 0 ) {
- int flags;
- // Need compiler to use MPSS 3.2 or greater to get these
- // definition so currently hardcoding it
- // COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC;
- flags = 0x00020002;
- res = COI::ProcessSetCacheSize(
- m_process, // in_Process
- mic_2m_buffer_size, // in_HugePagePoolSize
- flags, // inHugeFlags
- mic_4k_buffer_size, // in_SmallPagePoolSize
- flags, // inSmallFlags
- 0, // in_NumDependencies
- 0, // in_pDependencies
- 0 // out_PCompletion
- );
- OFFLOAD_DEBUG_TRACE(2,
- "Reserve target buffers 4K pages = %d 2M pages = %d\n",
- mic_4k_buffer_size, mic_2m_buffer_size);
- check_result(res, c_process_set_cache_size, m_index, res);
- }
- else {
- OFFLOAD_DEBUG_TRACE(2,
- "Reserve target buffers not supported in current MPSS\n");
- }
- }
- // get function handles
- res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
- m_func_names, m_funcs);
- check_result(res, c_process_get_func_handles, m_index, res);
- // initialize device side
- pid_t pid = init_device();
- // For IDB
- if (__dbg_is_attached) {
- // TODO: we have in-memory executable now.
- // Check with IDB team what should we provide them now?
- if (__target_exe == 0) {
- strcpy(__dbg_target_exe_name, "offload_main");
- }
- else {
- if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
- strcpy(__dbg_target_exe_name, __target_exe->name);
- }
- }
- __dbg_target_so_pid = pid;
- __dbg_target_id = m_physical_index;
- // The call to __dbg_target_so_loaded() is moved
- // to Engine:init so all the libraries are loaded before
- // informing debugger so debugger can access them.
- // __dbg_target_so_loaded();
- }
- }
- void Engine::fini_process(bool verbose)
- {
- if (m_process != 0) {
- uint32_t sig;
- int8_t ret;
- // destroy target process
- OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
- m_index);
- COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
- m_process = 0;
- if (res == COI_SUCCESS) {
- OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
- sig, ret);
- if (verbose) {
- if (sig != 0) {
- LIBOFFLOAD_ERROR(
- c_mic_process_exit_sig, m_index, sig,
- c_signal_names[sig >= c_signal_max ? 0 : sig]);
- }
- else {
- LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
- }
- }
- // for idb
- if (__dbg_is_attached) {
- __dbg_target_so_unloaded();
- }
- }
- else {
- if (verbose) {
- LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
- }
- }
- }
- }
- void Engine::load_libraries()
- {
- // load libraries collected so far
- for (TargetImageList::iterator it = m_images.begin();
- it != m_images.end(); it++) {
- OFFLOAD_DEBUG_TRACE(2,
- "Loading library \"%s\" from %p, size %llu, host file %s\n",
- it->name, it->data, it->size, it->origin);
- // load library to the device
- COILIBRARY lib;
- COIRESULT res;
- res = COI::ProcessLoadLibraryFromMemory(m_process,
- it->data,
- it->size,
- it->name,
- mic_library_path,
- it->origin,
- (it->origin) ? -1 : 0,
- COI_LOADLIBRARY_V1_FLAGS,
- &lib);
- m_dyn_libs.push_front(DynLib(it->name, it->data, lib));
- if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
- check_result(res, c_load_library, it->origin, m_index, res);
- }
- }
- m_images.clear();
- }
- void Engine::unload_library(const void *data, const char *name)
- {
- if (m_process == 0) {
- return;
- }
- for (DynLibList::iterator it = m_dyn_libs.begin();
- it != m_dyn_libs.end(); it++) {
- if (it->data == data) {
- COIRESULT res;
- OFFLOAD_DEBUG_TRACE(2,
- "Unloading library \"%s\"\n",name);
- res = COI::ProcessUnloadLibrary(m_process,it->lib);
- m_dyn_libs.erase(it);
- if (res != COI_SUCCESS) {
- check_result(res, c_unload_library, m_index, res);
- }
- return;
- }
- }
- }
- static bool target_entry_cmp(
- const VarList::BufEntry &l,
- const VarList::BufEntry &r
- )
- {
- const char *l_name = reinterpret_cast<const char*>(l.name);
- const char *r_name = reinterpret_cast<const char*>(r.name);
- return strcmp(l_name, r_name) < 0;
- }
- static bool host_entry_cmp(
- const VarTable::Entry *l,
- const VarTable::Entry *r
- )
- {
- return strcmp(l->name, r->name) < 0;
- }
- void Engine::init_ptr_data(void)
- {
- COIRESULT res;
- COIEVENT event;
- // Prepare table of host entries
- std::vector<const VarTable::Entry*> host_table(
- Iterator(__offload_vars.get_head()),
- Iterator());
- // no need to do anything further is host table is empty
- if (host_table.size() <= 0) {
- return;
- }
- // Get var table entries from the target.
- // First we need to get size for the buffer to copy data
- struct {
- int64_t nelems;
- int64_t length;
- } params;
- res = COI::PipelineRunFunction(get_pipeline(),
- m_funcs[c_func_var_table_size],
- 0, 0, 0,
- 0, 0,
- 0, 0,
- ¶ms, sizeof(params),
- &event);
- check_result(res, c_pipeline_run_func, m_index, res);
- res = COI::EventWait(1, &event, -1, 1, 0, 0);
- check_result(res, c_event_wait, res);
- if (params.length == 0) {
- return;
- }
- // create buffer for target entries and copy data to host
- COIBUFFER buffer;
- res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1,
- &m_process, &buffer);
- check_result(res, c_buf_create, m_index, res);
- COI_ACCESS_FLAGS flags = COI_SINK_WRITE;
- res = COI::PipelineRunFunction(get_pipeline(),
- m_funcs[c_func_var_table_copy],
- 1, &buffer, &flags,
- 0, 0,
- ¶ms.nelems, sizeof(params.nelems),
- 0, 0,
- &event);
- check_result(res, c_pipeline_run_func, m_index, res);
- res = COI::EventWait(1, &event, -1, 1, 0, 0);
- check_result(res, c_event_wait, res);
- // patch names in target data
- VarList::BufEntry *target_table;
- COIMAPINSTANCE map_inst;
- res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0,
- 0, &map_inst,
- reinterpret_cast<void**>(&target_table));
- check_result(res, c_buf_map, res);
- VarList::table_patch_names(target_table, params.nelems);
- // and sort entries
- std::sort(target_table, target_table + params.nelems, target_entry_cmp);
- std::sort(host_table.begin(), host_table.end(), host_entry_cmp);
- // merge host and target entries and enter matching vars map
- std::vector<const VarTable::Entry*>::const_iterator hi =
- host_table.begin();
- std::vector<const VarTable::Entry*>::const_iterator he =
- host_table.end();
- const VarList::BufEntry *ti = target_table;
- const VarList::BufEntry *te = target_table + params.nelems;
- while (hi != he && ti != te) {
- int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
- if (res == 0) {
- bool is_new;
- // add matching entry to var map
- PtrData *ptr = insert_ptr_data((*hi)->addr, (*hi)->size, is_new);
- // store address for new entries
- if (is_new) {
- ptr->mic_addr = ti->addr;
- ptr->is_static = true;
- ptr->var_alloc_type = (*hi)->var_alloc_type;
- }
- ptr->alloc_ptr_data_lock.unlock();
- hi++;
- ti++;
- }
- else if (res < 0) {
- hi++;
- }
- else {
- ti++;
- }
- }
- // cleanup
- res = COI::BufferUnmap(map_inst, 0, 0, 0);
- check_result(res, c_buf_unmap, res);
- res = COI::BufferDestroy(buffer);
- check_result(res, c_buf_destroy, res);
- }
- COIRESULT Engine::compute(
- _Offload_stream stream,
- const std::list<COIBUFFER> &buffers,
- const void* data,
- uint16_t data_size,
- void* ret,
- uint16_t ret_size,
- uint32_t num_deps,
- const COIEVENT* deps,
- COIEVENT* event
- ) /* const */
- {
- COIBUFFER *bufs;
- COI_ACCESS_FLAGS *flags;
- COIRESULT res;
- // convert buffers list to array
- int num_bufs = buffers.size();
- if (num_bufs > 0) {
- bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER));
- flags = (COI_ACCESS_FLAGS*) alloca(num_bufs *
- sizeof(COI_ACCESS_FLAGS));
- int i = 0;
- for (std::list<COIBUFFER>::const_iterator it = buffers.begin();
- it != buffers.end(); it++) {
- bufs[i] = *it;
- // TODO: this should be fixed
- flags[i++] = COI_SINK_WRITE;
- }
- }
- else {
- bufs = 0;
- flags = 0;
- }
- COIPIPELINE pipeline = (stream == no_stream) ?
- get_pipeline() :
- get_pipeline(stream);
- // start computation
- res = COI::PipelineRunFunction(pipeline,
- m_funcs[c_func_compute],
- num_bufs, bufs, flags,
- num_deps, deps,
- data, data_size,
- ret, ret_size,
- event);
- return res;
- }
- pid_t Engine::init_device(void)
- {
- struct init_data {
- int device_index;
- int devices_total;
- int console_level;
- int offload_report_level;
- } data;
- COIRESULT res;
- COIEVENT event;
- pid_t pid;
- OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init,
- "Initializing device with logical index %d "
- "and physical index %d\n",
- m_index, m_physical_index);
- // setup misc data
- data.device_index = m_index;
- data.devices_total = mic_engines_total;
- data.console_level = console_enabled;
- data.offload_report_level = offload_report_level;
- res = COI::PipelineRunFunction(get_pipeline(),
- m_funcs[c_func_init],
- 0, 0, 0, 0, 0,
- &data, sizeof(data),
- &pid, sizeof(pid),
- &event);
- check_result(res, c_pipeline_run_func, m_index, res);
- res = COI::EventWait(1, &event, -1, 1, 0, 0);
- check_result(res, c_event_wait, res);
- OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid);
- return pid;
- }
- // data associated with each thread
- struct Thread {
- Thread(long* addr_coipipe_counter) {
- m_addr_coipipe_counter = addr_coipipe_counter;
- memset(m_pipelines, 0, sizeof(m_pipelines));
- }
- ~Thread() {
- #ifndef TARGET_WINNT
- __sync_sub_and_fetch(m_addr_coipipe_counter, 1);
- #else // TARGET_WINNT
- _InterlockedDecrement(m_addr_coipipe_counter);
- #endif // TARGET_WINNT
- for (int i = 0; i < mic_engines_total; i++) {
- if (m_pipelines[i] != 0) {
- COI::PipelineDestroy(m_pipelines[i]);
- }
- }
- }
- COIPIPELINE get_pipeline(int index) const {
- return m_pipelines[index];
- }
- void set_pipeline(int index, COIPIPELINE pipeline) {
- m_pipelines[index] = pipeline;
- }
- AutoSet& get_auto_vars() {
- return m_auto_vars;
- }
- private:
- long* m_addr_coipipe_counter;
- AutoSet m_auto_vars;
- COIPIPELINE m_pipelines[MIC_ENGINES_MAX];
- };
- COIPIPELINE Engine::get_pipeline(void)
- {
- Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
- if (thread == 0) {
- thread = new Thread(&m_proc_number);
- thread_setspecific(mic_thread_key, thread);
- }
- COIPIPELINE pipeline = thread->get_pipeline(m_index);
- if (pipeline == 0) {
- COIRESULT res;
- int proc_num;
- #ifndef TARGET_WINNT
- proc_num = __sync_fetch_and_add(&m_proc_number, 1);
- #else // TARGET_WINNT
- proc_num = _InterlockedIncrement(&m_proc_number);
- #endif // TARGET_WINNT
- if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
- LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
- LIBOFFLOAD_ABORT;
- }
- // Create pipeline for this thread
- if (m_assigned_cpus == 0) {
- // If m_assigned_cpus is NULL, it implies all threads
- // Create the pipeline with no CPU mask
- res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
- } else {
- // Create COI CPU mask
- COI_CPU_MASK in_Mask;
- res = COI::PipelineClearCPUMask(in_Mask);
- check_result(res, c_clear_cpu_mask, m_index, res);
- int threads_per_core = m_num_threads / m_num_cores;
- // Available threads are defined by examining of m_assigned_cpus bitset.
- // We skip thread 0.
- for (int i = 1; i < m_num_threads; i++) {
- // For available thread i m_assigned_cpus[i] is equal to 1
- if ((*m_assigned_cpus)[i]) {
- COI_CPU_MASK_SET(i, in_Mask);
- }
- }
- OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this CPU thread\n"
- "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
- "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
- in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
- in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
- in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
- in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
- // Create the pipeline with allowable CPUs
- res = COI::PipelineCreate(m_process, in_Mask, mic_stack_size, &pipeline);
- }
- check_result(res, c_pipeline_create, m_index, res);
- thread->set_pipeline(m_index, pipeline);
- }
- return pipeline;
- }
- Stream* Stream::find_stream(uint64_t handle, bool remove)
- {
- Stream *stream = 0;
- m_stream_lock.lock();
- {
- StreamMap::iterator it = all_streams.find(handle);
- if (it != all_streams.end()) {
- stream = it->second;
- if (remove) {
- all_streams.erase(it);
- }
- }
- }
- m_stream_lock.unlock();
- return stream;
- }
- void Engine::move_cpu_el_after(CpuEl* cpu_what, CpuEl* cpu_after)
- {
- if (cpu_what == cpu_after) {
- return;
- }
- CpuEl* cpu_prev = cpu_what->prev;
- // remove cpu_what
- if (!cpu_prev) {
- m_cpu_head = cpu_what->next;
- }
- else {
- cpu_prev->next = cpu_what->next;
- }
- if (cpu_what->next) {
- cpu_what->next->prev = cpu_prev;
- }
- // insert cpu_what after cpu_after
- cpu_what->prev = cpu_after;
- cpu_what->next = cpu_after->next;
- if (cpu_after->next) {
- cpu_after->next->prev = cpu_what;
- }
- cpu_after->next = cpu_what;
- }
- COIPIPELINE Engine::get_pipeline(_Offload_stream handle)
- {
- Stream * stream = Stream::find_stream(handle, false);
- if (!stream) {
- LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
- LIBOFFLOAD_ABORT;
- }
- COIPIPELINE pipeline = stream->get_pipeline();
- if (pipeline == 0) {
- COIRESULT res;
- int proc_num;
- COI_CPU_MASK in_Mask ;
- #ifndef TARGET_WINNT
- proc_num = __sync_fetch_and_add(&m_proc_number, 1);
- #else // TARGET_WINNT
- proc_num = _InterlockedIncrement(&m_proc_number);
- #endif // TARGET_WINNT
- if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
- LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
- LIBOFFLOAD_ABORT;
- }
- m_stream_lock.lock();
- // start process if not done yet
- if (m_process == 0) {
- init_process();
- }
- // create CPUmask
- res = COI::PipelineClearCPUMask(in_Mask);
- check_result(res, c_clear_cpu_mask, m_index, res);
- int stream_cpu_num = stream->get_cpu_number();
- stream->m_stream_cpus.reset();
- int threads_per_core = m_num_threads / m_num_cores;
- // Available threads is taken from m_cpus list.
- // m_cpu_head points to the head of m_cpus.
- // the elements of m_cpus is ordered by the number of usage in streams.
- CpuEl *cpu_el = m_cpu_head;
- CpuEl *cpu_used_el, *cpu_used_prev, *cpu_prev;
- for (int i = 0; i < stream_cpu_num; i++) {
- COI_CPU_MASK_SET(CPU_INDEX(cpu_el), in_Mask);
- stream->m_stream_cpus.set(CPU_INDEX(cpu_el));
- //If the number of availabale threads is less than stream_cpu_num,
- // the stream_cpu_num is restricted to this number.
- if (!cpu_el->next) {
- break;
- }
- if (i + 1 < stream_cpu_num) {
- cpu_el = cpu_el->next;
- }
- }
- // assertion : cpu_el points to the last used thread
- cpu_used_el = cpu_el;
- while (cpu_used_el) {
- cpu_used_el->count++;
- cpu_el = cpu_prev = cpu_used_el;
- cpu_used_prev = cpu_used_el->prev;
- if (!cpu_el->next) {
- cpu_used_el = cpu_used_prev;
- continue;
- }
-
- while (cpu_el) {
- if (cpu_used_el->count < cpu_el->count) {
- break;
- }
- // Equal used threads are ordered by thread number to
- // assign to a stream as contiguous threads as possible.
- else if (cpu_used_el->count == cpu_el->count &&
- CPU_INDEX(cpu_used_el) < CPU_INDEX(cpu_el)) {
- break;
- }
- cpu_prev = cpu_el;
- cpu_el = cpu_el->next;
- }
- if (cpu_used_el != cpu_prev) {
- move_cpu_el_after(cpu_used_el, cpu_prev);
- }
- cpu_used_el = cpu_used_prev;
- }
- print_stream_cpu_list("get_pipeline");
- // create pipeline for this thread
- OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this Stream\n"
- "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
- "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
- in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
- in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
- in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
- in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
- res = COI::PipelineCreate(m_process, in_Mask,
- mic_stack_size, &pipeline);
- check_result(res, c_pipeline_create, m_index, res);
- // Set stream's affinities
- {
- struct affinity_spec affinity_spec;
- char* affinity_type;
- int i;
- // "compact" by default
- affinity_spec.affinity_type = affinity_compact;
- // Check if user has specified type of affinity
- if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) !=
- NULL)
- {
- char affinity_str[16];
- int affinity_str_len;
- OFFLOAD_DEBUG_TRACE(2,
- "User has specified OFFLOAD_STREAM_AFFINITY=%s\n",
- affinity_type);
- // Set type of affinity requested
- affinity_str_len = strlen(affinity_type);
- for (i=0; i<affinity_str_len && i<15; i++)
- {
- affinity_str[i] = tolower(affinity_type[i]);
- }
- affinity_str[i] = '\0';
- if (strcmp(affinity_str, "compact") == 0) {
- affinity_spec.affinity_type = affinity_compact;
- OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
- } else if (strcmp(affinity_str, "scatter") == 0) {
- affinity_spec.affinity_type = affinity_scatter;
- OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n");
- } else {
- LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str);
- affinity_spec.affinity_type = affinity_compact;
- OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
- }
- }
- // Make flat copy of sink mask because COI's mask is opaque
- for (i=0; i<16; i++) {
- affinity_spec.sink_mask[i] = in_Mask[i];
- }
- // Set number of cores and threads
- affinity_spec.num_cores = m_num_cores;
- affinity_spec.num_threads = m_num_threads;
- COIEVENT event;
- res = COI::PipelineRunFunction(pipeline,
- m_funcs[c_func_set_stream_affinity],
- 0, 0, 0,
- 0, 0,
- &affinity_spec, sizeof(affinity_spec),
- 0, 0,
- &event);
- check_result(res, c_pipeline_run_func, m_index, res);
-
- res = COI::EventWait(1, &event, -1, 1, 0, 0);
- check_result(res, c_event_wait, res);
- }
- m_stream_lock.unlock();
- stream->set_pipeline(pipeline);
- }
- return pipeline;
- }
- void Engine::stream_destroy(_Offload_stream handle)
- {
- // get stream
- Stream * stream = Stream::find_stream(handle, true);
- if (stream) {
- // return cpus for future use
- for (int i = 0; i < m_num_threads; i++) {
- if (stream->m_stream_cpus.test(i)) {
- CpuEl *cpu_el = m_cpus + i;
- CpuEl *cpu_first_el = cpu_el;
- // decrease count of thread "i" and move its CpuEl to the
- // proper place into the ordered list
- cpu_el->count--;
- while (cpu_el->prev) {
- if (cpu_first_el->count > cpu_el->prev->count) {
- break;
- }
- else if (cpu_first_el->count == cpu_el->prev->count &&
- CPU_INDEX(cpu_first_el) > CPU_INDEX(cpu_el->prev)) {
- break;
- }
- cpu_el = cpu_el->prev;
- }
- cpu_el = cpu_el->prev;
- // If cpu_el for thread "i" must be moved in the list
- if (cpu_first_el != cpu_el) {
- // Thread "i" is used the least times. It must be set as
- // the m_cpu_head.
- if (!cpu_el) {
- if (!cpu_first_el->prev) {
- continue;
- }
- // remove cpu_el.
- cpu_first_el->prev->next = cpu_first_el->next;
- if (cpu_first_el->next) {
- cpu_first_el->next->prev = cpu_first_el->prev;
- }
- // make cpu_first_el as new m_cpu_head
- cpu_first_el->prev = NULL;
- cpu_first_el->next = m_cpu_head;
- m_cpu_head->prev = cpu_first_el;
- m_cpu_head = cpu_first_el;
- }
- else {
- move_cpu_el_after(cpu_first_el, cpu_el);
- }
- }
- }
- }
- print_stream_cpu_list("stream_destroy");
- delete stream;
- }
- else {
- LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
- LIBOFFLOAD_ABORT;
- }
- }
- uint64_t Engine::get_thread_id(void)
- {
- Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
- if (thread == 0) {
- thread = new Thread(&m_proc_number);
- thread_setspecific(mic_thread_key, thread);
- }
- return reinterpret_cast<uint64_t>(thread);
- }
- AutoSet& Engine::get_auto_vars(void)
- {
- Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
- if (thread == 0) {
- thread = new Thread(&m_proc_number);
- thread_setspecific(mic_thread_key, thread);
- }
- return thread->get_auto_vars();
- }
- void Engine::destroy_thread_data(void *data)
- {
- delete static_cast<Thread*>(data);
- }
|