1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267 |
- /*
- Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include <omp.h>
- //#include <stdlib.h>
- //#include "offload.h"
- #include "compiler_if_host.h"
- // OpenMP API
- void omp_set_default_device(int num) __GOMP_NOTHROW
- {
- if (num >= 0) {
- __omp_device_num = num;
- }
- }
- int omp_get_default_device(void) __GOMP_NOTHROW
- {
- return __omp_device_num;
- }
- int omp_get_num_devices() __GOMP_NOTHROW
- {
- __offload_init_library();
- return mic_engines_total;
- }
- // OpenMP 4.5 APIs
- // COI supports 3-dim multiD transfers
- #define MAX_ARRAY_RANK 3
- int omp_get_initial_device(
- void
- ) __GOMP_NOTHROW
- {
- return -1;
- }
- void* omp_target_alloc(
- size_t size,
- int device_num
- ) __GOMP_NOTHROW
- {
- __offload_init_library();
- OFFLOAD_TRACE(2, "omp_target_alloc(%lld, %d)\n", size, device_num);
- if (device_num < -1) {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
- void* result = 0;
- // malloc on CPU
- if (device_num == -1) {
- // We do not check for malloc returning NULL because the
- // specification of this API includes the possibility of failure.
- // The user will check the returned result
- result = malloc(size);
- return result;
- }
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
- TARGET_MIC, device_num, 0, NULL, __func__, 0);
- if (ofld != 0) {
- VarDesc vars[2] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(size);
- vars[0].count = 1;
- vars[0].ptr = &size;
- vars[1].type.src = c_data;
- vars[1].type.dst = c_data;
- vars[1].direction.bits = c_parameter_out;
- vars[1].size = sizeof(result);
- vars[1].count = 1;
- vars[1].ptr = &result;
- OFFLOAD_OFFLOAD(ofld, "omp_target_alloc_target",
- 0, 2, vars, NULL, 0, 0, 0);
- }
- return result;
- }
- void omp_target_free(
- void *device_ptr,
- int device_num
- ) __GOMP_NOTHROW
- {
- __offload_init_library();
- OFFLOAD_TRACE(2, "omp_target_free(%p, %d)\n", device_ptr, device_num);
- if (device_num < -1) {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
- // free on CPU
- if (device_num == -1) {
- free(device_ptr);
- return;
- }
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
- TARGET_MIC, device_num, 0, NULL, __func__, 0);
- if (ofld) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(device_ptr);
- vars[0].count = 1;
- vars[0].ptr = &device_ptr;
-
- OFFLOAD_OFFLOAD(ofld, "omp_target_free_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- int omp_target_is_present(
- void *ptr,
- int device_num
- ) __GOMP_NOTHROW
- {
- __offload_init_library();
- OFFLOAD_TRACE(2, "omp_target_is_present(%p, %d)\n", ptr, device_num);
- if (device_num < -1) {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
- if (device_num == -1) {
- return false;
- }
- // If OpenMP allows wrap-around for device numbers, enable next line
- //device_num %= mic_engines_total;
- // lookup existing association in pointer table
- PtrData* ptr_data = mic_engines[device_num].find_ptr_data(ptr);
- if (ptr_data == 0) {
- OFFLOAD_TRACE(3, "Address %p is not mapped on device %d\n",
- ptr, device_num);
- return false;
- }
- OFFLOAD_TRACE(3, "Address %p found mapped on device %d\n",
- ptr, device_num);
- return true;
- }
- int omp_target_memcpy(
- void *dst,
- void *src,
- size_t length,
- size_t dst_offset,
- size_t src_offset,
- int dst_device,
- int src_device
- ) __GOMP_NOTHROW
- {
- __offload_init_library();
- OFFLOAD_TRACE(2, "omp_target_memcpy(%p, %p, %lld, %lld, %lld, %d, %d)\n",
- dst, src, length, dst_offset, src_offset, dst_device, src_device);
- if (dst_device < -1 || src_device < -1) {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
- char* srcp = (char *)src + src_offset;
- char* dstp = (char *)dst + dst_offset;
- if (src_device == -1) {
- // Source is CPU
- if (dst_device == -1) {
- // CPU -> CPU
- memcpy(dstp, srcp, length);
- return 0;
- } else {
- // CPU -> MIC
- // COIBufferWrite
- // If OpenMP allows wrap-around for device numbers, enable next line
- //dst_device %= mic_engines_total;
- OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", dstp);
- COIBUFFER mic_buf;
- COIRESULT res = COI::BufferCreateFromMemory(length,
- COI_BUFFER_NORMAL, COI_SINK_MEMORY, dstp,
- 1, &mic_engines[dst_device].get_process(),
- &mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
- return 1;
- }
- res = COI::BufferWrite(mic_buf, 0, srcp, length,
- COI_COPY_UNSPECIFIED, 0, 0, 0);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_write, res);
- return 1;
- }
- res = COI::BufferDestroy(mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_destroy, res);
- return 1;
- }
- return 0;
- }
- } else {
- // Source is device
- if (dst_device == -1) {
- // MIC -> CPU
- // COIBufferRead
- // If OpenMP allows wrap-around for device numbers, enable next line
- //src_device %= mic_engines_total;
- OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", srcp);
- COIBUFFER mic_buf;
- COIRESULT res = COI::BufferCreateFromMemory(length,
- COI_BUFFER_NORMAL, COI_SINK_MEMORY, srcp,
- 1, &mic_engines[src_device].get_process(),
- &mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
- return 1;
- }
- res = COI::BufferRead(mic_buf, 0, dstp, length,
- COI_COPY_UNSPECIFIED, 0, 0, 0);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_read, res);
- return 1;
- }
- res = COI::BufferDestroy(mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_destroy, res);
- return 1;
- }
- return 0;
- } else {
- // some MIC -> some MIC
- if (src_device == dst_device) {
- // MIC local copy will be done as remote memcpy
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
- 0, NULL, __func__, 0);
- if (ofld) {
- VarDesc vars[3] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(dstp);
- vars[0].count = 1;
- vars[0].ptr = &dstp;
- vars[1].type.src = c_data;
- vars[1].type.dst = c_data;
- vars[1].direction.bits = c_parameter_in;
- vars[1].size = sizeof(srcp);
- vars[1].count = 1;
- vars[1].ptr = &srcp;
- vars[2].type.src = c_data;
- vars[2].type.dst = c_data;
- vars[2].direction.bits = c_parameter_in;
- vars[2].size = sizeof(length);
- vars[2].count = 1;
- vars[2].ptr = &length;
-
- OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_target",
- 0, 3, vars, NULL, 0, 0, 0);
- return 0;
- } else {
- return 1;
- }
- } else {
- // MICx -> MICy
- // Allocate CPU buffer
- char *cpu_mem = (char *)malloc(length);
- if (cpu_mem == 0) {
- LIBOFFLOAD_ERROR(c_malloc);
- return 1;
- }
- int retval = 1;
- if (omp_target_memcpy(
- cpu_mem, srcp, length, 0, 0, -1, src_device) == 0) {
- retval = omp_target_memcpy(
- dstp, cpu_mem, length, 0, 0, dst_device, -1);
- }
- free(cpu_mem);
- return retval;
- }
- }
- }
- }
- static size_t bytesize_at_this_dimension(
- size_t element_size,
- int num_dims,
- const size_t* dimensions
- )
- {
- if (num_dims > 1) {
- return dimensions[1] *
- bytesize_at_this_dimension(
- element_size, num_dims-1, dimensions+1);
- } else {
- return element_size;
- }
- }
- static void memcpy_rect(
- char *dst,
- char *src,
- size_t element_size,
- int num_dims,
- const size_t *volume,
- const size_t *dst_offsets,
- const size_t *src_offsets,
- const size_t *dst_dimensions,
- const size_t *src_dimensions
- )
- {
- if (num_dims > 1) {
- int count = volume[0];
- int dst_index = dst_offsets[0];
- int src_index = src_offsets[0];
- size_t dst_element_size =
- bytesize_at_this_dimension(element_size, num_dims, dst_dimensions);
- size_t src_element_size =
- bytesize_at_this_dimension(element_size, num_dims, src_dimensions);
- for (; count>0; dst_index++, src_index++, count--) {
- memcpy_rect(dst+dst_element_size*dst_index,
- src+src_element_size*src_index,
- element_size, num_dims-1, volume+1,
- dst_offsets+1, src_offsets+1,
- dst_dimensions+1, src_dimensions+1);
- }
- } else {
- memcpy(dst+dst_offsets[0]*element_size,
- src+src_offsets[0]*element_size,
- element_size * volume[0]);
- }
- }
- int omp_target_memcpy_rect(
- void *dst_,
- void *src_,
- size_t element_size,
- int num_dims,
- const size_t *volume,
- const size_t *dst_offsets,
- const size_t *src_offsets,
- const size_t *dst_dimensions,
- const size_t *src_dimensions,
- int dst_device,
- int src_device
- ) __GOMP_NOTHROW
- {
- char *dst = (char *)dst_;
- char *src = (char *)src_;
- __offload_init_library();
- OFFLOAD_TRACE(2, "omp_target_memcpy_rect(%p, %p, %lld, %d, "
- "%p, %p, %p, %p, %p, %d, %d)\n",
- dst, src, element_size, num_dims,
- volume, dst_offsets, src_offsets,
- dst_dimensions, src_dimensions, dst_device, src_device);
-
- // MAX_ARRAY_RANK dimensions are supported
- if (dst == 0 && src == 0) {
- return MAX_ARRAY_RANK;
- }
- if (num_dims < 1 || num_dims > MAX_ARRAY_RANK ||
- element_size < 1 ||
- volume == 0 || dst_offsets == 0 || src_offsets == 0 ||
- dst_dimensions == 0 || src_dimensions == 0) {
- return 1;
- }
- if (dst_device < -1 || src_device < -1) {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
- if (src_device == -1) {
- // Source is CPU
- if (dst_device == -1) {
- // CPU -> CPU
- memcpy_rect((char*)dst, (char*)src, element_size, num_dims, volume,
- dst_offsets, src_offsets,
- dst_dimensions, src_dimensions);
- return 0;
- } else {
- // CPU -> MIC
- // COIBufferWriteMultiD
- struct arr_desc dst_desc;
- struct arr_desc src_desc;
- dst_desc.base = (int64_t)dst;
- dst_desc.rank = num_dims;
-
- src_desc.base = (int64_t)src;
- src_desc.rank = num_dims;
- for (int i=0; i<num_dims; i++)
- {
- dst_desc.dim[i].size = bytesize_at_this_dimension(
- element_size,
- num_dims - i,
- dst_dimensions + i);
- dst_desc.dim[i].lindex = 0;
- dst_desc.dim[i].lower = dst_offsets[i];
- dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
- dst_desc.dim[i].stride = 1;
-
- src_desc.dim[i].size = bytesize_at_this_dimension(
- element_size,
- num_dims - i,
- src_dimensions + i);
- src_desc.dim[i].lindex = 0;
- src_desc.dim[i].lower = src_offsets[i];
- src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
- src_desc.dim[i].stride = 1;
- }
- __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
- __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
-
- // If OpenMP allows wrap-around for device numbers, enable next line
- //dst_device %= mic_engines_total;
-
- // Compute MIC buffer size
- size_t dst_length = dst_dimensions[0] * bytesize_at_this_dimension(
- element_size,
- num_dims,
- dst_dimensions);
- OFFLOAD_TRACE(3,
- "Creating buffer from sink memory %llx of size %lld\n",
- dst, dst_length);
- COIBUFFER mic_buf;
- COIRESULT res = COI::BufferCreateFromMemory(dst_length,
- COI_BUFFER_NORMAL, COI_SINK_MEMORY, dst,
- 1, &mic_engines[dst_device].get_process(),
- &mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
- return 1;
- }
- res = COI::BufferWriteMultiD(mic_buf,
- mic_engines[dst_device].get_process(),
- 0, &dst_desc, &src_desc,
- COI_COPY_UNSPECIFIED, 0, 0, 0);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_write, res);
- return 1;
- }
- res = COI::BufferDestroy(mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_destroy, res);
- return 1;
- }
- return 0;
- }
- } else {
- // Source is device
- if (dst_device == -1) {
- // COIBufferReadMultiD
- struct arr_desc dst_desc;
- struct arr_desc src_desc;
- dst_desc.base = (int64_t)dst;
- dst_desc.rank = num_dims;
-
- src_desc.base = (int64_t)src;
- src_desc.rank = num_dims;
- for (int i=0; i<num_dims; i++)
- {
- dst_desc.dim[i].size = bytesize_at_this_dimension(
- element_size,
- num_dims - i,
- dst_dimensions + i);
- dst_desc.dim[i].lindex = 0;
- dst_desc.dim[i].lower = dst_offsets[i];
- dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
- dst_desc.dim[i].stride = 1;
-
- src_desc.dim[i].size = bytesize_at_this_dimension(
- element_size,
- num_dims - i,
- src_dimensions + i);
- src_desc.dim[i].lindex = 0;
- src_desc.dim[i].lower = src_offsets[i];
- src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
- src_desc.dim[i].stride = 1;
- }
- __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
- __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
-
- // If OpenMP allows wrap-around for device numbers, enable next line
- //src_device %= mic_engines_total;
-
- // Compute MIC buffer size
- size_t src_length = src_dimensions[0] * bytesize_at_this_dimension(
- element_size,
- num_dims,
- src_dimensions);
- OFFLOAD_TRACE(3,
- "Creating buffer from sink memory %llx of size %lld\n",
- src, src_length);
- COIBUFFER mic_buf;
- COIRESULT res = COI::BufferCreateFromMemory(src_length,
- COI_BUFFER_NORMAL, COI_SINK_MEMORY, src,
- 1, &mic_engines[src_device].get_process(),
- &mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
- return 1;
- }
- res = COI::BufferReadMultiD(mic_buf, 0,
- &dst_desc, &src_desc,
- COI_COPY_UNSPECIFIED, 0, 0, 0);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_write, res);
- return 1;
- }
- res = COI::BufferDestroy(mic_buf);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_buf_destroy, res);
- return 1;
- }
- return 0;
- } else {
- // some MIC -> some MIC
- if (src_device == dst_device) {
- // MIC local copy will be done as remote memcpy_rect
- struct parameters {
- void *dst;
- void *src;
- size_t element_size;
- int num_dims;
- size_t array_info[MAX_ARRAY_RANK*5];
- } parameters = {dst, src, element_size, num_dims};
- int result;
-
- for (int i=0; i<num_dims; i++)
- {
- parameters.array_info[i] = volume[i];
- parameters.array_info[i+num_dims] = dst_offsets[i];
- parameters.array_info[i+num_dims*2] = src_offsets[i];
- parameters.array_info[i+num_dims*3] = dst_dimensions[i];
- parameters.array_info[i+num_dims*4] = src_dimensions[i];
- }
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
- 0, NULL, __func__, 0);
- if (ofld) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(parameters) -
- (MAX_ARRAY_RANK - num_dims) *
- 5 * sizeof(size_t);
- vars[0].count = 1;
- vars[0].ptr = ¶meters;
-
- OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_rect_target",
- 0, 1, vars, NULL, 0, 0, 0);
- return 0;
- } else {
- return 1;
- }
- } else {
- // MICx -> MICy
- // Compute transfer byte-count
- size_t dst_length = element_size;
- for (int i=0; i<num_dims; i++) {
- dst_length *= volume[i];
- }
- // Allocate CPU buffer
- char *cpu_mem = (char *)malloc(dst_length);
- if (cpu_mem == 0) {
- LIBOFFLOAD_ERROR(c_malloc);
- return 1;
- }
- // Create CPU offset and dimension arrays
- // The CPU array collects the data in a contiguous block
- size_t cpu_offsets[MAX_ARRAY_RANK];
- size_t cpu_dimensions[MAX_ARRAY_RANK];
- for (int i=0; i<num_dims; i++) {
- cpu_offsets[i] = 0;
- cpu_dimensions[i] = volume[i];
- }
- int retval = 1;
- if (omp_target_memcpy_rect(
- cpu_mem, src, element_size, num_dims, volume,
- cpu_offsets, src_offsets,
- cpu_dimensions, src_dimensions,
- -1, src_device) == 0) {
- retval = omp_target_memcpy_rect(
- dst, cpu_mem, element_size, num_dims, volume,
- dst_offsets, cpu_offsets,
- dst_dimensions, cpu_dimensions,
- dst_device, -1);
- }
- free(cpu_mem);
- return retval;
- }
- }
- }
- }
- // host_ptr is key in table that yields association on device
- // A COIBUFFER of specified size is created from the memory at
- // device_ptr+device_offset on device_num
- int omp_target_associate_ptr(
- void *host_ptr,
- void *device_ptr,
- size_t size,
- size_t device_offset,
- int device_num
- ) __GOMP_NOTHROW
- {
- COIRESULT res;
- __offload_init_library();
- OFFLOAD_TRACE(2, "omp_target_associate_ptr(%p, %p, %lld, %lld, %d)\n",
- host_ptr, device_ptr, size, device_offset, device_num);
- if (device_num < -1) {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
- // Associating to CPU is treated as failure
- if (device_num == -1) {
- return 1;
- }
- // An incorrect size is treated as failure
- if (size < 0) {
- return 1;
- }
-
- // If OpenMP allows wrap-around for device numbers, enable next line
- //Engine& device = mic_engines[device_num % mic_engines_total];
- Engine& device = mic_engines[device_num];
-
- // Does host pointer have association already?
- // lookup existing association in pointer table
- PtrData* ptr_data = device.find_ptr_data(host_ptr);
- if (ptr_data != 0) {
- OFFLOAD_TRACE(3, "Address %p is already mapped on device %d\n",
- host_ptr, device_num);
- // Is current device pointer and offset same as existing?
- if ((void*)ptr_data->mic_addr == device_ptr &&
- (size_t)ptr_data->alloc_disp == device_offset) {
- return 0;
- } else {
- return 1;
- }
- }
- // Create association
- OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
- host_ptr, size);
- bool is_new;
- ptr_data = device.insert_ptr_data(host_ptr, size, is_new);
- ptr_data->is_omp_associate = true;
- // create CPU buffer
- OFFLOAD_TRACE(3,
- "Creating buffer from source memory %p, length %lld\n",
- host_ptr, size);
- // result is not checked because we can continue without cpu
- // buffer. In this case we will use COIBufferRead/Write
- // instead of COIBufferCopy.
- COI::BufferCreateFromMemory(size,
- COI_BUFFER_OPENCL,
- 0,
- host_ptr,
- 1,
- &device.get_process(),
- &ptr_data->cpu_buf);
- // create MIC buffer
- OFFLOAD_TRACE(3,
- "Creating buffer from sink memory: addr %p, size %lld\n",
- (char *)device_ptr + device_offset, size);
- res = COI::BufferCreateFromMemory(size,
- COI_BUFFER_NORMAL,
- COI_SINK_MEMORY,
- device_ptr,
- 1,
- &device.get_process(),
- &ptr_data->mic_buf);
- if (res != COI_SUCCESS) {
- ptr_data->alloc_ptr_data_lock.unlock();
- return 1;
- }
- // make buffer valid on the device.
- res = COI::BufferSetState(ptr_data->mic_buf,
- device.get_process(),
- COI_BUFFER_VALID,
- COI_BUFFER_NO_MOVE,
- 0, 0, 0);
- if (res != COI_SUCCESS) {
- ptr_data->alloc_ptr_data_lock.unlock();
- return 1;
- }
- res = COI::BufferSetState(ptr_data->mic_buf,
- COI_PROCESS_SOURCE,
- COI_BUFFER_INVALID,
- COI_BUFFER_NO_MOVE,
- 0, 0, 0);
- if (res != COI_SUCCESS) {
- ptr_data->alloc_ptr_data_lock.unlock();
- return 1;
- }
- ptr_data->alloc_disp = device_offset;
- ptr_data->alloc_ptr_data_lock.unlock();
- return 0;
- }
- int omp_target_disassociate_ptr(
- void *host_ptr,
- int device_num
- ) __GOMP_NOTHROW
- {
- COIRESULT res;
- __offload_init_library();
- OFFLOAD_TRACE(2, "omp_target_disassociate_ptr(%p, %d)\n",
- host_ptr, device_num);
- if (device_num < -1) {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
- // Dissociating from CPU is treated as failure
- if (device_num == -1) {
- return 1;
- }
-
- // If OpenMP allows wrap-around for device numbers, enable next line
- //Engine& device = mic_engines[device_num % mic_engines_total];
- Engine& device = mic_engines[device_num];
- // Lookup existing association in pointer table
- PtrData* ptr_data = device.find_ptr_data(host_ptr);
- // Attempt to disassociate unassociated pointer is a failure
- if (ptr_data == 0) {
- return 1;
- }
-
- // Destroy buffers
- if (ptr_data->cpu_buf != 0) {
- OFFLOAD_TRACE(3, "Destroying CPU buffer %p\n", ptr_data->cpu_buf);
- COI::BufferDestroy(ptr_data->cpu_buf);
- }
- if (ptr_data->mic_buf != 0) {
- OFFLOAD_TRACE(3, "Destroying MIC buffer %p\n", ptr_data->mic_buf);
- COI::BufferDestroy(ptr_data->mic_buf);
- }
-
- // Remove association from map
- OFFLOAD_TRACE(3, "Removing association for addr %p\n",
- ptr_data->cpu_addr.start());
- device.remove_ptr_data(ptr_data->cpu_addr.start());
- return 0;
- }
- // End of OpenMP 4.5 APIs
- // OpenMP API wrappers
- static void omp_set_int_target(
- TARGET_TYPE target_type,
- int target_number,
- int setting,
- const char* f_name
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- f_name, 0);
- if (ofld) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(int);
- vars[0].count = 1;
- vars[0].ptr = &setting;
- OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- static int omp_get_int_target(
- TARGET_TYPE target_type,
- int target_number,
- const char * f_name
- )
- {
- int setting = 0;
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- f_name, 0);
- if (ofld) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_out;
- vars[0].size = sizeof(int);
- vars[0].count = 1;
- vars[0].ptr = &setting;
- OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
- }
- return setting;
- }
- void omp_set_num_threads_target(
- TARGET_TYPE target_type,
- int target_number,
- int num_threads
- )
- {
- omp_set_int_target(target_type, target_number, num_threads,
- "omp_set_num_threads_target");
- }
- int omp_get_max_threads_target(
- TARGET_TYPE target_type,
- int target_number
- )
- {
- return omp_get_int_target(target_type, target_number,
- "omp_get_max_threads_target");
- }
- int omp_get_num_procs_target(
- TARGET_TYPE target_type,
- int target_number
- )
- {
- return omp_get_int_target(target_type, target_number,
- "omp_get_num_procs_target");
- }
- void omp_set_dynamic_target(
- TARGET_TYPE target_type,
- int target_number,
- int num_threads
- )
- {
- omp_set_int_target(target_type, target_number, num_threads,
- "omp_set_dynamic_target");
- }
- int omp_get_dynamic_target(
- TARGET_TYPE target_type,
- int target_number
- )
- {
- return omp_get_int_target(target_type, target_number,
- "omp_get_dynamic_target");
- }
- void omp_set_nested_target(
- TARGET_TYPE target_type,
- int target_number,
- int nested
- )
- {
- omp_set_int_target(target_type, target_number, nested,
- "omp_set_nested_target");
- }
- int omp_get_nested_target(
- TARGET_TYPE target_type,
- int target_number
- )
- {
- return omp_get_int_target(target_type, target_number,
- "omp_get_nested_target");
- }
- void omp_set_schedule_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_sched_t kind,
- int modifier
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[2] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(omp_sched_t);
- vars[0].count = 1;
- vars[0].ptr = &kind;
- vars[1].type.src = c_data;
- vars[1].type.dst = c_data;
- vars[1].direction.bits = c_parameter_in;
- vars[1].size = sizeof(int);
- vars[1].count = 1;
- vars[1].ptr = &modifier;
- OFFLOAD_OFFLOAD(ofld, "omp_set_schedule_target",
- 0, 2, vars, NULL, 0, 0, 0);
- }
- }
- void omp_get_schedule_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_sched_t *kind,
- int *modifier
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[2] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_out;
- vars[0].size = sizeof(omp_sched_t);
- vars[0].count = 1;
- vars[0].ptr = kind;
- vars[1].type.src = c_data;
- vars[1].type.dst = c_data;
- vars[1].direction.bits = c_parameter_out;
- vars[1].size = sizeof(int);
- vars[1].count = 1;
- vars[1].ptr = modifier;
- OFFLOAD_OFFLOAD(ofld, "omp_get_schedule_target",
- 0, 2, vars, NULL, 0, 0, 0);
- }
- }
- // lock API functions
- void omp_init_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_out;
- vars[0].size = sizeof(omp_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_init_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- void omp_destroy_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(omp_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_destroy_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- void omp_set_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_inout;
- vars[0].size = sizeof(omp_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_set_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- void omp_unset_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_inout;
- vars[0].size = sizeof(omp_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_unset_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- int omp_test_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_lock_target_t *lock
- )
- {
- int result = 0;
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[2] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_inout;
- vars[0].size = sizeof(omp_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- vars[1].type.src = c_data;
- vars[1].type.dst = c_data;
- vars[1].direction.bits = c_parameter_out;
- vars[1].size = sizeof(int);
- vars[1].count = 1;
- vars[1].ptr = &result;
- OFFLOAD_OFFLOAD(ofld, "omp_test_lock_target",
- 0, 2, vars, NULL, 0, 0, 0);
- }
- return result;
- }
- // nested lock API functions
- void omp_init_nest_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_nest_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_out;
- vars[0].size = sizeof(omp_nest_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_init_nest_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- void omp_destroy_nest_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_nest_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_in;
- vars[0].size = sizeof(omp_nest_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_destroy_nest_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- void omp_set_nest_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_nest_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_inout;
- vars[0].size = sizeof(omp_nest_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_set_nest_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- void omp_unset_nest_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_nest_lock_target_t *lock
- )
- {
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[1] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_inout;
- vars[0].size = sizeof(omp_nest_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- OFFLOAD_OFFLOAD(ofld, "omp_unset_nest_lock_target",
- 0, 1, vars, NULL, 0, 0, 0);
- }
- }
- int omp_test_nest_lock_target(
- TARGET_TYPE target_type,
- int target_number,
- omp_nest_lock_target_t *lock
- )
- {
- int result = 0;
- OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
- __func__, 0);
- if (ofld != 0) {
- VarDesc vars[2] = {0};
- vars[0].type.src = c_data;
- vars[0].type.dst = c_data;
- vars[0].direction.bits = c_parameter_inout;
- vars[0].size = sizeof(omp_nest_lock_target_t);
- vars[0].count = 1;
- vars[0].ptr = lock;
- vars[1].type.src = c_data;
- vars[1].type.dst = c_data;
- vars[1].direction.bits = c_parameter_out;
- vars[1].size = sizeof(int);
- vars[1].count = 1;
- vars[1].ptr = &result;
- OFFLOAD_OFFLOAD(ofld, "omp_test_nest_lock_target",
- 0, 2, vars, NULL, 0, 0, 0);
- }
- return result;
- }
|