123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653 |
- \input texinfo @c -*-texinfo-*-
- @c %**start of header
- @setfilename libgomp.info
- @settitle GNU libgomp
- @c %**end of header
- @copying
- Copyright @copyright{} 2006-2022 Free Software Foundation, Inc.
- Permission is granted to copy, distribute and/or modify this document
- under the terms of the GNU Free Documentation License, Version 1.3 or
- any later version published by the Free Software Foundation; with the
- Invariant Sections being ``Funding Free Software'', the Front-Cover
- texts being (a) (see below), and with the Back-Cover Texts being (b)
- (see below). A copy of the license is included in the section entitled
- ``GNU Free Documentation License''.
- (a) The FSF's Front-Cover Text is:
- A GNU Manual
- (b) The FSF's Back-Cover Text is:
- You have freedom to copy and modify this GNU Manual, like GNU
- software. Copies published by the Free Software Foundation raise
- funds for GNU development.
- @end copying
- @ifinfo
- @dircategory GNU Libraries
- @direntry
- * libgomp: (libgomp). GNU Offloading and Multi Processing Runtime Library.
- @end direntry
- This manual documents libgomp, the GNU Offloading and Multi Processing
- Runtime library. This is the GNU implementation of the OpenMP and
- OpenACC APIs for parallel and accelerator programming in C/C++ and
- Fortran.
- Published by the Free Software Foundation
- 51 Franklin Street, Fifth Floor
- Boston, MA 02110-1301 USA
- @insertcopying
- @end ifinfo
- @setchapternewpage odd
- @titlepage
- @title GNU Offloading and Multi Processing Runtime Library
- @subtitle The GNU OpenMP and OpenACC Implementation
- @page
- @vskip 0pt plus 1filll
- @comment For the @value{version-GCC} Version*
- @sp 1
- Published by the Free Software Foundation @*
- 51 Franklin Street, Fifth Floor@*
- Boston, MA 02110-1301, USA@*
- @sp 1
- @insertcopying
- @end titlepage
- @summarycontents
- @contents
- @page
- @node Top, Enabling OpenMP
- @top Introduction
- @cindex Introduction
- This manual documents the usage of libgomp, the GNU Offloading and
- Multi Processing Runtime Library. This includes the GNU
- implementation of the @uref{https://www.openmp.org, OpenMP} Application
- Programming Interface (API) for multi-platform shared-memory parallel
- programming in C/C++ and Fortran, and the GNU implementation of the
- @uref{https://www.openacc.org, OpenACC} Application Programming
- Interface (API) for offloading of code to accelerator devices in C/C++
- and Fortran.
- Originally, libgomp implemented the GNU OpenMP Runtime Library. Based
- on this, support for OpenACC and offloading (both OpenACC and OpenMP
- 4's target construct) has been added later on, and the library's name
- changed to GNU Offloading and Multi Processing Runtime Library.
- @comment
- @comment When you add a new menu item, please keep the right hand
- @comment aligned to the same column. Do not use tabs. This provides
- @comment better formatting.
- @comment
- @menu
- * Enabling OpenMP:: How to enable OpenMP for your applications.
- * OpenMP Implementation Status:: List of implemented features by OpenMP version
- * OpenMP Runtime Library Routines: Runtime Library Routines.
- The OpenMP runtime application programming
- interface.
- * OpenMP Environment Variables: Environment Variables.
- Influencing OpenMP runtime behavior with
- environment variables.
- * Enabling OpenACC:: How to enable OpenACC for your
- applications.
- * OpenACC Runtime Library Routines:: The OpenACC runtime application
- programming interface.
- * OpenACC Environment Variables:: Influencing OpenACC runtime behavior with
- environment variables.
- * CUDA Streams Usage:: Notes on the implementation of
- asynchronous operations.
- * OpenACC Library Interoperability:: OpenACC library interoperability with the
- NVIDIA CUBLAS library.
- * OpenACC Profiling Interface::
- * The libgomp ABI:: Notes on the external ABI presented by libgomp.
- * Reporting Bugs:: How to report bugs in the GNU Offloading and
- Multi Processing Runtime Library.
- * Copying:: GNU general public license says
- how you can copy and share libgomp.
- * GNU Free Documentation License::
- How you can copy and share this manual.
- * Funding:: How to help assure continued work for free
- software.
- * Library Index:: Index of this documentation.
- @end menu
- @c ---------------------------------------------------------------------
- @c Enabling OpenMP
- @c ---------------------------------------------------------------------
- @node Enabling OpenMP
- @chapter Enabling OpenMP
- To activate the OpenMP extensions for C/C++ and Fortran, the compile-time
- flag @command{-fopenmp} must be specified. This enables the OpenMP directive
- @code{#pragma omp} in C/C++ and @code{!$omp} directives in free form,
- @code{c$omp}, @code{*$omp} and @code{!$omp} directives in fixed form,
- @code{!$} conditional compilation sentinels in free form and @code{c$},
- @code{*$} and @code{!$} sentinels in fixed form, for Fortran. The flag also
- arranges for automatic linking of the OpenMP runtime library
- (@ref{Runtime Library Routines}).
- A complete description of all OpenMP directives may be found in the
- @uref{https://www.openmp.org, OpenMP Application Program Interface} manuals.
- See also @ref{OpenMP Implementation Status}.
- @c ---------------------------------------------------------------------
- @c OpenMP Implementation Status
- @c ---------------------------------------------------------------------
- @node OpenMP Implementation Status
- @chapter OpenMP Implementation Status
- @menu
- * OpenMP 4.5:: Feature completion status to 4.5 specification
- * OpenMP 5.0:: Feature completion status to 5.0 specification
- * OpenMP 5.1:: Feature completion status to 5.1 specification
- @end menu
- The @code{_OPENMP} preprocessor macro and Fortran's @code{openmp_version}
- parameter, provided by @code{omp_lib.h} and the @code{omp_lib} module, have
- the value @code{201511} (i.e. OpenMP 4.5).
- @node OpenMP 4.5
- @section OpenMP 4.5
- The OpenMP 4.5 specification is fully supported.
- @node OpenMP 5.0
- @section OpenMP 5.0
- @unnumberedsubsec New features listed in Appendix B of the OpenMP specification
- @c This list is sorted as in OpenMP 5.1's B.3 not as in OpenMP 5.0's B.2
- @multitable @columnfractions .60 .10 .25
- @headitem Description @tab Status @tab Comments
- @item Array shaping @tab N @tab
- @item Array sections with non-unit strides in C and C++ @tab N @tab
- @item Iterators @tab Y @tab
- @item @code{metadirective} directive @tab N @tab
- @item @code{declare variant} directive
- @tab P @tab simd traits not handled correctly
- @item @emph{target-offload-var} ICV and @code{OMP_TARGET_OFFLOAD}
- env variable @tab Y @tab
- @item Nested-parallel changes to @emph{max-active-levels-var} ICV @tab Y @tab
- @item @code{requires} directive @tab P
- @tab Only fulfillable requirement are @code{atomic_default_mem_order}
- and @code{dynamic_allocators}
- @item @code{teams} construct outside an enclosing target region @tab Y @tab
- @item Non-rectangular loop nests @tab P @tab Only C/C++
- @item @code{!=} as relational-op in canonical loop form for C/C++ @tab Y @tab
- @item @code{nonmonotonic} as default loop schedule modifier for worksharing-loop
- constructs @tab Y @tab
- @item Collapse of associated loops that are imperfectly nested loops @tab N @tab
- @item Clauses @code{if}, @code{nontemporal} and @code{order(concurrent)} in
- @code{simd} construct @tab Y @tab
- @item @code{atomic} constructs in @code{simd} @tab Y @tab
- @item @code{loop} construct @tab Y @tab
- @item @code{order(concurrent)} clause @tab Y @tab
- @item @code{scan} directive and @code{in_scan} modifier for the
- @code{reduction} clause @tab Y @tab
- @item @code{in_reduction} clause on @code{task} constructs @tab Y @tab
- @item @code{in_reduction} clause on @code{target} constructs @tab P
- @tab @code{nowait} only stub
- @item @code{task_reduction} clause with @code{taskgroup} @tab Y @tab
- @item @code{task} modifier to @code{reduction} clause @tab Y @tab
- @item @code{affinity} clause to @code{task} construct @tab Y @tab Stub only
- @item @code{detach} clause to @code{task} construct @tab Y @tab
- @item @code{omp_fulfill_event} runtime routine @tab Y @tab
- @item @code{reduction} and @code{in_reduction} clauses on @code{taskloop}
- and @code{taskloop simd} constructs @tab Y @tab
- @item @code{taskloop} construct cancelable by @code{cancel} construct
- @tab Y @tab
- @item @code{mutexinouset} @emph{dependence-type} for @code{depend} clause
- @tab Y @tab
- @item Predefined memory spaces, memory allocators, allocator traits
- @tab Y @tab Some are only stubs
- @item Memory management routines @tab Y @tab
- @item @code{allocate} directive @tab N @tab
- @item @code{allocate} clause @tab P @tab initial support
- @item @code{use_device_addr} clause on @code{target data} @tab Y @tab
- @item @code{ancestor} modifier on @code{device} clause
- @tab P @tab Reverse offload unsupported
- @item Implicit declare target directive @tab Y @tab
- @item Discontiguous array section with @code{target update} construct
- @tab N @tab
- @item C/C++'s lvalue expressions in @code{to}, @code{from}
- and @code{map} clauses @tab N @tab
- @item C/C++'s lvalue expressions in @code{depend} clauses @tab Y @tab
- @item Nested @code{declare target} directive @tab Y @tab
- @item Combined @code{master} constructs @tab Y @tab
- @item @code{depend} clause on @code{taskwait} @tab Y @tab
- @item Weak memory ordering clauses on @code{atomic} and @code{flush} construct
- @tab Y @tab
- @item @code{hint} clause on the @code{atomic} construct @tab Y @tab Stub only
- @item @code{depobj} construct and depend objects @tab Y @tab
- @item Lock hints were renamed to synchronization hints @tab Y @tab
- @item @code{conditional} modifier to @code{lastprivate} clause @tab Y @tab
- @item Map-order clarifications @tab P @tab
- @item @code{close} @emph{map-type-modifier} @tab Y @tab
- @item Mapping C/C++ pointer variables and to assign the address of
- device memory mapped by an array section @tab P @tab
- @item Mapping of Fortran pointer and allocatable variables, including pointer
- and allocatable components of variables
- @tab P @tab Mapping of vars with allocatable components unsupported
- @item @code{defaultmap} extensions @tab Y @tab
- @item @code{declare mapper} directive @tab N @tab
- @item @code{omp_get_supported_active_levels} routine @tab Y @tab
- @item Runtime routines and environment variables to display runtime thread
- affinity information @tab Y @tab
- @item @code{omp_pause_resource} and @code{omp_pause_resource_all} runtime
- routines @tab Y @tab
- @item @code{omp_get_device_num} runtime routine @tab Y @tab
- @item OMPT interface @tab N @tab
- @item OMPD interface @tab N @tab
- @end multitable
- @unnumberedsubsec Other new OpenMP 5.0 features
- @multitable @columnfractions .60 .10 .25
- @headitem Description @tab Status @tab Comments
- @item Supporting C++'s range-based for loop @tab Y @tab
- @end multitable
- @node OpenMP 5.1
- @section OpenMP 5.1
- @unnumberedsubsec New features listed in Appendix B of the OpenMP specification
- @multitable @columnfractions .60 .10 .25
- @headitem Description @tab Status @tab Comments
- @item OpenMP directive as C++ attribute specifiers @tab Y @tab
- @item @code{omp_all_memory} reserved locator @tab N @tab
- @item @emph{target_device trait} in OpenMP Context @tab N @tab
- @item @code{target_device} selector set in context selectors @tab N @tab
- @item C/C++'s @code{declare variant} directive: elision support of
- preprocessed code @tab N @tab
- @item @code{declare variant}: new clauses @code{adjust_args} and
- @code{append_args} @tab N @tab
- @item @code{dispatch} construct @tab N @tab
- @item device-specific ICV settings the environment variables @tab N @tab
- @item assume directive @tab N @tab
- @item @code{nothing} directive @tab Y @tab
- @item @code{error} directive @tab Y @tab
- @item @code{masked} construct @tab Y @tab
- @item @code{scope} directive @tab Y @tab
- @item Loop transformation constructs @tab N @tab
- @item @code{strict} modifier in the @code{grainsize} and @code{num_tasks}
- clauses of the taskloop construct @tab Y @tab
- @item @code{align} clause/modifier in @code{allocate} directive/clause
- and @code{allocator} directive @tab P @tab C/C++ on clause only
- @item @code{thread_limit} clause to @code{target} construct @tab Y @tab
- @item @code{has_device_addr} clause to @code{target} construct @tab Y @tab
- @item iterators in @code{target update} motion clauses and @code{map}
- clauses @tab N @tab
- @item indirect calls to the device version of a procedure or function in
- @code{target} regions @tab N @tab
- @item @code{interop} directive @tab N @tab
- @item @code{omp_interop_t} object support in runtime routines @tab N @tab
- @item @code{nowait} clause in @code{taskwait} directive @tab N @tab
- @item Extensions to the @code{atomic} directive @tab Y @tab
- @item @code{seq_cst} clause on a @code{flush} construct @tab Y @tab
- @item @code{inoutset} argument to the @code{depend} clause @tab N @tab
- @item @code{private} and @code{firstprivate} argument to @code{default}
- clause in C and C++ @tab Y @tab
- @item @code{present} argument to @code{defaultmap} clause @tab N @tab
- @item @code{omp_set_num_teams}, @code{omp_set_teams_thread_limit},
- @code{omp_get_max_teams}, @code{omp_get_teams_thread_limit} runtime
- routines @tab Y @tab
- @item @code{omp_target_is_accessible} runtime routine @tab N @tab
- @item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async}
- runtime routines @tab N @tab
- @item @code{omp_get_mapped_ptr} runtime routine @tab N @tab
- @item @code{omp_calloc}, @code{omp_realloc}, @code{omp_aligned_alloc} and
- @code{omp_aligned_calloc} runtime routines @tab Y @tab
- @item @code{omp_alloctrait_key_t} enum: @code{omp_atv_serialized} added,
- @code{omp_atv_default} changed @tab Y @tab
- @item @code{omp_display_env} runtime routine @tab Y
- @tab Not inside @code{target} regions
- @item @code{ompt_scope_endpoint_t} enum: @code{ompt_scope_beginend} @tab N @tab
- @item @code{ompt_sync_region_t} enum additions @tab N @tab
- @item @code{ompt_state_t} enum: @code{ompt_state_wait_barrier_implementation}
- and @code{ompt_state_wait_barrier_teams} @tab N @tab
- @item @code{ompt_callback_target_data_op_emi_t},
- @code{ompt_callback_target_emi_t}, @code{ompt_callback_target_map_emi_t}
- and @code{ompt_callback_target_submit_emi_t} @tab N @tab
- @item @code{ompt_callback_error_t} type @tab N @tab
- @item @code{OMP_PLACES} syntax extensions @tab Y @tab
- @item @code{OMP_NUM_TEAMS} and @code{OMP_TEAMS_THREAD_LIMIT} environment
- variables @tab Y @tab
- @end multitable
- @unnumberedsubsec Other new OpenMP 5.1 features
- @multitable @columnfractions .60 .10 .25
- @headitem Description @tab Status @tab Comments
- @item Support of strictly structured blocks in Fortran @tab Y @tab
- @item Support of structured block sequences in C/C++ @tab Y @tab
- @item @code{unconstrained} and @code{reproducible} modifiers on @code{order}
- clause @tab Y @tab
- @end multitable
- @c ---------------------------------------------------------------------
- @c OpenMP Runtime Library Routines
- @c ---------------------------------------------------------------------
- @node Runtime Library Routines
- @chapter OpenMP Runtime Library Routines
- The runtime routines described here are defined by Section 3 of the OpenMP
- specification in version 4.5. The routines are structured in following
- three parts:
- @menu
- Control threads, processors and the parallel environment. They have C
- linkage, and do not throw exceptions.
- * omp_get_active_level:: Number of active parallel regions
- * omp_get_ancestor_thread_num:: Ancestor thread ID
- * omp_get_cancellation:: Whether cancellation support is enabled
- * omp_get_default_device:: Get the default device for target regions
- * omp_get_device_num:: Get device that current thread is running on
- * omp_get_dynamic:: Dynamic teams setting
- * omp_get_initial_device:: Device number of host device
- * omp_get_level:: Number of parallel regions
- * omp_get_max_active_levels:: Current maximum number of active regions
- * omp_get_max_task_priority:: Maximum task priority value that can be set
- * omp_get_max_teams:: Maximum number of teams for teams region
- * omp_get_max_threads:: Maximum number of threads of parallel region
- * omp_get_nested:: Nested parallel regions
- * omp_get_num_devices:: Number of target devices
- * omp_get_num_procs:: Number of processors online
- * omp_get_num_teams:: Number of teams
- * omp_get_num_threads:: Size of the active team
- * omp_get_proc_bind:: Whether theads may be moved between CPUs
- * omp_get_schedule:: Obtain the runtime scheduling method
- * omp_get_supported_active_levels:: Maximum number of active regions supported
- * omp_get_team_num:: Get team number
- * omp_get_team_size:: Number of threads in a team
- * omp_get_teams_thread_limit:: Maximum number of threads imposed by teams
- * omp_get_thread_limit:: Maximum number of threads
- * omp_get_thread_num:: Current thread ID
- * omp_in_parallel:: Whether a parallel region is active
- * omp_in_final:: Whether in final or included task region
- * omp_is_initial_device:: Whether executing on the host device
- * omp_set_default_device:: Set the default device for target regions
- * omp_set_dynamic:: Enable/disable dynamic teams
- * omp_set_max_active_levels:: Limits the number of active parallel regions
- * omp_set_nested:: Enable/disable nested parallel regions
- * omp_set_num_teams:: Set upper teams limit for teams region
- * omp_set_num_threads:: Set upper team size limit
- * omp_set_schedule:: Set the runtime scheduling method
- * omp_set_teams_thread_limit:: Set upper thread limit for teams construct
- Initialize, set, test, unset and destroy simple and nested locks.
- * omp_init_lock:: Initialize simple lock
- * omp_set_lock:: Wait for and set simple lock
- * omp_test_lock:: Test and set simple lock if available
- * omp_unset_lock:: Unset simple lock
- * omp_destroy_lock:: Destroy simple lock
- * omp_init_nest_lock:: Initialize nested lock
- * omp_set_nest_lock:: Wait for and set simple lock
- * omp_test_nest_lock:: Test and set nested lock if available
- * omp_unset_nest_lock:: Unset nested lock
- * omp_destroy_nest_lock:: Destroy nested lock
- Portable, thread-based, wall clock timer.
- * omp_get_wtick:: Get timer precision.
- * omp_get_wtime:: Elapsed wall clock time.
- Support for event objects.
- * omp_fulfill_event:: Fulfill and destroy an OpenMP event.
- @end menu
- @node omp_get_active_level
- @section @code{omp_get_active_level} -- Number of parallel regions
- @table @asis
- @item @emph{Description}:
- This function returns the nesting level for the active parallel blocks,
- which enclose the calling call.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_active_level(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_active_level()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_level}, @ref{omp_get_max_active_levels}, @ref{omp_set_max_active_levels}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.20.
- @end table
- @node omp_get_ancestor_thread_num
- @section @code{omp_get_ancestor_thread_num} -- Ancestor thread ID
- @table @asis
- @item @emph{Description}:
- This function returns the thread identification number for the given
- nesting level of the current thread. For values of @var{level} outside
- zero to @code{omp_get_level} -1 is returned; if @var{level} is
- @code{omp_get_level} the result is identical to @code{omp_get_thread_num}.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_ancestor_thread_num(int level);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_ancestor_thread_num(level)}
- @item @tab @code{integer level}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_level}, @ref{omp_get_thread_num}, @ref{omp_get_team_size}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.18.
- @end table
- @node omp_get_cancellation
- @section @code{omp_get_cancellation} -- Whether cancellation support is enabled
- @table @asis
- @item @emph{Description}:
- This function returns @code{true} if cancellation is activated, @code{false}
- otherwise. Here, @code{true} and @code{false} represent their language-specific
- counterparts. Unless @env{OMP_CANCELLATION} is set true, cancellations are
- deactivated.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_cancellation(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_get_cancellation()}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_CANCELLATION}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.9.
- @end table
- @node omp_get_default_device
- @section @code{omp_get_default_device} -- Get the default device for target regions
- @table @asis
- @item @emph{Description}:
- Get the default device for target regions without device clause.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_default_device(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_default_device()}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_DEFAULT_DEVICE}, @ref{omp_set_default_device}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.30.
- @end table
- @node omp_get_device_num
- @section @code{omp_get_device_num} -- Return device number of current device
- @table @asis
- @item @emph{Description}:
- This function returns a device number that represents the device that the
- current thread is executing on. For OpenMP 5.0, this must be equal to the
- value returned by the @code{omp_get_initial_device} function when called
- from the host.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_device_num(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_device_num()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_initial_device}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.2.37.
- @end table
- @node omp_get_dynamic
- @section @code{omp_get_dynamic} -- Dynamic teams setting
- @table @asis
- @item @emph{Description}:
- This function returns @code{true} if enabled, @code{false} otherwise.
- Here, @code{true} and @code{false} represent their language-specific
- counterparts.
- The dynamic team setting may be initialized at startup by the
- @env{OMP_DYNAMIC} environment variable or at runtime using
- @code{omp_set_dynamic}. If undefined, dynamic adjustment is
- disabled by default.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_dynamic(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_get_dynamic()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_dynamic}, @ref{OMP_DYNAMIC}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.8.
- @end table
- @node omp_get_initial_device
- @section @code{omp_get_initial_device} -- Return device number of initial device
- @table @asis
- @item @emph{Description}:
- This function returns a device number that represents the host device.
- For OpenMP 5.1, this must be equal to the value returned by the
- @code{omp_get_num_devices} function.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_initial_device(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_initial_device()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_num_devices}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.35.
- @end table
- @node omp_get_level
- @section @code{omp_get_level} -- Obtain the current nesting level
- @table @asis
- @item @emph{Description}:
- This function returns the nesting level for the parallel blocks,
- which enclose the calling call.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_level(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_level()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_active_level}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.17.
- @end table
- @node omp_get_max_active_levels
- @section @code{omp_get_max_active_levels} -- Current maximum number of active regions
- @table @asis
- @item @emph{Description}:
- This function obtains the maximum allowed number of nested, active parallel regions.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_max_active_levels(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_max_active_levels()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_max_active_levels}, @ref{omp_get_active_level}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.16.
- @end table
- @node omp_get_max_task_priority
- @section @code{omp_get_max_task_priority} -- Maximum priority value
- that can be set for tasks.
- @table @asis
- @item @emph{Description}:
- This function obtains the maximum allowed priority number for tasks.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_max_task_priority(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_max_task_priority()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.29.
- @end table
- @node omp_get_max_teams
- @section @code{omp_get_max_teams} -- Maximum number of teams of teams region
- @table @asis
- @item @emph{Description}:
- Return the maximum number of teams used for the teams region
- that does not use the clause @code{num_teams}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_max_teams(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_max_teams()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_num_teams}, @ref{omp_get_num_teams}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.4.4.
- @end table
- @node omp_get_max_threads
- @section @code{omp_get_max_threads} -- Maximum number of threads of parallel region
- @table @asis
- @item @emph{Description}:
- Return the maximum number of threads used for the current parallel region
- that does not use the clause @code{num_threads}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_max_threads(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_max_threads()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_num_threads}, @ref{omp_set_dynamic}, @ref{omp_get_thread_limit}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.3.
- @end table
- @node omp_get_nested
- @section @code{omp_get_nested} -- Nested parallel regions
- @table @asis
- @item @emph{Description}:
- This function returns @code{true} if nested parallel regions are
- enabled, @code{false} otherwise. Here, @code{true} and @code{false}
- represent their language-specific counterparts.
- The state of nested parallel regions at startup depends on several
- environment variables. If @env{OMP_MAX_ACTIVE_LEVELS} is defined
- and is set to greater than one, then nested parallel regions will be
- enabled. If not defined, then the value of the @env{OMP_NESTED}
- environment variable will be followed if defined. If neither are
- defined, then if either @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND}
- are defined with a list of more than one value, then nested parallel
- regions are enabled. If none of these are defined, then nested parallel
- regions are disabled by default.
- Nested parallel regions can be enabled or disabled at runtime using
- @code{omp_set_nested}, or by setting the maximum number of nested
- regions with @code{omp_set_max_active_levels} to one to disable, or
- above one to enable.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_nested(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_get_nested()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_max_active_levels}, @ref{omp_set_nested},
- @ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11.
- @end table
- @node omp_get_num_devices
- @section @code{omp_get_num_devices} -- Number of target devices
- @table @asis
- @item @emph{Description}:
- Returns the number of target devices.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_num_devices(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_num_devices()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.31.
- @end table
- @node omp_get_num_procs
- @section @code{omp_get_num_procs} -- Number of processors online
- @table @asis
- @item @emph{Description}:
- Returns the number of processors online on that device.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_num_procs(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_num_procs()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.5.
- @end table
- @node omp_get_num_teams
- @section @code{omp_get_num_teams} -- Number of teams
- @table @asis
- @item @emph{Description}:
- Returns the number of teams in the current team region.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_num_teams(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_num_teams()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.32.
- @end table
- @node omp_get_num_threads
- @section @code{omp_get_num_threads} -- Size of the active team
- @table @asis
- @item @emph{Description}:
- Returns the number of threads in the current team. In a sequential section of
- the program @code{omp_get_num_threads} returns 1.
- The default team size may be initialized at startup by the
- @env{OMP_NUM_THREADS} environment variable. At runtime, the size
- of the current team may be set either by the @code{NUM_THREADS}
- clause or by @code{omp_set_num_threads}. If none of the above were
- used to define a specific value and @env{OMP_DYNAMIC} is disabled,
- one thread per CPU online is used.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_num_threads(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_num_threads()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_max_threads}, @ref{omp_set_num_threads}, @ref{OMP_NUM_THREADS}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.2.
- @end table
- @node omp_get_proc_bind
- @section @code{omp_get_proc_bind} -- Whether theads may be moved between CPUs
- @table @asis
- @item @emph{Description}:
- This functions returns the currently active thread affinity policy, which is
- set via @env{OMP_PROC_BIND}. Possible values are @code{omp_proc_bind_false},
- @code{omp_proc_bind_true}, @code{omp_proc_bind_primary},
- @code{omp_proc_bind_master}, @code{omp_proc_bind_close} and @code{omp_proc_bind_spread},
- where @code{omp_proc_bind_master} is an alias for @code{omp_proc_bind_primary}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{omp_proc_bind_t omp_get_proc_bind(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer(kind=omp_proc_bind_kind) function omp_get_proc_bind()}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_PROC_BIND}, @ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY},
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.22.
- @end table
- @node omp_get_schedule
- @section @code{omp_get_schedule} -- Obtain the runtime scheduling method
- @table @asis
- @item @emph{Description}:
- Obtain the runtime scheduling method. The @var{kind} argument will be
- set to the value @code{omp_sched_static}, @code{omp_sched_dynamic},
- @code{omp_sched_guided} or @code{omp_sched_auto}. The second argument,
- @var{chunk_size}, is set to the chunk size.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_get_schedule(omp_sched_t *kind, int *chunk_size);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_get_schedule(kind, chunk_size)}
- @item @tab @code{integer(kind=omp_sched_kind) kind}
- @item @tab @code{integer chunk_size}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_schedule}, @ref{OMP_SCHEDULE}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.13.
- @end table
- @node omp_get_supported_active_levels
- @section @code{omp_get_supported_active_levels} -- Maximum number of active regions supported
- @table @asis
- @item @emph{Description}:
- This function returns the maximum number of nested, active parallel regions
- supported by this implementation.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_supported_active_levels(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_supported_active_levels()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_max_active_levels}, @ref{omp_set_max_active_levels}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.2.15.
- @end table
- @node omp_get_team_num
- @section @code{omp_get_team_num} -- Get team number
- @table @asis
- @item @emph{Description}:
- Returns the team number of the calling thread.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_team_num(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_team_num()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.33.
- @end table
- @node omp_get_team_size
- @section @code{omp_get_team_size} -- Number of threads in a team
- @table @asis
- @item @emph{Description}:
- This function returns the number of threads in a thread team to which
- either the current thread or its ancestor belongs. For values of @var{level}
- outside zero to @code{omp_get_level}, -1 is returned; if @var{level} is zero,
- 1 is returned, and for @code{omp_get_level}, the result is identical
- to @code{omp_get_num_threads}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_team_size(int level);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_team_size(level)}
- @item @tab @code{integer level}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_num_threads}, @ref{omp_get_level}, @ref{omp_get_ancestor_thread_num}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.19.
- @end table
- @node omp_get_teams_thread_limit
- @section @code{omp_get_teams_thread_limit} -- Maximum number of threads imposed by teams
- @table @asis
- @item @emph{Description}:
- Return the maximum number of threads that will be able to participate in
- each team created by a teams construct.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_teams_thread_limit(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_teams_thread_limit()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_teams_thread_limit}, @ref{OMP_TEAMS_THREAD_LIMIT}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.4.6.
- @end table
- @node omp_get_thread_limit
- @section @code{omp_get_thread_limit} -- Maximum number of threads
- @table @asis
- @item @emph{Description}:
- Return the maximum number of threads of the program.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_thread_limit(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_thread_limit()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_max_threads}, @ref{OMP_THREAD_LIMIT}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.14.
- @end table
- @node omp_get_thread_num
- @section @code{omp_get_thread_num} -- Current thread ID
- @table @asis
- @item @emph{Description}:
- Returns a unique thread identification number within the current team.
- In a sequential parts of the program, @code{omp_get_thread_num}
- always returns 0. In parallel regions the return value varies
- from 0 to @code{omp_get_num_threads}-1 inclusive. The return
- value of the primary thread of a team is always 0.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_get_thread_num(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function omp_get_thread_num()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_num_threads}, @ref{omp_get_ancestor_thread_num}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.4.
- @end table
- @node omp_in_parallel
- @section @code{omp_in_parallel} -- Whether a parallel region is active
- @table @asis
- @item @emph{Description}:
- This function returns @code{true} if currently running in parallel,
- @code{false} otherwise. Here, @code{true} and @code{false} represent
- their language-specific counterparts.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_in_parallel(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_in_parallel()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.6.
- @end table
- @node omp_in_final
- @section @code{omp_in_final} -- Whether in final or included task region
- @table @asis
- @item @emph{Description}:
- This function returns @code{true} if currently running in a final
- or included task region, @code{false} otherwise. Here, @code{true}
- and @code{false} represent their language-specific counterparts.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_in_final(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_in_final()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.21.
- @end table
- @node omp_is_initial_device
- @section @code{omp_is_initial_device} -- Whether executing on the host device
- @table @asis
- @item @emph{Description}:
- This function returns @code{true} if currently running on the host device,
- @code{false} otherwise. Here, @code{true} and @code{false} represent
- their language-specific counterparts.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_is_initial_device(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_is_initial_device()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.34.
- @end table
- @node omp_set_default_device
- @section @code{omp_set_default_device} -- Set the default device for target regions
- @table @asis
- @item @emph{Description}:
- Set the default device for target regions without device clause. The argument
- shall be a nonnegative device number.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_default_device(int device_num);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_default_device(device_num)}
- @item @tab @code{integer device_num}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_DEFAULT_DEVICE}, @ref{omp_get_default_device}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.29.
- @end table
- @node omp_set_dynamic
- @section @code{omp_set_dynamic} -- Enable/disable dynamic teams
- @table @asis
- @item @emph{Description}:
- Enable or disable the dynamic adjustment of the number of threads
- within a team. The function takes the language-specific equivalent
- of @code{true} and @code{false}, where @code{true} enables dynamic
- adjustment of team sizes and @code{false} disables it.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_dynamic(int dynamic_threads);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_dynamic(dynamic_threads)}
- @item @tab @code{logical, intent(in) :: dynamic_threads}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_DYNAMIC}, @ref{omp_get_dynamic}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.7.
- @end table
- @node omp_set_max_active_levels
- @section @code{omp_set_max_active_levels} -- Limits the number of active parallel regions
- @table @asis
- @item @emph{Description}:
- This function limits the maximum allowed number of nested, active
- parallel regions. @var{max_levels} must be less or equal to
- the value returned by @code{omp_get_supported_active_levels}.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_max_active_levels(int max_levels);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_max_active_levels(max_levels)}
- @item @tab @code{integer max_levels}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_max_active_levels}, @ref{omp_get_active_level},
- @ref{omp_get_supported_active_levels}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.15.
- @end table
- @node omp_set_nested
- @section @code{omp_set_nested} -- Enable/disable nested parallel regions
- @table @asis
- @item @emph{Description}:
- Enable or disable nested parallel regions, i.e., whether team members
- are allowed to create new teams. The function takes the language-specific
- equivalent of @code{true} and @code{false}, where @code{true} enables
- dynamic adjustment of team sizes and @code{false} disables it.
- Enabling nested parallel regions will also set the maximum number of
- active nested regions to the maximum supported. Disabling nested parallel
- regions will set the maximum number of active nested regions to one.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_nested(nested)}
- @item @tab @code{logical, intent(in) :: nested}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_nested}, @ref{omp_set_max_active_levels},
- @ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10.
- @end table
- @node omp_set_num_teams
- @section @code{omp_set_num_teams} -- Set upper teams limit for teams construct
- @table @asis
- @item @emph{Description}:
- Specifies the upper bound for number of teams created by the teams construct
- which does not specify a @code{num_teams} clause. The
- argument of @code{omp_set_num_teams} shall be a positive integer.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_num_teams(int num_teams);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_num_teams(num_teams)}
- @item @tab @code{integer, intent(in) :: num_teams}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_NUM_TEAMS}, @ref{omp_get_num_teams}, @ref{omp_get_max_teams}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.4.3.
- @end table
- @node omp_set_num_threads
- @section @code{omp_set_num_threads} -- Set upper team size limit
- @table @asis
- @item @emph{Description}:
- Specifies the number of threads used by default in subsequent parallel
- sections, if those do not specify a @code{num_threads} clause. The
- argument of @code{omp_set_num_threads} shall be a positive integer.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_num_threads(int num_threads);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_num_threads(num_threads)}
- @item @tab @code{integer, intent(in) :: num_threads}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_NUM_THREADS}, @ref{omp_get_num_threads}, @ref{omp_get_max_threads}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.1.
- @end table
- @node omp_set_schedule
- @section @code{omp_set_schedule} -- Set the runtime scheduling method
- @table @asis
- @item @emph{Description}:
- Sets the runtime scheduling method. The @var{kind} argument can have the
- value @code{omp_sched_static}, @code{omp_sched_dynamic},
- @code{omp_sched_guided} or @code{omp_sched_auto}. Except for
- @code{omp_sched_auto}, the chunk size is set to the value of
- @var{chunk_size} if positive, or to the default value if zero or negative.
- For @code{omp_sched_auto} the @var{chunk_size} argument is ignored.
- @item @emph{C/C++}
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_schedule(omp_sched_t kind, int chunk_size);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_schedule(kind, chunk_size)}
- @item @tab @code{integer(kind=omp_sched_kind) kind}
- @item @tab @code{integer chunk_size}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_schedule}
- @ref{OMP_SCHEDULE}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.12.
- @end table
- @node omp_set_teams_thread_limit
- @section @code{omp_set_teams_thread_limit} -- Set upper thread limit for teams construct
- @table @asis
- @item @emph{Description}:
- Specifies the upper bound for number of threads that will be available
- for each team created by the teams construct which does not specify a
- @code{thread_limit} clause. The argument of
- @code{omp_set_teams_thread_limit} shall be a positive integer.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_teams_thread_limit(int thread_limit);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_teams_thread_limit(thread_limit)}
- @item @tab @code{integer, intent(in) :: thread_limit}
- @end multitable
- @item @emph{See also}:
- @ref{OMP_TEAMS_THREAD_LIMIT}, @ref{omp_get_teams_thread_limit}, @ref{omp_get_thread_limit}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.4.5.
- @end table
- @node omp_init_lock
- @section @code{omp_init_lock} -- Initialize simple lock
- @table @asis
- @item @emph{Description}:
- Initialize a simple lock. After initialization, the lock is in
- an unlocked state.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_init_lock(omp_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_init_lock(svar)}
- @item @tab @code{integer(omp_lock_kind), intent(out) :: svar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_destroy_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.1.
- @end table
- @node omp_set_lock
- @section @code{omp_set_lock} -- Wait for and set simple lock
- @table @asis
- @item @emph{Description}:
- Before setting a simple lock, the lock variable must be initialized by
- @code{omp_init_lock}. The calling thread is blocked until the lock
- is available. If the lock is already held by the current thread,
- a deadlock occurs.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_lock(omp_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_lock(svar)}
- @item @tab @code{integer(omp_lock_kind), intent(inout) :: svar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_init_lock}, @ref{omp_test_lock}, @ref{omp_unset_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.4.
- @end table
- @node omp_test_lock
- @section @code{omp_test_lock} -- Test and set simple lock if available
- @table @asis
- @item @emph{Description}:
- Before setting a simple lock, the lock variable must be initialized by
- @code{omp_init_lock}. Contrary to @code{omp_set_lock}, @code{omp_test_lock}
- does not block if the lock is not available. This function returns
- @code{true} upon success, @code{false} otherwise. Here, @code{true} and
- @code{false} represent their language-specific counterparts.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_test_lock(omp_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_test_lock(svar)}
- @item @tab @code{integer(omp_lock_kind), intent(inout) :: svar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_init_lock}, @ref{omp_set_lock}, @ref{omp_set_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.6.
- @end table
- @node omp_unset_lock
- @section @code{omp_unset_lock} -- Unset simple lock
- @table @asis
- @item @emph{Description}:
- A simple lock about to be unset must have been locked by @code{omp_set_lock}
- or @code{omp_test_lock} before. In addition, the lock must be held by the
- thread calling @code{omp_unset_lock}. Then, the lock becomes unlocked. If one
- or more threads attempted to set the lock before, one of them is chosen to,
- again, set the lock to itself.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_unset_lock(omp_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_unset_lock(svar)}
- @item @tab @code{integer(omp_lock_kind), intent(inout) :: svar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_lock}, @ref{omp_test_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.5.
- @end table
- @node omp_destroy_lock
- @section @code{omp_destroy_lock} -- Destroy simple lock
- @table @asis
- @item @emph{Description}:
- Destroy a simple lock. In order to be destroyed, a simple lock must be
- in the unlocked state.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_destroy_lock(omp_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_destroy_lock(svar)}
- @item @tab @code{integer(omp_lock_kind), intent(inout) :: svar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_init_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.3.
- @end table
- @node omp_init_nest_lock
- @section @code{omp_init_nest_lock} -- Initialize nested lock
- @table @asis
- @item @emph{Description}:
- Initialize a nested lock. After initialization, the lock is in
- an unlocked state and the nesting count is set to zero.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_init_nest_lock(omp_nest_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_init_nest_lock(nvar)}
- @item @tab @code{integer(omp_nest_lock_kind), intent(out) :: nvar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_destroy_nest_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.1.
- @end table
- @node omp_set_nest_lock
- @section @code{omp_set_nest_lock} -- Wait for and set nested lock
- @table @asis
- @item @emph{Description}:
- Before setting a nested lock, the lock variable must be initialized by
- @code{omp_init_nest_lock}. The calling thread is blocked until the lock
- is available. If the lock is already held by the current thread, the
- nesting count for the lock is incremented.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_set_nest_lock(omp_nest_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_set_nest_lock(nvar)}
- @item @tab @code{integer(omp_nest_lock_kind), intent(inout) :: nvar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_init_nest_lock}, @ref{omp_unset_nest_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.4.
- @end table
- @node omp_test_nest_lock
- @section @code{omp_test_nest_lock} -- Test and set nested lock if available
- @table @asis
- @item @emph{Description}:
- Before setting a nested lock, the lock variable must be initialized by
- @code{omp_init_nest_lock}. Contrary to @code{omp_set_nest_lock},
- @code{omp_test_nest_lock} does not block if the lock is not available.
- If the lock is already held by the current thread, the new nesting count
- is returned. Otherwise, the return value equals zero.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int omp_test_nest_lock(omp_nest_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{logical function omp_test_nest_lock(nvar)}
- @item @tab @code{integer(omp_nest_lock_kind), intent(inout) :: nvar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_init_lock}, @ref{omp_set_lock}, @ref{omp_set_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.6.
- @end table
- @node omp_unset_nest_lock
- @section @code{omp_unset_nest_lock} -- Unset nested lock
- @table @asis
- @item @emph{Description}:
- A nested lock about to be unset must have been locked by @code{omp_set_nested_lock}
- or @code{omp_test_nested_lock} before. In addition, the lock must be held by the
- thread calling @code{omp_unset_nested_lock}. If the nesting count drops to zero, the
- lock becomes unlocked. If one ore more threads attempted to set the lock before,
- one of them is chosen to, again, set the lock to itself.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_unset_nest_lock(omp_nest_lock_t *lock);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_unset_nest_lock(nvar)}
- @item @tab @code{integer(omp_nest_lock_kind), intent(inout) :: nvar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_set_nest_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.5.
- @end table
- @node omp_destroy_nest_lock
- @section @code{omp_destroy_nest_lock} -- Destroy nested lock
- @table @asis
- @item @emph{Description}:
- Destroy a nested lock. In order to be destroyed, a nested lock must be
- in the unlocked state and its nesting count must equal zero.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_destroy_nest_lock(omp_nest_lock_t *);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_destroy_nest_lock(nvar)}
- @item @tab @code{integer(omp_nest_lock_kind), intent(inout) :: nvar}
- @end multitable
- @item @emph{See also}:
- @ref{omp_init_lock}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.3.3.
- @end table
- @node omp_get_wtick
- @section @code{omp_get_wtick} -- Get timer precision
- @table @asis
- @item @emph{Description}:
- Gets the timer precision, i.e., the number of seconds between two
- successive clock ticks.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{double omp_get_wtick(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{double precision function omp_get_wtick()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_wtime}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.4.2.
- @end table
- @node omp_get_wtime
- @section @code{omp_get_wtime} -- Elapsed wall clock time
- @table @asis
- @item @emph{Description}:
- Elapsed wall clock time in seconds. The time is measured per thread, no
- guarantee can be made that two distinct threads measure the same time.
- Time is measured from some "time in the past", which is an arbitrary time
- guaranteed not to change during the execution of the program.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{double omp_get_wtime(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{double precision function omp_get_wtime()}
- @end multitable
- @item @emph{See also}:
- @ref{omp_get_wtick}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.4.1.
- @end table
- @node omp_fulfill_event
- @section @code{omp_fulfill_event} -- Fulfill and destroy an OpenMP event
- @table @asis
- @item @emph{Description}:
- Fulfill the event associated with the event handle argument. Currently, it
- is only used to fulfill events generated by detach clauses on task
- constructs - the effect of fulfilling the event is to allow the task to
- complete.
- The result of calling @code{omp_fulfill_event} with an event handle other
- than that generated by a detach clause is undefined. Calling it with an
- event handle that has already been fulfilled is also undefined.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void omp_fulfill_event(omp_event_handle_t event);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine omp_fulfill_event(event)}
- @item @tab @code{integer (kind=omp_event_handle_kind) :: event}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.5.1.
- @end table
- @c ---------------------------------------------------------------------
- @c OpenMP Environment Variables
- @c ---------------------------------------------------------------------
- @node Environment Variables
- @chapter OpenMP Environment Variables
- The environment variables which beginning with @env{OMP_} are defined by
- section 4 of the OpenMP specification in version 4.5, while those
- beginning with @env{GOMP_} are GNU extensions.
- @menu
- * OMP_CANCELLATION:: Set whether cancellation is activated
- * OMP_DISPLAY_ENV:: Show OpenMP version and environment variables
- * OMP_DEFAULT_DEVICE:: Set the device used in target regions
- * OMP_DYNAMIC:: Dynamic adjustment of threads
- * OMP_MAX_ACTIVE_LEVELS:: Set the maximum number of nested parallel regions
- * OMP_MAX_TASK_PRIORITY:: Set the maximum task priority value
- * OMP_NESTED:: Nested parallel regions
- * OMP_NUM_TEAMS:: Specifies the number of teams to use by teams region
- * OMP_NUM_THREADS:: Specifies the number of threads to use
- * OMP_PROC_BIND:: Whether theads may be moved between CPUs
- * OMP_PLACES:: Specifies on which CPUs the theads should be placed
- * OMP_STACKSIZE:: Set default thread stack size
- * OMP_SCHEDULE:: How threads are scheduled
- * OMP_TARGET_OFFLOAD:: Controls offloading behaviour
- * OMP_TEAMS_THREAD_LIMIT:: Set the maximum number of threads imposed by teams
- * OMP_THREAD_LIMIT:: Set the maximum number of threads
- * OMP_WAIT_POLICY:: How waiting threads are handled
- * GOMP_CPU_AFFINITY:: Bind threads to specific CPUs
- * GOMP_DEBUG:: Enable debugging output
- * GOMP_STACKSIZE:: Set default thread stack size
- * GOMP_SPINCOUNT:: Set the busy-wait spin count
- * GOMP_RTEMS_THREAD_POOLS:: Set the RTEMS specific thread pools
- @end menu
- @node OMP_CANCELLATION
- @section @env{OMP_CANCELLATION} -- Set whether cancellation is activated
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- If set to @code{TRUE}, the cancellation is activated. If set to @code{FALSE} or
- if unset, cancellation is disabled and the @code{cancel} construct is ignored.
- @item @emph{See also}:
- @ref{omp_get_cancellation}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.11
- @end table
- @node OMP_DISPLAY_ENV
- @section @env{OMP_DISPLAY_ENV} -- Show OpenMP version and environment variables
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- If set to @code{TRUE}, the OpenMP version number and the values
- associated with the OpenMP environment variables are printed to @code{stderr}.
- If set to @code{VERBOSE}, it additionally shows the value of the environment
- variables which are GNU extensions. If undefined or set to @code{FALSE},
- this information will not be shown.
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.12
- @end table
- @node OMP_DEFAULT_DEVICE
- @section @env{OMP_DEFAULT_DEVICE} -- Set the device used in target regions
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Set to choose the device which is used in a @code{target} region, unless the
- value is overridden by @code{omp_set_default_device} or by a @code{device}
- clause. The value shall be the nonnegative device number. If no device with
- the given device number exists, the code is executed on the host. If unset,
- device number 0 will be used.
- @item @emph{See also}:
- @ref{omp_get_default_device}, @ref{omp_set_default_device},
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.13
- @end table
- @node OMP_DYNAMIC
- @section @env{OMP_DYNAMIC} -- Dynamic adjustment of threads
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Enable or disable the dynamic adjustment of the number of threads
- within a team. The value of this environment variable shall be
- @code{TRUE} or @code{FALSE}. If undefined, dynamic adjustment is
- disabled by default.
- @item @emph{See also}:
- @ref{omp_set_dynamic}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.3
- @end table
- @node OMP_MAX_ACTIVE_LEVELS
- @section @env{OMP_MAX_ACTIVE_LEVELS} -- Set the maximum number of nested parallel regions
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Specifies the initial value for the maximum number of nested parallel
- regions. The value of this variable shall be a positive integer.
- If undefined, then if @env{OMP_NESTED} is defined and set to true, or
- if @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} are defined and set to
- a list with more than one item, the maximum number of nested parallel
- regions will be initialized to the largest number supported, otherwise
- it will be set to one.
- @item @emph{See also}:
- @ref{omp_set_max_active_levels}, @ref{OMP_NESTED}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9
- @end table
- @node OMP_MAX_TASK_PRIORITY
- @section @env{OMP_MAX_TASK_PRIORITY} -- Set the maximum priority
- number that can be set for a task.
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Specifies the initial value for the maximum priority value that can be
- set for a task. The value of this variable shall be a non-negative
- integer, and zero is allowed. If undefined, the default priority is
- 0.
- @item @emph{See also}:
- @ref{omp_get_max_task_priority}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.14
- @end table
- @node OMP_NESTED
- @section @env{OMP_NESTED} -- Nested parallel regions
- @cindex Environment Variable
- @cindex Implementation specific setting
- @table @asis
- @item @emph{Description}:
- Enable or disable nested parallel regions, i.e., whether team members
- are allowed to create new teams. The value of this environment variable
- shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number
- of maximum active nested regions supported will by default be set to the
- maximum supported, otherwise it will be set to one. If
- @env{OMP_MAX_ACTIVE_LEVELS} is defined, its setting will override this
- setting. If both are undefined, nested parallel regions are enabled if
- @env{OMP_NUM_THREADS} or @env{OMP_PROC_BINDS} are defined to a list with
- more than one item, otherwise they are disabled by default.
- @item @emph{See also}:
- @ref{omp_set_max_active_levels}, @ref{omp_set_nested}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6
- @end table
- @node OMP_NUM_TEAMS
- @section @env{OMP_NUM_TEAMS} -- Specifies the number of teams to use by teams region
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Specifies the upper bound for number of teams to use in teams regions
- without explicit @code{num_teams} clause. The value of this variable shall
- be a positive integer. If undefined it defaults to 0 which means
- implementation defined upper bound.
- @item @emph{See also}:
- @ref{omp_set_num_teams}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 6.23
- @end table
- @node OMP_NUM_THREADS
- @section @env{OMP_NUM_THREADS} -- Specifies the number of threads to use
- @cindex Environment Variable
- @cindex Implementation specific setting
- @table @asis
- @item @emph{Description}:
- Specifies the default number of threads to use in parallel regions. The
- value of this variable shall be a comma-separated list of positive integers;
- the value specifies the number of threads to use for the corresponding nested
- level. Specifying more than one item in the list will automatically enable
- nesting by default. If undefined one thread per CPU is used.
- @item @emph{See also}:
- @ref{omp_set_num_threads}, @ref{OMP_NESTED}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2
- @end table
- @node OMP_PROC_BIND
- @section @env{OMP_PROC_BIND} -- Whether theads may be moved between CPUs
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Specifies whether threads may be moved between processors. If set to
- @code{TRUE}, OpenMP theads should not be moved; if set to @code{FALSE}
- they may be moved. Alternatively, a comma separated list with the
- values @code{PRIMARY}, @code{MASTER}, @code{CLOSE} and @code{SPREAD} can
- be used to specify the thread affinity policy for the corresponding nesting
- level. With @code{PRIMARY} and @code{MASTER} the worker threads are in the
- same place partition as the primary thread. With @code{CLOSE} those are
- kept close to the primary thread in contiguous place partitions. And
- with @code{SPREAD} a sparse distribution
- across the place partitions is used. Specifying more than one item in the
- list will automatically enable nesting by default.
- When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when
- @env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise.
- @item @emph{See also}:
- @ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY},
- @ref{OMP_NESTED}, @ref{OMP_PLACES}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4
- @end table
- @node OMP_PLACES
- @section @env{OMP_PLACES} -- Specifies on which CPUs the theads should be placed
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- The thread placement can be either specified using an abstract name or by an
- explicit list of the places. The abstract names @code{threads}, @code{cores},
- @code{sockets}, @code{ll_caches} and @code{numa_domains} can be optionally
- followed by a positive number in parentheses, which denotes the how many places
- shall be created. With @code{threads} each place corresponds to a single
- hardware thread; @code{cores} to a single core with the corresponding number of
- hardware threads; with @code{sockets} the place corresponds to a single
- socket; with @code{ll_caches} to a set of cores that shares the last level
- cache on the device; and @code{numa_domains} to a set of cores for which their
- closest memory on the device is the same memory and at a similar distance from
- the cores. The resulting placement can be shown by setting the
- @env{OMP_DISPLAY_ENV} environment variable.
- Alternatively, the placement can be specified explicitly as comma-separated
- list of places. A place is specified by set of nonnegative numbers in curly
- braces, denoting the hardware threads. The curly braces can be omitted
- when only a single number has been specified. The hardware threads
- belonging to a place can either be specified as comma-separated list of
- nonnegative thread numbers or using an interval. Multiple places can also be
- either specified by a comma-separated list of places or by an interval. To
- specify an interval, a colon followed by the count is placed after
- the hardware thread number or the place. Optionally, the length can be
- followed by a colon and the stride number -- otherwise a unit stride is
- assumed. Placing an exclamation mark (@code{!}) directly before a curly
- brace or numbers inside the curly braces (excluding intervals) will
- exclude those hardware threads.
- For instance, the following specifies the same places list:
- @code{"@{0,1,2@}, @{3,4,6@}, @{7,8,9@}, @{10,11,12@}"};
- @code{"@{0:3@}, @{3:3@}, @{7:3@}, @{10:3@}"}; and @code{"@{0:2@}:4:3"}.
- If @env{OMP_PLACES} and @env{GOMP_CPU_AFFINITY} are unset and
- @env{OMP_PROC_BIND} is either unset or @code{false}, threads may be moved
- between CPUs following no placement policy.
- @item @emph{See also}:
- @ref{OMP_PROC_BIND}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind},
- @ref{OMP_DISPLAY_ENV}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.5
- @end table
- @node OMP_STACKSIZE
- @section @env{OMP_STACKSIZE} -- Set default thread stack size
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Set the default thread stack size in kilobytes, unless the number
- is suffixed by @code{B}, @code{K}, @code{M} or @code{G}, in which
- case the size is, respectively, in bytes, kilobytes, megabytes
- or gigabytes. This is different from @code{pthread_attr_setstacksize}
- which gets the number of bytes as an argument. If the stack size cannot
- be set due to system constraints, an error is reported and the initial
- stack size is left unchanged. If undefined, the stack size is system
- dependent.
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.7
- @end table
- @node OMP_SCHEDULE
- @section @env{OMP_SCHEDULE} -- How threads are scheduled
- @cindex Environment Variable
- @cindex Implementation specific setting
- @table @asis
- @item @emph{Description}:
- Allows to specify @code{schedule type} and @code{chunk size}.
- The value of the variable shall have the form: @code{type[,chunk]} where
- @code{type} is one of @code{static}, @code{dynamic}, @code{guided} or @code{auto}
- The optional @code{chunk} size shall be a positive integer. If undefined,
- dynamic scheduling and a chunk size of 1 is used.
- @item @emph{See also}:
- @ref{omp_set_schedule}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Sections 2.7.1.1 and 4.1
- @end table
- @node OMP_TARGET_OFFLOAD
- @section @env{OMP_TARGET_OFFLOAD} -- Controls offloading behaviour
- @cindex Environment Variable
- @cindex Implementation specific setting
- @table @asis
- @item @emph{Description}:
- Specifies the behaviour with regard to offloading code to a device. This
- variable can be set to one of three values - @code{MANDATORY}, @code{DISABLED}
- or @code{DEFAULT}.
- If set to @code{MANDATORY}, the program will terminate with an error if
- the offload device is not present or is not supported. If set to
- @code{DISABLED}, then offloading is disabled and all code will run on the
- host. If set to @code{DEFAULT}, the program will try offloading to the
- device first, then fall back to running code on the host if it cannot.
- If undefined, then the program will behave as if @code{DEFAULT} was set.
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 6.17
- @end table
- @node OMP_TEAMS_THREAD_LIMIT
- @section @env{OMP_TEAMS_THREAD_LIMIT} -- Set the maximum number of threads imposed by teams
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Specifies an upper bound for the number of threads to use by each contention
- group created by a teams construct without explicit @code{thread_limit}
- clause. The value of this variable shall be a positive integer. If undefined,
- the value of 0 is used which stands for an implementation defined upper
- limit.
- @item @emph{See also}:
- @ref{OMP_THREAD_LIMIT}, @ref{omp_set_teams_thread_limit}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 6.24
- @end table
- @node OMP_THREAD_LIMIT
- @section @env{OMP_THREAD_LIMIT} -- Set the maximum number of threads
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Specifies the number of threads to use for the whole program. The
- value of this variable shall be a positive integer. If undefined,
- the number of threads is not limited.
- @item @emph{See also}:
- @ref{OMP_NUM_THREADS}, @ref{omp_get_thread_limit}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.10
- @end table
- @node OMP_WAIT_POLICY
- @section @env{OMP_WAIT_POLICY} -- How waiting threads are handled
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Specifies whether waiting threads should be active or passive. If
- the value is @code{PASSIVE}, waiting threads should not consume CPU
- power while waiting; while the value is @code{ACTIVE} specifies that
- they should. If undefined, threads wait actively for a short time
- before waiting passively.
- @item @emph{See also}:
- @ref{GOMP_SPINCOUNT}
- @item @emph{Reference}:
- @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.8
- @end table
- @node GOMP_CPU_AFFINITY
- @section @env{GOMP_CPU_AFFINITY} -- Bind threads to specific CPUs
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Binds threads to specific CPUs. The variable should contain a space-separated
- or comma-separated list of CPUs. This list may contain different kinds of
- entries: either single CPU numbers in any order, a range of CPUs (M-N)
- or a range with some stride (M-N:S). CPU numbers are zero based. For example,
- @code{GOMP_CPU_AFFINITY="0 3 1-2 4-15:2"} will bind the initial thread
- to CPU 0, the second to CPU 3, the third to CPU 1, the fourth to
- CPU 2, the fifth to CPU 4, the sixth through tenth to CPUs 6, 8, 10, 12,
- and 14 respectively and then start assigning back from the beginning of
- the list. @code{GOMP_CPU_AFFINITY=0} binds all threads to CPU 0.
- There is no libgomp library routine to determine whether a CPU affinity
- specification is in effect. As a workaround, language-specific library
- functions, e.g., @code{getenv} in C or @code{GET_ENVIRONMENT_VARIABLE} in
- Fortran, may be used to query the setting of the @code{GOMP_CPU_AFFINITY}
- environment variable. A defined CPU affinity on startup cannot be changed
- or disabled during the runtime of the application.
- If both @env{GOMP_CPU_AFFINITY} and @env{OMP_PROC_BIND} are set,
- @env{OMP_PROC_BIND} has a higher precedence. If neither has been set and
- @env{OMP_PROC_BIND} is unset, or when @env{OMP_PROC_BIND} is set to
- @code{FALSE}, the host system will handle the assignment of threads to CPUs.
- @item @emph{See also}:
- @ref{OMP_PLACES}, @ref{OMP_PROC_BIND}
- @end table
- @node GOMP_DEBUG
- @section @env{GOMP_DEBUG} -- Enable debugging output
- @cindex Environment Variable
- @table @asis
- @item @emph{Description}:
- Enable debugging output. The variable should be set to @code{0}
- (disabled, also the default if not set), or @code{1} (enabled).
- If enabled, some debugging output will be printed during execution.
- This is currently not specified in more detail, and subject to change.
- @end table
- @node GOMP_STACKSIZE
- @section @env{GOMP_STACKSIZE} -- Set default thread stack size
- @cindex Environment Variable
- @cindex Implementation specific setting
- @table @asis
- @item @emph{Description}:
- Set the default thread stack size in kilobytes. This is different from
- @code{pthread_attr_setstacksize} which gets the number of bytes as an
- argument. If the stack size cannot be set due to system constraints, an
- error is reported and the initial stack size is left unchanged. If undefined,
- the stack size is system dependent.
- @item @emph{See also}:
- @ref{OMP_STACKSIZE}
- @item @emph{Reference}:
- @uref{https://gcc.gnu.org/ml/gcc-patches/2006-06/msg00493.html,
- GCC Patches Mailinglist},
- @uref{https://gcc.gnu.org/ml/gcc-patches/2006-06/msg00496.html,
- GCC Patches Mailinglist}
- @end table
- @node GOMP_SPINCOUNT
- @section @env{GOMP_SPINCOUNT} -- Set the busy-wait spin count
- @cindex Environment Variable
- @cindex Implementation specific setting
- @table @asis
- @item @emph{Description}:
- Determines how long a threads waits actively with consuming CPU power
- before waiting passively without consuming CPU power. The value may be
- either @code{INFINITE}, @code{INFINITY} to always wait actively or an
- integer which gives the number of spins of the busy-wait loop. The
- integer may optionally be followed by the following suffixes acting
- as multiplication factors: @code{k} (kilo, thousand), @code{M} (mega,
- million), @code{G} (giga, billion), or @code{T} (tera, trillion).
- If undefined, 0 is used when @env{OMP_WAIT_POLICY} is @code{PASSIVE},
- 300,000 is used when @env{OMP_WAIT_POLICY} is undefined and
- 30 billion is used when @env{OMP_WAIT_POLICY} is @code{ACTIVE}.
- If there are more OpenMP threads than available CPUs, 1000 and 100
- spins are used for @env{OMP_WAIT_POLICY} being @code{ACTIVE} or
- undefined, respectively; unless the @env{GOMP_SPINCOUNT} is lower
- or @env{OMP_WAIT_POLICY} is @code{PASSIVE}.
- @item @emph{See also}:
- @ref{OMP_WAIT_POLICY}
- @end table
- @node GOMP_RTEMS_THREAD_POOLS
- @section @env{GOMP_RTEMS_THREAD_POOLS} -- Set the RTEMS specific thread pools
- @cindex Environment Variable
- @cindex Implementation specific setting
- @table @asis
- @item @emph{Description}:
- This environment variable is only used on the RTEMS real-time operating system.
- It determines the scheduler instance specific thread pools. The format for
- @env{GOMP_RTEMS_THREAD_POOLS} is a list of optional
- @code{<thread-pool-count>[$<priority>]@@<scheduler-name>} configurations
- separated by @code{:} where:
- @itemize @bullet
- @item @code{<thread-pool-count>} is the thread pool count for this scheduler
- instance.
- @item @code{$<priority>} is an optional priority for the worker threads of a
- thread pool according to @code{pthread_setschedparam}. In case a priority
- value is omitted, then a worker thread will inherit the priority of the OpenMP
- primary thread that created it. The priority of the worker thread is not
- changed after creation, even if a new OpenMP primary thread using the worker has
- a different priority.
- @item @code{@@<scheduler-name>} is the scheduler instance name according to the
- RTEMS application configuration.
- @end itemize
- In case no thread pool configuration is specified for a scheduler instance,
- then each OpenMP primary thread of this scheduler instance will use its own
- dynamically allocated thread pool. To limit the worker thread count of the
- thread pools, each OpenMP primary thread must call @code{omp_set_num_threads}.
- @item @emph{Example}:
- Lets suppose we have three scheduler instances @code{IO}, @code{WRK0}, and
- @code{WRK1} with @env{GOMP_RTEMS_THREAD_POOLS} set to
- @code{"1@@WRK0:3$4@@WRK1"}. Then there are no thread pool restrictions for
- scheduler instance @code{IO}. In the scheduler instance @code{WRK0} there is
- one thread pool available. Since no priority is specified for this scheduler
- instance, the worker thread inherits the priority of the OpenMP primary thread
- that created it. In the scheduler instance @code{WRK1} there are three thread
- pools available and their worker threads run at priority four.
- @end table
- @c ---------------------------------------------------------------------
- @c Enabling OpenACC
- @c ---------------------------------------------------------------------
- @node Enabling OpenACC
- @chapter Enabling OpenACC
- To activate the OpenACC extensions for C/C++ and Fortran, the compile-time
- flag @option{-fopenacc} must be specified. This enables the OpenACC directive
- @code{#pragma acc} in C/C++ and @code{!$acc} directives in free form,
- @code{c$acc}, @code{*$acc} and @code{!$acc} directives in fixed form,
- @code{!$} conditional compilation sentinels in free form and @code{c$},
- @code{*$} and @code{!$} sentinels in fixed form, for Fortran. The flag also
- arranges for automatic linking of the OpenACC runtime library
- (@ref{OpenACC Runtime Library Routines}).
- See @uref{https://gcc.gnu.org/wiki/OpenACC} for more information.
- A complete description of all OpenACC directives accepted may be found in
- the @uref{https://www.openacc.org, OpenACC} Application Programming
- Interface manual, version 2.6.
- @c ---------------------------------------------------------------------
- @c OpenACC Runtime Library Routines
- @c ---------------------------------------------------------------------
- @node OpenACC Runtime Library Routines
- @chapter OpenACC Runtime Library Routines
- The runtime routines described here are defined by section 3 of the OpenACC
- specifications in version 2.6.
- They have C linkage, and do not throw exceptions.
- Generally, they are available only for the host, with the exception of
- @code{acc_on_device}, which is available for both the host and the
- acceleration device.
- @menu
- * acc_get_num_devices:: Get number of devices for the given device
- type.
- * acc_set_device_type:: Set type of device accelerator to use.
- * acc_get_device_type:: Get type of device accelerator to be used.
- * acc_set_device_num:: Set device number to use.
- * acc_get_device_num:: Get device number to be used.
- * acc_get_property:: Get device property.
- * acc_async_test:: Tests for completion of a specific asynchronous
- operation.
- * acc_async_test_all:: Tests for completion of all asynchronous
- operations.
- * acc_wait:: Wait for completion of a specific asynchronous
- operation.
- * acc_wait_all:: Waits for completion of all asynchronous
- operations.
- * acc_wait_all_async:: Wait for completion of all asynchronous
- operations.
- * acc_wait_async:: Wait for completion of asynchronous operations.
- * acc_init:: Initialize runtime for a specific device type.
- * acc_shutdown:: Shuts down the runtime for a specific device
- type.
- * acc_on_device:: Whether executing on a particular device
- * acc_malloc:: Allocate device memory.
- * acc_free:: Free device memory.
- * acc_copyin:: Allocate device memory and copy host memory to
- it.
- * acc_present_or_copyin:: If the data is not present on the device,
- allocate device memory and copy from host
- memory.
- * acc_create:: Allocate device memory and map it to host
- memory.
- * acc_present_or_create:: If the data is not present on the device,
- allocate device memory and map it to host
- memory.
- * acc_copyout:: Copy device memory to host memory.
- * acc_delete:: Free device memory.
- * acc_update_device:: Update device memory from mapped host memory.
- * acc_update_self:: Update host memory from mapped device memory.
- * acc_map_data:: Map previously allocated device memory to host
- memory.
- * acc_unmap_data:: Unmap device memory from host memory.
- * acc_deviceptr:: Get device pointer associated with specific
- host address.
- * acc_hostptr:: Get host pointer associated with specific
- device address.
- * acc_is_present:: Indicate whether host variable / array is
- present on device.
- * acc_memcpy_to_device:: Copy host memory to device memory.
- * acc_memcpy_from_device:: Copy device memory to host memory.
- * acc_attach:: Let device pointer point to device-pointer target.
- * acc_detach:: Let device pointer point to host-pointer target.
- API routines for target platforms.
- * acc_get_current_cuda_device:: Get CUDA device handle.
- * acc_get_current_cuda_context::Get CUDA context handle.
- * acc_get_cuda_stream:: Get CUDA stream handle.
- * acc_set_cuda_stream:: Set CUDA stream handle.
- API routines for the OpenACC Profiling Interface.
- * acc_prof_register:: Register callbacks.
- * acc_prof_unregister:: Unregister callbacks.
- * acc_prof_lookup:: Obtain inquiry functions.
- * acc_register_library:: Library registration.
- @end menu
- @node acc_get_num_devices
- @section @code{acc_get_num_devices} -- Get number of devices for given device type
- @table @asis
- @item @emph{Description}
- This function returns a value indicating the number of devices available
- for the device type specified in @var{devicetype}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int acc_get_num_devices(acc_device_t devicetype);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{integer function acc_get_num_devices(devicetype)}
- @item @tab @code{integer(kind=acc_device_kind) devicetype}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.1.
- @end table
- @node acc_set_device_type
- @section @code{acc_set_device_type} -- Set type of device accelerator to use.
- @table @asis
- @item @emph{Description}
- This function indicates to the runtime library which device type, specified
- in @var{devicetype}, to use when executing a parallel or kernels region.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_set_device_type(acc_device_t devicetype);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_set_device_type(devicetype)}
- @item @tab @code{integer(kind=acc_device_kind) devicetype}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.2.
- @end table
- @node acc_get_device_type
- @section @code{acc_get_device_type} -- Get type of device accelerator to be used.
- @table @asis
- @item @emph{Description}
- This function returns what device type will be used when executing a
- parallel or kernels region.
- This function returns @code{acc_device_none} if
- @code{acc_get_device_type} is called from
- @code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
- callbacks of the OpenACC Profiling Interface (@ref{OpenACC Profiling
- Interface}), that is, if the device is currently being initialized.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_device_t acc_get_device_type(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{function acc_get_device_type(void)}
- @item @tab @code{integer(kind=acc_device_kind) acc_get_device_type}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.3.
- @end table
- @node acc_set_device_num
- @section @code{acc_set_device_num} -- Set device number to use.
- @table @asis
- @item @emph{Description}
- This function will indicate to the runtime which device number,
- specified by @var{devicenum}, associated with the specified device
- type @var{devicetype}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_set_device_num(int devicenum, acc_device_t devicetype);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_set_device_num(devicenum, devicetype)}
- @item @tab @code{integer devicenum}
- @item @tab @code{integer(kind=acc_device_kind) devicetype}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.4.
- @end table
- @node acc_get_device_num
- @section @code{acc_get_device_num} -- Get device number to be used.
- @table @asis
- @item @emph{Description}
- This function returns which device number associated with the specified device
- type @var{devicetype}, will be used when executing a parallel or kernels
- region.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int acc_get_device_num(acc_device_t devicetype);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{function acc_get_device_num(devicetype)}
- @item @tab @code{integer(kind=acc_device_kind) devicetype}
- @item @tab @code{integer acc_get_device_num}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.5.
- @end table
- @node acc_get_property
- @section @code{acc_get_property} -- Get device property.
- @cindex acc_get_property
- @cindex acc_get_property_string
- @table @asis
- @item @emph{Description}
- These routines return the value of the specified @var{property} for the
- device being queried according to @var{devicenum} and @var{devicetype}.
- Integer-valued and string-valued properties are returned by
- @code{acc_get_property} and @code{acc_get_property_string} respectively.
- The Fortran @code{acc_get_property_string} subroutine returns the string
- retrieved in its fourth argument while the remaining entry points are
- functions, which pass the return value as their result.
- Note for Fortran, only: the OpenACC technical committee corrected and, hence,
- modified the interface introduced in OpenACC 2.6. The kind-value parameter
- @code{acc_device_property} has been renamed to @code{acc_device_property_kind}
- for consistency and the return type of the @code{acc_get_property} function is
- now a @code{c_size_t} integer instead of a @code{acc_device_property} integer.
- The parameter @code{acc_device_property} will continue to be provided,
- but might be removed in a future version of GCC.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{size_t acc_get_property(int devicenum, acc_device_t devicetype, acc_device_property_t property);}
- @item @emph{Prototype}: @tab @code{const char *acc_get_property_string(int devicenum, acc_device_t devicetype, acc_device_property_t property);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{function acc_get_property(devicenum, devicetype, property)}
- @item @emph{Interface}: @tab @code{subroutine acc_get_property_string(devicenum, devicetype, property, string)}
- @item @tab @code{use ISO_C_Binding, only: c_size_t}
- @item @tab @code{integer devicenum}
- @item @tab @code{integer(kind=acc_device_kind) devicetype}
- @item @tab @code{integer(kind=acc_device_property_kind) property}
- @item @tab @code{integer(kind=c_size_t) acc_get_property}
- @item @tab @code{character(*) string}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.6.
- @end table
- @node acc_async_test
- @section @code{acc_async_test} -- Test for completion of a specific asynchronous operation.
- @table @asis
- @item @emph{Description}
- This function tests for completion of the asynchronous operation specified
- in @var{arg}. In C/C++, a non-zero value will be returned to indicate
- the specified asynchronous operation has completed. While Fortran will return
- a @code{true}. If the asynchronous operation has not completed, C/C++ returns
- a zero and Fortran returns a @code{false}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int acc_async_test(int arg);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{function acc_async_test(arg)}
- @item @tab @code{integer(kind=acc_handle_kind) arg}
- @item @tab @code{logical acc_async_test}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.9.
- @end table
- @node acc_async_test_all
- @section @code{acc_async_test_all} -- Tests for completion of all asynchronous operations.
- @table @asis
- @item @emph{Description}
- This function tests for completion of all asynchronous operations.
- In C/C++, a non-zero value will be returned to indicate all asynchronous
- operations have completed. While Fortran will return a @code{true}. If
- any asynchronous operation has not completed, C/C++ returns a zero and
- Fortran returns a @code{false}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int acc_async_test_all(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{function acc_async_test()}
- @item @tab @code{logical acc_get_device_num}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.10.
- @end table
- @node acc_wait
- @section @code{acc_wait} -- Wait for completion of a specific asynchronous operation.
- @table @asis
- @item @emph{Description}
- This function waits for completion of the asynchronous operation
- specified in @var{arg}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_wait(arg);}
- @item @emph{Prototype (OpenACC 1.0 compatibility)}: @tab @code{acc_async_wait(arg);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_wait(arg)}
- @item @tab @code{integer(acc_handle_kind) arg}
- @item @emph{Interface (OpenACC 1.0 compatibility)}: @tab @code{subroutine acc_async_wait(arg)}
- @item @tab @code{integer(acc_handle_kind) arg}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.11.
- @end table
- @node acc_wait_all
- @section @code{acc_wait_all} -- Waits for completion of all asynchronous operations.
- @table @asis
- @item @emph{Description}
- This function waits for the completion of all asynchronous operations.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_wait_all(void);}
- @item @emph{Prototype (OpenACC 1.0 compatibility)}: @tab @code{acc_async_wait_all(void);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_wait_all()}
- @item @emph{Interface (OpenACC 1.0 compatibility)}: @tab @code{subroutine acc_async_wait_all()}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.13.
- @end table
- @node acc_wait_all_async
- @section @code{acc_wait_all_async} -- Wait for completion of all asynchronous operations.
- @table @asis
- @item @emph{Description}
- This function enqueues a wait operation on the queue @var{async} for any
- and all asynchronous operations that have been previously enqueued on
- any queue.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_wait_all_async(int async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_wait_all_async(async)}
- @item @tab @code{integer(acc_handle_kind) async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.14.
- @end table
- @node acc_wait_async
- @section @code{acc_wait_async} -- Wait for completion of asynchronous operations.
- @table @asis
- @item @emph{Description}
- This function enqueues a wait operation on queue @var{async} for any and all
- asynchronous operations enqueued on queue @var{arg}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_wait_async(int arg, int async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_wait_async(arg, async)}
- @item @tab @code{integer(acc_handle_kind) arg, async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.12.
- @end table
- @node acc_init
- @section @code{acc_init} -- Initialize runtime for a specific device type.
- @table @asis
- @item @emph{Description}
- This function initializes the runtime for the device type specified in
- @var{devicetype}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_init(acc_device_t devicetype);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_init(devicetype)}
- @item @tab @code{integer(acc_device_kind) devicetype}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.7.
- @end table
- @node acc_shutdown
- @section @code{acc_shutdown} -- Shuts down the runtime for a specific device type.
- @table @asis
- @item @emph{Description}
- This function shuts down the runtime for the device type specified in
- @var{devicetype}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_shutdown(acc_device_t devicetype);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_shutdown(devicetype)}
- @item @tab @code{integer(acc_device_kind) devicetype}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.8.
- @end table
- @node acc_on_device
- @section @code{acc_on_device} -- Whether executing on a particular device
- @table @asis
- @item @emph{Description}:
- This function returns whether the program is executing on a particular
- device specified in @var{devicetype}. In C/C++ a non-zero value is
- returned to indicate the device is executing on the specified device type.
- In Fortran, @code{true} will be returned. If the program is not executing
- on the specified device type C/C++ will return a zero, while Fortran will
- return @code{false}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_on_device(acc_device_t devicetype);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{function acc_on_device(devicetype)}
- @item @tab @code{integer(acc_device_kind) devicetype}
- @item @tab @code{logical acc_on_device}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.17.
- @end table
- @node acc_malloc
- @section @code{acc_malloc} -- Allocate device memory.
- @table @asis
- @item @emph{Description}
- This function allocates @var{len} bytes of device memory. It returns
- the device address of the allocated memory.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{d_void* acc_malloc(size_t len);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.18.
- @end table
- @node acc_free
- @section @code{acc_free} -- Free device memory.
- @table @asis
- @item @emph{Description}
- Free previously allocated device memory at the device address @code{a}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_free(d_void *a);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.19.
- @end table
- @node acc_copyin
- @section @code{acc_copyin} -- Allocate device memory and copy host memory to it.
- @table @asis
- @item @emph{Description}
- In C/C++, this function allocates @var{len} bytes of device memory
- and maps it to the specified host address in @var{a}. The device
- address of the newly allocated device memory is returned.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a
- variable or array element and @var{len} specifies the length in bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_copyin(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{void *acc_copyin_async(h_void *a, size_t len, int async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_copyin(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_copyin(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.20.
- @end table
- @node acc_present_or_copyin
- @section @code{acc_present_or_copyin} -- If the data is not present on the device, allocate device memory and copy from host memory.
- @table @asis
- @item @emph{Description}
- This function tests if the host data specified by @var{a} and of length
- @var{len} is present or not. If it is not present, then device memory
- will be allocated and the host memory copied. The device address of
- the newly allocated device memory is returned.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes.
- Note that @code{acc_present_or_copyin} and @code{acc_pcopyin} exist for
- backward compatibility with OpenACC 2.0; use @ref{acc_copyin} instead.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_present_or_copyin(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{void *acc_pcopyin(h_void *a, size_t len);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_present_or_copyin(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_present_or_copyin(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_pcopyin(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_pcopyin(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.20.
- @end table
- @node acc_create
- @section @code{acc_create} -- Allocate device memory and map it to host memory.
- @table @asis
- @item @emph{Description}
- This function allocates device memory and maps it to host memory specified
- by the host address @var{a} with a length of @var{len} bytes. In C/C++,
- the function returns the device address of the allocated device memory.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_create(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{void *acc_create_async(h_void *a, size_t len, int async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_create(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_create(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_create_async(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_create_async(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.21.
- @end table
- @node acc_present_or_create
- @section @code{acc_present_or_create} -- If the data is not present on the device, allocate device memory and map it to host memory.
- @table @asis
- @item @emph{Description}
- This function tests if the host data specified by @var{a} and of length
- @var{len} is present or not. If it is not present, then device memory
- will be allocated and mapped to host memory. In C/C++, the device address
- of the newly allocated device memory is returned.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes.
- Note that @code{acc_present_or_create} and @code{acc_pcreate} exist for
- backward compatibility with OpenACC 2.0; use @ref{acc_create} instead.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_present_or_create(h_void *a, size_t len)}
- @item @emph{Prototype}: @tab @code{void *acc_pcreate(h_void *a, size_t len)}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_present_or_create(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_present_or_create(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_pcreate(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_pcreate(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.21.
- @end table
- @node acc_copyout
- @section @code{acc_copyout} -- Copy device memory to host memory.
- @table @asis
- @item @emph{Description}
- This function copies mapped device memory to host memory which is specified
- by host address @var{a} for a length @var{len} bytes in C/C++.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_copyout(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{acc_copyout_async(h_void *a, size_t len, int async);}
- @item @emph{Prototype}: @tab @code{acc_copyout_finalize(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{acc_copyout_finalize_async(h_void *a, size_t len, int async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_copyout(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_copyout(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.22.
- @end table
- @node acc_delete
- @section @code{acc_delete} -- Free device memory.
- @table @asis
- @item @emph{Description}
- This function frees previously allocated device memory specified by
- the device address @var{a} and the length of @var{len} bytes.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_delete(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{acc_delete_async(h_void *a, size_t len, int async);}
- @item @emph{Prototype}: @tab @code{acc_delete_finalize(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{acc_delete_finalize_async(h_void *a, size_t len, int async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_delete(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_delete(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.23.
- @end table
- @node acc_update_device
- @section @code{acc_update_device} -- Update device memory from mapped host memory.
- @table @asis
- @item @emph{Description}
- This function updates the device copy from the previously mapped host memory.
- The host memory is specified with the host address @var{a} and a length of
- @var{len} bytes.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_update_device(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{acc_update_device(h_void *a, size_t len, async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_update_device(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_update_device(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.24.
- @end table
- @node acc_update_self
- @section @code{acc_update_self} -- Update host memory from mapped device memory.
- @table @asis
- @item @emph{Description}
- This function updates the host copy from the previously mapped device memory.
- The host memory is specified with the host address @var{a} and a length of
- @var{len} bytes.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_update_self(h_void *a, size_t len);}
- @item @emph{Prototype}: @tab @code{acc_update_self_async(h_void *a, size_t len, int async);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{subroutine acc_update_self(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @emph{Interface}: @tab @code{subroutine acc_update_self(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, len, async)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{integer(acc_handle_kind) :: async}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.25.
- @end table
- @node acc_map_data
- @section @code{acc_map_data} -- Map previously allocated device memory to host memory.
- @table @asis
- @item @emph{Description}
- This function maps previously allocated device and host memory. The device
- memory is specified with the device address @var{d}. The host memory is
- specified with the host address @var{h} and a length of @var{len}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_map_data(h_void *h, d_void *d, size_t len);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.26.
- @end table
- @node acc_unmap_data
- @section @code{acc_unmap_data} -- Unmap device memory from host memory.
- @table @asis
- @item @emph{Description}
- This function unmaps previously mapped device and host memory. The latter
- specified by @var{h}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_unmap_data(h_void *h);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.27.
- @end table
- @node acc_deviceptr
- @section @code{acc_deviceptr} -- Get device pointer associated with specific host address.
- @table @asis
- @item @emph{Description}
- This function returns the device address that has been mapped to the
- host address specified by @var{h}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_deviceptr(h_void *h);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.28.
- @end table
- @node acc_hostptr
- @section @code{acc_hostptr} -- Get host pointer associated with specific device address.
- @table @asis
- @item @emph{Description}
- This function returns the host address that has been mapped to the
- device address specified by @var{d}.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_hostptr(d_void *d);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.29.
- @end table
- @node acc_is_present
- @section @code{acc_is_present} -- Indicate whether host variable / array is present on device.
- @table @asis
- @item @emph{Description}
- This function indicates whether the specified host address in @var{a} and a
- length of @var{len} bytes is present on the device. In C/C++, a non-zero
- value is returned to indicate the presence of the mapped memory on the
- device. A zero is returned to indicate the memory is not mapped on the
- device.
- In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
- a contiguous array section. The second form @var{a} specifies a variable or
- array element and @var{len} specifies the length in bytes. If the host
- memory is mapped to device memory, then a @code{true} is returned. Otherwise,
- a @code{false} is return to indicate the mapped memory is not present.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int acc_is_present(h_void *a, size_t len);}
- @end multitable
- @item @emph{Fortran}:
- @multitable @columnfractions .20 .80
- @item @emph{Interface}: @tab @code{function acc_is_present(a)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{logical acc_is_present}
- @item @emph{Interface}: @tab @code{function acc_is_present(a, len)}
- @item @tab @code{type, dimension(:[,:]...) :: a}
- @item @tab @code{integer len}
- @item @tab @code{logical acc_is_present}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.30.
- @end table
- @node acc_memcpy_to_device
- @section @code{acc_memcpy_to_device} -- Copy host memory to device memory.
- @table @asis
- @item @emph{Description}
- This function copies host memory specified by host address of @var{src} to
- device memory specified by the device address @var{dest} for a length of
- @var{bytes} bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_memcpy_to_device(d_void *dest, h_void *src, size_t bytes);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.31.
- @end table
- @node acc_memcpy_from_device
- @section @code{acc_memcpy_from_device} -- Copy device memory to host memory.
- @table @asis
- @item @emph{Description}
- This function copies host memory specified by host address of @var{src} from
- device memory specified by the device address @var{dest} for a length of
- @var{bytes} bytes.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_memcpy_from_device(d_void *dest, h_void *src, size_t bytes);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.32.
- @end table
- @node acc_attach
- @section @code{acc_attach} -- Let device pointer point to device-pointer target.
- @table @asis
- @item @emph{Description}
- This function updates a pointer on the device from pointing to a host-pointer
- address to pointing to the corresponding device data.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_attach(h_void **ptr);}
- @item @emph{Prototype}: @tab @code{acc_attach_async(h_void **ptr, int async);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.34.
- @end table
- @node acc_detach
- @section @code{acc_detach} -- Let device pointer point to host-pointer target.
- @table @asis
- @item @emph{Description}
- This function updates a pointer on the device from pointing to a device-pointer
- address to pointing to the corresponding host data.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_detach(h_void **ptr);}
- @item @emph{Prototype}: @tab @code{acc_detach_async(h_void **ptr, int async);}
- @item @emph{Prototype}: @tab @code{acc_detach_finalize(h_void **ptr);}
- @item @emph{Prototype}: @tab @code{acc_detach_finalize_async(h_void **ptr, int async);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 3.2.35.
- @end table
- @node acc_get_current_cuda_device
- @section @code{acc_get_current_cuda_device} -- Get CUDA device handle.
- @table @asis
- @item @emph{Description}
- This function returns the CUDA device handle. This handle is the same
- as used by the CUDA Runtime or Driver API's.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_get_current_cuda_device(void);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- A.2.1.1.
- @end table
- @node acc_get_current_cuda_context
- @section @code{acc_get_current_cuda_context} -- Get CUDA context handle.
- @table @asis
- @item @emph{Description}
- This function returns the CUDA context handle. This handle is the same
- as used by the CUDA Runtime or Driver API's.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_get_current_cuda_context(void);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- A.2.1.2.
- @end table
- @node acc_get_cuda_stream
- @section @code{acc_get_cuda_stream} -- Get CUDA stream handle.
- @table @asis
- @item @emph{Description}
- This function returns the CUDA stream handle for the queue @var{async}.
- This handle is the same as used by the CUDA Runtime or Driver API's.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void *acc_get_cuda_stream(int async);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- A.2.1.3.
- @end table
- @node acc_set_cuda_stream
- @section @code{acc_set_cuda_stream} -- Set CUDA stream handle.
- @table @asis
- @item @emph{Description}
- This function associates the stream handle specified by @var{stream} with
- the queue @var{async}.
- This cannot be used to change the stream handle associated with
- @code{acc_async_sync}.
- The return value is not specified.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{int acc_set_cuda_stream(int async, void *stream);}
- @end multitable
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- A.2.1.4.
- @end table
- @node acc_prof_register
- @section @code{acc_prof_register} -- Register callbacks.
- @table @asis
- @item @emph{Description}:
- This function registers callbacks.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void acc_prof_register (acc_event_t, acc_prof_callback, acc_register_t);}
- @end multitable
- @item @emph{See also}:
- @ref{OpenACC Profiling Interface}
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 5.3.
- @end table
- @node acc_prof_unregister
- @section @code{acc_prof_unregister} -- Unregister callbacks.
- @table @asis
- @item @emph{Description}:
- This function unregisters callbacks.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void acc_prof_unregister (acc_event_t, acc_prof_callback, acc_register_t);}
- @end multitable
- @item @emph{See also}:
- @ref{OpenACC Profiling Interface}
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 5.3.
- @end table
- @node acc_prof_lookup
- @section @code{acc_prof_lookup} -- Obtain inquiry functions.
- @table @asis
- @item @emph{Description}:
- Function to obtain inquiry functions.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{acc_query_fn acc_prof_lookup (const char *);}
- @end multitable
- @item @emph{See also}:
- @ref{OpenACC Profiling Interface}
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 5.3.
- @end table
- @node acc_register_library
- @section @code{acc_register_library} -- Library registration.
- @table @asis
- @item @emph{Description}:
- Function for library registration.
- @item @emph{C/C++}:
- @multitable @columnfractions .20 .80
- @item @emph{Prototype}: @tab @code{void acc_register_library (acc_prof_reg, acc_prof_reg, acc_prof_lookup_func);}
- @end multitable
- @item @emph{See also}:
- @ref{OpenACC Profiling Interface}, @ref{ACC_PROFLIB}
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 5.3.
- @end table
- @c ---------------------------------------------------------------------
- @c OpenACC Environment Variables
- @c ---------------------------------------------------------------------
- @node OpenACC Environment Variables
- @chapter OpenACC Environment Variables
- The variables @env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM}
- are defined by section 4 of the OpenACC specification in version 2.0.
- The variable @env{ACC_PROFLIB}
- is defined by section 4 of the OpenACC specification in version 2.6.
- The variable @env{GCC_ACC_NOTIFY} is used for diagnostic purposes.
- @menu
- * ACC_DEVICE_TYPE::
- * ACC_DEVICE_NUM::
- * ACC_PROFLIB::
- * GCC_ACC_NOTIFY::
- @end menu
- @node ACC_DEVICE_TYPE
- @section @code{ACC_DEVICE_TYPE}
- @table @asis
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 4.1.
- @end table
- @node ACC_DEVICE_NUM
- @section @code{ACC_DEVICE_NUM}
- @table @asis
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 4.2.
- @end table
- @node ACC_PROFLIB
- @section @code{ACC_PROFLIB}
- @table @asis
- @item @emph{See also}:
- @ref{acc_register_library}, @ref{OpenACC Profiling Interface}
- @item @emph{Reference}:
- @uref{https://www.openacc.org, OpenACC specification v2.6}, section
- 4.3.
- @end table
- @node GCC_ACC_NOTIFY
- @section @code{GCC_ACC_NOTIFY}
- @table @asis
- @item @emph{Description}:
- Print debug information pertaining to the accelerator.
- @end table
- @c ---------------------------------------------------------------------
- @c CUDA Streams Usage
- @c ---------------------------------------------------------------------
- @node CUDA Streams Usage
- @chapter CUDA Streams Usage
- This applies to the @code{nvptx} plugin only.
- The library provides elements that perform asynchronous movement of
- data and asynchronous operation of computing constructs. This
- asynchronous functionality is implemented by making use of CUDA
- streams@footnote{See "Stream Management" in "CUDA Driver API",
- TRM-06703-001, Version 5.5, for additional information}.
- The primary means by that the asynchronous functionality is accessed
- is through the use of those OpenACC directives which make use of the
- @code{async} and @code{wait} clauses. When the @code{async} clause is
- first used with a directive, it creates a CUDA stream. If an
- @code{async-argument} is used with the @code{async} clause, then the
- stream is associated with the specified @code{async-argument}.
- Following the creation of an association between a CUDA stream and the
- @code{async-argument} of an @code{async} clause, both the @code{wait}
- clause and the @code{wait} directive can be used. When either the
- clause or directive is used after stream creation, it creates a
- rendezvous point whereby execution waits until all operations
- associated with the @code{async-argument}, that is, stream, have
- completed.
- Normally, the management of the streams that are created as a result of
- using the @code{async} clause, is done without any intervention by the
- caller. This implies the association between the @code{async-argument}
- and the CUDA stream will be maintained for the lifetime of the program.
- However, this association can be changed through the use of the library
- function @code{acc_set_cuda_stream}. When the function
- @code{acc_set_cuda_stream} is called, the CUDA stream that was
- originally associated with the @code{async} clause will be destroyed.
- Caution should be taken when changing the association as subsequent
- references to the @code{async-argument} refer to a different
- CUDA stream.
- @c ---------------------------------------------------------------------
- @c OpenACC Library Interoperability
- @c ---------------------------------------------------------------------
- @node OpenACC Library Interoperability
- @chapter OpenACC Library Interoperability
- @section Introduction
- The OpenACC library uses the CUDA Driver API, and may interact with
- programs that use the Runtime library directly, or another library
- based on the Runtime library, e.g., CUBLAS@footnote{See section 2.26,
- "Interactions with the CUDA Driver API" in
- "CUDA Runtime API", Version 5.5, and section 2.27, "VDPAU
- Interoperability", in "CUDA Driver API", TRM-06703-001, Version 5.5,
- for additional information on library interoperability.}.
- This chapter describes the use cases and what changes are
- required in order to use both the OpenACC library and the CUBLAS and Runtime
- libraries within a program.
- @section First invocation: NVIDIA CUBLAS library API
- In this first use case (see below), a function in the CUBLAS library is called
- prior to any of the functions in the OpenACC library. More specifically, the
- function @code{cublasCreate()}.
- When invoked, the function initializes the library and allocates the
- hardware resources on the host and the device on behalf of the caller. Once
- the initialization and allocation has completed, a handle is returned to the
- caller. The OpenACC library also requires initialization and allocation of
- hardware resources. Since the CUBLAS library has already allocated the
- hardware resources for the device, all that is left to do is to initialize
- the OpenACC library and acquire the hardware resources on the host.
- Prior to calling the OpenACC function that initializes the library and
- allocate the host hardware resources, you need to acquire the device number
- that was allocated during the call to @code{cublasCreate()}. The invoking of the
- runtime library function @code{cudaGetDevice()} accomplishes this. Once
- acquired, the device number is passed along with the device type as
- parameters to the OpenACC library function @code{acc_set_device_num()}.
- Once the call to @code{acc_set_device_num()} has completed, the OpenACC
- library uses the context that was created during the call to
- @code{cublasCreate()}. In other words, both libraries will be sharing the
- same context.
- @smallexample
- /* Create the handle */
- s = cublasCreate(&h);
- if (s != CUBLAS_STATUS_SUCCESS)
- @{
- fprintf(stderr, "cublasCreate failed %d\n", s);
- exit(EXIT_FAILURE);
- @}
- /* Get the device number */
- e = cudaGetDevice(&dev);
- if (e != cudaSuccess)
- @{
- fprintf(stderr, "cudaGetDevice failed %d\n", e);
- exit(EXIT_FAILURE);
- @}
- /* Initialize OpenACC library and use device 'dev' */
- acc_set_device_num(dev, acc_device_nvidia);
- @end smallexample
- @center Use Case 1
- @section First invocation: OpenACC library API
- In this second use case (see below), a function in the OpenACC library is
- called prior to any of the functions in the CUBLAS library. More specificially,
- the function @code{acc_set_device_num()}.
- In the use case presented here, the function @code{acc_set_device_num()}
- is used to both initialize the OpenACC library and allocate the hardware
- resources on the host and the device. In the call to the function, the
- call parameters specify which device to use and what device
- type to use, i.e., @code{acc_device_nvidia}. It should be noted that this
- is but one method to initialize the OpenACC library and allocate the
- appropriate hardware resources. Other methods are available through the
- use of environment variables and these will be discussed in the next section.
- Once the call to @code{acc_set_device_num()} has completed, other OpenACC
- functions can be called as seen with multiple calls being made to
- @code{acc_copyin()}. In addition, calls can be made to functions in the
- CUBLAS library. In the use case a call to @code{cublasCreate()} is made
- subsequent to the calls to @code{acc_copyin()}.
- As seen in the previous use case, a call to @code{cublasCreate()}
- initializes the CUBLAS library and allocates the hardware resources on the
- host and the device. However, since the device has already been allocated,
- @code{cublasCreate()} will only initialize the CUBLAS library and allocate
- the appropriate hardware resources on the host. The context that was created
- as part of the OpenACC initialization is shared with the CUBLAS library,
- similarly to the first use case.
- @smallexample
- dev = 0;
- acc_set_device_num(dev, acc_device_nvidia);
- /* Copy the first set to the device */
- d_X = acc_copyin(&h_X[0], N * sizeof (float));
- if (d_X == NULL)
- @{
- fprintf(stderr, "copyin error h_X\n");
- exit(EXIT_FAILURE);
- @}
- /* Copy the second set to the device */
- d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
- if (d_Y == NULL)
- @{
- fprintf(stderr, "copyin error h_Y1\n");
- exit(EXIT_FAILURE);
- @}
- /* Create the handle */
- s = cublasCreate(&h);
- if (s != CUBLAS_STATUS_SUCCESS)
- @{
- fprintf(stderr, "cublasCreate failed %d\n", s);
- exit(EXIT_FAILURE);
- @}
- /* Perform saxpy using CUBLAS library function */
- s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
- if (s != CUBLAS_STATUS_SUCCESS)
- @{
- fprintf(stderr, "cublasSaxpy failed %d\n", s);
- exit(EXIT_FAILURE);
- @}
- /* Copy the results from the device */
- acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
- @end smallexample
- @center Use Case 2
- @section OpenACC library and environment variables
- There are two environment variables associated with the OpenACC library
- that may be used to control the device type and device number:
- @env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM}, respectively. These two
- environment variables can be used as an alternative to calling
- @code{acc_set_device_num()}. As seen in the second use case, the device
- type and device number were specified using @code{acc_set_device_num()}.
- If however, the aforementioned environment variables were set, then the
- call to @code{acc_set_device_num()} would not be required.
- The use of the environment variables is only relevant when an OpenACC function
- is called prior to a call to @code{cudaCreate()}. If @code{cudaCreate()}
- is called prior to a call to an OpenACC function, then you must call
- @code{acc_set_device_num()}@footnote{More complete information
- about @env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM} can be found in
- sections 4.1 and 4.2 of the @uref{https://www.openacc.org, OpenACC}
- Application Programming Interface”, Version 2.6.}
- @c ---------------------------------------------------------------------
- @c OpenACC Profiling Interface
- @c ---------------------------------------------------------------------
- @node OpenACC Profiling Interface
- @chapter OpenACC Profiling Interface
- @section Implementation Status and Implementation-Defined Behavior
- We're implementing the OpenACC Profiling Interface as defined by the
- OpenACC 2.6 specification. We're clarifying some aspects here as
- @emph{implementation-defined behavior}, while they're still under
- discussion within the OpenACC Technical Committee.
- This implementation is tuned to keep the performance impact as low as
- possible for the (very common) case that the Profiling Interface is
- not enabled. This is relevant, as the Profiling Interface affects all
- the @emph{hot} code paths (in the target code, not in the offloaded
- code). Users of the OpenACC Profiling Interface can be expected to
- understand that performance will be impacted to some degree once the
- Profiling Interface has gotten enabled: for example, because of the
- @emph{runtime} (libgomp) calling into a third-party @emph{library} for
- every event that has been registered.
- We're not yet accounting for the fact that @cite{OpenACC events may
- occur during event processing}.
- We just handle one case specially, as required by CUDA 9.0
- @command{nvprof}, that @code{acc_get_device_type}
- (@ref{acc_get_device_type})) may be called from
- @code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
- callbacks.
- We're not yet implementing initialization via a
- @code{acc_register_library} function that is either statically linked
- in, or dynamically via @env{LD_PRELOAD}.
- Initialization via @code{acc_register_library} functions dynamically
- loaded via the @env{ACC_PROFLIB} environment variable does work, as
- does directly calling @code{acc_prof_register},
- @code{acc_prof_unregister}, @code{acc_prof_lookup}.
- As currently there are no inquiry functions defined, calls to
- @code{acc_prof_lookup} will always return @code{NULL}.
- There aren't separate @emph{start}, @emph{stop} events defined for the
- event types @code{acc_ev_create}, @code{acc_ev_delete},
- @code{acc_ev_alloc}, @code{acc_ev_free}. It's not clear if these
- should be triggered before or after the actual device-specific call is
- made. We trigger them after.
- Remarks about data provided to callbacks:
- @table @asis
- @item @code{acc_prof_info.event_type}
- It's not clear if for @emph{nested} event callbacks (for example,
- @code{acc_ev_enqueue_launch_start} as part of a parent compute
- construct), this should be set for the nested event
- (@code{acc_ev_enqueue_launch_start}), or if the value of the parent
- construct should remain (@code{acc_ev_compute_construct_start}). In
- this implementation, the value will generally correspond to the
- innermost nested event type.
- @item @code{acc_prof_info.device_type}
- @itemize
- @item
- For @code{acc_ev_compute_construct_start}, and in presence of an
- @code{if} clause with @emph{false} argument, this will still refer to
- the offloading device type.
- It's not clear if that's the expected behavior.
- @item
- Complementary to the item before, for
- @code{acc_ev_compute_construct_end}, this is set to
- @code{acc_device_host} in presence of an @code{if} clause with
- @emph{false} argument.
- It's not clear if that's the expected behavior.
- @end itemize
- @item @code{acc_prof_info.thread_id}
- Always @code{-1}; not yet implemented.
- @item @code{acc_prof_info.async}
- @itemize
- @item
- Not yet implemented correctly for
- @code{acc_ev_compute_construct_start}.
- @item
- In a compute construct, for host-fallback
- execution/@code{acc_device_host} it will always be
- @code{acc_async_sync}.
- It's not clear if that's the expected behavior.
- @item
- For @code{acc_ev_device_init_start} and @code{acc_ev_device_init_end},
- it will always be @code{acc_async_sync}.
- It's not clear if that's the expected behavior.
- @end itemize
- @item @code{acc_prof_info.async_queue}
- There is no @cite{limited number of asynchronous queues} in libgomp.
- This will always have the same value as @code{acc_prof_info.async}.
- @item @code{acc_prof_info.src_file}
- Always @code{NULL}; not yet implemented.
- @item @code{acc_prof_info.func_name}
- Always @code{NULL}; not yet implemented.
- @item @code{acc_prof_info.line_no}
- Always @code{-1}; not yet implemented.
- @item @code{acc_prof_info.end_line_no}
- Always @code{-1}; not yet implemented.
- @item @code{acc_prof_info.func_line_no}
- Always @code{-1}; not yet implemented.
- @item @code{acc_prof_info.func_end_line_no}
- Always @code{-1}; not yet implemented.
- @item @code{acc_event_info.event_type}, @code{acc_event_info.*.event_type}
- Relating to @code{acc_prof_info.event_type} discussed above, in this
- implementation, this will always be the same value as
- @code{acc_prof_info.event_type}.
- @item @code{acc_event_info.*.parent_construct}
- @itemize
- @item
- Will be @code{acc_construct_parallel} for all OpenACC compute
- constructs as well as many OpenACC Runtime API calls; should be the
- one matching the actual construct, or
- @code{acc_construct_runtime_api}, respectively.
- @item
- Will be @code{acc_construct_enter_data} or
- @code{acc_construct_exit_data} when processing variable mappings
- specified in OpenACC @emph{declare} directives; should be
- @code{acc_construct_declare}.
- @item
- For implicit @code{acc_ev_device_init_start},
- @code{acc_ev_device_init_end}, and explicit as well as implicit
- @code{acc_ev_alloc}, @code{acc_ev_free},
- @code{acc_ev_enqueue_upload_start}, @code{acc_ev_enqueue_upload_end},
- @code{acc_ev_enqueue_download_start}, and
- @code{acc_ev_enqueue_download_end}, will be
- @code{acc_construct_parallel}; should reflect the real parent
- construct.
- @end itemize
- @item @code{acc_event_info.*.implicit}
- For @code{acc_ev_alloc}, @code{acc_ev_free},
- @code{acc_ev_enqueue_upload_start}, @code{acc_ev_enqueue_upload_end},
- @code{acc_ev_enqueue_download_start}, and
- @code{acc_ev_enqueue_download_end}, this currently will be @code{1}
- also for explicit usage.
- @item @code{acc_event_info.data_event.var_name}
- Always @code{NULL}; not yet implemented.
- @item @code{acc_event_info.data_event.host_ptr}
- For @code{acc_ev_alloc}, and @code{acc_ev_free}, this is always
- @code{NULL}.
- @item @code{typedef union acc_api_info}
- @dots{} as printed in @cite{5.2.3. Third Argument: API-Specific
- Information}. This should obviously be @code{typedef @emph{struct}
- acc_api_info}.
- @item @code{acc_api_info.device_api}
- Possibly not yet implemented correctly for
- @code{acc_ev_compute_construct_start},
- @code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}:
- will always be @code{acc_device_api_none} for these event types.
- For @code{acc_ev_enter_data_start}, it will be
- @code{acc_device_api_none} in some cases.
- @item @code{acc_api_info.device_type}
- Always the same as @code{acc_prof_info.device_type}.
- @item @code{acc_api_info.vendor}
- Always @code{-1}; not yet implemented.
- @item @code{acc_api_info.device_handle}
- Always @code{NULL}; not yet implemented.
- @item @code{acc_api_info.context_handle}
- Always @code{NULL}; not yet implemented.
- @item @code{acc_api_info.async_handle}
- Always @code{NULL}; not yet implemented.
- @end table
- Remarks about certain event types:
- @table @asis
- @item @code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
- @itemize
- @item
- @c See 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' in
- @c 'libgomp.oacc-c-c++-common/acc_prof-kernels-1.c',
- @c 'libgomp.oacc-c-c++-common/acc_prof-parallel-1.c'.
- When a compute construct triggers implicit
- @code{acc_ev_device_init_start} and @code{acc_ev_device_init_end}
- events, they currently aren't @emph{nested within} the corresponding
- @code{acc_ev_compute_construct_start} and
- @code{acc_ev_compute_construct_end}, but they're currently observed
- @emph{before} @code{acc_ev_compute_construct_start}.
- It's not clear what to do: the standard asks us provide a lot of
- details to the @code{acc_ev_compute_construct_start} callback, without
- (implicitly) initializing a device before?
- @item
- Callbacks for these event types will not be invoked for calls to the
- @code{acc_set_device_type} and @code{acc_set_device_num} functions.
- It's not clear if they should be.
- @end itemize
- @item @code{acc_ev_enter_data_start}, @code{acc_ev_enter_data_end}, @code{acc_ev_exit_data_start}, @code{acc_ev_exit_data_end}
- @itemize
- @item
- Callbacks for these event types will also be invoked for OpenACC
- @emph{host_data} constructs.
- It's not clear if they should be.
- @item
- Callbacks for these event types will also be invoked when processing
- variable mappings specified in OpenACC @emph{declare} directives.
- It's not clear if they should be.
- @end itemize
- @end table
- Callbacks for the following event types will be invoked, but dispatch
- and information provided therein has not yet been thoroughly reviewed:
- @itemize
- @item @code{acc_ev_alloc}
- @item @code{acc_ev_free}
- @item @code{acc_ev_update_start}, @code{acc_ev_update_end}
- @item @code{acc_ev_enqueue_upload_start}, @code{acc_ev_enqueue_upload_end}
- @item @code{acc_ev_enqueue_download_start}, @code{acc_ev_enqueue_download_end}
- @end itemize
- During device initialization, and finalization, respectively,
- callbacks for the following event types will not yet be invoked:
- @itemize
- @item @code{acc_ev_alloc}
- @item @code{acc_ev_free}
- @end itemize
- Callbacks for the following event types have not yet been implemented,
- so currently won't be invoked:
- @itemize
- @item @code{acc_ev_device_shutdown_start}, @code{acc_ev_device_shutdown_end}
- @item @code{acc_ev_runtime_shutdown}
- @item @code{acc_ev_create}, @code{acc_ev_delete}
- @item @code{acc_ev_wait_start}, @code{acc_ev_wait_end}
- @end itemize
- For the following runtime library functions, not all expected
- callbacks will be invoked (mostly concerning implicit device
- initialization):
- @itemize
- @item @code{acc_get_num_devices}
- @item @code{acc_set_device_type}
- @item @code{acc_get_device_type}
- @item @code{acc_set_device_num}
- @item @code{acc_get_device_num}
- @item @code{acc_init}
- @item @code{acc_shutdown}
- @end itemize
- Aside from implicit device initialization, for the following runtime
- library functions, no callbacks will be invoked for shared-memory
- offloading devices (it's not clear if they should be):
- @itemize
- @item @code{acc_malloc}
- @item @code{acc_free}
- @item @code{acc_copyin}, @code{acc_present_or_copyin}, @code{acc_copyin_async}
- @item @code{acc_create}, @code{acc_present_or_create}, @code{acc_create_async}
- @item @code{acc_copyout}, @code{acc_copyout_async}, @code{acc_copyout_finalize}, @code{acc_copyout_finalize_async}
- @item @code{acc_delete}, @code{acc_delete_async}, @code{acc_delete_finalize}, @code{acc_delete_finalize_async}
- @item @code{acc_update_device}, @code{acc_update_device_async}
- @item @code{acc_update_self}, @code{acc_update_self_async}
- @item @code{acc_map_data}, @code{acc_unmap_data}
- @item @code{acc_memcpy_to_device}, @code{acc_memcpy_to_device_async}
- @item @code{acc_memcpy_from_device}, @code{acc_memcpy_from_device_async}
- @end itemize
- @c ---------------------------------------------------------------------
- @c The libgomp ABI
- @c ---------------------------------------------------------------------
- @node The libgomp ABI
- @chapter The libgomp ABI
- The following sections present notes on the external ABI as
- presented by libgomp. Only maintainers should need them.
- @menu
- * Implementing MASTER construct::
- * Implementing CRITICAL construct::
- * Implementing ATOMIC construct::
- * Implementing FLUSH construct::
- * Implementing BARRIER construct::
- * Implementing THREADPRIVATE construct::
- * Implementing PRIVATE clause::
- * Implementing FIRSTPRIVATE LASTPRIVATE COPYIN and COPYPRIVATE clauses::
- * Implementing REDUCTION clause::
- * Implementing PARALLEL construct::
- * Implementing FOR construct::
- * Implementing ORDERED construct::
- * Implementing SECTIONS construct::
- * Implementing SINGLE construct::
- * Implementing OpenACC's PARALLEL construct::
- @end menu
- @node Implementing MASTER construct
- @section Implementing MASTER construct
- @smallexample
- if (omp_get_thread_num () == 0)
- block
- @end smallexample
- Alternately, we generate two copies of the parallel subfunction
- and only include this in the version run by the primary thread.
- Surely this is not worthwhile though...
- @node Implementing CRITICAL construct
- @section Implementing CRITICAL construct
- Without a specified name,
- @smallexample
- void GOMP_critical_start (void);
- void GOMP_critical_end (void);
- @end smallexample
- so that we don't get COPY relocations from libgomp to the main
- application.
- With a specified name, use omp_set_lock and omp_unset_lock with
- name being transformed into a variable declared like
- @smallexample
- omp_lock_t gomp_critical_user_<name> __attribute__((common))
- @end smallexample
- Ideally the ABI would specify that all zero is a valid unlocked
- state, and so we wouldn't need to initialize this at
- startup.
- @node Implementing ATOMIC construct
- @section Implementing ATOMIC construct
- The target should implement the @code{__sync} builtins.
- Failing that we could add
- @smallexample
- void GOMP_atomic_enter (void)
- void GOMP_atomic_exit (void)
- @end smallexample
- which reuses the regular lock code, but with yet another lock
- object private to the library.
- @node Implementing FLUSH construct
- @section Implementing FLUSH construct
- Expands to the @code{__sync_synchronize} builtin.
- @node Implementing BARRIER construct
- @section Implementing BARRIER construct
- @smallexample
- void GOMP_barrier (void)
- @end smallexample
- @node Implementing THREADPRIVATE construct
- @section Implementing THREADPRIVATE construct
- In _most_ cases we can map this directly to @code{__thread}. Except
- that OMP allows constructors for C++ objects. We can either
- refuse to support this (how often is it used?) or we can
- implement something akin to .ctors.
- Even more ideally, this ctor feature is handled by extensions
- to the main pthreads library. Failing that, we can have a set
- of entry points to register ctor functions to be called.
- @node Implementing PRIVATE clause
- @section Implementing PRIVATE clause
- In association with a PARALLEL, or within the lexical extent
- of a PARALLEL block, the variable becomes a local variable in
- the parallel subfunction.
- In association with FOR or SECTIONS blocks, create a new
- automatic variable within the current function. This preserves
- the semantic of new variable creation.
- @node Implementing FIRSTPRIVATE LASTPRIVATE COPYIN and COPYPRIVATE clauses
- @section Implementing FIRSTPRIVATE LASTPRIVATE COPYIN and COPYPRIVATE clauses
- This seems simple enough for PARALLEL blocks. Create a private
- struct for communicating between the parent and subfunction.
- In the parent, copy in values for scalar and "small" structs;
- copy in addresses for others TREE_ADDRESSABLE types. In the
- subfunction, copy the value into the local variable.
- It is not clear what to do with bare FOR or SECTION blocks.
- The only thing I can figure is that we do something like:
- @smallexample
- #pragma omp for firstprivate(x) lastprivate(y)
- for (int i = 0; i < n; ++i)
- body;
- @end smallexample
- which becomes
- @smallexample
- @{
- int x = x, y;
- // for stuff
- if (i == n)
- y = y;
- @}
- @end smallexample
- where the "x=x" and "y=y" assignments actually have different
- uids for the two variables, i.e. not something you could write
- directly in C. Presumably this only makes sense if the "outer"
- x and y are global variables.
- COPYPRIVATE would work the same way, except the structure
- broadcast would have to happen via SINGLE machinery instead.
- @node Implementing REDUCTION clause
- @section Implementing REDUCTION clause
- The private struct mentioned in the previous section should have
- a pointer to an array of the type of the variable, indexed by the
- thread's @var{team_id}. The thread stores its final value into the
- array, and after the barrier, the primary thread iterates over the
- array to collect the values.
- @node Implementing PARALLEL construct
- @section Implementing PARALLEL construct
- @smallexample
- #pragma omp parallel
- @{
- body;
- @}
- @end smallexample
- becomes
- @smallexample
- void subfunction (void *data)
- @{
- use data;
- body;
- @}
- setup data;
- GOMP_parallel_start (subfunction, &data, num_threads);
- subfunction (&data);
- GOMP_parallel_end ();
- @end smallexample
- @smallexample
- void GOMP_parallel_start (void (*fn)(void *), void *data, unsigned num_threads)
- @end smallexample
- The @var{FN} argument is the subfunction to be run in parallel.
- The @var{DATA} argument is a pointer to a structure used to
- communicate data in and out of the subfunction, as discussed
- above with respect to FIRSTPRIVATE et al.
- The @var{NUM_THREADS} argument is 1 if an IF clause is present
- and false, or the value of the NUM_THREADS clause, if
- present, or 0.
- The function needs to create the appropriate number of
- threads and/or launch them from the dock. It needs to
- create the team structure and assign team ids.
- @smallexample
- void GOMP_parallel_end (void)
- @end smallexample
- Tears down the team and returns us to the previous @code{omp_in_parallel()} state.
- @node Implementing FOR construct
- @section Implementing FOR construct
- @smallexample
- #pragma omp parallel for
- for (i = lb; i <= ub; i++)
- body;
- @end smallexample
- becomes
- @smallexample
- void subfunction (void *data)
- @{
- long _s0, _e0;
- while (GOMP_loop_static_next (&_s0, &_e0))
- @{
- long _e1 = _e0, i;
- for (i = _s0; i < _e1; i++)
- body;
- @}
- GOMP_loop_end_nowait ();
- @}
- GOMP_parallel_loop_static (subfunction, NULL, 0, lb, ub+1, 1, 0);
- subfunction (NULL);
- GOMP_parallel_end ();
- @end smallexample
- @smallexample
- #pragma omp for schedule(runtime)
- for (i = 0; i < n; i++)
- body;
- @end smallexample
- becomes
- @smallexample
- @{
- long i, _s0, _e0;
- if (GOMP_loop_runtime_start (0, n, 1, &_s0, &_e0))
- do @{
- long _e1 = _e0;
- for (i = _s0, i < _e0; i++)
- body;
- @} while (GOMP_loop_runtime_next (&_s0, _&e0));
- GOMP_loop_end ();
- @}
- @end smallexample
- Note that while it looks like there is trickiness to propagating
- a non-constant STEP, there isn't really. We're explicitly allowed
- to evaluate it as many times as we want, and any variables involved
- should automatically be handled as PRIVATE or SHARED like any other
- variables. So the expression should remain evaluable in the
- subfunction. We can also pull it into a local variable if we like,
- but since its supposed to remain unchanged, we can also not if we like.
- If we have SCHEDULE(STATIC), and no ORDERED, then we ought to be
- able to get away with no work-sharing context at all, since we can
- simply perform the arithmetic directly in each thread to divide up
- the iterations. Which would mean that we wouldn't need to call any
- of these routines.
- There are separate routines for handling loops with an ORDERED
- clause. Bookkeeping for that is non-trivial...
- @node Implementing ORDERED construct
- @section Implementing ORDERED construct
- @smallexample
- void GOMP_ordered_start (void)
- void GOMP_ordered_end (void)
- @end smallexample
- @node Implementing SECTIONS construct
- @section Implementing SECTIONS construct
- A block as
- @smallexample
- #pragma omp sections
- @{
- #pragma omp section
- stmt1;
- #pragma omp section
- stmt2;
- #pragma omp section
- stmt3;
- @}
- @end smallexample
- becomes
- @smallexample
- for (i = GOMP_sections_start (3); i != 0; i = GOMP_sections_next ())
- switch (i)
- @{
- case 1:
- stmt1;
- break;
- case 2:
- stmt2;
- break;
- case 3:
- stmt3;
- break;
- @}
- GOMP_barrier ();
- @end smallexample
- @node Implementing SINGLE construct
- @section Implementing SINGLE construct
- A block like
- @smallexample
- #pragma omp single
- @{
- body;
- @}
- @end smallexample
- becomes
- @smallexample
- if (GOMP_single_start ())
- body;
- GOMP_barrier ();
- @end smallexample
- while
- @smallexample
- #pragma omp single copyprivate(x)
- body;
- @end smallexample
- becomes
- @smallexample
- datap = GOMP_single_copy_start ();
- if (datap == NULL)
- @{
- body;
- data.x = x;
- GOMP_single_copy_end (&data);
- @}
- else
- x = datap->x;
- GOMP_barrier ();
- @end smallexample
- @node Implementing OpenACC's PARALLEL construct
- @section Implementing OpenACC's PARALLEL construct
- @smallexample
- void GOACC_parallel ()
- @end smallexample
- @c ---------------------------------------------------------------------
- @c Reporting Bugs
- @c ---------------------------------------------------------------------
- @node Reporting Bugs
- @chapter Reporting Bugs
- Bugs in the GNU Offloading and Multi Processing Runtime Library should
- be reported via @uref{https://gcc.gnu.org/bugzilla/, Bugzilla}. Please add
- "openacc", or "openmp", or both to the keywords field in the bug
- report, as appropriate.
- @c ---------------------------------------------------------------------
- @c GNU General Public License
- @c ---------------------------------------------------------------------
- @include gpl_v3.texi
- @c ---------------------------------------------------------------------
- @c GNU Free Documentation License
- @c ---------------------------------------------------------------------
- @include fdl.texi
- @c ---------------------------------------------------------------------
- @c Funding Free Software
- @c ---------------------------------------------------------------------
- @include funding.texi
- @c ---------------------------------------------------------------------
- @c Index
- @c ---------------------------------------------------------------------
- @node Library Index
- @unnumbered Library Index
- @printindex cp
- @bye
|