12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591 |
- /* IEEE-754 double-precision functions for Xtensa
- Copyright (C) 2006-2022 Free Software Foundation, Inc.
- Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GCC is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- #ifdef __XTENSA_EB__
- #define xh a2
- #define xl a3
- #define yh a4
- #define yl a5
- #else
- #define xh a3
- #define xl a2
- #define yh a5
- #define yl a4
- #endif
- /* Warning! The branch displacements for some Xtensa branch instructions
- are quite small, and this code has been carefully laid out to keep
- branch targets in range. If you change anything, be sure to check that
- the assembler is not relaxing anything to branch over a jump. */
- #ifdef L_negdf2
- .align 4
- .global __negdf2
- .type __negdf2, @function
- __negdf2:
- leaf_entry sp, 16
- movi a4, 0x80000000
- xor xh, xh, a4
- leaf_return
- #endif /* L_negdf2 */
- #ifdef L_addsubdf3
- .literal_position
- /* Addition */
- __adddf3_aux:
-
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
- .Ladd_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall yh, a6, .Ladd_return_nan_or_inf
- /* If x is a NaN, return it. Otherwise, return y. */
- slli a7, xh, 12
- or a7, a7, xl
- bnez a7, .Ladd_return_nan
- .Ladd_ynan_or_inf:
- /* Return y. */
- mov xh, yh
- mov xl, yl
- .Ladd_return_nan_or_inf:
- slli a7, xh, 12
- or a7, a7, xl
- bnez a7, .Ladd_return_nan
- leaf_return
- .Ladd_return_nan:
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
- leaf_return
- .Ladd_opposite_signs:
- /* Operand signs differ. Do a subtraction. */
- slli a7, a6, 11
- xor yh, yh, a7
- j .Lsub_same_sign
- .align 4
- .global __adddf3
- .type __adddf3, @function
- __adddf3:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- /* Check if the two operands have the same sign. */
- xor a7, xh, yh
- bltz a7, .Ladd_opposite_signs
- .Ladd_same_sign:
- /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
- ball xh, a6, .Ladd_xnan_or_inf
- ball yh, a6, .Ladd_ynan_or_inf
- /* Compare the exponents. The smaller operand will be shifted
- right by the exponent difference and added to the larger
- one. */
- extui a7, xh, 20, 12
- extui a8, yh, 20, 12
- bltu a7, a8, .Ladd_shiftx
- .Ladd_shifty:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone yh, a6, .Ladd_yexpzero
- /* Replace yh sign/exponent with 0x001. */
- or yh, yh, a6
- slli yh, yh, 11
- srli yh, yh, 11
- .Ladd_yexpdiff:
- /* Compute the exponent difference. Optimize for difference < 32. */
- sub a10, a7, a8
- bgeui a10, 32, .Ladd_bigshifty
-
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out of yl are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, yl, a9
- src yl, yh, yl
- srl yh, yh
- .Ladd_addy:
- /* Do the 64-bit addition. */
- add xl, xl, yl
- add xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, 1
- 1:
- /* Check if the add overflowed into the exponent. */
- extui a10, xh, 20, 12
- beq a10, a7, .Ladd_round
- mov a8, a7
- j .Ladd_carry
- .Ladd_yexpzero:
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0", and increment the apparent exponent
- because subnormals behave as if they had the minimum (nonzero)
- exponent. Test for the case when both exponents are zero. */
- slli yh, yh, 12
- srli yh, yh, 12
- bnone xh, a6, .Ladd_bothexpzero
- addi a8, a8, 1
- j .Ladd_yexpdiff
- .Ladd_bothexpzero:
- /* Both exponents are zero. Handle this as a special case. There
- is no need to shift or round, and the normal code for handling
- a carry into the exponent field will not work because it
- assumes there is an implicit "1.0" that needs to be added. */
- add xl, xl, yl
- add xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, 1
- 1: leaf_return
- .Ladd_bigshifty:
- /* Exponent difference > 64 -- just return the bigger value. */
- bgeui a10, 64, 1b
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out are saved in a9 for rounding the result. */
- ssr a10
- sll a11, yl /* lost bits shifted out of yl */
- src a9, yh, yl
- srl yl, yh
- movi yh, 0
- beqz a11, .Ladd_addy
- or a9, a9, a10 /* any positive, nonzero value will work */
- j .Ladd_addy
- .Ladd_xexpzero:
- /* Same as "yexpzero" except skip handling the case when both
- exponents are zero. */
- slli xh, xh, 12
- srli xh, xh, 12
- addi a7, a7, 1
- j .Ladd_xexpdiff
- .Ladd_shiftx:
- /* Same thing as the "shifty" code, but with x and y swapped. Also,
- because the exponent difference is always nonzero in this version,
- the shift sequence can use SLL and skip loading a constant zero. */
- bnone xh, a6, .Ladd_xexpzero
- or xh, xh, a6
- slli xh, xh, 11
- srli xh, xh, 11
- .Ladd_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Ladd_bigshiftx
-
- ssr a10
- sll a9, xl
- src xl, xh, xl
- srl xh, xh
- .Ladd_addx:
- add xl, xl, yl
- add xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, 1
- 1:
- /* Check if the add overflowed into the exponent. */
- extui a10, xh, 20, 12
- bne a10, a8, .Ladd_carry
- .Ladd_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi xl, xl, 1
- beqz xl, .Ladd_roundcarry
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Ladd_exactlyhalf
- 1: leaf_return
- .Ladd_bigshiftx:
- /* Mostly the same thing as "bigshifty".... */
- bgeui a10, 64, .Ladd_returny
- ssr a10
- sll a11, xl
- src a9, xh, xl
- srl xl, xh
- movi xh, 0
- beqz a11, .Ladd_addx
- or a9, a9, a10
- j .Ladd_addx
- .Ladd_returny:
- mov xh, yh
- mov xl, yl
- leaf_return
- .Ladd_carry:
- /* The addition has overflowed into the exponent field, so the
- value needs to be renormalized. The mantissa of the result
- can be recovered by subtracting the original exponent and
- adding 0x100000 (which is the explicit "1.0" for the
- mantissa of the non-shifted operand -- the "1.0" for the
- shifted operand was already added). The mantissa can then
- be shifted right by one bit. The explicit "1.0" of the
- shifted mantissa then needs to be replaced by the exponent,
- incremented by one to account for the normalizing shift.
- It is faster to combine these operations: do the shift first
- and combine the additions and subtractions. If x is the
- original exponent, the result is:
- shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
- or:
- shifted mantissa + ((x + 1) << 19)
- Note that the exponent is incremented here by leaving the
- explicit "1.0" of the mantissa in the exponent field. */
- /* Shift xh/xl right by one bit. Save the lsb of xl. */
- mov a10, xl
- ssai 1
- src xl, xh, xl
- srl xh, xh
- /* See explanation above. The original exponent is in a8. */
- addi a8, a8, 1
- slli a8, a8, 19
- add xh, xh, a8
- /* Return an Infinity if the exponent overflowed. */
- ball xh, a6, .Ladd_infinity
-
- /* Same thing as the "round" code except the msb of the leftover
- fraction is bit 0 of a10, with the rest of the fraction in a9. */
- bbci.l a10, 0, 1f
- addi xl, xl, 1
- beqz xl, .Ladd_roundcarry
- beqz a9, .Ladd_exactlyhalf
- 1: leaf_return
- .Ladd_infinity:
- /* Clear the mantissa. */
- movi xl, 0
- srli xh, xh, 20
- slli xh, xh, 20
- /* The sign bit may have been lost in a carry-out. Put it back. */
- slli a8, a8, 1
- or xh, xh, a8
- leaf_return
- .Ladd_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- leaf_return
- .Ladd_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow to the exponent is OK. */
- leaf_return
- /* Subtraction */
- __subdf3_aux:
-
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
- .Lsub_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall yh, a6, .Lsub_return_nan_or_inf
- .Lsub_return_nan:
- /* Both x and y are either NaN or Inf, so the result is NaN. */
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
- leaf_return
- .Lsub_ynan_or_inf:
- /* Negate y and return it. */
- slli a7, a6, 11
- xor xh, yh, a7
- mov xl, yl
- .Lsub_return_nan_or_inf:
- slli a7, xh, 12
- or a7, a7, xl
- bnez a7, .Lsub_return_nan
- leaf_return
- .Lsub_opposite_signs:
- /* Operand signs differ. Do an addition. */
- slli a7, a6, 11
- xor yh, yh, a7
- j .Ladd_same_sign
- .align 4
- .global __subdf3
- .type __subdf3, @function
- __subdf3:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- /* Check if the two operands have the same sign. */
- xor a7, xh, yh
- bltz a7, .Lsub_opposite_signs
- .Lsub_same_sign:
- /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
- ball xh, a6, .Lsub_xnan_or_inf
- ball yh, a6, .Lsub_ynan_or_inf
- /* Compare the operands. In contrast to addition, the entire
- value matters here. */
- extui a7, xh, 20, 11
- extui a8, yh, 20, 11
- bltu xh, yh, .Lsub_xsmaller
- beq xh, yh, .Lsub_compare_low
- .Lsub_ysmaller:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone yh, a6, .Lsub_yexpzero
- /* Replace yh sign/exponent with 0x001. */
- or yh, yh, a6
- slli yh, yh, 11
- srli yh, yh, 11
- .Lsub_yexpdiff:
- /* Compute the exponent difference. Optimize for difference < 32. */
- sub a10, a7, a8
- bgeui a10, 32, .Lsub_bigshifty
-
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out of yl are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, yl, a9
- src yl, yh, yl
- srl yh, yh
- .Lsub_suby:
- /* Do the 64-bit subtraction. */
- sub xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, -1
- 1: sub xl, xl, yl
- /* Subtract the leftover bits in a9 from zero and propagate any
- borrow from xh/xl. */
- neg a9, a9
- beqz a9, 1f
- addi a5, xh, -1
- moveqz xh, a5, xl
- addi xl, xl, -1
- 1:
- /* Check if the subtract underflowed into the exponent. */
- extui a10, xh, 20, 11
- beq a10, a7, .Lsub_round
- j .Lsub_borrow
- .Lsub_compare_low:
- /* The high words are equal. Compare the low words. */
- bltu xl, yl, .Lsub_xsmaller
- bltu yl, xl, .Lsub_ysmaller
- /* The operands are equal. Return 0.0. */
- movi xh, 0
- movi xl, 0
- 1: leaf_return
- .Lsub_yexpzero:
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0". Unless x is also a subnormal, increment
- y's apparent exponent because subnormals behave as if they had
- the minimum (nonzero) exponent. */
- slli yh, yh, 12
- srli yh, yh, 12
- bnone xh, a6, .Lsub_yexpdiff
- addi a8, a8, 1
- j .Lsub_yexpdiff
- .Lsub_bigshifty:
- /* Exponent difference > 64 -- just return the bigger value. */
- bgeui a10, 64, 1b
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out are saved in a9 for rounding the result. */
- ssr a10
- sll a11, yl /* lost bits shifted out of yl */
- src a9, yh, yl
- srl yl, yh
- movi yh, 0
- beqz a11, .Lsub_suby
- or a9, a9, a10 /* any positive, nonzero value will work */
- j .Lsub_suby
- .Lsub_xsmaller:
- /* Same thing as the "ysmaller" code, but with x and y swapped and
- with y negated. */
- bnone xh, a6, .Lsub_xexpzero
- or xh, xh, a6
- slli xh, xh, 11
- srli xh, xh, 11
- .Lsub_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Lsub_bigshiftx
-
- ssr a10
- movi a9, 0
- src a9, xl, a9
- src xl, xh, xl
- srl xh, xh
- /* Negate y. */
- slli a11, a6, 11
- xor yh, yh, a11
- .Lsub_subx:
- sub xl, yl, xl
- sub xh, yh, xh
- bgeu yl, xl, 1f
- addi xh, xh, -1
- 1:
- /* Subtract the leftover bits in a9 from zero and propagate any
- borrow from xh/xl. */
- neg a9, a9
- beqz a9, 1f
- addi a5, xh, -1
- moveqz xh, a5, xl
- addi xl, xl, -1
- 1:
- /* Check if the subtract underflowed into the exponent. */
- extui a10, xh, 20, 11
- bne a10, a8, .Lsub_borrow
- .Lsub_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi xl, xl, 1
- beqz xl, .Lsub_roundcarry
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Lsub_exactlyhalf
- 1: leaf_return
- .Lsub_xexpzero:
- /* Same as "yexpzero". */
- slli xh, xh, 12
- srli xh, xh, 12
- bnone yh, a6, .Lsub_xexpdiff
- addi a7, a7, 1
- j .Lsub_xexpdiff
- .Lsub_bigshiftx:
- /* Mostly the same thing as "bigshifty", but with the sign bit of the
- shifted value set so that the subsequent subtraction flips the
- sign of y. */
- bgeui a10, 64, .Lsub_returny
- ssr a10
- sll a11, xl
- src a9, xh, xl
- srl xl, xh
- slli xh, a6, 11 /* set sign bit of xh */
- beqz a11, .Lsub_subx
- or a9, a9, a10
- j .Lsub_subx
- .Lsub_returny:
- /* Negate and return y. */
- slli a7, a6, 11
- xor xh, yh, a7
- mov xl, yl
- leaf_return
- .Lsub_borrow:
- /* The subtraction has underflowed into the exponent field, so the
- value needs to be renormalized. Shift the mantissa left as
- needed to remove any leading zeros and adjust the exponent
- accordingly. If the exponent is not large enough to remove
- all the leading zeros, the result will be a subnormal value. */
- slli a8, xh, 12
- beqz a8, .Lsub_xhzero
- do_nsau a6, a8, a7, a11
- srli a8, a8, 12
- bge a6, a10, .Lsub_subnormal
- addi a6, a6, 1
- .Lsub_shift_lt32:
- /* Shift the mantissa (a8/xl/a9) left by a6. */
- ssl a6
- src a8, a8, xl
- src xl, xl, a9
- sll a9, a9
- /* Combine the shifted mantissa with the sign and exponent,
- decrementing the exponent by a6. (The exponent has already
- been decremented by one due to the borrow from the subtraction,
- but adding the mantissa will increment the exponent by one.) */
- srli xh, xh, 20
- sub xh, xh, a6
- slli xh, xh, 20
- add xh, xh, a8
- j .Lsub_round
- .Lsub_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- leaf_return
- .Lsub_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow to the exponent is OK. */
- leaf_return
- .Lsub_xhzero:
- /* When normalizing the result, all the mantissa bits in the high
- word are zero. Shift by "20 + (leading zero count of xl) + 1". */
- do_nsau a6, xl, a7, a11
- addi a6, a6, 21
- blt a10, a6, .Lsub_subnormal
- .Lsub_normalize_shift:
- bltui a6, 32, .Lsub_shift_lt32
- ssl a6
- src a8, xl, a9
- sll xl, a9
- movi a9, 0
- srli xh, xh, 20
- sub xh, xh, a6
- slli xh, xh, 20
- add xh, xh, a8
- j .Lsub_round
- .Lsub_subnormal:
- /* The exponent is too small to shift away all the leading zeros.
- Set a6 to the current exponent (which has already been
- decremented by the borrow) so that the exponent of the result
- will be zero. Do not add 1 to a6 in this case, because: (1)
- adding the mantissa will not increment the exponent, so there is
- no need to subtract anything extra from the exponent to
- compensate, and (2) the effective exponent of a subnormal is 1
- not 0 so the shift amount must be 1 smaller than normal. */
- mov a6, a10
- j .Lsub_normalize_shift
- #endif /* L_addsubdf3 */
- #ifdef L_muldf3
- /* Multiplication */
- #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
- #define XCHAL_NO_MUL 1
- #endif
- .literal_position
- __muldf3_aux:
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
- .Lmul_xexpzero:
- /* Clear the sign bit of x. */
- slli xh, xh, 1
- srli xh, xh, 1
- /* If x is zero, return zero. */
- or a10, xh, xl
- beqz a10, .Lmul_return_zero
- /* Normalize x. Adjust the exponent in a8. */
- beqz xh, .Lmul_xh_zero
- do_nsau a10, xh, a11, a12
- addi a10, a10, -11
- ssl a10
- src xh, xh, xl
- sll xl, xl
- movi a8, 1
- sub a8, a8, a10
- j .Lmul_xnormalized
- .Lmul_xh_zero:
- do_nsau a10, xl, a11, a12
- addi a10, a10, -11
- movi a8, -31
- sub a8, a8, a10
- ssl a10
- bltz a10, .Lmul_xl_srl
- sll xh, xl
- movi xl, 0
- j .Lmul_xnormalized
- .Lmul_xl_srl:
- srl xh, xl
- sll xl, xl
- j .Lmul_xnormalized
-
- .Lmul_yexpzero:
- /* Clear the sign bit of y. */
- slli yh, yh, 1
- srli yh, yh, 1
- /* If y is zero, return zero. */
- or a10, yh, yl
- beqz a10, .Lmul_return_zero
- /* Normalize y. Adjust the exponent in a9. */
- beqz yh, .Lmul_yh_zero
- do_nsau a10, yh, a11, a12
- addi a10, a10, -11
- ssl a10
- src yh, yh, yl
- sll yl, yl
- movi a9, 1
- sub a9, a9, a10
- j .Lmul_ynormalized
- .Lmul_yh_zero:
- do_nsau a10, yl, a11, a12
- addi a10, a10, -11
- movi a9, -31
- sub a9, a9, a10
- ssl a10
- bltz a10, .Lmul_yl_srl
- sll yh, yl
- movi yl, 0
- j .Lmul_ynormalized
- .Lmul_yl_srl:
- srl yh, yl
- sll yl, yl
- j .Lmul_ynormalized
- .Lmul_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- j .Lmul_done
- .Lmul_xnan_or_inf:
- /* If y is zero, return NaN. */
- bnez yl, 1f
- slli a8, yh, 1
- beqz a8, .Lmul_return_nan
- 1:
- /* If y is NaN, return y. */
- bnall yh, a6, .Lmul_returnx
- slli a8, yh, 12
- or a8, a8, yl
- beqz a8, .Lmul_returnx
- .Lmul_returny:
- mov xh, yh
- mov xl, yl
- .Lmul_returnx:
- slli a8, xh, 12
- or a8, a8, xl
- bnez a8, .Lmul_return_nan
- /* Set the sign bit and return. */
- extui a7, a7, 31, 1
- slli xh, xh, 1
- ssai 1
- src xh, a7, xh
- j .Lmul_done
- .Lmul_ynan_or_inf:
- /* If x is zero, return NaN. */
- bnez xl, .Lmul_returny
- slli a8, xh, 1
- bnez a8, .Lmul_returny
- mov xh, yh
- .Lmul_return_nan:
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
- j .Lmul_done
- .align 4
- .global __muldf3
- .type __muldf3, @function
- __muldf3:
- #if __XTENSA_CALL0_ABI__
- leaf_entry sp, 32
- addi sp, sp, -32
- s32i a12, sp, 16
- s32i a13, sp, 20
- s32i a14, sp, 24
- s32i a15, sp, 28
- #elif XCHAL_NO_MUL
- /* This is not really a leaf function; allocate enough stack space
- to allow CALL12s to a helper function. */
- leaf_entry sp, 64
- #else
- leaf_entry sp, 32
- #endif
- movi a6, 0x7ff00000
- /* Get the sign of the result. */
- xor a7, xh, yh
- /* Check for NaN and infinity. */
- ball xh, a6, .Lmul_xnan_or_inf
- ball yh, a6, .Lmul_ynan_or_inf
- /* Extract the exponents. */
- extui a8, xh, 20, 11
- extui a9, yh, 20, 11
- beqz a8, .Lmul_xexpzero
- .Lmul_xnormalized:
- beqz a9, .Lmul_yexpzero
- .Lmul_ynormalized:
- /* Add the exponents. */
- add a8, a8, a9
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0x1fffff
- or xh, xh, a6
- and xh, xh, a10
- or yh, yh, a6
- and yh, yh, a10
- /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
- The least-significant word of the result is thrown away except
- that if it is nonzero, the lsb of a6 is set to 1. */
- #if XCHAL_HAVE_MUL32_HIGH
- /* Compute a6 with any carry-outs in a10. */
- movi a10, 0
- mull a6, xl, yh
- mull a11, xh, yl
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a10, a10, 1
- 1:
- muluh a11, xl, yl
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a10, a10, 1
- 1:
- /* If the low word of the result is nonzero, set the lsb of a6. */
- mull a11, xl, yl
- beqz a11, 1f
- movi a9, 1
- or a6, a6, a9
- 1:
- /* Compute xl with any carry-outs in a9. */
- movi a9, 0
- mull a11, xh, yh
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a9, a9, 1
- 1:
- muluh a11, xh, yl
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a9, a9, 1
- 1:
- muluh xl, xl, yh
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
- 1:
- /* Compute xh. */
- muluh xh, xh, yh
- add xh, xh, a9
- #else /* ! XCHAL_HAVE_MUL32_HIGH */
- /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
- products. These partial products are:
- 0 xll * yll
- 1 xll * ylh
- 2 xlh * yll
- 3 xll * yhl
- 4 xlh * ylh
- 5 xhl * yll
- 6 xll * yhh
- 7 xlh * yhl
- 8 xhl * ylh
- 9 xhh * yll
- 10 xlh * yhh
- 11 xhl * yhl
- 12 xhh * ylh
- 13 xhl * yhh
- 14 xhh * yhl
- 15 xhh * yhh
- where the input chunks are (hh, hl, lh, ll). If using the Mul16
- or Mul32 multiplier options, these input chunks must be stored in
- separate registers. For Mac16, the UMUL.AA.* opcodes can specify
- that the inputs come from either half of the registers, so there
- is no need to shift them out ahead of time. If there is no
- multiply hardware, the 16-bit chunks can be extracted when setting
- up the arguments to the separate multiply function. */
- /* Save a7 since it is needed to hold a temporary value. */
- s32i a7, sp, 4
- #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* Calling a separate multiply function will clobber a0 and requires
- use of a8 as a temporary, so save those values now. (The function
- uses a custom ABI so nothing else needs to be saved.) */
- s32i a0, sp, 0
- s32i a8, sp, 8
- #endif
- #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
- #define xlh a12
- #define ylh a13
- #define xhh a14
- #define yhh a15
- /* Get the high halves of the inputs into registers. */
- srli xlh, xl, 16
- srli ylh, yl, 16
- srli xhh, xh, 16
- srli yhh, yh, 16
- #define xll xl
- #define yll yl
- #define xhl xh
- #define yhl yh
- #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
- /* Clear the high halves of the inputs. This does not matter
- for MUL16 because the high bits are ignored. */
- extui xl, xl, 0, 16
- extui xh, xh, 0, 16
- extui yl, yl, 0, 16
- extui yh, yh, 0, 16
- #endif
- #endif /* MUL16 || MUL32 */
- #if XCHAL_HAVE_MUL16
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mul16u dst, xreg ## xhalf, yreg ## yhalf
- #elif XCHAL_HAVE_MUL32
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mull dst, xreg ## xhalf, yreg ## yhalf
- #elif XCHAL_HAVE_MAC16
- /* The preprocessor insists on inserting a space when concatenating after
- a period in the definition of do_mul below. These macros are a workaround
- using underscores instead of periods when doing the concatenation. */
- #define umul_aa_ll umul.aa.ll
- #define umul_aa_lh umul.aa.lh
- #define umul_aa_hl umul.aa.hl
- #define umul_aa_hh umul.aa.hh
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- umul_aa_ ## xhalf ## yhalf xreg, yreg; \
- rsr dst, ACCLO
- #else /* no multiply hardware */
-
- #define set_arg_l(dst, src) \
- extui dst, src, 0, 16
- #define set_arg_h(dst, src) \
- srli dst, src, 16
- #if __XTENSA_CALL0_ABI__
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a13, xreg); \
- set_arg_ ## yhalf (a14, yreg); \
- call0 .Lmul_mulsi3; \
- mov dst, a12
- #else
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a14, xreg); \
- set_arg_ ## yhalf (a15, yreg); \
- call12 .Lmul_mulsi3; \
- mov dst, a14
- #endif /* __XTENSA_CALL0_ABI__ */
- #endif /* no multiply hardware */
- /* Add pp1 and pp2 into a10 with carry-out in a9. */
- do_mul(a10, xl, l, yl, h) /* pp 1 */
- do_mul(a11, xl, h, yl, l) /* pp 2 */
- movi a9, 0
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a9, a9, 1
- 1:
- /* Initialize a6 with a9/a10 shifted into position. Note that
- this value can be safely incremented without any carry-outs. */
- ssai 16
- src a6, a9, a10
- /* Compute the low word into a10. */
- do_mul(a11, xl, l, yl, l) /* pp 0 */
- sll a10, a10
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a6, a6, 1
- 1:
- /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
- This is good enough to determine the low half of a6, so that any
- nonzero bits from the low word of the result can be collapsed
- into a6, freeing up a register. */
- movi a9, 0
- do_mul(a11, xl, l, yh, l) /* pp 3 */
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
- 1:
- do_mul(a11, xl, h, yl, h) /* pp 4 */
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
- 1:
- do_mul(a11, xh, l, yl, l) /* pp 5 */
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
- 1:
- /* Collapse any nonzero bits from the low word into a6. */
- beqz a10, 1f
- movi a11, 1
- or a6, a6, a11
- 1:
- /* Add pp6-9 into a11 with carry-outs in a10. */
- do_mul(a7, xl, l, yh, h) /* pp 6 */
- do_mul(a11, xh, h, yl, l) /* pp 9 */
- movi a10, 0
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
- 1:
- do_mul(a7, xl, h, yh, l) /* pp 7 */
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
- 1:
- do_mul(a7, xh, l, yl, h) /* pp 8 */
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
- 1:
- /* Shift a10/a11 into position, and add low half of a11 to a6. */
- src a10, a10, a11
- add a10, a10, a9
- sll a11, a11
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a10, a10, 1
- 1:
- /* Add pp10-12 into xl with carry-outs in a9. */
- movi a9, 0
- do_mul(xl, xl, h, yh, h) /* pp 10 */
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
- 1:
- do_mul(a10, xh, l, yh, l) /* pp 11 */
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
- 1:
- do_mul(a10, xh, h, yl, h) /* pp 12 */
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
- 1:
- /* Add pp13-14 into a11 with carry-outs in a10. */
- do_mul(a11, xh, l, yh, h) /* pp 13 */
- do_mul(a7, xh, h, yh, l) /* pp 14 */
- movi a10, 0
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
- 1:
- /* Shift a10/a11 into position, and add low half of a11 to a6. */
- src a10, a10, a11
- add a10, a10, a9
- sll a11, a11
- add xl, xl, a11
- bgeu xl, a11, 1f
- addi a10, a10, 1
- 1:
- /* Compute xh. */
- do_mul(xh, xh, h, yh, h) /* pp 15 */
- add xh, xh, a10
- /* Restore values saved on the stack during the multiplication. */
- l32i a7, sp, 4
- #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- l32i a0, sp, 0
- l32i a8, sp, 8
- #endif
- #endif /* ! XCHAL_HAVE_MUL32_HIGH */
- /* Shift left by 12 bits, unless there was a carry-out from the
- multiply, in which case, shift by 11 bits and increment the
- exponent. Note: It is convenient to use the constant 0x3ff
- instead of 0x400 when removing the extra exponent bias (so that
- it is easy to construct 0x7fe for the overflow check). Reverse
- the logic here to decrement the exponent sum by one unless there
- was a carry-out. */
- movi a4, 11
- srli a5, xh, 21 - 12
- bnez a5, 1f
- addi a4, a4, 1
- addi a8, a8, -1
- 1: ssl a4
- src xh, xh, xl
- src xl, xl, a6
- sll a6, a6
- /* Subtract the extra bias from the exponent sum (plus one to account
- for the explicit "1.0" of the mantissa that will be added to the
- exponent in the final result). */
- movi a4, 0x3ff
- sub a8, a8, a4
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..7fd are OK here. */
- slli a4, a4, 1 /* 0x7fe */
- bgeu a8, a4, .Lmul_overflow
-
- .Lmul_round:
- /* Round. */
- bgez a6, .Lmul_rounded
- addi xl, xl, 1
- beqz xl, .Lmul_roundcarry
- slli a6, a6, 1
- beqz a6, .Lmul_exactlyhalf
- .Lmul_rounded:
- /* Add the exponent to the mantissa. */
- slli a8, a8, 20
- add xh, xh, a8
- .Lmul_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or xh, xh, a7
- .Lmul_done:
- #if __XTENSA_CALL0_ABI__
- l32i a12, sp, 16
- l32i a13, sp, 20
- l32i a14, sp, 24
- l32i a15, sp, 28
- addi sp, sp, 32
- #endif
- leaf_return
- .Lmul_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- j .Lmul_rounded
- .Lmul_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow is OK -- it will be added to the exponent. */
- j .Lmul_rounded
- .Lmul_overflow:
- bltz a8, .Lmul_underflow
- /* Return +/- Infinity. */
- addi a8, a4, 1 /* 0x7ff */
- slli xh, a8, 20
- movi xl, 0
- j .Lmul_addsign
- .Lmul_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- mov a9, a6
- ssr a8
- bgeui a8, 32, .Lmul_bigshift
-
- /* Shift xh/xl right. Any bits that are shifted out of xl are saved
- in a6 (combined with the shifted-out bits currently in a6) for
- rounding the result. */
- sll a6, xl
- src xl, xh, xl
- srl xh, xh
- j 1f
- .Lmul_bigshift:
- bgeui a8, 64, .Lmul_flush_to_zero
- sll a10, xl /* lost bits shifted out of xl */
- src a6, xh, xl
- srl xl, xh
- movi xh, 0
- or a9, a9, a10
- /* Set the exponent to zero. */
- 1: movi a8, 0
- /* Pack any nonzero bits shifted out into a6. */
- beqz a9, .Lmul_round
- movi a9, 1
- or a6, a6, a9
- j .Lmul_round
-
- .Lmul_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- j .Lmul_done
- #if XCHAL_NO_MUL
-
- /* For Xtensa processors with no multiply hardware, this simplified
- version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. When using CALL0, this function
- uses a custom ABI: the inputs are passed in a13 and a14, the
- result is returned in a12, and a8 and a15 are clobbered. */
- .align 4
- .Lmul_mulsi3:
- leaf_entry sp, 16
- .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
- movi \dst, 0
- 1: add \tmp1, \src2, \dst
- extui \tmp2, \src1, 0, 1
- movnez \dst, \tmp1, \tmp2
- do_addx2 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 1, 1
- movnez \dst, \tmp1, \tmp2
- do_addx4 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 2, 1
- movnez \dst, \tmp1, \tmp2
- do_addx8 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 3, 1
- movnez \dst, \tmp1, \tmp2
- srli \src1, \src1, 4
- slli \src2, \src2, 4
- bnez \src1, 1b
- .endm
- #if __XTENSA_CALL0_ABI__
- mul_mulsi3_body a12, a13, a14, a15, a8
- #else
- /* The result will be written into a2, so save that argument in a4. */
- mov a4, a2
- mul_mulsi3_body a2, a4, a3, a5, a6
- #endif
- leaf_return
- #endif /* XCHAL_NO_MUL */
- #endif /* L_muldf3 */
- #ifdef L_divdf3
- /* Division */
- #if XCHAL_HAVE_DFP_DIV
- .text
- .align 4
- .global __divdf3
- .type __divdf3, @function
- __divdf3:
- leaf_entry sp, 16
- wfrd f1, xh, xl
- wfrd f2, yh, yl
- div0.d f3, f2
- nexp01.d f4, f2
- const.d f0, 1
- maddn.d f0, f4, f3
- const.d f5, 0
- mov.d f7, f2
- mkdadj.d f7, f1
- maddn.d f3, f0, f3
- maddn.d f5, f0, f0
- nexp01.d f1, f1
- div0.d f2, f2
- maddn.d f3, f5, f3
- const.d f5, 1
- const.d f0, 0
- neg.d f6, f1
- maddn.d f5, f4, f3
- maddn.d f0, f6, f2
- maddn.d f3, f5, f3
- maddn.d f6, f4, f0
- const.d f2, 1
- maddn.d f2, f4, f3
- maddn.d f0, f6, f3
- neg.d f1, f1
- maddn.d f3, f2, f3
- maddn.d f1, f4, f0
- addexpm.d f0, f7
- addexp.d f3, f7
- divn.d f0, f1, f3
- rfr xl, f0
- rfrd xh, f0
- leaf_return
- #else
- .literal_position
- __divdf3_aux:
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
- .Ldiv_yexpzero:
- /* Clear the sign bit of y. */
- slli yh, yh, 1
- srli yh, yh, 1
- /* Check for division by zero. */
- or a10, yh, yl
- beqz a10, .Ldiv_yzero
- /* Normalize y. Adjust the exponent in a9. */
- beqz yh, .Ldiv_yh_zero
- do_nsau a10, yh, a11, a9
- addi a10, a10, -11
- ssl a10
- src yh, yh, yl
- sll yl, yl
- movi a9, 1
- sub a9, a9, a10
- j .Ldiv_ynormalized
- .Ldiv_yh_zero:
- do_nsau a10, yl, a11, a9
- addi a10, a10, -11
- movi a9, -31
- sub a9, a9, a10
- ssl a10
- bltz a10, .Ldiv_yl_srl
- sll yh, yl
- movi yl, 0
- j .Ldiv_ynormalized
- .Ldiv_yl_srl:
- srl yh, yl
- sll yl, yl
- j .Ldiv_ynormalized
- .Ldiv_yzero:
- /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
- slli xh, xh, 1
- srli xh, xh, 1
- or xl, xl, xh
- srli xh, a7, 31
- slli xh, xh, 31
- or xh, xh, a6
- bnez xl, 1f
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
- 1: movi xl, 0
- leaf_return
- .Ldiv_xexpzero:
- /* Clear the sign bit of x. */
- slli xh, xh, 1
- srli xh, xh, 1
- /* If x is zero, return zero. */
- or a10, xh, xl
- beqz a10, .Ldiv_return_zero
- /* Normalize x. Adjust the exponent in a8. */
- beqz xh, .Ldiv_xh_zero
- do_nsau a10, xh, a11, a8
- addi a10, a10, -11
- ssl a10
- src xh, xh, xl
- sll xl, xl
- movi a8, 1
- sub a8, a8, a10
- j .Ldiv_xnormalized
- .Ldiv_xh_zero:
- do_nsau a10, xl, a11, a8
- addi a10, a10, -11
- movi a8, -31
- sub a8, a8, a10
- ssl a10
- bltz a10, .Ldiv_xl_srl
- sll xh, xl
- movi xl, 0
- j .Ldiv_xnormalized
- .Ldiv_xl_srl:
- srl xh, xl
- sll xl, xl
- j .Ldiv_xnormalized
-
- .Ldiv_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- leaf_return
- .Ldiv_xnan_or_inf:
- /* Set the sign bit of the result. */
- srli a7, yh, 31
- slli a7, a7, 31
- xor xh, xh, a7
- /* If y is NaN or Inf, return NaN. */
- ball yh, a6, .Ldiv_return_nan
- slli a8, xh, 12
- or a8, a8, xl
- bnez a8, .Ldiv_return_nan
- leaf_return
- .Ldiv_ynan_or_inf:
- /* If y is Infinity, return zero. */
- slli a8, yh, 12
- or a8, a8, yl
- beqz a8, .Ldiv_return_zero
- /* y is NaN; return it. */
- mov xh, yh
- mov xl, yl
- .Ldiv_return_nan:
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
- leaf_return
- .Ldiv_highequal1:
- bltu xl, yl, 2f
- j 3f
- .align 4
- .global __divdf3
- .type __divdf3, @function
- __divdf3:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- /* Get the sign of the result. */
- xor a7, xh, yh
- /* Check for NaN and infinity. */
- ball xh, a6, .Ldiv_xnan_or_inf
- ball yh, a6, .Ldiv_ynan_or_inf
- /* Extract the exponents. */
- extui a8, xh, 20, 11
- extui a9, yh, 20, 11
- beqz a9, .Ldiv_yexpzero
- .Ldiv_ynormalized:
- beqz a8, .Ldiv_xexpzero
- .Ldiv_xnormalized:
- /* Subtract the exponents. */
- sub a8, a8, a9
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0x1fffff
- or xh, xh, a6
- and xh, xh, a10
- or yh, yh, a6
- and yh, yh, a10
- /* Set SAR for left shift by one. */
- ssai (32 - 1)
- /* The first digit of the mantissa division must be a one.
- Shift x (and adjust the exponent) as needed to make this true. */
- bltu yh, xh, 3f
- beq yh, xh, .Ldiv_highequal1
- 2: src xh, xh, xl
- sll xl, xl
- addi a8, a8, -1
- 3:
- /* Do the first subtraction and shift. */
- sub xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, -1
- 1: sub xl, xl, yl
- src xh, xh, xl
- sll xl, xl
- /* Put the quotient into a10/a11. */
- movi a10, 0
- movi a11, 1
- /* Divide one bit at a time for 52 bits. */
- movi a9, 52
- #if XCHAL_HAVE_LOOPS
- loop a9, .Ldiv_loopend
- #endif
- .Ldiv_loop:
- /* Shift the quotient << 1. */
- src a10, a10, a11
- sll a11, a11
- /* Is this digit a 0 or 1? */
- bltu xh, yh, 3f
- beq xh, yh, .Ldiv_highequal2
- /* Output a 1 and subtract. */
- 2: addi a11, a11, 1
- sub xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, -1
- 1: sub xl, xl, yl
- /* Shift the dividend << 1. */
- 3: src xh, xh, xl
- sll xl, xl
- #if !XCHAL_HAVE_LOOPS
- addi a9, a9, -1
- bnez a9, .Ldiv_loop
- #endif
- .Ldiv_loopend:
- /* Add the exponent bias (less one to account for the explicit "1.0"
- of the mantissa that will be added to the exponent in the final
- result). */
- movi a9, 0x3fe
- add a8, a8, a9
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..7fd are OK here. */
- addmi a9, a9, 0x400 /* 0x7fe */
- bgeu a8, a9, .Ldiv_overflow
- .Ldiv_round:
- /* Round. The remainder (<< 1) is in xh/xl. */
- bltu xh, yh, .Ldiv_rounded
- beq xh, yh, .Ldiv_highequal3
- .Ldiv_roundup:
- addi a11, a11, 1
- beqz a11, .Ldiv_roundcarry
- .Ldiv_rounded:
- mov xl, a11
- /* Add the exponent to the mantissa. */
- slli a8, a8, 20
- add xh, a10, a8
- .Ldiv_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or xh, xh, a7
- leaf_return
- .Ldiv_highequal2:
- bgeu xl, yl, 2b
- j 3b
- .Ldiv_highequal3:
- bltu xl, yl, .Ldiv_rounded
- bne xl, yl, .Ldiv_roundup
- /* Remainder is exactly half the divisor. Round even. */
- addi a11, a11, 1
- beqz a11, .Ldiv_roundcarry
- srli a11, a11, 1
- slli a11, a11, 1
- j .Ldiv_rounded
- .Ldiv_overflow:
- bltz a8, .Ldiv_underflow
- /* Return +/- Infinity. */
- addi a8, a9, 1 /* 0x7ff */
- slli xh, a8, 20
- movi xl, 0
- j .Ldiv_addsign
- .Ldiv_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- ssr a8
- bgeui a8, 32, .Ldiv_bigshift
-
- /* Shift a10/a11 right. Any bits that are shifted out of a11 are
- saved in a6 for rounding the result. */
- sll a6, a11
- src a11, a10, a11
- srl a10, a10
- j 1f
- .Ldiv_bigshift:
- bgeui a8, 64, .Ldiv_flush_to_zero
- sll a9, a11 /* lost bits shifted out of a11 */
- src a6, a10, a11
- srl a11, a10
- movi a10, 0
- or xl, xl, a9
- /* Set the exponent to zero. */
- 1: movi a8, 0
- /* Pack any nonzero remainder (in xh/xl) into a6. */
- or xh, xh, xl
- beqz xh, 1f
- movi a9, 1
- or a6, a6, a9
-
- /* Round a10/a11 based on the bits shifted out into a6. */
- 1: bgez a6, .Ldiv_rounded
- addi a11, a11, 1
- beqz a11, .Ldiv_roundcarry
- slli a6, a6, 1
- bnez a6, .Ldiv_rounded
- srli a11, a11, 1
- slli a11, a11, 1
- j .Ldiv_rounded
- .Ldiv_roundcarry:
- /* a11 is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi a10, a10, 1
- /* Overflow to the exponent field is OK. */
- j .Ldiv_rounded
- .Ldiv_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- leaf_return
- #endif /* XCHAL_HAVE_DFP_DIV */
- #endif /* L_divdf3 */
- #ifdef L_cmpdf2
- /* Equal and Not Equal */
- .align 4
- .global __eqdf2
- .global __nedf2
- .set __nedf2, __eqdf2
- .type __eqdf2, @function
- __eqdf2:
- leaf_entry sp, 16
- bne xl, yl, 2f
- bne xh, yh, 4f
- /* The values are equal but NaN != NaN. Check the exponent. */
- movi a6, 0x7ff00000
- ball xh, a6, 3f
- /* Equal. */
- movi a2, 0
- leaf_return
- /* Not equal. */
- 2: movi a2, 1
- leaf_return
- /* Check if the mantissas are nonzero. */
- 3: slli a7, xh, 12
- or a7, a7, xl
- j 5f
- /* Check if x and y are zero with different signs. */
- 4: or a7, xh, yh
- slli a7, a7, 1
- or a7, a7, xl /* xl == yl here */
- /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
- or x when exponent(x) = 0x7ff and x == y. */
- 5: movi a2, 0
- movi a3, 1
- movnez a2, a3, a7
- leaf_return
- /* Greater Than */
- .align 4
- .global __gtdf2
- .type __gtdf2, @function
- __gtdf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
- 1: bnall yh, a6, .Lle_cmp
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Lle_cmp
- movi a2, 0
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 0
- leaf_return
- /* Less Than or Equal */
- .align 4
- .global __ledf2
- .type __ledf2, @function
- __ledf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
- 1: bnall yh, a6, .Lle_cmp
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Lle_cmp
- movi a2, 1
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 1
- leaf_return
- .Lle_cmp:
- /* Check if x and y have different signs. */
- xor a7, xh, yh
- bltz a7, .Lle_diff_signs
- /* Check if x is negative. */
- bltz xh, .Lle_xneg
- /* Check if x <= y. */
- bltu xh, yh, 4f
- bne xh, yh, 5f
- bltu yl, xl, 5f
- 4: movi a2, 0
- leaf_return
- .Lle_xneg:
- /* Check if y <= x. */
- bltu yh, xh, 4b
- bne yh, xh, 5f
- bgeu xl, yl, 4b
- 5: movi a2, 1
- leaf_return
- .Lle_diff_signs:
- bltz xh, 4b
- /* Check if both x and y are zero. */
- or a7, xh, yh
- slli a7, a7, 1
- or a7, a7, xl
- or a7, a7, yl
- movi a2, 1
- movi a3, 0
- moveqz a2, a3, a7
- leaf_return
- /* Greater Than or Equal */
- .align 4
- .global __gedf2
- .type __gedf2, @function
- __gedf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
- 1: bnall yh, a6, .Llt_cmp
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Llt_cmp
- movi a2, -1
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, -1
- leaf_return
- /* Less Than */
- .align 4
- .global __ltdf2
- .type __ltdf2, @function
- __ltdf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
- 1: bnall yh, a6, .Llt_cmp
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Llt_cmp
- movi a2, 0
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 0
- leaf_return
- .Llt_cmp:
- /* Check if x and y have different signs. */
- xor a7, xh, yh
- bltz a7, .Llt_diff_signs
- /* Check if x is negative. */
- bltz xh, .Llt_xneg
- /* Check if x < y. */
- bltu xh, yh, 4f
- bne xh, yh, 5f
- bgeu xl, yl, 5f
- 4: movi a2, -1
- leaf_return
- .Llt_xneg:
- /* Check if y < x. */
- bltu yh, xh, 4b
- bne yh, xh, 5f
- bltu yl, xl, 4b
- 5: movi a2, 0
- leaf_return
- .Llt_diff_signs:
- bgez xh, 5b
- /* Check if both x and y are nonzero. */
- or a7, xh, yh
- slli a7, a7, 1
- or a7, a7, xl
- or a7, a7, yl
- movi a2, 0
- movi a3, -1
- movnez a2, a3, a7
- leaf_return
- /* Unordered */
- .align 4
- .global __unorddf2
- .type __unorddf2, @function
- __unorddf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 3f
- 1: ball yh, a6, 4f
- 2: movi a2, 0
- leaf_return
- 3: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 1
- leaf_return
- 4: slli a7, yh, 12
- or a7, a7, yl
- beqz a7, 2b
- movi a2, 1
- leaf_return
- #endif /* L_cmpdf2 */
- #ifdef L_fixdfsi
- .align 4
- .global __fixdfsi
- .type __fixdfsi, @function
- __fixdfsi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixdfsi_nan_or_inf
- /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
- extui a4, xh, 20, 11
- extui a5, a6, 19, 10 /* 0x3fe */
- sub a4, a4, a5
- bgei a4, 32, .Lfixdfsi_maxint
- blti a4, 1, .Lfixdfsi_zero
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src a5, a7, xl
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
- .Lfixdfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixdfsi_maxint
- /* Translate NaN to +maxint. */
- movi xh, 0
- .Lfixdfsi_maxint:
- slli a4, a6, 11 /* 0x80000000 */
- addi a5, a4, -1 /* 0x7fffffff */
- movgez a4, a5, xh
- mov a2, a4
- leaf_return
- .Lfixdfsi_zero:
- movi a2, 0
- leaf_return
- #endif /* L_fixdfsi */
- #ifdef L_fixdfdi
- .align 4
- .global __fixdfdi
- .type __fixdfdi, @function
- __fixdfdi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixdfdi_nan_or_inf
- /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
- extui a4, xh, 20, 11
- extui a5, a6, 19, 10 /* 0x3fe */
- sub a4, a4, a5
- bgei a4, 64, .Lfixdfdi_maxint
- blti a4, 1, .Lfixdfdi_zero
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src xh, a7, xl
- sll xl, xl
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixdfdi_smallshift
- srl xl, xh
- movi xh, 0
- .Lfixdfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
- 1: leaf_return
- .Lfixdfdi_smallshift:
- src xl, xh, xl
- srl xh, xh
- j .Lfixdfdi_shifted
- .Lfixdfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixdfdi_maxint
- /* Translate NaN to +maxint. */
- movi xh, 0
- .Lfixdfdi_maxint:
- slli a7, a6, 11 /* 0x80000000 */
- bgez xh, 1f
- mov xh, a7
- movi xl, 0
- leaf_return
- 1: addi xh, a7, -1 /* 0x7fffffff */
- movi xl, -1
- leaf_return
- .Lfixdfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
- #endif /* L_fixdfdi */
- #ifdef L_fixunsdfsi
- .align 4
- .global __fixunsdfsi
- .type __fixunsdfsi, @function
- __fixunsdfsi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixunsdfsi_nan_or_inf
- /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
- extui a4, xh, 20, 11
- extui a5, a6, 20, 10 /* 0x3ff */
- sub a4, a4, a5
- bgei a4, 32, .Lfixunsdfsi_maxint
- bltz a4, .Lfixunsdfsi_zero
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src a5, a7, xl
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 32, .Lfixunsdfsi_bigexp
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
- .Lfixunsdfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixunsdfsi_maxint
- /* Translate NaN to 0xffffffff. */
- movi a2, -1
- leaf_return
- .Lfixunsdfsi_maxint:
- slli a4, a6, 11 /* 0x80000000 */
- movi a5, -1 /* 0xffffffff */
- movgez a4, a5, xh
- mov a2, a4
- leaf_return
- .Lfixunsdfsi_zero:
- movi a2, 0
- leaf_return
- .Lfixunsdfsi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz xh, 1f
- mov a2, a5 /* no shift needed */
- leaf_return
- /* Return 0x80000000 if negative. */
- 1: slli a2, a6, 11
- leaf_return
- #endif /* L_fixunsdfsi */
- #ifdef L_fixunsdfdi
- .align 4
- .global __fixunsdfdi
- .type __fixunsdfdi, @function
- __fixunsdfdi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixunsdfdi_nan_or_inf
- /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
- extui a4, xh, 20, 11
- extui a5, a6, 20, 10 /* 0x3ff */
- sub a4, a4, a5
- bgei a4, 64, .Lfixunsdfdi_maxint
- bltz a4, .Lfixunsdfdi_zero
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src xh, a7, xl
- sll xl, xl
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 64, .Lfixunsdfdi_bigexp
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixunsdfdi_smallshift
- srl xl, xh
- movi xh, 0
- .Lfixunsdfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
- 1: leaf_return
- .Lfixunsdfdi_smallshift:
- src xl, xh, xl
- srl xh, xh
- j .Lfixunsdfdi_shifted
- .Lfixunsdfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixunsdfdi_maxint
- /* Translate NaN to 0xffffffff.... */
- 1: movi xh, -1
- movi xl, -1
- leaf_return
- .Lfixunsdfdi_maxint:
- bgez xh, 1b
- 2: slli xh, a6, 11 /* 0x80000000 */
- movi xl, 0
- leaf_return
- .Lfixunsdfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
- .Lfixunsdfdi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz a7, 2b
- leaf_return /* no shift needed */
- #endif /* L_fixunsdfdi */
- #ifdef L_floatsidf
- .align 4
- .global __floatunsidf
- .type __floatunsidf, @function
- __floatunsidf:
- leaf_entry sp, 16
- beqz a2, .Lfloatsidf_return_zero
- /* Set the sign to zero and jump to the floatsidf code. */
- movi a7, 0
- j .Lfloatsidf_normalize
- .align 4
- .global __floatsidf
- .type __floatsidf, @function
- __floatsidf:
- leaf_entry sp, 16
- /* Check for zero. */
- beqz a2, .Lfloatsidf_return_zero
- /* Save the sign. */
- extui a7, a2, 31, 1
- /* Get the absolute value. */
- #if XCHAL_HAVE_ABS
- abs a2, a2
- #else
- neg a4, a2
- movltz a2, a4, a2
- #endif
- .Lfloatsidf_normalize:
- /* Normalize with the first 1 bit in the msb. */
- do_nsau a4, a2, a5, a6
- ssl a4
- sll a5, a2
- /* Shift the mantissa into position. */
- srli xh, a5, 11
- slli xl, a5, (32 - 11)
- /* Set the exponent. */
- movi a5, 0x41d /* 0x3fe + 31 */
- sub a5, a5, a4
- slli a5, a5, 20
- add xh, xh, a5
- /* Add the sign and return. */
- slli a7, a7, 31
- or xh, xh, a7
- leaf_return
- .Lfloatsidf_return_zero:
- movi a3, 0
- leaf_return
- #endif /* L_floatsidf */
- #ifdef L_floatdidf
- .align 4
- .global __floatundidf
- .type __floatundidf, @function
- __floatundidf:
- leaf_entry sp, 16
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
- /* Set the sign to zero and jump to the floatdidf code. */
- movi a7, 0
- j .Lfloatdidf_normalize
- .align 4
- .global __floatdidf
- .type __floatdidf, @function
- __floatdidf:
- leaf_entry sp, 16
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
- /* Save the sign. */
- extui a7, xh, 31, 1
- /* Get the absolute value. */
- bgez xh, .Lfloatdidf_normalize
- neg xl, xl
- neg xh, xh
- beqz xl, .Lfloatdidf_normalize
- addi xh, xh, -1
- .Lfloatdidf_normalize:
- /* Normalize with the first 1 bit in the msb of xh. */
- beqz xh, .Lfloatdidf_bigshift
- do_nsau a4, xh, a5, a6
- ssl a4
- src xh, xh, xl
- sll xl, xl
- .Lfloatdidf_shifted:
- /* Shift the mantissa into position, with rounding bits in a6. */
- ssai 11
- sll a6, xl
- src xl, xh, xl
- srl xh, xh
- /* Set the exponent. */
- movi a5, 0x43d /* 0x3fe + 63 */
- sub a5, a5, a4
- slli a5, a5, 20
- add xh, xh, a5
- /* Add the sign. */
- slli a7, a7, 31
- or xh, xh, a7
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a6, 2f
- addi xl, xl, 1
- beqz xl, .Lfloatdidf_roundcarry
- /* Check if the leftover fraction is exactly 1/2. */
- slli a6, a6, 1
- beqz a6, .Lfloatdidf_exactlyhalf
- 2: leaf_return
- .Lfloatdidf_bigshift:
- /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
- do_nsau a4, xl, a5, a6
- ssl a4
- sll xh, xl
- movi xl, 0
- addi a4, a4, 32
- j .Lfloatdidf_shifted
- .Lfloatdidf_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- leaf_return
- .Lfloatdidf_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow to the exponent is OK. */
- leaf_return
- #endif /* L_floatdidf */
- #ifdef L_truncdfsf2
- .align 4
- .global __truncdfsf2
- .type __truncdfsf2, @function
- __truncdfsf2:
- leaf_entry sp, 16
- /* Adjust the exponent bias. */
- movi a4, (0x3ff - 0x7f) << 20
- sub a5, xh, a4
- /* Check for underflow. */
- xor a6, xh, a5
- bltz a6, .Ltrunc_underflow
- extui a6, a5, 20, 11
- beqz a6, .Ltrunc_underflow
- /* Check for overflow. */
- movi a4, 255
- bge a6, a4, .Ltrunc_overflow
- /* Shift a5/xl << 3 into a5/a4. */
- ssai (32 - 3)
- src a5, a5, xl
- sll a4, xl
- .Ltrunc_addsign:
- /* Add the sign bit. */
- extui a6, xh, 31, 1
- slli a6, a6, 31
- or a2, a6, a5
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a4, 1f
- addi a2, a2, 1
- /* Overflow to the exponent is OK. The answer will be correct. */
- /* Check if the leftover fraction is exactly 1/2. */
- slli a4, a4, 1
- beqz a4, .Ltrunc_exactlyhalf
- 1: leaf_return
- .Ltrunc_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
- .Ltrunc_overflow:
- /* Check if exponent == 0x7ff. */
- movi a4, 0x7ff00000
- bnall xh, a4, 1f
- /* Check if mantissa is nonzero. */
- slli a5, xh, 12
- or a5, a5, xl
- beqz a5, 1f
- /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
- srli a4, a4, 1
- 1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
- /* Add the sign bit. */
- extui a6, xh, 31, 1
- ssai 1
- src a2, a6, a4
- leaf_return
- .Ltrunc_underflow:
- /* Find shift count for a subnormal. Flush to zero if >= 32. */
- extui a6, xh, 20, 11
- movi a5, 0x3ff - 0x7f
- sub a6, a5, a6
- addi a6, a6, 1
- bgeui a6, 32, 1f
- /* Replace the exponent with an explicit "1.0". */
- slli a5, a5, 13 /* 0x700000 */
- or a5, a5, xh
- slli a5, a5, 11
- srli a5, a5, 11
- /* Shift the mantissa left by 3 bits (into a5/a4). */
- ssai (32 - 3)
- src a5, a5, xl
- sll a4, xl
- /* Shift right by a6. */
- ssr a6
- sll a7, a4
- src a4, a5, a4
- srl a5, a5
- beqz a7, .Ltrunc_addsign
- or a4, a4, a6 /* any positive, nonzero value will work */
- j .Ltrunc_addsign
- /* Return +/- zero. */
- 1: extui a2, xh, 31, 1
- slli a2, a2, 31
- leaf_return
- #endif /* L_truncdfsf2 */
- #ifdef L_extendsfdf2
- .align 4
- .global __extendsfdf2
- .type __extendsfdf2, @function
- __extendsfdf2:
- leaf_entry sp, 16
- /* Save the sign bit and then shift it off. */
- extui a5, a2, 31, 1
- slli a5, a5, 31
- slli a4, a2, 1
- /* Extract and check the exponent. */
- extui a6, a2, 23, 8
- beqz a6, .Lextend_expzero
- addi a6, a6, 1
- beqi a6, 256, .Lextend_nan_or_inf
- /* Shift >> 3 into a4/xl. */
- srli a4, a4, 4
- slli xl, a2, (32 - 3)
- /* Adjust the exponent bias. */
- movi a6, (0x3ff - 0x7f) << 20
- add a4, a4, a6
- /* Add the sign bit. */
- or xh, a4, a5
- leaf_return
- .Lextend_nan_or_inf:
- movi a4, 0x7ff00000
- /* Check for NaN. */
- slli a7, a2, 9
- beqz a7, 1f
- slli a6, a6, 11 /* 0x80000 */
- or a4, a4, a6
- /* Add the sign and return. */
- 1: or xh, a4, a5
- movi xl, 0
- leaf_return
- .Lextend_expzero:
- beqz a4, 1b
- /* Normalize it to have 8 zero bits before the first 1 bit. */
- do_nsau a7, a4, a2, a3
- addi a7, a7, -8
- ssl a7
- sll a4, a4
-
- /* Shift >> 3 into a4/xl. */
- slli xl, a4, (32 - 3)
- srli a4, a4, 3
- /* Set the exponent. */
- movi a6, 0x3fe - 0x7f
- sub a6, a6, a7
- slli a6, a6, 20
- add a4, a4, a6
- /* Add the sign and return. */
- or xh, a4, a5
- leaf_return
- #endif /* L_extendsfdf2 */
- #if XCHAL_HAVE_DFP_SQRT
- #ifdef L_sqrt
- .text
- .align 4
- .global __ieee754_sqrt
- .type __ieee754_sqrt, @function
- __ieee754_sqrt:
- leaf_entry sp, 16
- wfrd f1, xh, xl
- sqrt0.d f2, f1
- const.d f4, 0
- maddn.d f4, f2, f2
- nexp01.d f3, f1
- const.d f0, 3
- addexp.d f3, f0
- maddn.d f0, f4, f3
- nexp01.d f4, f1
- maddn.d f2, f0, f2
- const.d f5, 0
- maddn.d f5, f2, f3
- const.d f0, 3
- maddn.d f0, f5, f2
- neg.d f6, f4
- maddn.d f2, f0, f2
- const.d f0, 0
- const.d f5, 0
- const.d f7, 0
- maddn.d f0, f6, f2
- maddn.d f5, f2, f3
- const.d f3, 3
- maddn.d f7, f3, f2
- maddn.d f4, f0, f0
- maddn.d f3, f5, f2
- neg.d f2, f7
- maddn.d f0, f4, f2
- maddn.d f7, f3, f7
- mksadj.d f2, f1
- nexp01.d f1, f1
- maddn.d f1, f0, f0
- neg.d f3, f7
- addexpm.d f0, f2
- addexp.d f3, f2
- divn.d f0, f1, f3
- rfr xl, f0
- rfrd xh, f0
- leaf_return
- #endif /* L_sqrt */
- #endif /* XCHAL_HAVE_DFP_SQRT */
- #if XCHAL_HAVE_DFP_RECIP
- #ifdef L_recipdf2
- /* Reciprocal */
- .align 4
- .global __recipdf2
- .type __recipdf2, @function
- __recipdf2:
- leaf_entry sp, 16
- wfrd f1, xh, xl
- recip0.d f0, f1
- const.d f2, 2
- msub.d f2, f1, f0
- mul.d f3, f1, f0
- const.d f4, 2
- mul.d f5, f0, f2
- msub.d f4, f3, f2
- const.d f2, 1
- mul.d f0, f5, f4
- msub.d f2, f1, f0
- maddn.d f0, f0, f2
- rfr xl, f0
- rfrd xh, f0
- leaf_return
- #endif /* L_recipdf2 */
- #endif /* XCHAL_HAVE_DFP_RECIP */
- #if XCHAL_HAVE_DFP_RSQRT
- #ifdef L_rsqrtdf2
- /* Reciprocal square root */
- .align 4
- .global __rsqrtdf2
- .type __rsqrtdf2, @function
- __rsqrtdf2:
- leaf_entry sp, 16
- wfrd f1, xh, xl
- rsqrt0.d f0, f1
- mul.d f2, f1, f0
- const.d f3, 3
- mul.d f4, f3, f0
- const.d f5, 1
- msub.d f5, f2, f0
- maddn.d f0, f4, f5
- const.d f2, 1
- mul.d f4, f1, f0
- mul.d f5, f3, f0
- msub.d f2, f4, f0
- maddn.d f0, f5, f2
- const.d f2, 1
- mul.d f1, f1, f0
- mul.d f3, f3, f0
- msub.d f2, f1, f0
- maddn.d f0, f3, f2
- rfr xl, f0
- rfrd xh, f0
- leaf_return
- #endif /* L_rsqrtdf2 */
- #endif /* XCHAL_HAVE_DFP_RSQRT */
|