ieee754-df.S 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591
  1. /* IEEE-754 double-precision functions for Xtensa
  2. Copyright (C) 2006-2022 Free Software Foundation, Inc.
  3. Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
  4. This file is part of GCC.
  5. GCC is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. GCC is distributed in the hope that it will be useful, but WITHOUT
  10. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
  12. License for more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #ifdef __XTENSA_EB__
  21. #define xh a2
  22. #define xl a3
  23. #define yh a4
  24. #define yl a5
  25. #else
  26. #define xh a3
  27. #define xl a2
  28. #define yh a5
  29. #define yl a4
  30. #endif
  31. /* Warning! The branch displacements for some Xtensa branch instructions
  32. are quite small, and this code has been carefully laid out to keep
  33. branch targets in range. If you change anything, be sure to check that
  34. the assembler is not relaxing anything to branch over a jump. */
  35. #ifdef L_negdf2
  36. .align 4
  37. .global __negdf2
  38. .type __negdf2, @function
  39. __negdf2:
  40. leaf_entry sp, 16
  41. movi a4, 0x80000000
  42. xor xh, xh, a4
  43. leaf_return
  44. #endif /* L_negdf2 */
  45. #ifdef L_addsubdf3
  46. .literal_position
  47. /* Addition */
  48. __adddf3_aux:
  49. /* Handle NaNs and Infinities. (This code is placed before the
  50. start of the function just to keep it in range of the limited
  51. branch displacements.) */
  52. .Ladd_xnan_or_inf:
  53. /* If y is neither Infinity nor NaN, return x. */
  54. bnall yh, a6, .Ladd_return_nan_or_inf
  55. /* If x is a NaN, return it. Otherwise, return y. */
  56. slli a7, xh, 12
  57. or a7, a7, xl
  58. bnez a7, .Ladd_return_nan
  59. .Ladd_ynan_or_inf:
  60. /* Return y. */
  61. mov xh, yh
  62. mov xl, yl
  63. .Ladd_return_nan_or_inf:
  64. slli a7, xh, 12
  65. or a7, a7, xl
  66. bnez a7, .Ladd_return_nan
  67. leaf_return
  68. .Ladd_return_nan:
  69. movi a4, 0x80000 /* make it a quiet NaN */
  70. or xh, xh, a4
  71. leaf_return
  72. .Ladd_opposite_signs:
  73. /* Operand signs differ. Do a subtraction. */
  74. slli a7, a6, 11
  75. xor yh, yh, a7
  76. j .Lsub_same_sign
  77. .align 4
  78. .global __adddf3
  79. .type __adddf3, @function
  80. __adddf3:
  81. leaf_entry sp, 16
  82. movi a6, 0x7ff00000
  83. /* Check if the two operands have the same sign. */
  84. xor a7, xh, yh
  85. bltz a7, .Ladd_opposite_signs
  86. .Ladd_same_sign:
  87. /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
  88. ball xh, a6, .Ladd_xnan_or_inf
  89. ball yh, a6, .Ladd_ynan_or_inf
  90. /* Compare the exponents. The smaller operand will be shifted
  91. right by the exponent difference and added to the larger
  92. one. */
  93. extui a7, xh, 20, 12
  94. extui a8, yh, 20, 12
  95. bltu a7, a8, .Ladd_shiftx
  96. .Ladd_shifty:
  97. /* Check if the smaller (or equal) exponent is zero. */
  98. bnone yh, a6, .Ladd_yexpzero
  99. /* Replace yh sign/exponent with 0x001. */
  100. or yh, yh, a6
  101. slli yh, yh, 11
  102. srli yh, yh, 11
  103. .Ladd_yexpdiff:
  104. /* Compute the exponent difference. Optimize for difference < 32. */
  105. sub a10, a7, a8
  106. bgeui a10, 32, .Ladd_bigshifty
  107. /* Shift yh/yl right by the exponent difference. Any bits that are
  108. shifted out of yl are saved in a9 for rounding the result. */
  109. ssr a10
  110. movi a9, 0
  111. src a9, yl, a9
  112. src yl, yh, yl
  113. srl yh, yh
  114. .Ladd_addy:
  115. /* Do the 64-bit addition. */
  116. add xl, xl, yl
  117. add xh, xh, yh
  118. bgeu xl, yl, 1f
  119. addi xh, xh, 1
  120. 1:
  121. /* Check if the add overflowed into the exponent. */
  122. extui a10, xh, 20, 12
  123. beq a10, a7, .Ladd_round
  124. mov a8, a7
  125. j .Ladd_carry
  126. .Ladd_yexpzero:
  127. /* y is a subnormal value. Replace its sign/exponent with zero,
  128. i.e., no implicit "1.0", and increment the apparent exponent
  129. because subnormals behave as if they had the minimum (nonzero)
  130. exponent. Test for the case when both exponents are zero. */
  131. slli yh, yh, 12
  132. srli yh, yh, 12
  133. bnone xh, a6, .Ladd_bothexpzero
  134. addi a8, a8, 1
  135. j .Ladd_yexpdiff
  136. .Ladd_bothexpzero:
  137. /* Both exponents are zero. Handle this as a special case. There
  138. is no need to shift or round, and the normal code for handling
  139. a carry into the exponent field will not work because it
  140. assumes there is an implicit "1.0" that needs to be added. */
  141. add xl, xl, yl
  142. add xh, xh, yh
  143. bgeu xl, yl, 1f
  144. addi xh, xh, 1
  145. 1: leaf_return
  146. .Ladd_bigshifty:
  147. /* Exponent difference > 64 -- just return the bigger value. */
  148. bgeui a10, 64, 1b
  149. /* Shift yh/yl right by the exponent difference. Any bits that are
  150. shifted out are saved in a9 for rounding the result. */
  151. ssr a10
  152. sll a11, yl /* lost bits shifted out of yl */
  153. src a9, yh, yl
  154. srl yl, yh
  155. movi yh, 0
  156. beqz a11, .Ladd_addy
  157. or a9, a9, a10 /* any positive, nonzero value will work */
  158. j .Ladd_addy
  159. .Ladd_xexpzero:
  160. /* Same as "yexpzero" except skip handling the case when both
  161. exponents are zero. */
  162. slli xh, xh, 12
  163. srli xh, xh, 12
  164. addi a7, a7, 1
  165. j .Ladd_xexpdiff
  166. .Ladd_shiftx:
  167. /* Same thing as the "shifty" code, but with x and y swapped. Also,
  168. because the exponent difference is always nonzero in this version,
  169. the shift sequence can use SLL and skip loading a constant zero. */
  170. bnone xh, a6, .Ladd_xexpzero
  171. or xh, xh, a6
  172. slli xh, xh, 11
  173. srli xh, xh, 11
  174. .Ladd_xexpdiff:
  175. sub a10, a8, a7
  176. bgeui a10, 32, .Ladd_bigshiftx
  177. ssr a10
  178. sll a9, xl
  179. src xl, xh, xl
  180. srl xh, xh
  181. .Ladd_addx:
  182. add xl, xl, yl
  183. add xh, xh, yh
  184. bgeu xl, yl, 1f
  185. addi xh, xh, 1
  186. 1:
  187. /* Check if the add overflowed into the exponent. */
  188. extui a10, xh, 20, 12
  189. bne a10, a8, .Ladd_carry
  190. .Ladd_round:
  191. /* Round up if the leftover fraction is >= 1/2. */
  192. bgez a9, 1f
  193. addi xl, xl, 1
  194. beqz xl, .Ladd_roundcarry
  195. /* Check if the leftover fraction is exactly 1/2. */
  196. slli a9, a9, 1
  197. beqz a9, .Ladd_exactlyhalf
  198. 1: leaf_return
  199. .Ladd_bigshiftx:
  200. /* Mostly the same thing as "bigshifty".... */
  201. bgeui a10, 64, .Ladd_returny
  202. ssr a10
  203. sll a11, xl
  204. src a9, xh, xl
  205. srl xl, xh
  206. movi xh, 0
  207. beqz a11, .Ladd_addx
  208. or a9, a9, a10
  209. j .Ladd_addx
  210. .Ladd_returny:
  211. mov xh, yh
  212. mov xl, yl
  213. leaf_return
  214. .Ladd_carry:
  215. /* The addition has overflowed into the exponent field, so the
  216. value needs to be renormalized. The mantissa of the result
  217. can be recovered by subtracting the original exponent and
  218. adding 0x100000 (which is the explicit "1.0" for the
  219. mantissa of the non-shifted operand -- the "1.0" for the
  220. shifted operand was already added). The mantissa can then
  221. be shifted right by one bit. The explicit "1.0" of the
  222. shifted mantissa then needs to be replaced by the exponent,
  223. incremented by one to account for the normalizing shift.
  224. It is faster to combine these operations: do the shift first
  225. and combine the additions and subtractions. If x is the
  226. original exponent, the result is:
  227. shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
  228. or:
  229. shifted mantissa + ((x + 1) << 19)
  230. Note that the exponent is incremented here by leaving the
  231. explicit "1.0" of the mantissa in the exponent field. */
  232. /* Shift xh/xl right by one bit. Save the lsb of xl. */
  233. mov a10, xl
  234. ssai 1
  235. src xl, xh, xl
  236. srl xh, xh
  237. /* See explanation above. The original exponent is in a8. */
  238. addi a8, a8, 1
  239. slli a8, a8, 19
  240. add xh, xh, a8
  241. /* Return an Infinity if the exponent overflowed. */
  242. ball xh, a6, .Ladd_infinity
  243. /* Same thing as the "round" code except the msb of the leftover
  244. fraction is bit 0 of a10, with the rest of the fraction in a9. */
  245. bbci.l a10, 0, 1f
  246. addi xl, xl, 1
  247. beqz xl, .Ladd_roundcarry
  248. beqz a9, .Ladd_exactlyhalf
  249. 1: leaf_return
  250. .Ladd_infinity:
  251. /* Clear the mantissa. */
  252. movi xl, 0
  253. srli xh, xh, 20
  254. slli xh, xh, 20
  255. /* The sign bit may have been lost in a carry-out. Put it back. */
  256. slli a8, a8, 1
  257. or xh, xh, a8
  258. leaf_return
  259. .Ladd_exactlyhalf:
  260. /* Round down to the nearest even value. */
  261. srli xl, xl, 1
  262. slli xl, xl, 1
  263. leaf_return
  264. .Ladd_roundcarry:
  265. /* xl is always zero when the rounding increment overflows, so
  266. there's no need to round it to an even value. */
  267. addi xh, xh, 1
  268. /* Overflow to the exponent is OK. */
  269. leaf_return
  270. /* Subtraction */
  271. __subdf3_aux:
  272. /* Handle NaNs and Infinities. (This code is placed before the
  273. start of the function just to keep it in range of the limited
  274. branch displacements.) */
  275. .Lsub_xnan_or_inf:
  276. /* If y is neither Infinity nor NaN, return x. */
  277. bnall yh, a6, .Lsub_return_nan_or_inf
  278. .Lsub_return_nan:
  279. /* Both x and y are either NaN or Inf, so the result is NaN. */
  280. movi a4, 0x80000 /* make it a quiet NaN */
  281. or xh, xh, a4
  282. leaf_return
  283. .Lsub_ynan_or_inf:
  284. /* Negate y and return it. */
  285. slli a7, a6, 11
  286. xor xh, yh, a7
  287. mov xl, yl
  288. .Lsub_return_nan_or_inf:
  289. slli a7, xh, 12
  290. or a7, a7, xl
  291. bnez a7, .Lsub_return_nan
  292. leaf_return
  293. .Lsub_opposite_signs:
  294. /* Operand signs differ. Do an addition. */
  295. slli a7, a6, 11
  296. xor yh, yh, a7
  297. j .Ladd_same_sign
  298. .align 4
  299. .global __subdf3
  300. .type __subdf3, @function
  301. __subdf3:
  302. leaf_entry sp, 16
  303. movi a6, 0x7ff00000
  304. /* Check if the two operands have the same sign. */
  305. xor a7, xh, yh
  306. bltz a7, .Lsub_opposite_signs
  307. .Lsub_same_sign:
  308. /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
  309. ball xh, a6, .Lsub_xnan_or_inf
  310. ball yh, a6, .Lsub_ynan_or_inf
  311. /* Compare the operands. In contrast to addition, the entire
  312. value matters here. */
  313. extui a7, xh, 20, 11
  314. extui a8, yh, 20, 11
  315. bltu xh, yh, .Lsub_xsmaller
  316. beq xh, yh, .Lsub_compare_low
  317. .Lsub_ysmaller:
  318. /* Check if the smaller (or equal) exponent is zero. */
  319. bnone yh, a6, .Lsub_yexpzero
  320. /* Replace yh sign/exponent with 0x001. */
  321. or yh, yh, a6
  322. slli yh, yh, 11
  323. srli yh, yh, 11
  324. .Lsub_yexpdiff:
  325. /* Compute the exponent difference. Optimize for difference < 32. */
  326. sub a10, a7, a8
  327. bgeui a10, 32, .Lsub_bigshifty
  328. /* Shift yh/yl right by the exponent difference. Any bits that are
  329. shifted out of yl are saved in a9 for rounding the result. */
  330. ssr a10
  331. movi a9, 0
  332. src a9, yl, a9
  333. src yl, yh, yl
  334. srl yh, yh
  335. .Lsub_suby:
  336. /* Do the 64-bit subtraction. */
  337. sub xh, xh, yh
  338. bgeu xl, yl, 1f
  339. addi xh, xh, -1
  340. 1: sub xl, xl, yl
  341. /* Subtract the leftover bits in a9 from zero and propagate any
  342. borrow from xh/xl. */
  343. neg a9, a9
  344. beqz a9, 1f
  345. addi a5, xh, -1
  346. moveqz xh, a5, xl
  347. addi xl, xl, -1
  348. 1:
  349. /* Check if the subtract underflowed into the exponent. */
  350. extui a10, xh, 20, 11
  351. beq a10, a7, .Lsub_round
  352. j .Lsub_borrow
  353. .Lsub_compare_low:
  354. /* The high words are equal. Compare the low words. */
  355. bltu xl, yl, .Lsub_xsmaller
  356. bltu yl, xl, .Lsub_ysmaller
  357. /* The operands are equal. Return 0.0. */
  358. movi xh, 0
  359. movi xl, 0
  360. 1: leaf_return
  361. .Lsub_yexpzero:
  362. /* y is a subnormal value. Replace its sign/exponent with zero,
  363. i.e., no implicit "1.0". Unless x is also a subnormal, increment
  364. y's apparent exponent because subnormals behave as if they had
  365. the minimum (nonzero) exponent. */
  366. slli yh, yh, 12
  367. srli yh, yh, 12
  368. bnone xh, a6, .Lsub_yexpdiff
  369. addi a8, a8, 1
  370. j .Lsub_yexpdiff
  371. .Lsub_bigshifty:
  372. /* Exponent difference > 64 -- just return the bigger value. */
  373. bgeui a10, 64, 1b
  374. /* Shift yh/yl right by the exponent difference. Any bits that are
  375. shifted out are saved in a9 for rounding the result. */
  376. ssr a10
  377. sll a11, yl /* lost bits shifted out of yl */
  378. src a9, yh, yl
  379. srl yl, yh
  380. movi yh, 0
  381. beqz a11, .Lsub_suby
  382. or a9, a9, a10 /* any positive, nonzero value will work */
  383. j .Lsub_suby
  384. .Lsub_xsmaller:
  385. /* Same thing as the "ysmaller" code, but with x and y swapped and
  386. with y negated. */
  387. bnone xh, a6, .Lsub_xexpzero
  388. or xh, xh, a6
  389. slli xh, xh, 11
  390. srli xh, xh, 11
  391. .Lsub_xexpdiff:
  392. sub a10, a8, a7
  393. bgeui a10, 32, .Lsub_bigshiftx
  394. ssr a10
  395. movi a9, 0
  396. src a9, xl, a9
  397. src xl, xh, xl
  398. srl xh, xh
  399. /* Negate y. */
  400. slli a11, a6, 11
  401. xor yh, yh, a11
  402. .Lsub_subx:
  403. sub xl, yl, xl
  404. sub xh, yh, xh
  405. bgeu yl, xl, 1f
  406. addi xh, xh, -1
  407. 1:
  408. /* Subtract the leftover bits in a9 from zero and propagate any
  409. borrow from xh/xl. */
  410. neg a9, a9
  411. beqz a9, 1f
  412. addi a5, xh, -1
  413. moveqz xh, a5, xl
  414. addi xl, xl, -1
  415. 1:
  416. /* Check if the subtract underflowed into the exponent. */
  417. extui a10, xh, 20, 11
  418. bne a10, a8, .Lsub_borrow
  419. .Lsub_round:
  420. /* Round up if the leftover fraction is >= 1/2. */
  421. bgez a9, 1f
  422. addi xl, xl, 1
  423. beqz xl, .Lsub_roundcarry
  424. /* Check if the leftover fraction is exactly 1/2. */
  425. slli a9, a9, 1
  426. beqz a9, .Lsub_exactlyhalf
  427. 1: leaf_return
  428. .Lsub_xexpzero:
  429. /* Same as "yexpzero". */
  430. slli xh, xh, 12
  431. srli xh, xh, 12
  432. bnone yh, a6, .Lsub_xexpdiff
  433. addi a7, a7, 1
  434. j .Lsub_xexpdiff
  435. .Lsub_bigshiftx:
  436. /* Mostly the same thing as "bigshifty", but with the sign bit of the
  437. shifted value set so that the subsequent subtraction flips the
  438. sign of y. */
  439. bgeui a10, 64, .Lsub_returny
  440. ssr a10
  441. sll a11, xl
  442. src a9, xh, xl
  443. srl xl, xh
  444. slli xh, a6, 11 /* set sign bit of xh */
  445. beqz a11, .Lsub_subx
  446. or a9, a9, a10
  447. j .Lsub_subx
  448. .Lsub_returny:
  449. /* Negate and return y. */
  450. slli a7, a6, 11
  451. xor xh, yh, a7
  452. mov xl, yl
  453. leaf_return
  454. .Lsub_borrow:
  455. /* The subtraction has underflowed into the exponent field, so the
  456. value needs to be renormalized. Shift the mantissa left as
  457. needed to remove any leading zeros and adjust the exponent
  458. accordingly. If the exponent is not large enough to remove
  459. all the leading zeros, the result will be a subnormal value. */
  460. slli a8, xh, 12
  461. beqz a8, .Lsub_xhzero
  462. do_nsau a6, a8, a7, a11
  463. srli a8, a8, 12
  464. bge a6, a10, .Lsub_subnormal
  465. addi a6, a6, 1
  466. .Lsub_shift_lt32:
  467. /* Shift the mantissa (a8/xl/a9) left by a6. */
  468. ssl a6
  469. src a8, a8, xl
  470. src xl, xl, a9
  471. sll a9, a9
  472. /* Combine the shifted mantissa with the sign and exponent,
  473. decrementing the exponent by a6. (The exponent has already
  474. been decremented by one due to the borrow from the subtraction,
  475. but adding the mantissa will increment the exponent by one.) */
  476. srli xh, xh, 20
  477. sub xh, xh, a6
  478. slli xh, xh, 20
  479. add xh, xh, a8
  480. j .Lsub_round
  481. .Lsub_exactlyhalf:
  482. /* Round down to the nearest even value. */
  483. srli xl, xl, 1
  484. slli xl, xl, 1
  485. leaf_return
  486. .Lsub_roundcarry:
  487. /* xl is always zero when the rounding increment overflows, so
  488. there's no need to round it to an even value. */
  489. addi xh, xh, 1
  490. /* Overflow to the exponent is OK. */
  491. leaf_return
  492. .Lsub_xhzero:
  493. /* When normalizing the result, all the mantissa bits in the high
  494. word are zero. Shift by "20 + (leading zero count of xl) + 1". */
  495. do_nsau a6, xl, a7, a11
  496. addi a6, a6, 21
  497. blt a10, a6, .Lsub_subnormal
  498. .Lsub_normalize_shift:
  499. bltui a6, 32, .Lsub_shift_lt32
  500. ssl a6
  501. src a8, xl, a9
  502. sll xl, a9
  503. movi a9, 0
  504. srli xh, xh, 20
  505. sub xh, xh, a6
  506. slli xh, xh, 20
  507. add xh, xh, a8
  508. j .Lsub_round
  509. .Lsub_subnormal:
  510. /* The exponent is too small to shift away all the leading zeros.
  511. Set a6 to the current exponent (which has already been
  512. decremented by the borrow) so that the exponent of the result
  513. will be zero. Do not add 1 to a6 in this case, because: (1)
  514. adding the mantissa will not increment the exponent, so there is
  515. no need to subtract anything extra from the exponent to
  516. compensate, and (2) the effective exponent of a subnormal is 1
  517. not 0 so the shift amount must be 1 smaller than normal. */
  518. mov a6, a10
  519. j .Lsub_normalize_shift
  520. #endif /* L_addsubdf3 */
  521. #ifdef L_muldf3
  522. /* Multiplication */
  523. #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
  524. #define XCHAL_NO_MUL 1
  525. #endif
  526. .literal_position
  527. __muldf3_aux:
  528. /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
  529. (This code is placed before the start of the function just to
  530. keep it in range of the limited branch displacements.) */
  531. .Lmul_xexpzero:
  532. /* Clear the sign bit of x. */
  533. slli xh, xh, 1
  534. srli xh, xh, 1
  535. /* If x is zero, return zero. */
  536. or a10, xh, xl
  537. beqz a10, .Lmul_return_zero
  538. /* Normalize x. Adjust the exponent in a8. */
  539. beqz xh, .Lmul_xh_zero
  540. do_nsau a10, xh, a11, a12
  541. addi a10, a10, -11
  542. ssl a10
  543. src xh, xh, xl
  544. sll xl, xl
  545. movi a8, 1
  546. sub a8, a8, a10
  547. j .Lmul_xnormalized
  548. .Lmul_xh_zero:
  549. do_nsau a10, xl, a11, a12
  550. addi a10, a10, -11
  551. movi a8, -31
  552. sub a8, a8, a10
  553. ssl a10
  554. bltz a10, .Lmul_xl_srl
  555. sll xh, xl
  556. movi xl, 0
  557. j .Lmul_xnormalized
  558. .Lmul_xl_srl:
  559. srl xh, xl
  560. sll xl, xl
  561. j .Lmul_xnormalized
  562. .Lmul_yexpzero:
  563. /* Clear the sign bit of y. */
  564. slli yh, yh, 1
  565. srli yh, yh, 1
  566. /* If y is zero, return zero. */
  567. or a10, yh, yl
  568. beqz a10, .Lmul_return_zero
  569. /* Normalize y. Adjust the exponent in a9. */
  570. beqz yh, .Lmul_yh_zero
  571. do_nsau a10, yh, a11, a12
  572. addi a10, a10, -11
  573. ssl a10
  574. src yh, yh, yl
  575. sll yl, yl
  576. movi a9, 1
  577. sub a9, a9, a10
  578. j .Lmul_ynormalized
  579. .Lmul_yh_zero:
  580. do_nsau a10, yl, a11, a12
  581. addi a10, a10, -11
  582. movi a9, -31
  583. sub a9, a9, a10
  584. ssl a10
  585. bltz a10, .Lmul_yl_srl
  586. sll yh, yl
  587. movi yl, 0
  588. j .Lmul_ynormalized
  589. .Lmul_yl_srl:
  590. srl yh, yl
  591. sll yl, yl
  592. j .Lmul_ynormalized
  593. .Lmul_return_zero:
  594. /* Return zero with the appropriate sign bit. */
  595. srli xh, a7, 31
  596. slli xh, xh, 31
  597. movi xl, 0
  598. j .Lmul_done
  599. .Lmul_xnan_or_inf:
  600. /* If y is zero, return NaN. */
  601. bnez yl, 1f
  602. slli a8, yh, 1
  603. beqz a8, .Lmul_return_nan
  604. 1:
  605. /* If y is NaN, return y. */
  606. bnall yh, a6, .Lmul_returnx
  607. slli a8, yh, 12
  608. or a8, a8, yl
  609. beqz a8, .Lmul_returnx
  610. .Lmul_returny:
  611. mov xh, yh
  612. mov xl, yl
  613. .Lmul_returnx:
  614. slli a8, xh, 12
  615. or a8, a8, xl
  616. bnez a8, .Lmul_return_nan
  617. /* Set the sign bit and return. */
  618. extui a7, a7, 31, 1
  619. slli xh, xh, 1
  620. ssai 1
  621. src xh, a7, xh
  622. j .Lmul_done
  623. .Lmul_ynan_or_inf:
  624. /* If x is zero, return NaN. */
  625. bnez xl, .Lmul_returny
  626. slli a8, xh, 1
  627. bnez a8, .Lmul_returny
  628. mov xh, yh
  629. .Lmul_return_nan:
  630. movi a4, 0x80000 /* make it a quiet NaN */
  631. or xh, xh, a4
  632. j .Lmul_done
  633. .align 4
  634. .global __muldf3
  635. .type __muldf3, @function
  636. __muldf3:
  637. #if __XTENSA_CALL0_ABI__
  638. leaf_entry sp, 32
  639. addi sp, sp, -32
  640. s32i a12, sp, 16
  641. s32i a13, sp, 20
  642. s32i a14, sp, 24
  643. s32i a15, sp, 28
  644. #elif XCHAL_NO_MUL
  645. /* This is not really a leaf function; allocate enough stack space
  646. to allow CALL12s to a helper function. */
  647. leaf_entry sp, 64
  648. #else
  649. leaf_entry sp, 32
  650. #endif
  651. movi a6, 0x7ff00000
  652. /* Get the sign of the result. */
  653. xor a7, xh, yh
  654. /* Check for NaN and infinity. */
  655. ball xh, a6, .Lmul_xnan_or_inf
  656. ball yh, a6, .Lmul_ynan_or_inf
  657. /* Extract the exponents. */
  658. extui a8, xh, 20, 11
  659. extui a9, yh, 20, 11
  660. beqz a8, .Lmul_xexpzero
  661. .Lmul_xnormalized:
  662. beqz a9, .Lmul_yexpzero
  663. .Lmul_ynormalized:
  664. /* Add the exponents. */
  665. add a8, a8, a9
  666. /* Replace sign/exponent fields with explicit "1.0". */
  667. movi a10, 0x1fffff
  668. or xh, xh, a6
  669. and xh, xh, a10
  670. or yh, yh, a6
  671. and yh, yh, a10
  672. /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
  673. The least-significant word of the result is thrown away except
  674. that if it is nonzero, the lsb of a6 is set to 1. */
  675. #if XCHAL_HAVE_MUL32_HIGH
  676. /* Compute a6 with any carry-outs in a10. */
  677. movi a10, 0
  678. mull a6, xl, yh
  679. mull a11, xh, yl
  680. add a6, a6, a11
  681. bgeu a6, a11, 1f
  682. addi a10, a10, 1
  683. 1:
  684. muluh a11, xl, yl
  685. add a6, a6, a11
  686. bgeu a6, a11, 1f
  687. addi a10, a10, 1
  688. 1:
  689. /* If the low word of the result is nonzero, set the lsb of a6. */
  690. mull a11, xl, yl
  691. beqz a11, 1f
  692. movi a9, 1
  693. or a6, a6, a9
  694. 1:
  695. /* Compute xl with any carry-outs in a9. */
  696. movi a9, 0
  697. mull a11, xh, yh
  698. add a10, a10, a11
  699. bgeu a10, a11, 1f
  700. addi a9, a9, 1
  701. 1:
  702. muluh a11, xh, yl
  703. add a10, a10, a11
  704. bgeu a10, a11, 1f
  705. addi a9, a9, 1
  706. 1:
  707. muluh xl, xl, yh
  708. add xl, xl, a10
  709. bgeu xl, a10, 1f
  710. addi a9, a9, 1
  711. 1:
  712. /* Compute xh. */
  713. muluh xh, xh, yh
  714. add xh, xh, a9
  715. #else /* ! XCHAL_HAVE_MUL32_HIGH */
  716. /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
  717. products. These partial products are:
  718. 0 xll * yll
  719. 1 xll * ylh
  720. 2 xlh * yll
  721. 3 xll * yhl
  722. 4 xlh * ylh
  723. 5 xhl * yll
  724. 6 xll * yhh
  725. 7 xlh * yhl
  726. 8 xhl * ylh
  727. 9 xhh * yll
  728. 10 xlh * yhh
  729. 11 xhl * yhl
  730. 12 xhh * ylh
  731. 13 xhl * yhh
  732. 14 xhh * yhl
  733. 15 xhh * yhh
  734. where the input chunks are (hh, hl, lh, ll). If using the Mul16
  735. or Mul32 multiplier options, these input chunks must be stored in
  736. separate registers. For Mac16, the UMUL.AA.* opcodes can specify
  737. that the inputs come from either half of the registers, so there
  738. is no need to shift them out ahead of time. If there is no
  739. multiply hardware, the 16-bit chunks can be extracted when setting
  740. up the arguments to the separate multiply function. */
  741. /* Save a7 since it is needed to hold a temporary value. */
  742. s32i a7, sp, 4
  743. #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
  744. /* Calling a separate multiply function will clobber a0 and requires
  745. use of a8 as a temporary, so save those values now. (The function
  746. uses a custom ABI so nothing else needs to be saved.) */
  747. s32i a0, sp, 0
  748. s32i a8, sp, 8
  749. #endif
  750. #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
  751. #define xlh a12
  752. #define ylh a13
  753. #define xhh a14
  754. #define yhh a15
  755. /* Get the high halves of the inputs into registers. */
  756. srli xlh, xl, 16
  757. srli ylh, yl, 16
  758. srli xhh, xh, 16
  759. srli yhh, yh, 16
  760. #define xll xl
  761. #define yll yl
  762. #define xhl xh
  763. #define yhl yh
  764. #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
  765. /* Clear the high halves of the inputs. This does not matter
  766. for MUL16 because the high bits are ignored. */
  767. extui xl, xl, 0, 16
  768. extui xh, xh, 0, 16
  769. extui yl, yl, 0, 16
  770. extui yh, yh, 0, 16
  771. #endif
  772. #endif /* MUL16 || MUL32 */
  773. #if XCHAL_HAVE_MUL16
  774. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  775. mul16u dst, xreg ## xhalf, yreg ## yhalf
  776. #elif XCHAL_HAVE_MUL32
  777. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  778. mull dst, xreg ## xhalf, yreg ## yhalf
  779. #elif XCHAL_HAVE_MAC16
  780. /* The preprocessor insists on inserting a space when concatenating after
  781. a period in the definition of do_mul below. These macros are a workaround
  782. using underscores instead of periods when doing the concatenation. */
  783. #define umul_aa_ll umul.aa.ll
  784. #define umul_aa_lh umul.aa.lh
  785. #define umul_aa_hl umul.aa.hl
  786. #define umul_aa_hh umul.aa.hh
  787. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  788. umul_aa_ ## xhalf ## yhalf xreg, yreg; \
  789. rsr dst, ACCLO
  790. #else /* no multiply hardware */
  791. #define set_arg_l(dst, src) \
  792. extui dst, src, 0, 16
  793. #define set_arg_h(dst, src) \
  794. srli dst, src, 16
  795. #if __XTENSA_CALL0_ABI__
  796. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  797. set_arg_ ## xhalf (a13, xreg); \
  798. set_arg_ ## yhalf (a14, yreg); \
  799. call0 .Lmul_mulsi3; \
  800. mov dst, a12
  801. #else
  802. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  803. set_arg_ ## xhalf (a14, xreg); \
  804. set_arg_ ## yhalf (a15, yreg); \
  805. call12 .Lmul_mulsi3; \
  806. mov dst, a14
  807. #endif /* __XTENSA_CALL0_ABI__ */
  808. #endif /* no multiply hardware */
  809. /* Add pp1 and pp2 into a10 with carry-out in a9. */
  810. do_mul(a10, xl, l, yl, h) /* pp 1 */
  811. do_mul(a11, xl, h, yl, l) /* pp 2 */
  812. movi a9, 0
  813. add a10, a10, a11
  814. bgeu a10, a11, 1f
  815. addi a9, a9, 1
  816. 1:
  817. /* Initialize a6 with a9/a10 shifted into position. Note that
  818. this value can be safely incremented without any carry-outs. */
  819. ssai 16
  820. src a6, a9, a10
  821. /* Compute the low word into a10. */
  822. do_mul(a11, xl, l, yl, l) /* pp 0 */
  823. sll a10, a10
  824. add a10, a10, a11
  825. bgeu a10, a11, 1f
  826. addi a6, a6, 1
  827. 1:
  828. /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
  829. This is good enough to determine the low half of a6, so that any
  830. nonzero bits from the low word of the result can be collapsed
  831. into a6, freeing up a register. */
  832. movi a9, 0
  833. do_mul(a11, xl, l, yh, l) /* pp 3 */
  834. add a6, a6, a11
  835. bgeu a6, a11, 1f
  836. addi a9, a9, 1
  837. 1:
  838. do_mul(a11, xl, h, yl, h) /* pp 4 */
  839. add a6, a6, a11
  840. bgeu a6, a11, 1f
  841. addi a9, a9, 1
  842. 1:
  843. do_mul(a11, xh, l, yl, l) /* pp 5 */
  844. add a6, a6, a11
  845. bgeu a6, a11, 1f
  846. addi a9, a9, 1
  847. 1:
  848. /* Collapse any nonzero bits from the low word into a6. */
  849. beqz a10, 1f
  850. movi a11, 1
  851. or a6, a6, a11
  852. 1:
  853. /* Add pp6-9 into a11 with carry-outs in a10. */
  854. do_mul(a7, xl, l, yh, h) /* pp 6 */
  855. do_mul(a11, xh, h, yl, l) /* pp 9 */
  856. movi a10, 0
  857. add a11, a11, a7
  858. bgeu a11, a7, 1f
  859. addi a10, a10, 1
  860. 1:
  861. do_mul(a7, xl, h, yh, l) /* pp 7 */
  862. add a11, a11, a7
  863. bgeu a11, a7, 1f
  864. addi a10, a10, 1
  865. 1:
  866. do_mul(a7, xh, l, yl, h) /* pp 8 */
  867. add a11, a11, a7
  868. bgeu a11, a7, 1f
  869. addi a10, a10, 1
  870. 1:
  871. /* Shift a10/a11 into position, and add low half of a11 to a6. */
  872. src a10, a10, a11
  873. add a10, a10, a9
  874. sll a11, a11
  875. add a6, a6, a11
  876. bgeu a6, a11, 1f
  877. addi a10, a10, 1
  878. 1:
  879. /* Add pp10-12 into xl with carry-outs in a9. */
  880. movi a9, 0
  881. do_mul(xl, xl, h, yh, h) /* pp 10 */
  882. add xl, xl, a10
  883. bgeu xl, a10, 1f
  884. addi a9, a9, 1
  885. 1:
  886. do_mul(a10, xh, l, yh, l) /* pp 11 */
  887. add xl, xl, a10
  888. bgeu xl, a10, 1f
  889. addi a9, a9, 1
  890. 1:
  891. do_mul(a10, xh, h, yl, h) /* pp 12 */
  892. add xl, xl, a10
  893. bgeu xl, a10, 1f
  894. addi a9, a9, 1
  895. 1:
  896. /* Add pp13-14 into a11 with carry-outs in a10. */
  897. do_mul(a11, xh, l, yh, h) /* pp 13 */
  898. do_mul(a7, xh, h, yh, l) /* pp 14 */
  899. movi a10, 0
  900. add a11, a11, a7
  901. bgeu a11, a7, 1f
  902. addi a10, a10, 1
  903. 1:
  904. /* Shift a10/a11 into position, and add low half of a11 to a6. */
  905. src a10, a10, a11
  906. add a10, a10, a9
  907. sll a11, a11
  908. add xl, xl, a11
  909. bgeu xl, a11, 1f
  910. addi a10, a10, 1
  911. 1:
  912. /* Compute xh. */
  913. do_mul(xh, xh, h, yh, h) /* pp 15 */
  914. add xh, xh, a10
  915. /* Restore values saved on the stack during the multiplication. */
  916. l32i a7, sp, 4
  917. #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
  918. l32i a0, sp, 0
  919. l32i a8, sp, 8
  920. #endif
  921. #endif /* ! XCHAL_HAVE_MUL32_HIGH */
  922. /* Shift left by 12 bits, unless there was a carry-out from the
  923. multiply, in which case, shift by 11 bits and increment the
  924. exponent. Note: It is convenient to use the constant 0x3ff
  925. instead of 0x400 when removing the extra exponent bias (so that
  926. it is easy to construct 0x7fe for the overflow check). Reverse
  927. the logic here to decrement the exponent sum by one unless there
  928. was a carry-out. */
  929. movi a4, 11
  930. srli a5, xh, 21 - 12
  931. bnez a5, 1f
  932. addi a4, a4, 1
  933. addi a8, a8, -1
  934. 1: ssl a4
  935. src xh, xh, xl
  936. src xl, xl, a6
  937. sll a6, a6
  938. /* Subtract the extra bias from the exponent sum (plus one to account
  939. for the explicit "1.0" of the mantissa that will be added to the
  940. exponent in the final result). */
  941. movi a4, 0x3ff
  942. sub a8, a8, a4
  943. /* Check for over/underflow. The value in a8 is one less than the
  944. final exponent, so values in the range 0..7fd are OK here. */
  945. slli a4, a4, 1 /* 0x7fe */
  946. bgeu a8, a4, .Lmul_overflow
  947. .Lmul_round:
  948. /* Round. */
  949. bgez a6, .Lmul_rounded
  950. addi xl, xl, 1
  951. beqz xl, .Lmul_roundcarry
  952. slli a6, a6, 1
  953. beqz a6, .Lmul_exactlyhalf
  954. .Lmul_rounded:
  955. /* Add the exponent to the mantissa. */
  956. slli a8, a8, 20
  957. add xh, xh, a8
  958. .Lmul_addsign:
  959. /* Add the sign bit. */
  960. srli a7, a7, 31
  961. slli a7, a7, 31
  962. or xh, xh, a7
  963. .Lmul_done:
  964. #if __XTENSA_CALL0_ABI__
  965. l32i a12, sp, 16
  966. l32i a13, sp, 20
  967. l32i a14, sp, 24
  968. l32i a15, sp, 28
  969. addi sp, sp, 32
  970. #endif
  971. leaf_return
  972. .Lmul_exactlyhalf:
  973. /* Round down to the nearest even value. */
  974. srli xl, xl, 1
  975. slli xl, xl, 1
  976. j .Lmul_rounded
  977. .Lmul_roundcarry:
  978. /* xl is always zero when the rounding increment overflows, so
  979. there's no need to round it to an even value. */
  980. addi xh, xh, 1
  981. /* Overflow is OK -- it will be added to the exponent. */
  982. j .Lmul_rounded
  983. .Lmul_overflow:
  984. bltz a8, .Lmul_underflow
  985. /* Return +/- Infinity. */
  986. addi a8, a4, 1 /* 0x7ff */
  987. slli xh, a8, 20
  988. movi xl, 0
  989. j .Lmul_addsign
  990. .Lmul_underflow:
  991. /* Create a subnormal value, where the exponent field contains zero,
  992. but the effective exponent is 1. The value of a8 is one less than
  993. the actual exponent, so just negate it to get the shift amount. */
  994. neg a8, a8
  995. mov a9, a6
  996. ssr a8
  997. bgeui a8, 32, .Lmul_bigshift
  998. /* Shift xh/xl right. Any bits that are shifted out of xl are saved
  999. in a6 (combined with the shifted-out bits currently in a6) for
  1000. rounding the result. */
  1001. sll a6, xl
  1002. src xl, xh, xl
  1003. srl xh, xh
  1004. j 1f
  1005. .Lmul_bigshift:
  1006. bgeui a8, 64, .Lmul_flush_to_zero
  1007. sll a10, xl /* lost bits shifted out of xl */
  1008. src a6, xh, xl
  1009. srl xl, xh
  1010. movi xh, 0
  1011. or a9, a9, a10
  1012. /* Set the exponent to zero. */
  1013. 1: movi a8, 0
  1014. /* Pack any nonzero bits shifted out into a6. */
  1015. beqz a9, .Lmul_round
  1016. movi a9, 1
  1017. or a6, a6, a9
  1018. j .Lmul_round
  1019. .Lmul_flush_to_zero:
  1020. /* Return zero with the appropriate sign bit. */
  1021. srli xh, a7, 31
  1022. slli xh, xh, 31
  1023. movi xl, 0
  1024. j .Lmul_done
  1025. #if XCHAL_NO_MUL
  1026. /* For Xtensa processors with no multiply hardware, this simplified
  1027. version of _mulsi3 is used for multiplying 16-bit chunks of
  1028. the floating-point mantissas. When using CALL0, this function
  1029. uses a custom ABI: the inputs are passed in a13 and a14, the
  1030. result is returned in a12, and a8 and a15 are clobbered. */
  1031. .align 4
  1032. .Lmul_mulsi3:
  1033. leaf_entry sp, 16
  1034. .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
  1035. movi \dst, 0
  1036. 1: add \tmp1, \src2, \dst
  1037. extui \tmp2, \src1, 0, 1
  1038. movnez \dst, \tmp1, \tmp2
  1039. do_addx2 \tmp1, \src2, \dst, \tmp1
  1040. extui \tmp2, \src1, 1, 1
  1041. movnez \dst, \tmp1, \tmp2
  1042. do_addx4 \tmp1, \src2, \dst, \tmp1
  1043. extui \tmp2, \src1, 2, 1
  1044. movnez \dst, \tmp1, \tmp2
  1045. do_addx8 \tmp1, \src2, \dst, \tmp1
  1046. extui \tmp2, \src1, 3, 1
  1047. movnez \dst, \tmp1, \tmp2
  1048. srli \src1, \src1, 4
  1049. slli \src2, \src2, 4
  1050. bnez \src1, 1b
  1051. .endm
  1052. #if __XTENSA_CALL0_ABI__
  1053. mul_mulsi3_body a12, a13, a14, a15, a8
  1054. #else
  1055. /* The result will be written into a2, so save that argument in a4. */
  1056. mov a4, a2
  1057. mul_mulsi3_body a2, a4, a3, a5, a6
  1058. #endif
  1059. leaf_return
  1060. #endif /* XCHAL_NO_MUL */
  1061. #endif /* L_muldf3 */
  1062. #ifdef L_divdf3
  1063. /* Division */
  1064. #if XCHAL_HAVE_DFP_DIV
  1065. .text
  1066. .align 4
  1067. .global __divdf3
  1068. .type __divdf3, @function
  1069. __divdf3:
  1070. leaf_entry sp, 16
  1071. wfrd f1, xh, xl
  1072. wfrd f2, yh, yl
  1073. div0.d f3, f2
  1074. nexp01.d f4, f2
  1075. const.d f0, 1
  1076. maddn.d f0, f4, f3
  1077. const.d f5, 0
  1078. mov.d f7, f2
  1079. mkdadj.d f7, f1
  1080. maddn.d f3, f0, f3
  1081. maddn.d f5, f0, f0
  1082. nexp01.d f1, f1
  1083. div0.d f2, f2
  1084. maddn.d f3, f5, f3
  1085. const.d f5, 1
  1086. const.d f0, 0
  1087. neg.d f6, f1
  1088. maddn.d f5, f4, f3
  1089. maddn.d f0, f6, f2
  1090. maddn.d f3, f5, f3
  1091. maddn.d f6, f4, f0
  1092. const.d f2, 1
  1093. maddn.d f2, f4, f3
  1094. maddn.d f0, f6, f3
  1095. neg.d f1, f1
  1096. maddn.d f3, f2, f3
  1097. maddn.d f1, f4, f0
  1098. addexpm.d f0, f7
  1099. addexp.d f3, f7
  1100. divn.d f0, f1, f3
  1101. rfr xl, f0
  1102. rfrd xh, f0
  1103. leaf_return
  1104. #else
  1105. .literal_position
  1106. __divdf3_aux:
  1107. /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
  1108. (This code is placed before the start of the function just to
  1109. keep it in range of the limited branch displacements.) */
  1110. .Ldiv_yexpzero:
  1111. /* Clear the sign bit of y. */
  1112. slli yh, yh, 1
  1113. srli yh, yh, 1
  1114. /* Check for division by zero. */
  1115. or a10, yh, yl
  1116. beqz a10, .Ldiv_yzero
  1117. /* Normalize y. Adjust the exponent in a9. */
  1118. beqz yh, .Ldiv_yh_zero
  1119. do_nsau a10, yh, a11, a9
  1120. addi a10, a10, -11
  1121. ssl a10
  1122. src yh, yh, yl
  1123. sll yl, yl
  1124. movi a9, 1
  1125. sub a9, a9, a10
  1126. j .Ldiv_ynormalized
  1127. .Ldiv_yh_zero:
  1128. do_nsau a10, yl, a11, a9
  1129. addi a10, a10, -11
  1130. movi a9, -31
  1131. sub a9, a9, a10
  1132. ssl a10
  1133. bltz a10, .Ldiv_yl_srl
  1134. sll yh, yl
  1135. movi yl, 0
  1136. j .Ldiv_ynormalized
  1137. .Ldiv_yl_srl:
  1138. srl yh, yl
  1139. sll yl, yl
  1140. j .Ldiv_ynormalized
  1141. .Ldiv_yzero:
  1142. /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
  1143. slli xh, xh, 1
  1144. srli xh, xh, 1
  1145. or xl, xl, xh
  1146. srli xh, a7, 31
  1147. slli xh, xh, 31
  1148. or xh, xh, a6
  1149. bnez xl, 1f
  1150. movi a4, 0x80000 /* make it a quiet NaN */
  1151. or xh, xh, a4
  1152. 1: movi xl, 0
  1153. leaf_return
  1154. .Ldiv_xexpzero:
  1155. /* Clear the sign bit of x. */
  1156. slli xh, xh, 1
  1157. srli xh, xh, 1
  1158. /* If x is zero, return zero. */
  1159. or a10, xh, xl
  1160. beqz a10, .Ldiv_return_zero
  1161. /* Normalize x. Adjust the exponent in a8. */
  1162. beqz xh, .Ldiv_xh_zero
  1163. do_nsau a10, xh, a11, a8
  1164. addi a10, a10, -11
  1165. ssl a10
  1166. src xh, xh, xl
  1167. sll xl, xl
  1168. movi a8, 1
  1169. sub a8, a8, a10
  1170. j .Ldiv_xnormalized
  1171. .Ldiv_xh_zero:
  1172. do_nsau a10, xl, a11, a8
  1173. addi a10, a10, -11
  1174. movi a8, -31
  1175. sub a8, a8, a10
  1176. ssl a10
  1177. bltz a10, .Ldiv_xl_srl
  1178. sll xh, xl
  1179. movi xl, 0
  1180. j .Ldiv_xnormalized
  1181. .Ldiv_xl_srl:
  1182. srl xh, xl
  1183. sll xl, xl
  1184. j .Ldiv_xnormalized
  1185. .Ldiv_return_zero:
  1186. /* Return zero with the appropriate sign bit. */
  1187. srli xh, a7, 31
  1188. slli xh, xh, 31
  1189. movi xl, 0
  1190. leaf_return
  1191. .Ldiv_xnan_or_inf:
  1192. /* Set the sign bit of the result. */
  1193. srli a7, yh, 31
  1194. slli a7, a7, 31
  1195. xor xh, xh, a7
  1196. /* If y is NaN or Inf, return NaN. */
  1197. ball yh, a6, .Ldiv_return_nan
  1198. slli a8, xh, 12
  1199. or a8, a8, xl
  1200. bnez a8, .Ldiv_return_nan
  1201. leaf_return
  1202. .Ldiv_ynan_or_inf:
  1203. /* If y is Infinity, return zero. */
  1204. slli a8, yh, 12
  1205. or a8, a8, yl
  1206. beqz a8, .Ldiv_return_zero
  1207. /* y is NaN; return it. */
  1208. mov xh, yh
  1209. mov xl, yl
  1210. .Ldiv_return_nan:
  1211. movi a4, 0x80000 /* make it a quiet NaN */
  1212. or xh, xh, a4
  1213. leaf_return
  1214. .Ldiv_highequal1:
  1215. bltu xl, yl, 2f
  1216. j 3f
  1217. .align 4
  1218. .global __divdf3
  1219. .type __divdf3, @function
  1220. __divdf3:
  1221. leaf_entry sp, 16
  1222. movi a6, 0x7ff00000
  1223. /* Get the sign of the result. */
  1224. xor a7, xh, yh
  1225. /* Check for NaN and infinity. */
  1226. ball xh, a6, .Ldiv_xnan_or_inf
  1227. ball yh, a6, .Ldiv_ynan_or_inf
  1228. /* Extract the exponents. */
  1229. extui a8, xh, 20, 11
  1230. extui a9, yh, 20, 11
  1231. beqz a9, .Ldiv_yexpzero
  1232. .Ldiv_ynormalized:
  1233. beqz a8, .Ldiv_xexpzero
  1234. .Ldiv_xnormalized:
  1235. /* Subtract the exponents. */
  1236. sub a8, a8, a9
  1237. /* Replace sign/exponent fields with explicit "1.0". */
  1238. movi a10, 0x1fffff
  1239. or xh, xh, a6
  1240. and xh, xh, a10
  1241. or yh, yh, a6
  1242. and yh, yh, a10
  1243. /* Set SAR for left shift by one. */
  1244. ssai (32 - 1)
  1245. /* The first digit of the mantissa division must be a one.
  1246. Shift x (and adjust the exponent) as needed to make this true. */
  1247. bltu yh, xh, 3f
  1248. beq yh, xh, .Ldiv_highequal1
  1249. 2: src xh, xh, xl
  1250. sll xl, xl
  1251. addi a8, a8, -1
  1252. 3:
  1253. /* Do the first subtraction and shift. */
  1254. sub xh, xh, yh
  1255. bgeu xl, yl, 1f
  1256. addi xh, xh, -1
  1257. 1: sub xl, xl, yl
  1258. src xh, xh, xl
  1259. sll xl, xl
  1260. /* Put the quotient into a10/a11. */
  1261. movi a10, 0
  1262. movi a11, 1
  1263. /* Divide one bit at a time for 52 bits. */
  1264. movi a9, 52
  1265. #if XCHAL_HAVE_LOOPS
  1266. loop a9, .Ldiv_loopend
  1267. #endif
  1268. .Ldiv_loop:
  1269. /* Shift the quotient << 1. */
  1270. src a10, a10, a11
  1271. sll a11, a11
  1272. /* Is this digit a 0 or 1? */
  1273. bltu xh, yh, 3f
  1274. beq xh, yh, .Ldiv_highequal2
  1275. /* Output a 1 and subtract. */
  1276. 2: addi a11, a11, 1
  1277. sub xh, xh, yh
  1278. bgeu xl, yl, 1f
  1279. addi xh, xh, -1
  1280. 1: sub xl, xl, yl
  1281. /* Shift the dividend << 1. */
  1282. 3: src xh, xh, xl
  1283. sll xl, xl
  1284. #if !XCHAL_HAVE_LOOPS
  1285. addi a9, a9, -1
  1286. bnez a9, .Ldiv_loop
  1287. #endif
  1288. .Ldiv_loopend:
  1289. /* Add the exponent bias (less one to account for the explicit "1.0"
  1290. of the mantissa that will be added to the exponent in the final
  1291. result). */
  1292. movi a9, 0x3fe
  1293. add a8, a8, a9
  1294. /* Check for over/underflow. The value in a8 is one less than the
  1295. final exponent, so values in the range 0..7fd are OK here. */
  1296. addmi a9, a9, 0x400 /* 0x7fe */
  1297. bgeu a8, a9, .Ldiv_overflow
  1298. .Ldiv_round:
  1299. /* Round. The remainder (<< 1) is in xh/xl. */
  1300. bltu xh, yh, .Ldiv_rounded
  1301. beq xh, yh, .Ldiv_highequal3
  1302. .Ldiv_roundup:
  1303. addi a11, a11, 1
  1304. beqz a11, .Ldiv_roundcarry
  1305. .Ldiv_rounded:
  1306. mov xl, a11
  1307. /* Add the exponent to the mantissa. */
  1308. slli a8, a8, 20
  1309. add xh, a10, a8
  1310. .Ldiv_addsign:
  1311. /* Add the sign bit. */
  1312. srli a7, a7, 31
  1313. slli a7, a7, 31
  1314. or xh, xh, a7
  1315. leaf_return
  1316. .Ldiv_highequal2:
  1317. bgeu xl, yl, 2b
  1318. j 3b
  1319. .Ldiv_highequal3:
  1320. bltu xl, yl, .Ldiv_rounded
  1321. bne xl, yl, .Ldiv_roundup
  1322. /* Remainder is exactly half the divisor. Round even. */
  1323. addi a11, a11, 1
  1324. beqz a11, .Ldiv_roundcarry
  1325. srli a11, a11, 1
  1326. slli a11, a11, 1
  1327. j .Ldiv_rounded
  1328. .Ldiv_overflow:
  1329. bltz a8, .Ldiv_underflow
  1330. /* Return +/- Infinity. */
  1331. addi a8, a9, 1 /* 0x7ff */
  1332. slli xh, a8, 20
  1333. movi xl, 0
  1334. j .Ldiv_addsign
  1335. .Ldiv_underflow:
  1336. /* Create a subnormal value, where the exponent field contains zero,
  1337. but the effective exponent is 1. The value of a8 is one less than
  1338. the actual exponent, so just negate it to get the shift amount. */
  1339. neg a8, a8
  1340. ssr a8
  1341. bgeui a8, 32, .Ldiv_bigshift
  1342. /* Shift a10/a11 right. Any bits that are shifted out of a11 are
  1343. saved in a6 for rounding the result. */
  1344. sll a6, a11
  1345. src a11, a10, a11
  1346. srl a10, a10
  1347. j 1f
  1348. .Ldiv_bigshift:
  1349. bgeui a8, 64, .Ldiv_flush_to_zero
  1350. sll a9, a11 /* lost bits shifted out of a11 */
  1351. src a6, a10, a11
  1352. srl a11, a10
  1353. movi a10, 0
  1354. or xl, xl, a9
  1355. /* Set the exponent to zero. */
  1356. 1: movi a8, 0
  1357. /* Pack any nonzero remainder (in xh/xl) into a6. */
  1358. or xh, xh, xl
  1359. beqz xh, 1f
  1360. movi a9, 1
  1361. or a6, a6, a9
  1362. /* Round a10/a11 based on the bits shifted out into a6. */
  1363. 1: bgez a6, .Ldiv_rounded
  1364. addi a11, a11, 1
  1365. beqz a11, .Ldiv_roundcarry
  1366. slli a6, a6, 1
  1367. bnez a6, .Ldiv_rounded
  1368. srli a11, a11, 1
  1369. slli a11, a11, 1
  1370. j .Ldiv_rounded
  1371. .Ldiv_roundcarry:
  1372. /* a11 is always zero when the rounding increment overflows, so
  1373. there's no need to round it to an even value. */
  1374. addi a10, a10, 1
  1375. /* Overflow to the exponent field is OK. */
  1376. j .Ldiv_rounded
  1377. .Ldiv_flush_to_zero:
  1378. /* Return zero with the appropriate sign bit. */
  1379. srli xh, a7, 31
  1380. slli xh, xh, 31
  1381. movi xl, 0
  1382. leaf_return
  1383. #endif /* XCHAL_HAVE_DFP_DIV */
  1384. #endif /* L_divdf3 */
  1385. #ifdef L_cmpdf2
  1386. /* Equal and Not Equal */
  1387. .align 4
  1388. .global __eqdf2
  1389. .global __nedf2
  1390. .set __nedf2, __eqdf2
  1391. .type __eqdf2, @function
  1392. __eqdf2:
  1393. leaf_entry sp, 16
  1394. bne xl, yl, 2f
  1395. bne xh, yh, 4f
  1396. /* The values are equal but NaN != NaN. Check the exponent. */
  1397. movi a6, 0x7ff00000
  1398. ball xh, a6, 3f
  1399. /* Equal. */
  1400. movi a2, 0
  1401. leaf_return
  1402. /* Not equal. */
  1403. 2: movi a2, 1
  1404. leaf_return
  1405. /* Check if the mantissas are nonzero. */
  1406. 3: slli a7, xh, 12
  1407. or a7, a7, xl
  1408. j 5f
  1409. /* Check if x and y are zero with different signs. */
  1410. 4: or a7, xh, yh
  1411. slli a7, a7, 1
  1412. or a7, a7, xl /* xl == yl here */
  1413. /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
  1414. or x when exponent(x) = 0x7ff and x == y. */
  1415. 5: movi a2, 0
  1416. movi a3, 1
  1417. movnez a2, a3, a7
  1418. leaf_return
  1419. /* Greater Than */
  1420. .align 4
  1421. .global __gtdf2
  1422. .type __gtdf2, @function
  1423. __gtdf2:
  1424. leaf_entry sp, 16
  1425. movi a6, 0x7ff00000
  1426. ball xh, a6, 2f
  1427. 1: bnall yh, a6, .Lle_cmp
  1428. /* Check if y is a NaN. */
  1429. slli a7, yh, 12
  1430. or a7, a7, yl
  1431. beqz a7, .Lle_cmp
  1432. movi a2, 0
  1433. leaf_return
  1434. /* Check if x is a NaN. */
  1435. 2: slli a7, xh, 12
  1436. or a7, a7, xl
  1437. beqz a7, 1b
  1438. movi a2, 0
  1439. leaf_return
  1440. /* Less Than or Equal */
  1441. .align 4
  1442. .global __ledf2
  1443. .type __ledf2, @function
  1444. __ledf2:
  1445. leaf_entry sp, 16
  1446. movi a6, 0x7ff00000
  1447. ball xh, a6, 2f
  1448. 1: bnall yh, a6, .Lle_cmp
  1449. /* Check if y is a NaN. */
  1450. slli a7, yh, 12
  1451. or a7, a7, yl
  1452. beqz a7, .Lle_cmp
  1453. movi a2, 1
  1454. leaf_return
  1455. /* Check if x is a NaN. */
  1456. 2: slli a7, xh, 12
  1457. or a7, a7, xl
  1458. beqz a7, 1b
  1459. movi a2, 1
  1460. leaf_return
  1461. .Lle_cmp:
  1462. /* Check if x and y have different signs. */
  1463. xor a7, xh, yh
  1464. bltz a7, .Lle_diff_signs
  1465. /* Check if x is negative. */
  1466. bltz xh, .Lle_xneg
  1467. /* Check if x <= y. */
  1468. bltu xh, yh, 4f
  1469. bne xh, yh, 5f
  1470. bltu yl, xl, 5f
  1471. 4: movi a2, 0
  1472. leaf_return
  1473. .Lle_xneg:
  1474. /* Check if y <= x. */
  1475. bltu yh, xh, 4b
  1476. bne yh, xh, 5f
  1477. bgeu xl, yl, 4b
  1478. 5: movi a2, 1
  1479. leaf_return
  1480. .Lle_diff_signs:
  1481. bltz xh, 4b
  1482. /* Check if both x and y are zero. */
  1483. or a7, xh, yh
  1484. slli a7, a7, 1
  1485. or a7, a7, xl
  1486. or a7, a7, yl
  1487. movi a2, 1
  1488. movi a3, 0
  1489. moveqz a2, a3, a7
  1490. leaf_return
  1491. /* Greater Than or Equal */
  1492. .align 4
  1493. .global __gedf2
  1494. .type __gedf2, @function
  1495. __gedf2:
  1496. leaf_entry sp, 16
  1497. movi a6, 0x7ff00000
  1498. ball xh, a6, 2f
  1499. 1: bnall yh, a6, .Llt_cmp
  1500. /* Check if y is a NaN. */
  1501. slli a7, yh, 12
  1502. or a7, a7, yl
  1503. beqz a7, .Llt_cmp
  1504. movi a2, -1
  1505. leaf_return
  1506. /* Check if x is a NaN. */
  1507. 2: slli a7, xh, 12
  1508. or a7, a7, xl
  1509. beqz a7, 1b
  1510. movi a2, -1
  1511. leaf_return
  1512. /* Less Than */
  1513. .align 4
  1514. .global __ltdf2
  1515. .type __ltdf2, @function
  1516. __ltdf2:
  1517. leaf_entry sp, 16
  1518. movi a6, 0x7ff00000
  1519. ball xh, a6, 2f
  1520. 1: bnall yh, a6, .Llt_cmp
  1521. /* Check if y is a NaN. */
  1522. slli a7, yh, 12
  1523. or a7, a7, yl
  1524. beqz a7, .Llt_cmp
  1525. movi a2, 0
  1526. leaf_return
  1527. /* Check if x is a NaN. */
  1528. 2: slli a7, xh, 12
  1529. or a7, a7, xl
  1530. beqz a7, 1b
  1531. movi a2, 0
  1532. leaf_return
  1533. .Llt_cmp:
  1534. /* Check if x and y have different signs. */
  1535. xor a7, xh, yh
  1536. bltz a7, .Llt_diff_signs
  1537. /* Check if x is negative. */
  1538. bltz xh, .Llt_xneg
  1539. /* Check if x < y. */
  1540. bltu xh, yh, 4f
  1541. bne xh, yh, 5f
  1542. bgeu xl, yl, 5f
  1543. 4: movi a2, -1
  1544. leaf_return
  1545. .Llt_xneg:
  1546. /* Check if y < x. */
  1547. bltu yh, xh, 4b
  1548. bne yh, xh, 5f
  1549. bltu yl, xl, 4b
  1550. 5: movi a2, 0
  1551. leaf_return
  1552. .Llt_diff_signs:
  1553. bgez xh, 5b
  1554. /* Check if both x and y are nonzero. */
  1555. or a7, xh, yh
  1556. slli a7, a7, 1
  1557. or a7, a7, xl
  1558. or a7, a7, yl
  1559. movi a2, 0
  1560. movi a3, -1
  1561. movnez a2, a3, a7
  1562. leaf_return
  1563. /* Unordered */
  1564. .align 4
  1565. .global __unorddf2
  1566. .type __unorddf2, @function
  1567. __unorddf2:
  1568. leaf_entry sp, 16
  1569. movi a6, 0x7ff00000
  1570. ball xh, a6, 3f
  1571. 1: ball yh, a6, 4f
  1572. 2: movi a2, 0
  1573. leaf_return
  1574. 3: slli a7, xh, 12
  1575. or a7, a7, xl
  1576. beqz a7, 1b
  1577. movi a2, 1
  1578. leaf_return
  1579. 4: slli a7, yh, 12
  1580. or a7, a7, yl
  1581. beqz a7, 2b
  1582. movi a2, 1
  1583. leaf_return
  1584. #endif /* L_cmpdf2 */
  1585. #ifdef L_fixdfsi
  1586. .align 4
  1587. .global __fixdfsi
  1588. .type __fixdfsi, @function
  1589. __fixdfsi:
  1590. leaf_entry sp, 16
  1591. /* Check for NaN and Infinity. */
  1592. movi a6, 0x7ff00000
  1593. ball xh, a6, .Lfixdfsi_nan_or_inf
  1594. /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
  1595. extui a4, xh, 20, 11
  1596. extui a5, a6, 19, 10 /* 0x3fe */
  1597. sub a4, a4, a5
  1598. bgei a4, 32, .Lfixdfsi_maxint
  1599. blti a4, 1, .Lfixdfsi_zero
  1600. /* Add explicit "1.0" and shift << 11. */
  1601. or a7, xh, a6
  1602. ssai (32 - 11)
  1603. src a5, a7, xl
  1604. /* Shift back to the right, based on the exponent. */
  1605. ssl a4 /* shift by 32 - a4 */
  1606. srl a5, a5
  1607. /* Negate the result if sign != 0. */
  1608. neg a2, a5
  1609. movgez a2, a5, a7
  1610. leaf_return
  1611. .Lfixdfsi_nan_or_inf:
  1612. /* Handle Infinity and NaN. */
  1613. slli a4, xh, 12
  1614. or a4, a4, xl
  1615. beqz a4, .Lfixdfsi_maxint
  1616. /* Translate NaN to +maxint. */
  1617. movi xh, 0
  1618. .Lfixdfsi_maxint:
  1619. slli a4, a6, 11 /* 0x80000000 */
  1620. addi a5, a4, -1 /* 0x7fffffff */
  1621. movgez a4, a5, xh
  1622. mov a2, a4
  1623. leaf_return
  1624. .Lfixdfsi_zero:
  1625. movi a2, 0
  1626. leaf_return
  1627. #endif /* L_fixdfsi */
  1628. #ifdef L_fixdfdi
  1629. .align 4
  1630. .global __fixdfdi
  1631. .type __fixdfdi, @function
  1632. __fixdfdi:
  1633. leaf_entry sp, 16
  1634. /* Check for NaN and Infinity. */
  1635. movi a6, 0x7ff00000
  1636. ball xh, a6, .Lfixdfdi_nan_or_inf
  1637. /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
  1638. extui a4, xh, 20, 11
  1639. extui a5, a6, 19, 10 /* 0x3fe */
  1640. sub a4, a4, a5
  1641. bgei a4, 64, .Lfixdfdi_maxint
  1642. blti a4, 1, .Lfixdfdi_zero
  1643. /* Add explicit "1.0" and shift << 11. */
  1644. or a7, xh, a6
  1645. ssai (32 - 11)
  1646. src xh, a7, xl
  1647. sll xl, xl
  1648. /* Shift back to the right, based on the exponent. */
  1649. ssl a4 /* shift by 64 - a4 */
  1650. bgei a4, 32, .Lfixdfdi_smallshift
  1651. srl xl, xh
  1652. movi xh, 0
  1653. .Lfixdfdi_shifted:
  1654. /* Negate the result if sign != 0. */
  1655. bgez a7, 1f
  1656. neg xl, xl
  1657. neg xh, xh
  1658. beqz xl, 1f
  1659. addi xh, xh, -1
  1660. 1: leaf_return
  1661. .Lfixdfdi_smallshift:
  1662. src xl, xh, xl
  1663. srl xh, xh
  1664. j .Lfixdfdi_shifted
  1665. .Lfixdfdi_nan_or_inf:
  1666. /* Handle Infinity and NaN. */
  1667. slli a4, xh, 12
  1668. or a4, a4, xl
  1669. beqz a4, .Lfixdfdi_maxint
  1670. /* Translate NaN to +maxint. */
  1671. movi xh, 0
  1672. .Lfixdfdi_maxint:
  1673. slli a7, a6, 11 /* 0x80000000 */
  1674. bgez xh, 1f
  1675. mov xh, a7
  1676. movi xl, 0
  1677. leaf_return
  1678. 1: addi xh, a7, -1 /* 0x7fffffff */
  1679. movi xl, -1
  1680. leaf_return
  1681. .Lfixdfdi_zero:
  1682. movi xh, 0
  1683. movi xl, 0
  1684. leaf_return
  1685. #endif /* L_fixdfdi */
  1686. #ifdef L_fixunsdfsi
  1687. .align 4
  1688. .global __fixunsdfsi
  1689. .type __fixunsdfsi, @function
  1690. __fixunsdfsi:
  1691. leaf_entry sp, 16
  1692. /* Check for NaN and Infinity. */
  1693. movi a6, 0x7ff00000
  1694. ball xh, a6, .Lfixunsdfsi_nan_or_inf
  1695. /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
  1696. extui a4, xh, 20, 11
  1697. extui a5, a6, 20, 10 /* 0x3ff */
  1698. sub a4, a4, a5
  1699. bgei a4, 32, .Lfixunsdfsi_maxint
  1700. bltz a4, .Lfixunsdfsi_zero
  1701. /* Add explicit "1.0" and shift << 11. */
  1702. or a7, xh, a6
  1703. ssai (32 - 11)
  1704. src a5, a7, xl
  1705. /* Shift back to the right, based on the exponent. */
  1706. addi a4, a4, 1
  1707. beqi a4, 32, .Lfixunsdfsi_bigexp
  1708. ssl a4 /* shift by 32 - a4 */
  1709. srl a5, a5
  1710. /* Negate the result if sign != 0. */
  1711. neg a2, a5
  1712. movgez a2, a5, a7
  1713. leaf_return
  1714. .Lfixunsdfsi_nan_or_inf:
  1715. /* Handle Infinity and NaN. */
  1716. slli a4, xh, 12
  1717. or a4, a4, xl
  1718. beqz a4, .Lfixunsdfsi_maxint
  1719. /* Translate NaN to 0xffffffff. */
  1720. movi a2, -1
  1721. leaf_return
  1722. .Lfixunsdfsi_maxint:
  1723. slli a4, a6, 11 /* 0x80000000 */
  1724. movi a5, -1 /* 0xffffffff */
  1725. movgez a4, a5, xh
  1726. mov a2, a4
  1727. leaf_return
  1728. .Lfixunsdfsi_zero:
  1729. movi a2, 0
  1730. leaf_return
  1731. .Lfixunsdfsi_bigexp:
  1732. /* Handle unsigned maximum exponent case. */
  1733. bltz xh, 1f
  1734. mov a2, a5 /* no shift needed */
  1735. leaf_return
  1736. /* Return 0x80000000 if negative. */
  1737. 1: slli a2, a6, 11
  1738. leaf_return
  1739. #endif /* L_fixunsdfsi */
  1740. #ifdef L_fixunsdfdi
  1741. .align 4
  1742. .global __fixunsdfdi
  1743. .type __fixunsdfdi, @function
  1744. __fixunsdfdi:
  1745. leaf_entry sp, 16
  1746. /* Check for NaN and Infinity. */
  1747. movi a6, 0x7ff00000
  1748. ball xh, a6, .Lfixunsdfdi_nan_or_inf
  1749. /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
  1750. extui a4, xh, 20, 11
  1751. extui a5, a6, 20, 10 /* 0x3ff */
  1752. sub a4, a4, a5
  1753. bgei a4, 64, .Lfixunsdfdi_maxint
  1754. bltz a4, .Lfixunsdfdi_zero
  1755. /* Add explicit "1.0" and shift << 11. */
  1756. or a7, xh, a6
  1757. ssai (32 - 11)
  1758. src xh, a7, xl
  1759. sll xl, xl
  1760. /* Shift back to the right, based on the exponent. */
  1761. addi a4, a4, 1
  1762. beqi a4, 64, .Lfixunsdfdi_bigexp
  1763. ssl a4 /* shift by 64 - a4 */
  1764. bgei a4, 32, .Lfixunsdfdi_smallshift
  1765. srl xl, xh
  1766. movi xh, 0
  1767. .Lfixunsdfdi_shifted:
  1768. /* Negate the result if sign != 0. */
  1769. bgez a7, 1f
  1770. neg xl, xl
  1771. neg xh, xh
  1772. beqz xl, 1f
  1773. addi xh, xh, -1
  1774. 1: leaf_return
  1775. .Lfixunsdfdi_smallshift:
  1776. src xl, xh, xl
  1777. srl xh, xh
  1778. j .Lfixunsdfdi_shifted
  1779. .Lfixunsdfdi_nan_or_inf:
  1780. /* Handle Infinity and NaN. */
  1781. slli a4, xh, 12
  1782. or a4, a4, xl
  1783. beqz a4, .Lfixunsdfdi_maxint
  1784. /* Translate NaN to 0xffffffff.... */
  1785. 1: movi xh, -1
  1786. movi xl, -1
  1787. leaf_return
  1788. .Lfixunsdfdi_maxint:
  1789. bgez xh, 1b
  1790. 2: slli xh, a6, 11 /* 0x80000000 */
  1791. movi xl, 0
  1792. leaf_return
  1793. .Lfixunsdfdi_zero:
  1794. movi xh, 0
  1795. movi xl, 0
  1796. leaf_return
  1797. .Lfixunsdfdi_bigexp:
  1798. /* Handle unsigned maximum exponent case. */
  1799. bltz a7, 2b
  1800. leaf_return /* no shift needed */
  1801. #endif /* L_fixunsdfdi */
  1802. #ifdef L_floatsidf
  1803. .align 4
  1804. .global __floatunsidf
  1805. .type __floatunsidf, @function
  1806. __floatunsidf:
  1807. leaf_entry sp, 16
  1808. beqz a2, .Lfloatsidf_return_zero
  1809. /* Set the sign to zero and jump to the floatsidf code. */
  1810. movi a7, 0
  1811. j .Lfloatsidf_normalize
  1812. .align 4
  1813. .global __floatsidf
  1814. .type __floatsidf, @function
  1815. __floatsidf:
  1816. leaf_entry sp, 16
  1817. /* Check for zero. */
  1818. beqz a2, .Lfloatsidf_return_zero
  1819. /* Save the sign. */
  1820. extui a7, a2, 31, 1
  1821. /* Get the absolute value. */
  1822. #if XCHAL_HAVE_ABS
  1823. abs a2, a2
  1824. #else
  1825. neg a4, a2
  1826. movltz a2, a4, a2
  1827. #endif
  1828. .Lfloatsidf_normalize:
  1829. /* Normalize with the first 1 bit in the msb. */
  1830. do_nsau a4, a2, a5, a6
  1831. ssl a4
  1832. sll a5, a2
  1833. /* Shift the mantissa into position. */
  1834. srli xh, a5, 11
  1835. slli xl, a5, (32 - 11)
  1836. /* Set the exponent. */
  1837. movi a5, 0x41d /* 0x3fe + 31 */
  1838. sub a5, a5, a4
  1839. slli a5, a5, 20
  1840. add xh, xh, a5
  1841. /* Add the sign and return. */
  1842. slli a7, a7, 31
  1843. or xh, xh, a7
  1844. leaf_return
  1845. .Lfloatsidf_return_zero:
  1846. movi a3, 0
  1847. leaf_return
  1848. #endif /* L_floatsidf */
  1849. #ifdef L_floatdidf
  1850. .align 4
  1851. .global __floatundidf
  1852. .type __floatundidf, @function
  1853. __floatundidf:
  1854. leaf_entry sp, 16
  1855. /* Check for zero. */
  1856. or a4, xh, xl
  1857. beqz a4, 2f
  1858. /* Set the sign to zero and jump to the floatdidf code. */
  1859. movi a7, 0
  1860. j .Lfloatdidf_normalize
  1861. .align 4
  1862. .global __floatdidf
  1863. .type __floatdidf, @function
  1864. __floatdidf:
  1865. leaf_entry sp, 16
  1866. /* Check for zero. */
  1867. or a4, xh, xl
  1868. beqz a4, 2f
  1869. /* Save the sign. */
  1870. extui a7, xh, 31, 1
  1871. /* Get the absolute value. */
  1872. bgez xh, .Lfloatdidf_normalize
  1873. neg xl, xl
  1874. neg xh, xh
  1875. beqz xl, .Lfloatdidf_normalize
  1876. addi xh, xh, -1
  1877. .Lfloatdidf_normalize:
  1878. /* Normalize with the first 1 bit in the msb of xh. */
  1879. beqz xh, .Lfloatdidf_bigshift
  1880. do_nsau a4, xh, a5, a6
  1881. ssl a4
  1882. src xh, xh, xl
  1883. sll xl, xl
  1884. .Lfloatdidf_shifted:
  1885. /* Shift the mantissa into position, with rounding bits in a6. */
  1886. ssai 11
  1887. sll a6, xl
  1888. src xl, xh, xl
  1889. srl xh, xh
  1890. /* Set the exponent. */
  1891. movi a5, 0x43d /* 0x3fe + 63 */
  1892. sub a5, a5, a4
  1893. slli a5, a5, 20
  1894. add xh, xh, a5
  1895. /* Add the sign. */
  1896. slli a7, a7, 31
  1897. or xh, xh, a7
  1898. /* Round up if the leftover fraction is >= 1/2. */
  1899. bgez a6, 2f
  1900. addi xl, xl, 1
  1901. beqz xl, .Lfloatdidf_roundcarry
  1902. /* Check if the leftover fraction is exactly 1/2. */
  1903. slli a6, a6, 1
  1904. beqz a6, .Lfloatdidf_exactlyhalf
  1905. 2: leaf_return
  1906. .Lfloatdidf_bigshift:
  1907. /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
  1908. do_nsau a4, xl, a5, a6
  1909. ssl a4
  1910. sll xh, xl
  1911. movi xl, 0
  1912. addi a4, a4, 32
  1913. j .Lfloatdidf_shifted
  1914. .Lfloatdidf_exactlyhalf:
  1915. /* Round down to the nearest even value. */
  1916. srli xl, xl, 1
  1917. slli xl, xl, 1
  1918. leaf_return
  1919. .Lfloatdidf_roundcarry:
  1920. /* xl is always zero when the rounding increment overflows, so
  1921. there's no need to round it to an even value. */
  1922. addi xh, xh, 1
  1923. /* Overflow to the exponent is OK. */
  1924. leaf_return
  1925. #endif /* L_floatdidf */
  1926. #ifdef L_truncdfsf2
  1927. .align 4
  1928. .global __truncdfsf2
  1929. .type __truncdfsf2, @function
  1930. __truncdfsf2:
  1931. leaf_entry sp, 16
  1932. /* Adjust the exponent bias. */
  1933. movi a4, (0x3ff - 0x7f) << 20
  1934. sub a5, xh, a4
  1935. /* Check for underflow. */
  1936. xor a6, xh, a5
  1937. bltz a6, .Ltrunc_underflow
  1938. extui a6, a5, 20, 11
  1939. beqz a6, .Ltrunc_underflow
  1940. /* Check for overflow. */
  1941. movi a4, 255
  1942. bge a6, a4, .Ltrunc_overflow
  1943. /* Shift a5/xl << 3 into a5/a4. */
  1944. ssai (32 - 3)
  1945. src a5, a5, xl
  1946. sll a4, xl
  1947. .Ltrunc_addsign:
  1948. /* Add the sign bit. */
  1949. extui a6, xh, 31, 1
  1950. slli a6, a6, 31
  1951. or a2, a6, a5
  1952. /* Round up if the leftover fraction is >= 1/2. */
  1953. bgez a4, 1f
  1954. addi a2, a2, 1
  1955. /* Overflow to the exponent is OK. The answer will be correct. */
  1956. /* Check if the leftover fraction is exactly 1/2. */
  1957. slli a4, a4, 1
  1958. beqz a4, .Ltrunc_exactlyhalf
  1959. 1: leaf_return
  1960. .Ltrunc_exactlyhalf:
  1961. /* Round down to the nearest even value. */
  1962. srli a2, a2, 1
  1963. slli a2, a2, 1
  1964. leaf_return
  1965. .Ltrunc_overflow:
  1966. /* Check if exponent == 0x7ff. */
  1967. movi a4, 0x7ff00000
  1968. bnall xh, a4, 1f
  1969. /* Check if mantissa is nonzero. */
  1970. slli a5, xh, 12
  1971. or a5, a5, xl
  1972. beqz a5, 1f
  1973. /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
  1974. srli a4, a4, 1
  1975. 1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
  1976. /* Add the sign bit. */
  1977. extui a6, xh, 31, 1
  1978. ssai 1
  1979. src a2, a6, a4
  1980. leaf_return
  1981. .Ltrunc_underflow:
  1982. /* Find shift count for a subnormal. Flush to zero if >= 32. */
  1983. extui a6, xh, 20, 11
  1984. movi a5, 0x3ff - 0x7f
  1985. sub a6, a5, a6
  1986. addi a6, a6, 1
  1987. bgeui a6, 32, 1f
  1988. /* Replace the exponent with an explicit "1.0". */
  1989. slli a5, a5, 13 /* 0x700000 */
  1990. or a5, a5, xh
  1991. slli a5, a5, 11
  1992. srli a5, a5, 11
  1993. /* Shift the mantissa left by 3 bits (into a5/a4). */
  1994. ssai (32 - 3)
  1995. src a5, a5, xl
  1996. sll a4, xl
  1997. /* Shift right by a6. */
  1998. ssr a6
  1999. sll a7, a4
  2000. src a4, a5, a4
  2001. srl a5, a5
  2002. beqz a7, .Ltrunc_addsign
  2003. or a4, a4, a6 /* any positive, nonzero value will work */
  2004. j .Ltrunc_addsign
  2005. /* Return +/- zero. */
  2006. 1: extui a2, xh, 31, 1
  2007. slli a2, a2, 31
  2008. leaf_return
  2009. #endif /* L_truncdfsf2 */
  2010. #ifdef L_extendsfdf2
  2011. .align 4
  2012. .global __extendsfdf2
  2013. .type __extendsfdf2, @function
  2014. __extendsfdf2:
  2015. leaf_entry sp, 16
  2016. /* Save the sign bit and then shift it off. */
  2017. extui a5, a2, 31, 1
  2018. slli a5, a5, 31
  2019. slli a4, a2, 1
  2020. /* Extract and check the exponent. */
  2021. extui a6, a2, 23, 8
  2022. beqz a6, .Lextend_expzero
  2023. addi a6, a6, 1
  2024. beqi a6, 256, .Lextend_nan_or_inf
  2025. /* Shift >> 3 into a4/xl. */
  2026. srli a4, a4, 4
  2027. slli xl, a2, (32 - 3)
  2028. /* Adjust the exponent bias. */
  2029. movi a6, (0x3ff - 0x7f) << 20
  2030. add a4, a4, a6
  2031. /* Add the sign bit. */
  2032. or xh, a4, a5
  2033. leaf_return
  2034. .Lextend_nan_or_inf:
  2035. movi a4, 0x7ff00000
  2036. /* Check for NaN. */
  2037. slli a7, a2, 9
  2038. beqz a7, 1f
  2039. slli a6, a6, 11 /* 0x80000 */
  2040. or a4, a4, a6
  2041. /* Add the sign and return. */
  2042. 1: or xh, a4, a5
  2043. movi xl, 0
  2044. leaf_return
  2045. .Lextend_expzero:
  2046. beqz a4, 1b
  2047. /* Normalize it to have 8 zero bits before the first 1 bit. */
  2048. do_nsau a7, a4, a2, a3
  2049. addi a7, a7, -8
  2050. ssl a7
  2051. sll a4, a4
  2052. /* Shift >> 3 into a4/xl. */
  2053. slli xl, a4, (32 - 3)
  2054. srli a4, a4, 3
  2055. /* Set the exponent. */
  2056. movi a6, 0x3fe - 0x7f
  2057. sub a6, a6, a7
  2058. slli a6, a6, 20
  2059. add a4, a4, a6
  2060. /* Add the sign and return. */
  2061. or xh, a4, a5
  2062. leaf_return
  2063. #endif /* L_extendsfdf2 */
  2064. #if XCHAL_HAVE_DFP_SQRT
  2065. #ifdef L_sqrt
  2066. .text
  2067. .align 4
  2068. .global __ieee754_sqrt
  2069. .type __ieee754_sqrt, @function
  2070. __ieee754_sqrt:
  2071. leaf_entry sp, 16
  2072. wfrd f1, xh, xl
  2073. sqrt0.d f2, f1
  2074. const.d f4, 0
  2075. maddn.d f4, f2, f2
  2076. nexp01.d f3, f1
  2077. const.d f0, 3
  2078. addexp.d f3, f0
  2079. maddn.d f0, f4, f3
  2080. nexp01.d f4, f1
  2081. maddn.d f2, f0, f2
  2082. const.d f5, 0
  2083. maddn.d f5, f2, f3
  2084. const.d f0, 3
  2085. maddn.d f0, f5, f2
  2086. neg.d f6, f4
  2087. maddn.d f2, f0, f2
  2088. const.d f0, 0
  2089. const.d f5, 0
  2090. const.d f7, 0
  2091. maddn.d f0, f6, f2
  2092. maddn.d f5, f2, f3
  2093. const.d f3, 3
  2094. maddn.d f7, f3, f2
  2095. maddn.d f4, f0, f0
  2096. maddn.d f3, f5, f2
  2097. neg.d f2, f7
  2098. maddn.d f0, f4, f2
  2099. maddn.d f7, f3, f7
  2100. mksadj.d f2, f1
  2101. nexp01.d f1, f1
  2102. maddn.d f1, f0, f0
  2103. neg.d f3, f7
  2104. addexpm.d f0, f2
  2105. addexp.d f3, f2
  2106. divn.d f0, f1, f3
  2107. rfr xl, f0
  2108. rfrd xh, f0
  2109. leaf_return
  2110. #endif /* L_sqrt */
  2111. #endif /* XCHAL_HAVE_DFP_SQRT */
  2112. #if XCHAL_HAVE_DFP_RECIP
  2113. #ifdef L_recipdf2
  2114. /* Reciprocal */
  2115. .align 4
  2116. .global __recipdf2
  2117. .type __recipdf2, @function
  2118. __recipdf2:
  2119. leaf_entry sp, 16
  2120. wfrd f1, xh, xl
  2121. recip0.d f0, f1
  2122. const.d f2, 2
  2123. msub.d f2, f1, f0
  2124. mul.d f3, f1, f0
  2125. const.d f4, 2
  2126. mul.d f5, f0, f2
  2127. msub.d f4, f3, f2
  2128. const.d f2, 1
  2129. mul.d f0, f5, f4
  2130. msub.d f2, f1, f0
  2131. maddn.d f0, f0, f2
  2132. rfr xl, f0
  2133. rfrd xh, f0
  2134. leaf_return
  2135. #endif /* L_recipdf2 */
  2136. #endif /* XCHAL_HAVE_DFP_RECIP */
  2137. #if XCHAL_HAVE_DFP_RSQRT
  2138. #ifdef L_rsqrtdf2
  2139. /* Reciprocal square root */
  2140. .align 4
  2141. .global __rsqrtdf2
  2142. .type __rsqrtdf2, @function
  2143. __rsqrtdf2:
  2144. leaf_entry sp, 16
  2145. wfrd f1, xh, xl
  2146. rsqrt0.d f0, f1
  2147. mul.d f2, f1, f0
  2148. const.d f3, 3
  2149. mul.d f4, f3, f0
  2150. const.d f5, 1
  2151. msub.d f5, f2, f0
  2152. maddn.d f0, f4, f5
  2153. const.d f2, 1
  2154. mul.d f4, f1, f0
  2155. mul.d f5, f3, f0
  2156. msub.d f2, f4, f0
  2157. maddn.d f0, f5, f2
  2158. const.d f2, 1
  2159. mul.d f1, f1, f0
  2160. mul.d f3, f3, f0
  2161. msub.d f2, f1, f0
  2162. maddn.d f0, f3, f2
  2163. rfr xl, f0
  2164. rfrd xh, f0
  2165. leaf_return
  2166. #endif /* L_rsqrtdf2 */
  2167. #endif /* XCHAL_HAVE_DFP_RSQRT */