sha2.lua 150 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956
  1. --------------------------------------------------------------------------------------------------------------------------
  2. -- sha2.lua
  3. --------------------------------------------------------------------------------------------------------------------------
  4. -- VERSION: 8 (2019-09-03)
  5. -- AUTHOR: Egor Skriptunoff
  6. -- LICENSE: MIT (the same license as Lua itself)
  7. --
  8. --
  9. -- DESCRIPTION:
  10. -- This module contains functions to calculate SHA digest:
  11. -- MD5, SHA-1,
  12. -- SHA-224, SHA-256, SHA-512/224, SHA-512/256, SHA-384, SHA-512,
  13. -- SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256,
  14. -- HMAC
  15. -- Written in pure Lua.
  16. -- Compatible with:
  17. -- Lua 5.1, Lua 5.2, Lua 5.3, Lua 5.4, Fengari, LuaJIT 2.0/2.1 (any CPU endianness).
  18. -- Main feature of this module: it was heavily optimized for speed.
  19. -- For every Lua version the module contains particular implementation branch to get benefits from version-specific features.
  20. -- - branch for Lua 5.1 (emulating bitwise operators using look-up table)
  21. -- - branch for Lua 5.2 (using bit32/bit library), suitable for both Lua 5.2 with native "bit32" and Lua 5.1 with external library "bit"
  22. -- - branch for Lua 5.3/5.4 (using native 64-bit bitwise operators)
  23. -- - branch for Lua 5.3/5.4 (using native 32-bit bitwise operators) for Lua built with LUA_INT_TYPE=LUA_INT_INT
  24. -- - branch for LuaJIT without FFI library (useful in a sandboxed environment)
  25. -- - branch for LuaJIT x86 without FFI library (LuaJIT x86 has oddity because of lack of CPU registers)
  26. -- - branch for LuaJIT 2.0 with FFI library (bit.* functions work only with Lua numbers)
  27. -- - branch for LuaJIT 2.1 with FFI library (bit.* functions can work with "int64_t" arguments)
  28. --
  29. --
  30. -- USAGE:
  31. -- Input data should be provided as a binary string: either as a whole string or as a sequence of substrings (chunk-by-chunk loading, total length < 9*10^15 bytes).
  32. -- Result (SHA digest) is returned in hexadecimal representation as a string of lowercase hex digits.
  33. -- Simplest usage example:
  34. -- local sha = require("sha2")
  35. -- local your_hash = sha.sha256("your string")
  36. -- See file "sha2_test.lua" for more examples.
  37. --
  38. --
  39. -- CHANGELOG:
  40. -- version date description
  41. -- ------- ---------- -----------
  42. -- 8 2019-09-03 SHA3 functions added
  43. -- 7 2019-03-17 Added functions to convert to/from base64
  44. -- 6 2018-11-12 HMAC added
  45. -- 5 2018-11-10 SHA-1 added
  46. -- 4 2018-11-03 MD5 added
  47. -- 3 2018-11-02 Bug fixed: incorrect hashing of long (2 GByte) data streams on Lua 5.3/5.4 built with "int32" integers
  48. -- 2 2018-10-07 Decreased module loading time in Lua 5.1 implementation branch (thanks to Peter Melnichenko for giving a hint)
  49. -- 1 2018-10-06 First release (only SHA-2 functions)
  50. -----------------------------------------------------------------------------
  51. local print_debug_messages = false -- set to true to view some messages about your system's abilities and implementation branch chosen for your system
  52. local unpack, table_concat, byte, char, string_rep, sub, gsub, gmatch, string_format, floor, ceil, math_min, math_max, tonumber, type =
  53. table.unpack or unpack, table.concat, string.byte, string.char, string.rep, string.sub, string.gsub, string.gmatch, string.format, math.floor, math.ceil, math.min, math.max, tonumber, type
  54. --------------------------------------------------------------------------------
  55. -- EXAMINING YOUR SYSTEM
  56. --------------------------------------------------------------------------------
  57. local function get_precision(one)
  58. -- "one" must be either float 1.0 or integer 1
  59. -- returns bits_precision, is_integer
  60. -- This function works correctly with all floating point datatypes (including non-IEEE-754)
  61. local k, n, m, prev_n = 0, one, one
  62. while true do
  63. k, prev_n, n, m = k + 1, n, n + n + 1, m + m + k % 2
  64. if k > 256 or n - (n - 1) ~= 1 or m - (m - 1) ~= 1 or n == m then
  65. return k, false -- floating point datatype
  66. elseif n == prev_n then
  67. return k, true -- integer datatype
  68. end
  69. end
  70. end
  71. -- Make sure Lua has "double" numbers
  72. local x = 2/3
  73. local Lua_has_double = x * 5 > 3 and x * 4 < 3 and get_precision(1.0) >= 53
  74. assert(Lua_has_double, "at least 53-bit floating point numbers are required")
  75. -- Q:
  76. -- SHA2 was designed for FPU-less machines.
  77. -- So, why floating point numbers are needed for this module?
  78. -- A:
  79. -- 53-bit "double" numbers are useful to calculate "magic numbers" used in SHA.
  80. -- I prefer to write 50 LOC "magic numbers calculator" instead of storing more than 200 constants explicitly in this source file.
  81. local int_prec, Lua_has_integers = get_precision(1)
  82. local Lua_has_int64 = Lua_has_integers and int_prec == 64
  83. local Lua_has_int32 = Lua_has_integers and int_prec == 32
  84. assert(Lua_has_int64 or Lua_has_int32 or not Lua_has_integers, "Lua integers must be either 32-bit or 64-bit")
  85. -- Q:
  86. -- Does it mean that almost all non-standard configurations are not supported?
  87. -- A:
  88. -- Yes. Sorry, too many problems to support all possible Lua numbers configurations.
  89. -- Lua 5.1/5.2 with "int32" will not work.
  90. -- Lua 5.1/5.2 with "int64" will not work.
  91. -- Lua 5.1/5.2 with "int128" will not work.
  92. -- Lua 5.1/5.2 with "float" will not work.
  93. -- Lua 5.1/5.2 with "double" is OK. (default config for Lua 5.1, Lua 5.2, LuaJIT)
  94. -- Lua 5.3/5.4 with "int32" + "float" will not work.
  95. -- Lua 5.3/5.4 with "int64" + "float" will not work.
  96. -- Lua 5.3/5.4 with "int128" + "float" will not work.
  97. -- Lua 5.3/5.4 with "int32" + "double" is OK. (config used by Fengari)
  98. -- Lua 5.3/5.4 with "int64" + "double" is OK. (default config for Lua 5.3, Lua 5.4)
  99. -- Lua 5.3/5.4 with "int128" + "double" will not work.
  100. -- Using floating point numbers better than "double" instead of "double" is OK (non-IEEE-754 floating point implementation are allowed).
  101. -- Using "int128" instead of "int64" is not OK: "int128" would require different branch of implementation for optimized SHA512.
  102. -- Check for LuaJIT and 32-bit bitwise libraries
  103. local is_LuaJIT = ({false, [1] = true})[1] and (type(jit) ~= "table" or jit.version_num >= 20000) -- LuaJIT 1.x.x is treated as vanilla Lua 5.1
  104. local is_LuaJIT_21 -- LuaJIT 2.1+
  105. local LuaJIT_arch
  106. local ffi -- LuaJIT FFI library (as a table)
  107. local b -- 32-bit bitwise library (as a table)
  108. local library_name
  109. if is_LuaJIT then
  110. -- Assuming "bit" library is always available on LuaJIT
  111. b = require"bit"
  112. library_name = "bit"
  113. -- "ffi" is intentionally disabled on some systems for safety reason
  114. local LuaJIT_has_FFI, result = pcall(require, "ffi")
  115. if LuaJIT_has_FFI then
  116. ffi = result
  117. end
  118. is_LuaJIT_21 = not not loadstring"b=0b0"
  119. LuaJIT_arch = type(jit) == "table" and jit.arch or ffi and ffi.arch or nil
  120. else
  121. -- For vanilla Lua, "bit"/"bit32" libraries are searched in global namespace only. No attempt is made to load a library if it's not loaded yet.
  122. for _, libname in ipairs(_VERSION == "Lua 5.2" and {"bit32", "bit"} or {"bit", "bit32"}) do
  123. if type(_G[libname]) == "table" and _G[libname].bxor then
  124. b = _G[libname]
  125. library_name = libname
  126. break
  127. end
  128. end
  129. end
  130. --------------------------------------------------------------------------------
  131. -- You can disable here some of your system's abilities (for testing purposes)
  132. --------------------------------------------------------------------------------
  133. -- is_LuaJIT = nil
  134. -- is_LuaJIT_21 = nil
  135. -- ffi = nil
  136. -- Lua_has_int32 = nil
  137. -- Lua_has_int64 = nil
  138. -- b, library_name = nil
  139. --------------------------------------------------------------------------------
  140. if print_debug_messages then
  141. -- Printing list of abilities of your system
  142. print("Abilities:")
  143. print(" Lua version: "..(is_LuaJIT and "LuaJIT "..(is_LuaJIT_21 and "2.1 " or "2.0 ")..(LuaJIT_arch or "")..(ffi and " with FFI" or " without FFI") or _VERSION))
  144. print(" Integer bitwise operators: "..(Lua_has_int64 and "int64" or Lua_has_int32 and "int32" or "no"))
  145. print(" 32-bit bitwise library: "..(library_name or "not found"))
  146. end
  147. -- Selecting the most suitable implementation for given set of abilities
  148. local method, branch
  149. if is_LuaJIT and ffi then
  150. method = "Using 'ffi' library of LuaJIT"
  151. branch = "FFI"
  152. elseif is_LuaJIT then
  153. method = "Using special code for FFI-less LuaJIT"
  154. branch = "LJ"
  155. elseif Lua_has_int64 then
  156. method = "Using native int64 bitwise operators"
  157. branch = "INT64"
  158. elseif Lua_has_int32 then
  159. method = "Using native int32 bitwise operators"
  160. branch = "INT32"
  161. elseif library_name then -- when bitwise library is available (Lua 5.2 with native library "bit32" or Lua 5.1 with external library "bit")
  162. method = "Using '"..library_name.."' library"
  163. branch = "LIB32"
  164. else
  165. method = "Emulating bitwise operators using look-up table"
  166. branch = "EMUL"
  167. end
  168. if print_debug_messages then
  169. -- Printing the implementation selected to be used on your system
  170. print("Implementation selected:")
  171. print(" "..method)
  172. end
  173. --------------------------------------------------------------------------------
  174. -- BASIC 32-BIT BITWISE FUNCTIONS
  175. --------------------------------------------------------------------------------
  176. local AND, OR, XOR, SHL, SHR, ROL, ROR, NOT, NORM, HEX, XOR_BYTE
  177. -- Only low 32 bits of function arguments matter, high bits are ignored
  178. -- The result of all functions (except HEX) is an integer inside "correct range":
  179. -- for "bit" library: (-2^31)..(2^31-1)
  180. -- for "bit32" library: 0..(2^32-1)
  181. if branch == "FFI" or branch == "LJ" or branch == "LIB32" then
  182. -- Your system has 32-bit bitwise library (either "bit" or "bit32")
  183. AND = b.band -- 2 arguments
  184. OR = b.bor -- 2 arguments
  185. XOR = b.bxor -- 2..5 arguments
  186. SHL = b.lshift -- second argument is integer 0..31
  187. SHR = b.rshift -- second argument is integer 0..31
  188. ROL = b.rol or b.lrotate -- second argument is integer 0..31
  189. ROR = b.ror or b.rrotate -- second argument is integer 0..31
  190. NOT = b.bnot -- only for LuaJIT
  191. NORM = b.tobit -- only for LuaJIT
  192. HEX = b.tohex -- returns string of 8 lowercase hexadecimal digits
  193. assert(AND and OR and XOR and SHL and SHR and ROL and ROR and NOT, "Library '"..library_name.."' is incomplete")
  194. XOR_BYTE = XOR -- XOR of two bytes (0..255)
  195. elseif branch == "EMUL" then
  196. -- Emulating 32-bit bitwise operations using 53-bit floating point arithmetic
  197. function SHL(x, n)
  198. return (x * 2^n) % 2^32
  199. end
  200. function SHR(x, n)
  201. -- return (x % 2^32 - x % 2^n) / 2^n
  202. x = x % 2^32 / 2^n
  203. return x - x % 1
  204. end
  205. function ROL(x, n)
  206. x = x % 2^32 * 2^n
  207. local r = x % 2^32
  208. return r + (x - r) / 2^32
  209. end
  210. function ROR(x, n)
  211. x = x % 2^32 / 2^n
  212. local r = x % 1
  213. return r * 2^32 + (x - r)
  214. end
  215. local AND_of_two_bytes = {[0] = 0} -- look-up table (256*256 entries)
  216. local idx = 0
  217. for y = 0, 127 * 256, 256 do
  218. for x = y, y + 127 do
  219. x = AND_of_two_bytes[x] * 2
  220. AND_of_two_bytes[idx] = x
  221. AND_of_two_bytes[idx + 1] = x
  222. AND_of_two_bytes[idx + 256] = x
  223. AND_of_two_bytes[idx + 257] = x + 1
  224. idx = idx + 2
  225. end
  226. idx = idx + 256
  227. end
  228. local function and_or_xor(x, y, operation)
  229. -- operation: nil = AND, 1 = OR, 2 = XOR
  230. local x0 = x % 2^32
  231. local y0 = y % 2^32
  232. local rx = x0 % 256
  233. local ry = y0 % 256
  234. local res = AND_of_two_bytes[rx + ry * 256]
  235. x = x0 - rx
  236. y = (y0 - ry) / 256
  237. rx = x % 65536
  238. ry = y % 256
  239. res = res + AND_of_two_bytes[rx + ry] * 256
  240. x = (x - rx) / 256
  241. y = (y - ry) / 256
  242. rx = x % 65536 + y % 256
  243. res = res + AND_of_two_bytes[rx] * 65536
  244. res = res + AND_of_two_bytes[(x + y - rx) / 256] * 16777216
  245. if operation then
  246. res = x0 + y0 - operation * res
  247. end
  248. return res
  249. end
  250. function AND(x, y)
  251. return and_or_xor(x, y)
  252. end
  253. function OR(x, y)
  254. return and_or_xor(x, y, 1)
  255. end
  256. function XOR(x, y, z, t, u) -- 2..5 arguments
  257. if z then
  258. if t then
  259. if u then
  260. t = and_or_xor(t, u, 2)
  261. end
  262. z = and_or_xor(z, t, 2)
  263. end
  264. y = and_or_xor(y, z, 2)
  265. end
  266. return and_or_xor(x, y, 2)
  267. end
  268. function XOR_BYTE(x, y)
  269. return x + y - 2 * AND_of_two_bytes[x + y * 256]
  270. end
  271. end
  272. HEX = HEX or
  273. function (x) -- returns string of 8 lowercase hexadecimal digits
  274. return string_format("%08x", x % 4294967296)
  275. end
  276. local function XOR32A5(x)
  277. return XOR(x, 0xA5A5A5A5) % 4294967296
  278. end
  279. local function create_array_of_lanes()
  280. return {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  281. end
  282. --------------------------------------------------------------------------------
  283. -- CREATING OPTIMIZED INNER LOOP
  284. --------------------------------------------------------------------------------
  285. -- Inner loop functions
  286. local sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed
  287. -- Arrays of SHA2 "magic numbers" (in "INT64" and "FFI" branches "*_lo" arrays contain 64-bit values)
  288. local sha2_K_lo, sha2_K_hi, sha2_H_lo, sha2_H_hi, sha3_RC_lo, sha3_RC_hi = {}, {}, {}, {}, {}, {}
  289. local sha2_H_ext256 = {[224] = {}, [256] = sha2_H_hi}
  290. local sha2_H_ext512_lo, sha2_H_ext512_hi = {[384] = {}, [512] = sha2_H_lo}, {[384] = {}, [512] = sha2_H_hi}
  291. local md5_K, md5_sha1_H = {}, {0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0}
  292. local md5_next_shift = {0, 0, 0, 0, 0, 0, 0, 0, 28, 25, 26, 27, 0, 0, 10, 9, 11, 12, 0, 15, 16, 17, 18, 0, 20, 22, 23, 21}
  293. local HEX64, XOR64A5, lanes_index_base -- defined only for branches that internally use 64-bit integers: "INT64" and "FFI"
  294. local common_W = {} -- temporary table shared between all calculations (to avoid creating new temporary table every time)
  295. local K_lo_modulo, hi_factor, hi_factor_keccak = 4294967296, 0, 0
  296. local function build_keccak_format(elem)
  297. local keccak_format = {}
  298. for _, size in ipairs{1, 9, 13, 17, 18, 21} do
  299. keccak_format[size] = "<"..string_rep(elem, size)
  300. end
  301. return keccak_format
  302. end
  303. if branch == "FFI" then
  304. -- SHA256 implementation for "LuaJIT with FFI" branch
  305. local common_W_FFI_int32 = ffi.new"int32_t[80]" -- 64 is enough for SHA256, but 80 is needed for SHA-1
  306. function sha256_feed_64(H, str, offs, size)
  307. -- offs >= 0, size >= 0, size is multiple of 64
  308. local W, K = common_W_FFI_int32, sha2_K_hi
  309. for pos = offs, offs + size - 1, 64 do
  310. for j = 0, 15 do
  311. pos = pos + 4
  312. local a, b, c, d = byte(str, pos - 3, pos) -- slow, but doesn't depend on endianness
  313. W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  314. end
  315. for j = 16, 63 do
  316. local a, b = W[j-15], W[j-2]
  317. W[j] = NORM( XOR(ROR(a, 7), ROL(a, 14), SHR(a, 3)) + XOR(ROL(b, 15), ROL(b, 13), SHR(b, 10)) + W[j-7] + W[j-16] )
  318. end
  319. local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  320. for j = 0, 63, 8 do -- Thanks to Peter Cawley for this workaround (unroll the loop to avoid "PHI shuffling too complex" due to PHIs overlap)
  321. local z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j] + K[j+1] + h) )
  322. h, g, f, e = g, f, e, NORM( d + z )
  323. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  324. z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+1] + K[j+2] + h) )
  325. h, g, f, e = g, f, e, NORM( d + z )
  326. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  327. z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+2] + K[j+3] + h) )
  328. h, g, f, e = g, f, e, NORM( d + z )
  329. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  330. z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+3] + K[j+4] + h) )
  331. h, g, f, e = g, f, e, NORM( d + z )
  332. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  333. z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+4] + K[j+5] + h) )
  334. h, g, f, e = g, f, e, NORM( d + z )
  335. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  336. z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+5] + K[j+6] + h) )
  337. h, g, f, e = g, f, e, NORM( d + z )
  338. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  339. z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+6] + K[j+7] + h) )
  340. h, g, f, e = g, f, e, NORM( d + z )
  341. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  342. z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+7] + K[j+8] + h) )
  343. h, g, f, e = g, f, e, NORM( d + z )
  344. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  345. end
  346. H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  347. H[5], H[6], H[7], H[8] = NORM(e + H[5]), NORM(f + H[6]), NORM(g + H[7]), NORM(h + H[8])
  348. end
  349. end
  350. local common_W_FFI_int64 = ffi.new"int64_t[80]"
  351. local int64 = ffi.typeof"int64_t"
  352. local int32 = ffi.typeof"int32_t"
  353. local uint32 = ffi.typeof"uint32_t"
  354. hi_factor = int64(2^32)
  355. if is_LuaJIT_21 then -- LuaJIT 2.1 supports bitwise 64-bit operations
  356. local AND64, OR64, XOR64, NOT64, SHL64, SHR64, ROL64, ROR64 -- introducing synonyms for better code readability
  357. = AND, OR, XOR, NOT, SHL, SHR, ROL, ROR
  358. HEX64 = HEX
  359. -- SHA3 implementation for "LuaJIT 2.1 + FFI" branch
  360. local lanes_arr64 = ffi.typeof"int64_t[30]" -- 25 + 5 for temporary usage
  361. -- lanes array is indexed from 0
  362. lanes_index_base = 0
  363. hi_factor_keccak = int64(2^32)
  364. function create_array_of_lanes()
  365. return lanes_arr64()
  366. end
  367. function keccak_feed(lanes, _, str, offs, size, block_size_in_bytes)
  368. -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  369. local RC = sha3_RC_lo
  370. local qwords_qty = SHR(block_size_in_bytes, 3)
  371. for pos = offs, offs + size - 1, block_size_in_bytes do
  372. for j = 0, qwords_qty - 1 do
  373. pos = pos + 8
  374. local h, g, f, e, d, c, b, a = byte(str, pos - 7, pos) -- slow, but doesn't depend on endianness
  375. lanes[j] = XOR64(lanes[j], OR64(OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d) * int64(2^32), uint32(int32(OR(SHL(e, 24), SHL(f, 16), SHL(g, 8), h)))))
  376. end
  377. for round_idx = 1, 24 do
  378. for j = 0, 4 do
  379. lanes[25 + j] = XOR64(lanes[j], lanes[j+5], lanes[j+10], lanes[j+15], lanes[j+20])
  380. end
  381. local D = XOR64(lanes[25], ROL64(lanes[27], 1))
  382. lanes[1], lanes[6], lanes[11], lanes[16] = ROL64(XOR64(D, lanes[6]), 44), ROL64(XOR64(D, lanes[16]), 45), ROL64(XOR64(D, lanes[1]), 1), ROL64(XOR64(D, lanes[11]), 10)
  383. lanes[21] = ROL64(XOR64(D, lanes[21]), 2)
  384. D = XOR64(lanes[26], ROL64(lanes[28], 1))
  385. lanes[2], lanes[7], lanes[12], lanes[22] = ROL64(XOR64(D, lanes[12]), 43), ROL64(XOR64(D, lanes[22]), 61), ROL64(XOR64(D, lanes[7]), 6), ROL64(XOR64(D, lanes[2]), 62)
  386. lanes[17] = ROL64(XOR64(D, lanes[17]), 15)
  387. D = XOR64(lanes[27], ROL64(lanes[29], 1))
  388. lanes[3], lanes[8], lanes[18], lanes[23] = ROL64(XOR64(D, lanes[18]), 21), ROL64(XOR64(D, lanes[3]), 28), ROL64(XOR64(D, lanes[23]), 56), ROL64(XOR64(D, lanes[8]), 55)
  389. lanes[13] = ROL64(XOR64(D, lanes[13]), 25)
  390. D = XOR64(lanes[28], ROL64(lanes[25], 1))
  391. lanes[4], lanes[14], lanes[19], lanes[24] = ROL64(XOR64(D, lanes[24]), 14), ROL64(XOR64(D, lanes[19]), 8), ROL64(XOR64(D, lanes[4]), 27), ROL64(XOR64(D, lanes[14]), 39)
  392. lanes[9] = ROL64(XOR64(D, lanes[9]), 20)
  393. D = XOR64(lanes[29], ROL64(lanes[26], 1))
  394. lanes[5], lanes[10], lanes[15], lanes[20] = ROL64(XOR64(D, lanes[10]), 3), ROL64(XOR64(D, lanes[20]), 18), ROL64(XOR64(D, lanes[5]), 36), ROL64(XOR64(D, lanes[15]), 41)
  395. lanes[0] = XOR64(D, lanes[0])
  396. lanes[0], lanes[1], lanes[2], lanes[3], lanes[4] = XOR64(lanes[0], AND64(NOT64(lanes[1]), lanes[2]), RC[round_idx]), XOR64(lanes[1], AND64(NOT64(lanes[2]), lanes[3])), XOR64(lanes[2], AND64(NOT64(lanes[3]), lanes[4])), XOR64(lanes[3], AND64(NOT64(lanes[4]), lanes[0])), XOR64(lanes[4], AND64(NOT64(lanes[0]), lanes[1]))
  397. lanes[5], lanes[6], lanes[7], lanes[8], lanes[9] = XOR64(lanes[8], AND64(NOT64(lanes[9]), lanes[5])), XOR64(lanes[9], AND64(NOT64(lanes[5]), lanes[6])), XOR64(lanes[5], AND64(NOT64(lanes[6]), lanes[7])), XOR64(lanes[6], AND64(NOT64(lanes[7]), lanes[8])), XOR64(lanes[7], AND64(NOT64(lanes[8]), lanes[9]))
  398. lanes[10], lanes[11], lanes[12], lanes[13], lanes[14] = XOR64(lanes[11], AND64(NOT64(lanes[12]), lanes[13])), XOR64(lanes[12], AND64(NOT64(lanes[13]), lanes[14])), XOR64(lanes[13], AND64(NOT64(lanes[14]), lanes[10])), XOR64(lanes[14], AND64(NOT64(lanes[10]), lanes[11])), XOR64(lanes[10], AND64(NOT64(lanes[11]), lanes[12]))
  399. lanes[15], lanes[16], lanes[17], lanes[18], lanes[19] = XOR64(lanes[19], AND64(NOT64(lanes[15]), lanes[16])), XOR64(lanes[15], AND64(NOT64(lanes[16]), lanes[17])), XOR64(lanes[16], AND64(NOT64(lanes[17]), lanes[18])), XOR64(lanes[17], AND64(NOT64(lanes[18]), lanes[19])), XOR64(lanes[18], AND64(NOT64(lanes[19]), lanes[15]))
  400. lanes[20], lanes[21], lanes[22], lanes[23], lanes[24] = XOR64(lanes[22], AND64(NOT64(lanes[23]), lanes[24])), XOR64(lanes[23], AND64(NOT64(lanes[24]), lanes[20])), XOR64(lanes[24], AND64(NOT64(lanes[20]), lanes[21])), XOR64(lanes[20], AND64(NOT64(lanes[21]), lanes[22])), XOR64(lanes[21], AND64(NOT64(lanes[22]), lanes[23]))
  401. end
  402. end
  403. end
  404. -- SHA512 implementation for "LuaJIT 2.1 + FFI" branch
  405. local A5_long = 0xA5A5A5A5 * int64(2^32 + 1) -- It's impossible to use constant 0xA5A5A5A5A5A5A5A5LL because it will raise syntax error on other Lua versions
  406. function XOR64A5(long)
  407. return XOR64(long, A5_long)
  408. end
  409. function sha512_feed_128(H, _, str, offs, size)
  410. -- offs >= 0, size >= 0, size is multiple of 128
  411. local W, K = common_W_FFI_int64, sha2_K_lo
  412. for pos = offs, offs + size - 1, 128 do
  413. for j = 0, 15 do
  414. pos = pos + 8
  415. local a, b, c, d, e, f, g, h = byte(str, pos - 7, pos) -- slow, but doesn't depend on endianness
  416. W[j] = OR64(OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d) * int64(2^32), uint32(int32(OR(SHL(e, 24), SHL(f, 16), SHL(g, 8), h))))
  417. end
  418. for j = 16, 79 do
  419. local a, b = W[j-15], W[j-2]
  420. W[j] = XOR64(ROR64(a, 1), ROR64(a, 8), SHR64(a, 7)) + XOR64(ROR64(b, 19), ROL64(b, 3), SHR64(b, 6)) + W[j-7] + W[j-16]
  421. end
  422. local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  423. for j = 0, 79, 8 do
  424. local z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+1] + W[j]
  425. h, g, f, e = g, f, e, z + d
  426. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  427. z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+2] + W[j+1]
  428. h, g, f, e = g, f, e, z + d
  429. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  430. z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+3] + W[j+2]
  431. h, g, f, e = g, f, e, z + d
  432. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  433. z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+4] + W[j+3]
  434. h, g, f, e = g, f, e, z + d
  435. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  436. z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+5] + W[j+4]
  437. h, g, f, e = g, f, e, z + d
  438. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  439. z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+6] + W[j+5]
  440. h, g, f, e = g, f, e, z + d
  441. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  442. z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+7] + W[j+6]
  443. h, g, f, e = g, f, e, z + d
  444. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  445. z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+8] + W[j+7]
  446. h, g, f, e = g, f, e, z + d
  447. d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  448. end
  449. H[1] = a + H[1]
  450. H[2] = b + H[2]
  451. H[3] = c + H[3]
  452. H[4] = d + H[4]
  453. H[5] = e + H[5]
  454. H[6] = f + H[6]
  455. H[7] = g + H[7]
  456. H[8] = h + H[8]
  457. end
  458. end
  459. else -- LuaJIT 2.0 doesn't support 64-bit bitwise operations
  460. -- SHA512 implementation for "LuaJIT 2.0 + FFI" branch
  461. local union64 = ffi.typeof"union{int64_t i64; struct{int32_t lo, hi;} i32;}"
  462. do -- make sure the struct is endianness-compatible
  463. local u = union64(1)
  464. if u.i32.lo < u.i32.hi then
  465. union64 = ffi.typeof"union{int64_t i64; struct{int32_t hi, lo;} i32;}"
  466. end
  467. end
  468. local unions64 = ffi.typeof("$[?]", union64)
  469. local U = unions64(3) -- this array of unions is used for fast splitting int64 into int32_high and int32_low
  470. -- "xorrific" 64-bit functions :-)
  471. -- int64 input is splitted into two int32 parts, some bitwise 32-bit operations are performed, finally the result is converted to int64
  472. -- these functions are needed because bit.* functions in LuaJIT 2.0 don't work with int64_t
  473. local function XORROR64_1(a)
  474. -- return XOR64(ROR64(a, 1), ROR64(a, 8), SHR64(a, 7))
  475. U[0].i64 = a
  476. local a_lo, a_hi = U[0].i32.lo, U[0].i32.hi
  477. local t_lo = XOR(OR(SHR(a_lo, 1), SHL(a_hi, 31)), OR(SHR(a_lo, 8), SHL(a_hi, 24)), OR(SHR(a_lo, 7), SHL(a_hi, 25)))
  478. local t_hi = XOR(OR(SHR(a_hi, 1), SHL(a_lo, 31)), OR(SHR(a_hi, 8), SHL(a_lo, 24)), SHR(a_hi, 7))
  479. return t_hi * int64(2^32) + uint32(int32(t_lo))
  480. end
  481. local function XORROR64_2(b)
  482. -- return XOR64(ROR64(b, 19), ROL64(b, 3), SHR64(b, 6))
  483. U[0].i64 = b
  484. local b_lo, b_hi = U[0].i32.lo, U[0].i32.hi
  485. local u_lo = XOR(OR(SHR(b_lo, 19), SHL(b_hi, 13)), OR(SHL(b_lo, 3), SHR(b_hi, 29)), OR(SHR(b_lo, 6), SHL(b_hi, 26)))
  486. local u_hi = XOR(OR(SHR(b_hi, 19), SHL(b_lo, 13)), OR(SHL(b_hi, 3), SHR(b_lo, 29)), SHR(b_hi, 6))
  487. return u_hi * int64(2^32) + uint32(int32(u_lo))
  488. end
  489. local function XORROR64_3(e)
  490. -- return XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23))
  491. U[0].i64 = e
  492. local e_lo, e_hi = U[0].i32.lo, U[0].i32.hi
  493. local u_lo = XOR(OR(SHR(e_lo, 14), SHL(e_hi, 18)), OR(SHR(e_lo, 18), SHL(e_hi, 14)), OR(SHL(e_lo, 23), SHR(e_hi, 9)))
  494. local u_hi = XOR(OR(SHR(e_hi, 14), SHL(e_lo, 18)), OR(SHR(e_hi, 18), SHL(e_lo, 14)), OR(SHL(e_hi, 23), SHR(e_lo, 9)))
  495. return u_hi * int64(2^32) + uint32(int32(u_lo))
  496. end
  497. local function XORROR64_6(a)
  498. -- return XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30))
  499. U[0].i64 = a
  500. local b_lo, b_hi = U[0].i32.lo, U[0].i32.hi
  501. local u_lo = XOR(OR(SHR(b_lo, 28), SHL(b_hi, 4)), OR(SHL(b_lo, 30), SHR(b_hi, 2)), OR(SHL(b_lo, 25), SHR(b_hi, 7)))
  502. local u_hi = XOR(OR(SHR(b_hi, 28), SHL(b_lo, 4)), OR(SHL(b_hi, 30), SHR(b_lo, 2)), OR(SHL(b_hi, 25), SHR(b_lo, 7)))
  503. return u_hi * int64(2^32) + uint32(int32(u_lo))
  504. end
  505. local function XORROR64_4(e, f, g)
  506. -- return XOR64(g, AND64(e, XOR64(f, g)))
  507. U[0].i64 = f
  508. U[1].i64 = g
  509. U[2].i64 = e
  510. local f_lo, f_hi = U[0].i32.lo, U[0].i32.hi
  511. local g_lo, g_hi = U[1].i32.lo, U[1].i32.hi
  512. local e_lo, e_hi = U[2].i32.lo, U[2].i32.hi
  513. local result_lo = XOR(g_lo, AND(e_lo, XOR(f_lo, g_lo)))
  514. local result_hi = XOR(g_hi, AND(e_hi, XOR(f_hi, g_hi)))
  515. return result_hi * int64(2^32) + uint32(int32(result_lo))
  516. end
  517. local function XORROR64_5(a, b, c)
  518. -- return XOR64(AND64(XOR64(a, b), c), AND64(a, b))
  519. U[0].i64 = a
  520. U[1].i64 = b
  521. U[2].i64 = c
  522. local a_lo, a_hi = U[0].i32.lo, U[0].i32.hi
  523. local b_lo, b_hi = U[1].i32.lo, U[1].i32.hi
  524. local c_lo, c_hi = U[2].i32.lo, U[2].i32.hi
  525. local result_lo = XOR(AND(XOR(a_lo, b_lo), c_lo), AND(a_lo, b_lo))
  526. local result_hi = XOR(AND(XOR(a_hi, b_hi), c_hi), AND(a_hi, b_hi))
  527. return result_hi * int64(2^32) + uint32(int32(result_lo))
  528. end
  529. function XOR64A5(long)
  530. -- return XOR64(long, 0xA5A5A5A5A5A5A5A5)
  531. U[0].i64 = long
  532. local lo32, hi32 = U[0].i32.lo, U[0].i32.hi
  533. lo32 = XOR(lo32, 0xA5A5A5A5)
  534. hi32 = XOR(hi32, 0xA5A5A5A5)
  535. return hi32 * int64(2^32) + uint32(int32(lo32))
  536. end
  537. function HEX64(long)
  538. U[0].i64 = long
  539. return HEX(U[0].i32.hi)..HEX(U[0].i32.lo)
  540. end
  541. function sha512_feed_128(H, _, str, offs, size)
  542. -- offs >= 0, size >= 0, size is multiple of 128
  543. local W, K = common_W_FFI_int64, sha2_K_lo
  544. for pos = offs, offs + size - 1, 128 do
  545. for j = 0, 15 do
  546. pos = pos + 8
  547. local a, b, c, d, e, f, g, h = byte(str, pos - 7, pos) -- slow, but doesn't depend on endianness
  548. W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d) * int64(2^32) + uint32(int32(OR(SHL(e, 24), SHL(f, 16), SHL(g, 8), h)))
  549. end
  550. for j = 16, 79 do
  551. W[j] = XORROR64_1(W[j-15]) + XORROR64_2(W[j-2]) + W[j-7] + W[j-16]
  552. end
  553. local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  554. for j = 0, 79, 8 do
  555. local z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+1] + W[j]
  556. h, g, f, e = g, f, e, z + d
  557. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  558. z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+2] + W[j+1]
  559. h, g, f, e = g, f, e, z + d
  560. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  561. z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+3] + W[j+2]
  562. h, g, f, e = g, f, e, z + d
  563. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  564. z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+4] + W[j+3]
  565. h, g, f, e = g, f, e, z + d
  566. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  567. z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+5] + W[j+4]
  568. h, g, f, e = g, f, e, z + d
  569. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  570. z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+6] + W[j+5]
  571. h, g, f, e = g, f, e, z + d
  572. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  573. z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+7] + W[j+6]
  574. h, g, f, e = g, f, e, z + d
  575. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  576. z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+8] + W[j+7]
  577. h, g, f, e = g, f, e, z + d
  578. d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  579. end
  580. H[1] = a + H[1]
  581. H[2] = b + H[2]
  582. H[3] = c + H[3]
  583. H[4] = d + H[4]
  584. H[5] = e + H[5]
  585. H[6] = f + H[6]
  586. H[7] = g + H[7]
  587. H[8] = h + H[8]
  588. end
  589. end
  590. end
  591. -- MD5 implementation for "LuaJIT with FFI" branch
  592. function md5_feed_64(H, str, offs, size)
  593. -- offs >= 0, size >= 0, size is multiple of 64
  594. local W, K = common_W_FFI_int32, md5_K
  595. for pos = offs, offs + size - 1, 64 do
  596. for j = 0, 15 do
  597. pos = pos + 4
  598. local a, b, c, d = byte(str, pos - 3, pos) -- slow, but doesn't depend on endianness
  599. W[j] = OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a)
  600. end
  601. local a, b, c, d = H[1], H[2], H[3], H[4]
  602. for j = 0, 15, 4 do
  603. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+1] + W[j ] + a), 7) + b)
  604. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+2] + W[j+1] + a), 12) + b)
  605. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+3] + W[j+2] + a), 17) + b)
  606. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+4] + W[j+3] + a), 22) + b)
  607. end
  608. for j = 16, 31, 4 do
  609. local g = 5*j
  610. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+1] + W[AND(g + 1, 15)] + a), 5) + b)
  611. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+2] + W[AND(g + 6, 15)] + a), 9) + b)
  612. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+3] + W[AND(g - 5, 15)] + a), 14) + b)
  613. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+4] + W[AND(g , 15)] + a), 20) + b)
  614. end
  615. for j = 32, 47, 4 do
  616. local g = 3*j
  617. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+1] + W[AND(g + 5, 15)] + a), 4) + b)
  618. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+2] + W[AND(g + 8, 15)] + a), 11) + b)
  619. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+3] + W[AND(g - 5, 15)] + a), 16) + b)
  620. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+4] + W[AND(g - 2, 15)] + a), 23) + b)
  621. end
  622. for j = 48, 63, 4 do
  623. local g = 7*j
  624. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+1] + W[AND(g , 15)] + a), 6) + b)
  625. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+2] + W[AND(g + 7, 15)] + a), 10) + b)
  626. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+3] + W[AND(g - 2, 15)] + a), 15) + b)
  627. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+4] + W[AND(g + 5, 15)] + a), 21) + b)
  628. end
  629. H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  630. end
  631. end
  632. -- SHA-1 implementation for "LuaJIT with FFI" branch
  633. function sha1_feed_64(H, str, offs, size)
  634. -- offs >= 0, size >= 0, size is multiple of 64
  635. local W = common_W_FFI_int32
  636. for pos = offs, offs + size - 1, 64 do
  637. for j = 0, 15 do
  638. pos = pos + 4
  639. local a, b, c, d = byte(str, pos - 3, pos) -- slow, but doesn't depend on endianness
  640. W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  641. end
  642. for j = 16, 79 do
  643. W[j] = ROL(XOR(W[j-3], W[j-8], W[j-14], W[j-16]), 1)
  644. end
  645. local a, b, c, d, e = H[1], H[2], H[3], H[4], H[5]
  646. for j = 0, 19, 5 do
  647. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j] + 0x5A827999 + e)) -- constant = floor(2^30 * sqrt(2))
  648. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+1] + 0x5A827999 + e))
  649. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+2] + 0x5A827999 + e))
  650. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+3] + 0x5A827999 + e))
  651. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+4] + 0x5A827999 + e))
  652. end
  653. for j = 20, 39, 5 do
  654. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j] + 0x6ED9EBA1 + e)) -- 2^30 * sqrt(3)
  655. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0x6ED9EBA1 + e))
  656. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0x6ED9EBA1 + e))
  657. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0x6ED9EBA1 + e))
  658. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0x6ED9EBA1 + e))
  659. end
  660. for j = 40, 59, 5 do
  661. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j] + 0x8F1BBCDC + e)) -- 2^30 * sqrt(5)
  662. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+1] + 0x8F1BBCDC + e))
  663. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+2] + 0x8F1BBCDC + e))
  664. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+3] + 0x8F1BBCDC + e))
  665. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+4] + 0x8F1BBCDC + e))
  666. end
  667. for j = 60, 79, 5 do
  668. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j] + 0xCA62C1D6 + e)) -- 2^30 * sqrt(10)
  669. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0xCA62C1D6 + e))
  670. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0xCA62C1D6 + e))
  671. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0xCA62C1D6 + e))
  672. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0xCA62C1D6 + e))
  673. end
  674. H[1], H[2], H[3], H[4], H[5] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4]), NORM(e + H[5])
  675. end
  676. end
  677. end
  678. -- SHA3 implementation for "LuaJIT 2.0 + FFI" and "LuaJIT without FFI" branches
  679. if branch == "FFI" and not is_LuaJIT_21 or branch == "LJ" then
  680. if branch == "FFI" then
  681. local lanes_arr32 = ffi.typeof"int32_t[31]" -- 25 + 5 + 1 (due to 1-based indexing)
  682. function create_array_of_lanes()
  683. return lanes_arr32()
  684. end
  685. end
  686. function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
  687. -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  688. local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
  689. local qwords_qty = SHR(block_size_in_bytes, 3)
  690. for pos = offs, offs + size - 1, block_size_in_bytes do
  691. for j = 1, qwords_qty do
  692. local a, b, c, d = byte(str, pos + 1, pos + 4)
  693. lanes_lo[j] = XOR(lanes_lo[j], OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a))
  694. pos = pos + 8
  695. a, b, c, d = byte(str, pos - 3, pos)
  696. lanes_hi[j] = XOR(lanes_hi[j], OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a))
  697. end
  698. for round_idx = 1, 24 do
  699. for j = 1, 5 do
  700. lanes_lo[25 + j] = XOR(lanes_lo[j], lanes_lo[j + 5], lanes_lo[j + 10], lanes_lo[j + 15], lanes_lo[j + 20])
  701. end
  702. for j = 1, 5 do
  703. lanes_hi[25 + j] = XOR(lanes_hi[j], lanes_hi[j + 5], lanes_hi[j + 10], lanes_hi[j + 15], lanes_hi[j + 20])
  704. end
  705. local D_lo = XOR(lanes_lo[26], SHL(lanes_lo[28], 1), SHR(lanes_hi[28], 31))
  706. local D_hi = XOR(lanes_hi[26], SHL(lanes_hi[28], 1), SHR(lanes_lo[28], 31))
  707. lanes_lo[2], lanes_hi[2], lanes_lo[7], lanes_hi[7], lanes_lo[12], lanes_hi[12], lanes_lo[17], lanes_hi[17] = XOR(SHR(XOR(D_lo, lanes_lo[7]), 20), SHL(XOR(D_hi, lanes_hi[7]), 12)), XOR(SHR(XOR(D_hi, lanes_hi[7]), 20), SHL(XOR(D_lo, lanes_lo[7]), 12)), XOR(SHR(XOR(D_lo, lanes_lo[17]), 19), SHL(XOR(D_hi, lanes_hi[17]), 13)), XOR(SHR(XOR(D_hi, lanes_hi[17]), 19), SHL(XOR(D_lo, lanes_lo[17]), 13)), XOR(SHL(XOR(D_lo, lanes_lo[2]), 1), SHR(XOR(D_hi, lanes_hi[2]), 31)), XOR(SHL(XOR(D_hi, lanes_hi[2]), 1), SHR(XOR(D_lo, lanes_lo[2]), 31)), XOR(SHL(XOR(D_lo, lanes_lo[12]), 10), SHR(XOR(D_hi, lanes_hi[12]), 22)), XOR(SHL(XOR(D_hi, lanes_hi[12]), 10), SHR(XOR(D_lo, lanes_lo[12]), 22))
  708. local L, H = XOR(D_lo, lanes_lo[22]), XOR(D_hi, lanes_hi[22])
  709. lanes_lo[22], lanes_hi[22] = XOR(SHL(L, 2), SHR(H, 30)), XOR(SHL(H, 2), SHR(L, 30))
  710. D_lo = XOR(lanes_lo[27], SHL(lanes_lo[29], 1), SHR(lanes_hi[29], 31))
  711. D_hi = XOR(lanes_hi[27], SHL(lanes_hi[29], 1), SHR(lanes_lo[29], 31))
  712. lanes_lo[3], lanes_hi[3], lanes_lo[8], lanes_hi[8], lanes_lo[13], lanes_hi[13], lanes_lo[23], lanes_hi[23] = XOR(SHR(XOR(D_lo, lanes_lo[13]), 21), SHL(XOR(D_hi, lanes_hi[13]), 11)), XOR(SHR(XOR(D_hi, lanes_hi[13]), 21), SHL(XOR(D_lo, lanes_lo[13]), 11)), XOR(SHR(XOR(D_lo, lanes_lo[23]), 3), SHL(XOR(D_hi, lanes_hi[23]), 29)), XOR(SHR(XOR(D_hi, lanes_hi[23]), 3), SHL(XOR(D_lo, lanes_lo[23]), 29)), XOR(SHL(XOR(D_lo, lanes_lo[8]), 6), SHR(XOR(D_hi, lanes_hi[8]), 26)), XOR(SHL(XOR(D_hi, lanes_hi[8]), 6), SHR(XOR(D_lo, lanes_lo[8]), 26)), XOR(SHR(XOR(D_lo, lanes_lo[3]), 2), SHL(XOR(D_hi, lanes_hi[3]), 30)), XOR(SHR(XOR(D_hi, lanes_hi[3]), 2), SHL(XOR(D_lo, lanes_lo[3]), 30))
  713. L, H = XOR(D_lo, lanes_lo[18]), XOR(D_hi, lanes_hi[18])
  714. lanes_lo[18], lanes_hi[18] = XOR(SHL(L, 15), SHR(H, 17)), XOR(SHL(H, 15), SHR(L, 17))
  715. D_lo = XOR(lanes_lo[28], SHL(lanes_lo[30], 1), SHR(lanes_hi[30], 31))
  716. D_hi = XOR(lanes_hi[28], SHL(lanes_hi[30], 1), SHR(lanes_lo[30], 31))
  717. lanes_lo[4], lanes_hi[4], lanes_lo[9], lanes_hi[9], lanes_lo[19], lanes_hi[19], lanes_lo[24], lanes_hi[24] = XOR(SHL(XOR(D_lo, lanes_lo[19]), 21), SHR(XOR(D_hi, lanes_hi[19]), 11)), XOR(SHL(XOR(D_hi, lanes_hi[19]), 21), SHR(XOR(D_lo, lanes_lo[19]), 11)), XOR(SHL(XOR(D_lo, lanes_lo[4]), 28), SHR(XOR(D_hi, lanes_hi[4]), 4)), XOR(SHL(XOR(D_hi, lanes_hi[4]), 28), SHR(XOR(D_lo, lanes_lo[4]), 4)), XOR(SHR(XOR(D_lo, lanes_lo[24]), 8), SHL(XOR(D_hi, lanes_hi[24]), 24)), XOR(SHR(XOR(D_hi, lanes_hi[24]), 8), SHL(XOR(D_lo, lanes_lo[24]), 24)), XOR(SHR(XOR(D_lo, lanes_lo[9]), 9), SHL(XOR(D_hi, lanes_hi[9]), 23)), XOR(SHR(XOR(D_hi, lanes_hi[9]), 9), SHL(XOR(D_lo, lanes_lo[9]), 23))
  718. L, H = XOR(D_lo, lanes_lo[14]), XOR(D_hi, lanes_hi[14])
  719. lanes_lo[14], lanes_hi[14] = XOR(SHL(L, 25), SHR(H, 7)), XOR(SHL(H, 25), SHR(L, 7))
  720. D_lo = XOR(lanes_lo[29], SHL(lanes_lo[26], 1), SHR(lanes_hi[26], 31))
  721. D_hi = XOR(lanes_hi[29], SHL(lanes_hi[26], 1), SHR(lanes_lo[26], 31))
  722. lanes_lo[5], lanes_hi[5], lanes_lo[15], lanes_hi[15], lanes_lo[20], lanes_hi[20], lanes_lo[25], lanes_hi[25] = XOR(SHL(XOR(D_lo, lanes_lo[25]), 14), SHR(XOR(D_hi, lanes_hi[25]), 18)), XOR(SHL(XOR(D_hi, lanes_hi[25]), 14), SHR(XOR(D_lo, lanes_lo[25]), 18)), XOR(SHL(XOR(D_lo, lanes_lo[20]), 8), SHR(XOR(D_hi, lanes_hi[20]), 24)), XOR(SHL(XOR(D_hi, lanes_hi[20]), 8), SHR(XOR(D_lo, lanes_lo[20]), 24)), XOR(SHL(XOR(D_lo, lanes_lo[5]), 27), SHR(XOR(D_hi, lanes_hi[5]), 5)), XOR(SHL(XOR(D_hi, lanes_hi[5]), 27), SHR(XOR(D_lo, lanes_lo[5]), 5)), XOR(SHR(XOR(D_lo, lanes_lo[15]), 25), SHL(XOR(D_hi, lanes_hi[15]), 7)), XOR(SHR(XOR(D_hi, lanes_hi[15]), 25), SHL(XOR(D_lo, lanes_lo[15]), 7))
  723. L, H = XOR(D_lo, lanes_lo[10]), XOR(D_hi, lanes_hi[10])
  724. lanes_lo[10], lanes_hi[10] = XOR(SHL(L, 20), SHR(H, 12)), XOR(SHL(H, 20), SHR(L, 12))
  725. D_lo = XOR(lanes_lo[30], SHL(lanes_lo[27], 1), SHR(lanes_hi[27], 31))
  726. D_hi = XOR(lanes_hi[30], SHL(lanes_hi[27], 1), SHR(lanes_lo[27], 31))
  727. lanes_lo[6], lanes_hi[6], lanes_lo[11], lanes_hi[11], lanes_lo[16], lanes_hi[16], lanes_lo[21], lanes_hi[21] = XOR(SHL(XOR(D_lo, lanes_lo[11]), 3), SHR(XOR(D_hi, lanes_hi[11]), 29)), XOR(SHL(XOR(D_hi, lanes_hi[11]), 3), SHR(XOR(D_lo, lanes_lo[11]), 29)), XOR(SHL(XOR(D_lo, lanes_lo[21]), 18), SHR(XOR(D_hi, lanes_hi[21]), 14)), XOR(SHL(XOR(D_hi, lanes_hi[21]), 18), SHR(XOR(D_lo, lanes_lo[21]), 14)), XOR(SHR(XOR(D_lo, lanes_lo[6]), 28), SHL(XOR(D_hi, lanes_hi[6]), 4)), XOR(SHR(XOR(D_hi, lanes_hi[6]), 28), SHL(XOR(D_lo, lanes_lo[6]), 4)), XOR(SHR(XOR(D_lo, lanes_lo[16]), 23), SHL(XOR(D_hi, lanes_hi[16]), 9)), XOR(SHR(XOR(D_hi, lanes_hi[16]), 23), SHL(XOR(D_lo, lanes_lo[16]), 9))
  728. lanes_lo[1], lanes_hi[1] = XOR(D_lo, lanes_lo[1]), XOR(D_hi, lanes_hi[1])
  729. lanes_lo[1], lanes_lo[2], lanes_lo[3], lanes_lo[4], lanes_lo[5] = XOR(lanes_lo[1], AND(NOT(lanes_lo[2]), lanes_lo[3]), RC_lo[round_idx]), XOR(lanes_lo[2], AND(NOT(lanes_lo[3]), lanes_lo[4])), XOR(lanes_lo[3], AND(NOT(lanes_lo[4]), lanes_lo[5])), XOR(lanes_lo[4], AND(NOT(lanes_lo[5]), lanes_lo[1])), XOR(lanes_lo[5], AND(NOT(lanes_lo[1]), lanes_lo[2]))
  730. lanes_lo[6], lanes_lo[7], lanes_lo[8], lanes_lo[9], lanes_lo[10] = XOR(lanes_lo[9], AND(NOT(lanes_lo[10]), lanes_lo[6])), XOR(lanes_lo[10], AND(NOT(lanes_lo[6]), lanes_lo[7])), XOR(lanes_lo[6], AND(NOT(lanes_lo[7]), lanes_lo[8])), XOR(lanes_lo[7], AND(NOT(lanes_lo[8]), lanes_lo[9])), XOR(lanes_lo[8], AND(NOT(lanes_lo[9]), lanes_lo[10]))
  731. lanes_lo[11], lanes_lo[12], lanes_lo[13], lanes_lo[14], lanes_lo[15] = XOR(lanes_lo[12], AND(NOT(lanes_lo[13]), lanes_lo[14])), XOR(lanes_lo[13], AND(NOT(lanes_lo[14]), lanes_lo[15])), XOR(lanes_lo[14], AND(NOT(lanes_lo[15]), lanes_lo[11])), XOR(lanes_lo[15], AND(NOT(lanes_lo[11]), lanes_lo[12])), XOR(lanes_lo[11], AND(NOT(lanes_lo[12]), lanes_lo[13]))
  732. lanes_lo[16], lanes_lo[17], lanes_lo[18], lanes_lo[19], lanes_lo[20] = XOR(lanes_lo[20], AND(NOT(lanes_lo[16]), lanes_lo[17])), XOR(lanes_lo[16], AND(NOT(lanes_lo[17]), lanes_lo[18])), XOR(lanes_lo[17], AND(NOT(lanes_lo[18]), lanes_lo[19])), XOR(lanes_lo[18], AND(NOT(lanes_lo[19]), lanes_lo[20])), XOR(lanes_lo[19], AND(NOT(lanes_lo[20]), lanes_lo[16]))
  733. lanes_lo[21], lanes_lo[22], lanes_lo[23], lanes_lo[24], lanes_lo[25] = XOR(lanes_lo[23], AND(NOT(lanes_lo[24]), lanes_lo[25])), XOR(lanes_lo[24], AND(NOT(lanes_lo[25]), lanes_lo[21])), XOR(lanes_lo[25], AND(NOT(lanes_lo[21]), lanes_lo[22])), XOR(lanes_lo[21], AND(NOT(lanes_lo[22]), lanes_lo[23])), XOR(lanes_lo[22], AND(NOT(lanes_lo[23]), lanes_lo[24]))
  734. lanes_hi[1], lanes_hi[2], lanes_hi[3], lanes_hi[4], lanes_hi[5] = XOR(lanes_hi[1], AND(NOT(lanes_hi[2]), lanes_hi[3]), RC_hi[round_idx]), XOR(lanes_hi[2], AND(NOT(lanes_hi[3]), lanes_hi[4])), XOR(lanes_hi[3], AND(NOT(lanes_hi[4]), lanes_hi[5])), XOR(lanes_hi[4], AND(NOT(lanes_hi[5]), lanes_hi[1])), XOR(lanes_hi[5], AND(NOT(lanes_hi[1]), lanes_hi[2]))
  735. lanes_hi[6], lanes_hi[7], lanes_hi[8], lanes_hi[9], lanes_hi[10] = XOR(lanes_hi[9], AND(NOT(lanes_hi[10]), lanes_hi[6])), XOR(lanes_hi[10], AND(NOT(lanes_hi[6]), lanes_hi[7])), XOR(lanes_hi[6], AND(NOT(lanes_hi[7]), lanes_hi[8])), XOR(lanes_hi[7], AND(NOT(lanes_hi[8]), lanes_hi[9])), XOR(lanes_hi[8], AND(NOT(lanes_hi[9]), lanes_hi[10]))
  736. lanes_hi[11], lanes_hi[12], lanes_hi[13], lanes_hi[14], lanes_hi[15] = XOR(lanes_hi[12], AND(NOT(lanes_hi[13]), lanes_hi[14])), XOR(lanes_hi[13], AND(NOT(lanes_hi[14]), lanes_hi[15])), XOR(lanes_hi[14], AND(NOT(lanes_hi[15]), lanes_hi[11])), XOR(lanes_hi[15], AND(NOT(lanes_hi[11]), lanes_hi[12])), XOR(lanes_hi[11], AND(NOT(lanes_hi[12]), lanes_hi[13]))
  737. lanes_hi[16], lanes_hi[17], lanes_hi[18], lanes_hi[19], lanes_hi[20] = XOR(lanes_hi[20], AND(NOT(lanes_hi[16]), lanes_hi[17])), XOR(lanes_hi[16], AND(NOT(lanes_hi[17]), lanes_hi[18])), XOR(lanes_hi[17], AND(NOT(lanes_hi[18]), lanes_hi[19])), XOR(lanes_hi[18], AND(NOT(lanes_hi[19]), lanes_hi[20])), XOR(lanes_hi[19], AND(NOT(lanes_hi[20]), lanes_hi[16]))
  738. lanes_hi[21], lanes_hi[22], lanes_hi[23], lanes_hi[24], lanes_hi[25] = XOR(lanes_hi[23], AND(NOT(lanes_hi[24]), lanes_hi[25])), XOR(lanes_hi[24], AND(NOT(lanes_hi[25]), lanes_hi[21])), XOR(lanes_hi[25], AND(NOT(lanes_hi[21]), lanes_hi[22])), XOR(lanes_hi[21], AND(NOT(lanes_hi[22]), lanes_hi[23])), XOR(lanes_hi[22], AND(NOT(lanes_hi[23]), lanes_hi[24]))
  739. end
  740. end
  741. end
  742. end
  743. if branch == "LJ" then
  744. -- SHA256 implementation for "LuaJIT without FFI" branch
  745. function sha256_feed_64(H, str, offs, size)
  746. -- offs >= 0, size >= 0, size is multiple of 64
  747. local W, K = common_W, sha2_K_hi
  748. for pos = offs, offs + size - 1, 64 do
  749. for j = 1, 16 do
  750. pos = pos + 4
  751. local a, b, c, d = byte(str, pos - 3, pos)
  752. W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  753. end
  754. for j = 17, 64 do
  755. local a, b = W[j-15], W[j-2]
  756. W[j] = NORM( NORM( XOR(ROR(a, 7), ROL(a, 14), SHR(a, 3)) + XOR(ROL(b, 15), ROL(b, 13), SHR(b, 10)) ) + NORM( W[j-7] + W[j-16] ) )
  757. end
  758. local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  759. for j = 1, 64, 8 do -- Thanks to Peter Cawley for this workaround (unroll the loop to avoid "PHI shuffling too complex" due to PHIs overlap)
  760. local z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j] + W[j] + h) )
  761. h, g, f, e = g, f, e, NORM(d + z)
  762. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  763. z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+1] + W[j+1] + h) )
  764. h, g, f, e = g, f, e, NORM(d + z)
  765. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  766. z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+2] + W[j+2] + h) )
  767. h, g, f, e = g, f, e, NORM(d + z)
  768. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  769. z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+3] + W[j+3] + h) )
  770. h, g, f, e = g, f, e, NORM(d + z)
  771. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  772. z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+4] + W[j+4] + h) )
  773. h, g, f, e = g, f, e, NORM(d + z)
  774. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  775. z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+5] + W[j+5] + h) )
  776. h, g, f, e = g, f, e, NORM(d + z)
  777. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  778. z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+6] + W[j+6] + h) )
  779. h, g, f, e = g, f, e, NORM(d + z)
  780. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  781. z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+7] + W[j+7] + h) )
  782. h, g, f, e = g, f, e, NORM(d + z)
  783. d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  784. end
  785. H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  786. H[5], H[6], H[7], H[8] = NORM(e + H[5]), NORM(f + H[6]), NORM(g + H[7]), NORM(h + H[8])
  787. end
  788. end
  789. local function ADD64_4(a_lo, a_hi, b_lo, b_hi, c_lo, c_hi, d_lo, d_hi)
  790. local sum_lo = a_lo % 2^32 + b_lo % 2^32 + c_lo % 2^32 + d_lo % 2^32
  791. local sum_hi = a_hi + b_hi + c_hi + d_hi
  792. local result_lo = NORM( sum_lo )
  793. local result_hi = NORM( sum_hi + floor(sum_lo / 2^32) )
  794. return result_lo, result_hi
  795. end
  796. if LuaJIT_arch == "x86" then -- Special trick is required to avoid "PHI shuffling too complex" on x86 platform
  797. -- SHA512 implementation for "LuaJIT x86 without FFI" branch
  798. function sha512_feed_128(H_lo, H_hi, str, offs, size)
  799. -- offs >= 0, size >= 0, size is multiple of 128
  800. -- W1_hi, W1_lo, W2_hi, W2_lo, ... Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  801. local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
  802. for pos = offs, offs + size - 1, 128 do
  803. for j = 1, 16*2 do
  804. pos = pos + 4
  805. local a, b, c, d = byte(str, pos - 3, pos)
  806. W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  807. end
  808. for jj = 17*2, 80*2, 2 do
  809. local a_lo, a_hi = W[jj-30], W[jj-31]
  810. local t_lo = XOR(OR(SHR(a_lo, 1), SHL(a_hi, 31)), OR(SHR(a_lo, 8), SHL(a_hi, 24)), OR(SHR(a_lo, 7), SHL(a_hi, 25)))
  811. local t_hi = XOR(OR(SHR(a_hi, 1), SHL(a_lo, 31)), OR(SHR(a_hi, 8), SHL(a_lo, 24)), SHR(a_hi, 7))
  812. local b_lo, b_hi = W[jj-4], W[jj-5]
  813. local u_lo = XOR(OR(SHR(b_lo, 19), SHL(b_hi, 13)), OR(SHL(b_lo, 3), SHR(b_hi, 29)), OR(SHR(b_lo, 6), SHL(b_hi, 26)))
  814. local u_hi = XOR(OR(SHR(b_hi, 19), SHL(b_lo, 13)), OR(SHL(b_hi, 3), SHR(b_lo, 29)), SHR(b_hi, 6))
  815. W[jj], W[jj-1] = ADD64_4(t_lo, t_hi, u_lo, u_hi, W[jj-14], W[jj-15], W[jj-32], W[jj-33])
  816. end
  817. local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  818. local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  819. local zero = 0
  820. for j = 1, 80 do
  821. local t_lo = XOR(g_lo, AND(e_lo, XOR(f_lo, g_lo)))
  822. local t_hi = XOR(g_hi, AND(e_hi, XOR(f_hi, g_hi)))
  823. local u_lo = XOR(OR(SHR(e_lo, 14), SHL(e_hi, 18)), OR(SHR(e_lo, 18), SHL(e_hi, 14)), OR(SHL(e_lo, 23), SHR(e_hi, 9)))
  824. local u_hi = XOR(OR(SHR(e_hi, 14), SHL(e_lo, 18)), OR(SHR(e_hi, 18), SHL(e_lo, 14)), OR(SHL(e_hi, 23), SHR(e_lo, 9)))
  825. local sum_lo = u_lo % 2^32 + t_lo % 2^32 + h_lo % 2^32 + K_lo[j] + W[2*j] % 2^32
  826. local z_lo, z_hi = NORM( sum_lo ), NORM( u_hi + t_hi + h_hi + K_hi[j] + W[2*j-1] + floor(sum_lo / 2^32) )
  827. zero = zero + zero -- this thick is needed to avoid "PHI shuffling too complex" due to PHIs overlap
  828. h_lo, h_hi, g_lo, g_hi, f_lo, f_hi = OR(zero, g_lo), OR(zero, g_hi), OR(zero, f_lo), OR(zero, f_hi), OR(zero, e_lo), OR(zero, e_hi)
  829. local sum_lo = z_lo % 2^32 + d_lo % 2^32
  830. e_lo, e_hi = NORM( sum_lo ), NORM( z_hi + d_hi + floor(sum_lo / 2^32) )
  831. d_lo, d_hi, c_lo, c_hi, b_lo, b_hi = OR(zero, c_lo), OR(zero, c_hi), OR(zero, b_lo), OR(zero, b_hi), OR(zero, a_lo), OR(zero, a_hi)
  832. u_lo = XOR(OR(SHR(b_lo, 28), SHL(b_hi, 4)), OR(SHL(b_lo, 30), SHR(b_hi, 2)), OR(SHL(b_lo, 25), SHR(b_hi, 7)))
  833. u_hi = XOR(OR(SHR(b_hi, 28), SHL(b_lo, 4)), OR(SHL(b_hi, 30), SHR(b_lo, 2)), OR(SHL(b_hi, 25), SHR(b_lo, 7)))
  834. t_lo = OR(AND(d_lo, c_lo), AND(b_lo, XOR(d_lo, c_lo)))
  835. t_hi = OR(AND(d_hi, c_hi), AND(b_hi, XOR(d_hi, c_hi)))
  836. local sum_lo = z_lo % 2^32 + t_lo % 2^32 + u_lo % 2^32
  837. a_lo, a_hi = NORM( sum_lo ), NORM( z_hi + t_hi + u_hi + floor(sum_lo / 2^32) )
  838. end
  839. H_lo[1], H_hi[1] = ADD64_4(H_lo[1], H_hi[1], a_lo, a_hi, 0, 0, 0, 0)
  840. H_lo[2], H_hi[2] = ADD64_4(H_lo[2], H_hi[2], b_lo, b_hi, 0, 0, 0, 0)
  841. H_lo[3], H_hi[3] = ADD64_4(H_lo[3], H_hi[3], c_lo, c_hi, 0, 0, 0, 0)
  842. H_lo[4], H_hi[4] = ADD64_4(H_lo[4], H_hi[4], d_lo, d_hi, 0, 0, 0, 0)
  843. H_lo[5], H_hi[5] = ADD64_4(H_lo[5], H_hi[5], e_lo, e_hi, 0, 0, 0, 0)
  844. H_lo[6], H_hi[6] = ADD64_4(H_lo[6], H_hi[6], f_lo, f_hi, 0, 0, 0, 0)
  845. H_lo[7], H_hi[7] = ADD64_4(H_lo[7], H_hi[7], g_lo, g_hi, 0, 0, 0, 0)
  846. H_lo[8], H_hi[8] = ADD64_4(H_lo[8], H_hi[8], h_lo, h_hi, 0, 0, 0, 0)
  847. end
  848. end
  849. else -- all platforms except x86
  850. -- SHA512 implementation for "LuaJIT non-x86 without FFI" branch
  851. function sha512_feed_128(H_lo, H_hi, str, offs, size)
  852. -- offs >= 0, size >= 0, size is multiple of 128
  853. -- W1_hi, W1_lo, W2_hi, W2_lo, ... Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  854. local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
  855. for pos = offs, offs + size - 1, 128 do
  856. for j = 1, 16*2 do
  857. pos = pos + 4
  858. local a, b, c, d = byte(str, pos - 3, pos)
  859. W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  860. end
  861. for jj = 17*2, 80*2, 2 do
  862. local a_lo, a_hi = W[jj-30], W[jj-31]
  863. local t_lo = XOR(OR(SHR(a_lo, 1), SHL(a_hi, 31)), OR(SHR(a_lo, 8), SHL(a_hi, 24)), OR(SHR(a_lo, 7), SHL(a_hi, 25)))
  864. local t_hi = XOR(OR(SHR(a_hi, 1), SHL(a_lo, 31)), OR(SHR(a_hi, 8), SHL(a_lo, 24)), SHR(a_hi, 7))
  865. local b_lo, b_hi = W[jj-4], W[jj-5]
  866. local u_lo = XOR(OR(SHR(b_lo, 19), SHL(b_hi, 13)), OR(SHL(b_lo, 3), SHR(b_hi, 29)), OR(SHR(b_lo, 6), SHL(b_hi, 26)))
  867. local u_hi = XOR(OR(SHR(b_hi, 19), SHL(b_lo, 13)), OR(SHL(b_hi, 3), SHR(b_lo, 29)), SHR(b_hi, 6))
  868. W[jj], W[jj-1] = ADD64_4(t_lo, t_hi, u_lo, u_hi, W[jj-14], W[jj-15], W[jj-32], W[jj-33])
  869. end
  870. local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  871. local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  872. for j = 1, 80 do
  873. local t_lo = XOR(g_lo, AND(e_lo, XOR(f_lo, g_lo)))
  874. local t_hi = XOR(g_hi, AND(e_hi, XOR(f_hi, g_hi)))
  875. local u_lo = XOR(OR(SHR(e_lo, 14), SHL(e_hi, 18)), OR(SHR(e_lo, 18), SHL(e_hi, 14)), OR(SHL(e_lo, 23), SHR(e_hi, 9)))
  876. local u_hi = XOR(OR(SHR(e_hi, 14), SHL(e_lo, 18)), OR(SHR(e_hi, 18), SHL(e_lo, 14)), OR(SHL(e_hi, 23), SHR(e_lo, 9)))
  877. local sum_lo = u_lo % 2^32 + t_lo % 2^32 + h_lo % 2^32 + K_lo[j] + W[2*j] % 2^32
  878. local z_lo, z_hi = NORM( sum_lo ), NORM( u_hi + t_hi + h_hi + K_hi[j] + W[2*j-1] + floor(sum_lo / 2^32) )
  879. h_lo, h_hi, g_lo, g_hi, f_lo, f_hi = g_lo, g_hi, f_lo, f_hi, e_lo, e_hi
  880. local sum_lo = z_lo % 2^32 + d_lo % 2^32
  881. e_lo, e_hi = NORM( sum_lo ), NORM( z_hi + d_hi + floor(sum_lo / 2^32) )
  882. d_lo, d_hi, c_lo, c_hi, b_lo, b_hi = c_lo, c_hi, b_lo, b_hi, a_lo, a_hi
  883. u_lo = XOR(OR(SHR(b_lo, 28), SHL(b_hi, 4)), OR(SHL(b_lo, 30), SHR(b_hi, 2)), OR(SHL(b_lo, 25), SHR(b_hi, 7)))
  884. u_hi = XOR(OR(SHR(b_hi, 28), SHL(b_lo, 4)), OR(SHL(b_hi, 30), SHR(b_lo, 2)), OR(SHL(b_hi, 25), SHR(b_lo, 7)))
  885. t_lo = OR(AND(d_lo, c_lo), AND(b_lo, XOR(d_lo, c_lo)))
  886. t_hi = OR(AND(d_hi, c_hi), AND(b_hi, XOR(d_hi, c_hi)))
  887. local sum_lo = z_lo % 2^32 + u_lo % 2^32 + t_lo % 2^32
  888. a_lo, a_hi = NORM( sum_lo ), NORM( z_hi + u_hi + t_hi + floor(sum_lo / 2^32) )
  889. end
  890. H_lo[1], H_hi[1] = ADD64_4(H_lo[1], H_hi[1], a_lo, a_hi, 0, 0, 0, 0)
  891. H_lo[2], H_hi[2] = ADD64_4(H_lo[2], H_hi[2], b_lo, b_hi, 0, 0, 0, 0)
  892. H_lo[3], H_hi[3] = ADD64_4(H_lo[3], H_hi[3], c_lo, c_hi, 0, 0, 0, 0)
  893. H_lo[4], H_hi[4] = ADD64_4(H_lo[4], H_hi[4], d_lo, d_hi, 0, 0, 0, 0)
  894. H_lo[5], H_hi[5] = ADD64_4(H_lo[5], H_hi[5], e_lo, e_hi, 0, 0, 0, 0)
  895. H_lo[6], H_hi[6] = ADD64_4(H_lo[6], H_hi[6], f_lo, f_hi, 0, 0, 0, 0)
  896. H_lo[7], H_hi[7] = ADD64_4(H_lo[7], H_hi[7], g_lo, g_hi, 0, 0, 0, 0)
  897. H_lo[8], H_hi[8] = ADD64_4(H_lo[8], H_hi[8], h_lo, h_hi, 0, 0, 0, 0)
  898. end
  899. end
  900. end
  901. -- MD5 implementation for "LuaJIT without FFI" branch
  902. function md5_feed_64(H, str, offs, size)
  903. -- offs >= 0, size >= 0, size is multiple of 64
  904. local W, K = common_W, md5_K
  905. for pos = offs, offs + size - 1, 64 do
  906. for j = 1, 16 do
  907. pos = pos + 4
  908. local a, b, c, d = byte(str, pos - 3, pos)
  909. W[j] = OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a)
  910. end
  911. local a, b, c, d = H[1], H[2], H[3], H[4]
  912. for j = 1, 16, 4 do
  913. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j ] + W[j ] + a), 7) + b)
  914. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+1] + W[j+1] + a), 12) + b)
  915. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+2] + W[j+2] + a), 17) + b)
  916. a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+3] + W[j+3] + a), 22) + b)
  917. end
  918. for j = 17, 32, 4 do
  919. local g = 5*j-4
  920. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j ] + W[AND(g , 15) + 1] + a), 5) + b)
  921. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+1] + W[AND(g + 5, 15) + 1] + a), 9) + b)
  922. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+2] + W[AND(g + 10, 15) + 1] + a), 14) + b)
  923. a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+3] + W[AND(g - 1, 15) + 1] + a), 20) + b)
  924. end
  925. for j = 33, 48, 4 do
  926. local g = 3*j+2
  927. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j ] + W[AND(g , 15) + 1] + a), 4) + b)
  928. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+1] + W[AND(g + 3, 15) + 1] + a), 11) + b)
  929. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+2] + W[AND(g + 6, 15) + 1] + a), 16) + b)
  930. a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+3] + W[AND(g - 7, 15) + 1] + a), 23) + b)
  931. end
  932. for j = 49, 64, 4 do
  933. local g = j*7
  934. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j ] + W[AND(g - 7, 15) + 1] + a), 6) + b)
  935. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+1] + W[AND(g , 15) + 1] + a), 10) + b)
  936. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+2] + W[AND(g + 7, 15) + 1] + a), 15) + b)
  937. a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+3] + W[AND(g - 2, 15) + 1] + a), 21) + b)
  938. end
  939. H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  940. end
  941. end
  942. -- SHA-1 implementation for "LuaJIT without FFI" branch
  943. function sha1_feed_64(H, str, offs, size)
  944. -- offs >= 0, size >= 0, size is multiple of 64
  945. local W = common_W
  946. for pos = offs, offs + size - 1, 64 do
  947. for j = 1, 16 do
  948. pos = pos + 4
  949. local a, b, c, d = byte(str, pos - 3, pos)
  950. W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  951. end
  952. for j = 17, 80 do
  953. W[j] = ROL(XOR(W[j-3], W[j-8], W[j-14], W[j-16]), 1)
  954. end
  955. local a, b, c, d, e = H[1], H[2], H[3], H[4], H[5]
  956. for j = 1, 20, 5 do
  957. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j] + 0x5A827999 + e)) -- constant = floor(2^30 * sqrt(2))
  958. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+1] + 0x5A827999 + e))
  959. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+2] + 0x5A827999 + e))
  960. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+3] + 0x5A827999 + e))
  961. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+4] + 0x5A827999 + e))
  962. end
  963. for j = 21, 40, 5 do
  964. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j] + 0x6ED9EBA1 + e)) -- 2^30 * sqrt(3)
  965. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0x6ED9EBA1 + e))
  966. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0x6ED9EBA1 + e))
  967. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0x6ED9EBA1 + e))
  968. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0x6ED9EBA1 + e))
  969. end
  970. for j = 41, 60, 5 do
  971. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j] + 0x8F1BBCDC + e)) -- 2^30 * sqrt(5)
  972. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+1] + 0x8F1BBCDC + e))
  973. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+2] + 0x8F1BBCDC + e))
  974. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+3] + 0x8F1BBCDC + e))
  975. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+4] + 0x8F1BBCDC + e))
  976. end
  977. for j = 61, 80, 5 do
  978. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j] + 0xCA62C1D6 + e)) -- 2^30 * sqrt(10)
  979. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0xCA62C1D6 + e))
  980. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0xCA62C1D6 + e))
  981. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0xCA62C1D6 + e))
  982. e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0xCA62C1D6 + e))
  983. end
  984. H[1], H[2], H[3], H[4], H[5] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4]), NORM(e + H[5])
  985. end
  986. end
  987. end
  988. if branch == "INT64" then
  989. -- implementation for Lua 5.3/5.4
  990. hi_factor = 4294967296
  991. hi_factor_keccak = 4294967296
  992. lanes_index_base = 1
  993. HEX64, XOR64A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed = load[[
  994. local md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo = ...
  995. local string_format, string_unpack = string.format, string.unpack
  996. local function HEX64(x)
  997. return string_format("%016x", x)
  998. end
  999. local function XOR64A5(x)
  1000. return x ~ 0xa5a5a5a5a5a5a5a5
  1001. end
  1002. local function XOR_BYTE(x, y)
  1003. return x ~ y
  1004. end
  1005. local common_W = {}
  1006. local function sha256_feed_64(H, str, offs, size)
  1007. -- offs >= 0, size >= 0, size is multiple of 64
  1008. local W, K = common_W, sha2_K_hi
  1009. local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1010. for pos = offs + 1, offs + size, 64 do
  1011. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1012. string_unpack(">I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4", str, pos)
  1013. for j = 17, 64 do
  1014. local a = W[j-15]
  1015. a = a<<32 | a
  1016. local b = W[j-2]
  1017. b = b<<32 | b
  1018. W[j] = (a>>7 ~ a>>18 ~ a>>35) + (b>>17 ~ b>>19 ~ b>>42) + W[j-7] + W[j-16] & (1<<32)-1
  1019. end
  1020. local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1021. for j = 1, 64 do
  1022. e = e<<32 | e & (1<<32)-1
  1023. local z = (e>>6 ~ e>>11 ~ e>>25) + (g ~ e & (f ~ g)) + h + K[j] + W[j]
  1024. h = g
  1025. g = f
  1026. f = e
  1027. e = z + d
  1028. d = c
  1029. c = b
  1030. b = a
  1031. a = a<<32 | a & (1<<32)-1
  1032. a = z + ((a ~ c) & d ~ a & c) + (a>>2 ~ a>>13 ~ a>>22)
  1033. end
  1034. h1 = a + h1
  1035. h2 = b + h2
  1036. h3 = c + h3
  1037. h4 = d + h4
  1038. h5 = e + h5
  1039. h6 = f + h6
  1040. h7 = g + h7
  1041. h8 = h + h8
  1042. end
  1043. H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1044. end
  1045. local function sha512_feed_128(H, _, str, offs, size)
  1046. -- offs >= 0, size >= 0, size is multiple of 128
  1047. local W, K = common_W, sha2_K_lo
  1048. local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1049. for pos = offs + 1, offs + size, 128 do
  1050. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1051. string_unpack(">i8i8i8i8i8i8i8i8i8i8i8i8i8i8i8i8", str, pos)
  1052. for j = 17, 80 do
  1053. local a = W[j-15]
  1054. local b = W[j-2]
  1055. W[j] = (a >> 1 ~ a >> 7 ~ a >> 8 ~ a << 56 ~ a << 63) + (b >> 6 ~ b >> 19 ~ b >> 61 ~ b << 3 ~ b << 45) + W[j-7] + W[j-16]
  1056. end
  1057. local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1058. for j = 1, 80 do
  1059. local z = (e >> 14 ~ e >> 18 ~ e >> 41 ~ e << 23 ~ e << 46 ~ e << 50) + (g ~ e & (f ~ g)) + h + K[j] + W[j]
  1060. h = g
  1061. g = f
  1062. f = e
  1063. e = z + d
  1064. d = c
  1065. c = b
  1066. b = a
  1067. a = z + ((a ~ c) & d ~ a & c) + (a >> 28 ~ a >> 34 ~ a >> 39 ~ a << 25 ~ a << 30 ~ a << 36)
  1068. end
  1069. h1 = a + h1
  1070. h2 = b + h2
  1071. h3 = c + h3
  1072. h4 = d + h4
  1073. h5 = e + h5
  1074. h6 = f + h6
  1075. h7 = g + h7
  1076. h8 = h + h8
  1077. end
  1078. H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1079. end
  1080. local function md5_feed_64(H, str, offs, size)
  1081. -- offs >= 0, size >= 0, size is multiple of 64
  1082. local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
  1083. local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
  1084. for pos = offs + 1, offs + size, 64 do
  1085. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1086. string_unpack("<I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4", str, pos)
  1087. local a, b, c, d = h1, h2, h3, h4
  1088. local s = 32-7
  1089. for j = 1, 16 do
  1090. local F = (d ~ b & (c ~ d)) + a + K[j] + W[j]
  1091. a = d
  1092. d = c
  1093. c = b
  1094. b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1095. s = md5_next_shift[s]
  1096. end
  1097. s = 32-5
  1098. for j = 17, 32 do
  1099. local F = (c ~ d & (b ~ c)) + a + K[j] + W[(5*j-4 & 15) + 1]
  1100. a = d
  1101. d = c
  1102. c = b
  1103. b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1104. s = md5_next_shift[s]
  1105. end
  1106. s = 32-4
  1107. for j = 33, 48 do
  1108. local F = (b ~ c ~ d) + a + K[j] + W[(3*j+2 & 15) + 1]
  1109. a = d
  1110. d = c
  1111. c = b
  1112. b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1113. s = md5_next_shift[s]
  1114. end
  1115. s = 32-6
  1116. for j = 49, 64 do
  1117. local F = (c ~ (b | ~d)) + a + K[j] + W[(j*7-7 & 15) + 1]
  1118. a = d
  1119. d = c
  1120. c = b
  1121. b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1122. s = md5_next_shift[s]
  1123. end
  1124. h1 = a + h1
  1125. h2 = b + h2
  1126. h3 = c + h3
  1127. h4 = d + h4
  1128. end
  1129. H[1], H[2], H[3], H[4] = h1, h2, h3, h4
  1130. end
  1131. local function sha1_feed_64(H, str, offs, size)
  1132. -- offs >= 0, size >= 0, size is multiple of 64
  1133. local W = common_W
  1134. local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
  1135. for pos = offs + 1, offs + size, 64 do
  1136. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1137. string_unpack(">I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4", str, pos)
  1138. for j = 17, 80 do
  1139. local a = W[j-3] ~ W[j-8] ~ W[j-14] ~ W[j-16]
  1140. W[j] = (a<<32 | a) << 1 >> 32
  1141. end
  1142. local a, b, c, d, e = h1, h2, h3, h4, h5
  1143. for j = 1, 20 do
  1144. local z = ((a<<32 | a & (1<<32)-1) >> 27) + (d ~ b & (c ~ d)) + 0x5A827999 + W[j] + e -- constant = floor(2^30 * sqrt(2))
  1145. e = d
  1146. d = c
  1147. c = (b<<32 | b & (1<<32)-1) >> 2
  1148. b = a
  1149. a = z
  1150. end
  1151. for j = 21, 40 do
  1152. local z = ((a<<32 | a & (1<<32)-1) >> 27) + (b ~ c ~ d) + 0x6ED9EBA1 + W[j] + e -- 2^30 * sqrt(3)
  1153. e = d
  1154. d = c
  1155. c = (b<<32 | b & (1<<32)-1) >> 2
  1156. b = a
  1157. a = z
  1158. end
  1159. for j = 41, 60 do
  1160. local z = ((a<<32 | a & (1<<32)-1) >> 27) + ((b ~ c) & d ~ b & c) + 0x8F1BBCDC + W[j] + e -- 2^30 * sqrt(5)
  1161. e = d
  1162. d = c
  1163. c = (b<<32 | b & (1<<32)-1) >> 2
  1164. b = a
  1165. a = z
  1166. end
  1167. for j = 61, 80 do
  1168. local z = ((a<<32 | a & (1<<32)-1) >> 27) + (b ~ c ~ d) + 0xCA62C1D6 + W[j] + e -- 2^30 * sqrt(10)
  1169. e = d
  1170. d = c
  1171. c = (b<<32 | b & (1<<32)-1) >> 2
  1172. b = a
  1173. a = z
  1174. end
  1175. h1 = a + h1
  1176. h2 = b + h2
  1177. h3 = c + h3
  1178. h4 = d + h4
  1179. h5 = e + h5
  1180. end
  1181. H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
  1182. end
  1183. local keccak_format_i8 = build_keccak_format("i8")
  1184. local function keccak_feed(lanes, _, str, offs, size, block_size_in_bytes)
  1185. -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  1186. local RC = sha3_RC_lo
  1187. local qwords_qty = block_size_in_bytes / 8
  1188. local keccak_format = keccak_format_i8[qwords_qty]
  1189. for pos = offs + 1, offs + size, block_size_in_bytes do
  1190. local qwords_from_message = {string_unpack(keccak_format, str, pos)}
  1191. for j = 1, qwords_qty do
  1192. lanes[j] = lanes[j] ~ qwords_from_message[j]
  1193. end
  1194. local L01, L02, L03, L04, L05, L06, L07, L08, L09, L10, L11, L12, L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24, L25 =
  1195. lanes[1], lanes[2], lanes[3], lanes[4], lanes[5], lanes[6], lanes[7], lanes[8], lanes[9], lanes[10], lanes[11], lanes[12], lanes[13],
  1196. lanes[14], lanes[15], lanes[16], lanes[17], lanes[18], lanes[19], lanes[20], lanes[21], lanes[22], lanes[23], lanes[24], lanes[25]
  1197. for round_idx = 1, 24 do
  1198. local C1 = L01 ~ L06 ~ L11 ~ L16 ~ L21
  1199. local C2 = L02 ~ L07 ~ L12 ~ L17 ~ L22
  1200. local C3 = L03 ~ L08 ~ L13 ~ L18 ~ L23
  1201. local C4 = L04 ~ L09 ~ L14 ~ L19 ~ L24
  1202. local C5 = L05 ~ L10 ~ L15 ~ L20 ~ L25
  1203. local D = C1 ~ C3<<1 ~ C3>>63
  1204. local T0 = D ~ L02
  1205. local T1 = D ~ L07
  1206. local T2 = D ~ L12
  1207. local T3 = D ~ L17
  1208. local T4 = D ~ L22
  1209. L02 = T1<<44 ~ T1>>20
  1210. L07 = T3<<45 ~ T3>>19
  1211. L12 = T0<<1 ~ T0>>63
  1212. L17 = T2<<10 ~ T2>>54
  1213. L22 = T4<<2 ~ T4>>62
  1214. D = C2 ~ C4<<1 ~ C4>>63
  1215. T0 = D ~ L03
  1216. T1 = D ~ L08
  1217. T2 = D ~ L13
  1218. T3 = D ~ L18
  1219. T4 = D ~ L23
  1220. L03 = T2<<43 ~ T2>>21
  1221. L08 = T4<<61 ~ T4>>3
  1222. L13 = T1<<6 ~ T1>>58
  1223. L18 = T3<<15 ~ T3>>49
  1224. L23 = T0<<62 ~ T0>>2
  1225. D = C3 ~ C5<<1 ~ C5>>63
  1226. T0 = D ~ L04
  1227. T1 = D ~ L09
  1228. T2 = D ~ L14
  1229. T3 = D ~ L19
  1230. T4 = D ~ L24
  1231. L04 = T3<<21 ~ T3>>43
  1232. L09 = T0<<28 ~ T0>>36
  1233. L14 = T2<<25 ~ T2>>39
  1234. L19 = T4<<56 ~ T4>>8
  1235. L24 = T1<<55 ~ T1>>9
  1236. D = C4 ~ C1<<1 ~ C1>>63
  1237. T0 = D ~ L05
  1238. T1 = D ~ L10
  1239. T2 = D ~ L15
  1240. T3 = D ~ L20
  1241. T4 = D ~ L25
  1242. L05 = T4<<14 ~ T4>>50
  1243. L10 = T1<<20 ~ T1>>44
  1244. L15 = T3<<8 ~ T3>>56
  1245. L20 = T0<<27 ~ T0>>37
  1246. L25 = T2<<39 ~ T2>>25
  1247. D = C5 ~ C2<<1 ~ C2>>63
  1248. T1 = D ~ L06
  1249. T2 = D ~ L11
  1250. T3 = D ~ L16
  1251. T4 = D ~ L21
  1252. L06 = T2<<3 ~ T2>>61
  1253. L11 = T4<<18 ~ T4>>46
  1254. L16 = T1<<36 ~ T1>>28
  1255. L21 = T3<<41 ~ T3>>23
  1256. L01 = D ~ L01
  1257. L01, L02, L03, L04, L05 = L01 ~ ~L02 & L03, L02 ~ ~L03 & L04, L03 ~ ~L04 & L05, L04 ~ ~L05 & L01, L05 ~ ~L01 & L02
  1258. L06, L07, L08, L09, L10 = L09 ~ ~L10 & L06, L10 ~ ~L06 & L07, L06 ~ ~L07 & L08, L07 ~ ~L08 & L09, L08 ~ ~L09 & L10
  1259. L11, L12, L13, L14, L15 = L12 ~ ~L13 & L14, L13 ~ ~L14 & L15, L14 ~ ~L15 & L11, L15 ~ ~L11 & L12, L11 ~ ~L12 & L13
  1260. L16, L17, L18, L19, L20 = L20 ~ ~L16 & L17, L16 ~ ~L17 & L18, L17 ~ ~L18 & L19, L18 ~ ~L19 & L20, L19 ~ ~L20 & L16
  1261. L21, L22, L23, L24, L25 = L23 ~ ~L24 & L25, L24 ~ ~L25 & L21, L25 ~ ~L21 & L22, L21 ~ ~L22 & L23, L22 ~ ~L23 & L24
  1262. L01 = L01 ~ RC[round_idx]
  1263. end
  1264. lanes[1] = L01
  1265. lanes[2] = L02
  1266. lanes[3] = L03
  1267. lanes[4] = L04
  1268. lanes[5] = L05
  1269. lanes[6] = L06
  1270. lanes[7] = L07
  1271. lanes[8] = L08
  1272. lanes[9] = L09
  1273. lanes[10] = L10
  1274. lanes[11] = L11
  1275. lanes[12] = L12
  1276. lanes[13] = L13
  1277. lanes[14] = L14
  1278. lanes[15] = L15
  1279. lanes[16] = L16
  1280. lanes[17] = L17
  1281. lanes[18] = L18
  1282. lanes[19] = L19
  1283. lanes[20] = L20
  1284. lanes[21] = L21
  1285. lanes[22] = L22
  1286. lanes[23] = L23
  1287. lanes[24] = L24
  1288. lanes[25] = L25
  1289. end
  1290. end
  1291. return HEX64, XOR64A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed
  1292. ]](md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo)
  1293. end
  1294. if branch == "INT32" then
  1295. -- implementation for Lua 5.3/5.4 having non-standard numbers config "int32"+"double" (built with LUA_INT_TYPE=LUA_INT_INT)
  1296. K_lo_modulo = 2^32
  1297. function HEX(x) -- returns string of 8 lowercase hexadecimal digits
  1298. return string_format("%08x", x)
  1299. end
  1300. XOR32A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed = load[[
  1301. local md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo, sha3_RC_hi = ...
  1302. local string_unpack, floor = string.unpack, math.floor
  1303. local function XOR32A5(x)
  1304. return x ~ 0xA5A5A5A5
  1305. end
  1306. local function XOR_BYTE(x, y)
  1307. return x ~ y
  1308. end
  1309. local common_W = {}
  1310. local function sha256_feed_64(H, str, offs, size)
  1311. -- offs >= 0, size >= 0, size is multiple of 64
  1312. local W, K = common_W, sha2_K_hi
  1313. local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1314. for pos = offs + 1, offs + size, 64 do
  1315. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1316. string_unpack(">i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1317. for j = 17, 64 do
  1318. local a, b = W[j-15], W[j-2]
  1319. W[j] = (a>>7 ~ a<<25 ~ a<<14 ~ a>>18 ~ a>>3) + (b<<15 ~ b>>17 ~ b<<13 ~ b>>19 ~ b>>10) + W[j-7] + W[j-16]
  1320. end
  1321. local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1322. for j = 1, 64 do
  1323. local z = (e>>6 ~ e<<26 ~ e>>11 ~ e<<21 ~ e>>25 ~ e<<7) + (g ~ e & (f ~ g)) + h + K[j] + W[j]
  1324. h = g
  1325. g = f
  1326. f = e
  1327. e = z + d
  1328. d = c
  1329. c = b
  1330. b = a
  1331. a = z + ((a ~ c) & d ~ a & c) + (a>>2 ~ a<<30 ~ a>>13 ~ a<<19 ~ a<<10 ~ a>>22)
  1332. end
  1333. h1 = a + h1
  1334. h2 = b + h2
  1335. h3 = c + h3
  1336. h4 = d + h4
  1337. h5 = e + h5
  1338. h6 = f + h6
  1339. h7 = g + h7
  1340. h8 = h + h8
  1341. end
  1342. H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1343. end
  1344. local function sha512_feed_128(H_lo, H_hi, str, offs, size)
  1345. -- offs >= 0, size >= 0, size is multiple of 128
  1346. -- W1_hi, W1_lo, W2_hi, W2_lo, ... Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  1347. local floor, W, K_lo, K_hi = floor, common_W, sha2_K_lo, sha2_K_hi
  1348. local h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  1349. local h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  1350. for pos = offs + 1, offs + size, 128 do
  1351. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16],
  1352. W[17], W[18], W[19], W[20], W[21], W[22], W[23], W[24], W[25], W[26], W[27], W[28], W[29], W[30], W[31], W[32] =
  1353. string_unpack(">i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1354. for jj = 17*2, 80*2, 2 do
  1355. local a_lo, a_hi, b_lo, b_hi = W[jj-30], W[jj-31], W[jj-4], W[jj-5]
  1356. local tmp =
  1357. (a_lo>>1 ~ a_hi<<31 ~ a_lo>>8 ~ a_hi<<24 ~ a_lo>>7 ~ a_hi<<25) % 2^32
  1358. + (b_lo>>19 ~ b_hi<<13 ~ b_lo<<3 ~ b_hi>>29 ~ b_lo>>6 ~ b_hi<<26) % 2^32
  1359. + W[jj-14] % 2^32 + W[jj-32] % 2^32
  1360. W[jj-1] =
  1361. (a_hi>>1 ~ a_lo<<31 ~ a_hi>>8 ~ a_lo<<24 ~ a_hi>>7)
  1362. + (b_hi>>19 ~ b_lo<<13 ~ b_hi<<3 ~ b_lo>>29 ~ b_hi>>6)
  1363. + W[jj-15] + W[jj-33] + floor(tmp / 2^32)
  1364. W[jj] = 0|((tmp + 2^31) % 2^32 - 2^31)
  1365. end
  1366. local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1367. local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1368. for j = 1, 80 do
  1369. local jj = 2*j
  1370. local z_lo = (e_lo>>14 ~ e_hi<<18 ~ e_lo>>18 ~ e_hi<<14 ~ e_lo<<23 ~ e_hi>>9) % 2^32 + (g_lo ~ e_lo & (f_lo ~ g_lo)) % 2^32 + h_lo % 2^32 + K_lo[j] + W[jj] % 2^32
  1371. local z_hi = (e_hi>>14 ~ e_lo<<18 ~ e_hi>>18 ~ e_lo<<14 ~ e_hi<<23 ~ e_lo>>9) + (g_hi ~ e_hi & (f_hi ~ g_hi)) + h_hi + K_hi[j] + W[jj-1] + floor(z_lo / 2^32)
  1372. z_lo = z_lo % 2^32
  1373. h_lo = g_lo
  1374. h_hi = g_hi
  1375. g_lo = f_lo
  1376. g_hi = f_hi
  1377. f_lo = e_lo
  1378. f_hi = e_hi
  1379. e_lo = z_lo + d_lo % 2^32
  1380. e_hi = z_hi + d_hi + floor(e_lo / 2^32)
  1381. e_lo = 0|((e_lo + 2^31) % 2^32 - 2^31)
  1382. d_lo = c_lo
  1383. d_hi = c_hi
  1384. c_lo = b_lo
  1385. c_hi = b_hi
  1386. b_lo = a_lo
  1387. b_hi = a_hi
  1388. z_lo = z_lo + (d_lo & c_lo ~ b_lo & (d_lo ~ c_lo)) % 2^32 + (b_lo>>28 ~ b_hi<<4 ~ b_lo<<30 ~ b_hi>>2 ~ b_lo<<25 ~ b_hi>>7) % 2^32
  1389. a_hi = z_hi + (d_hi & c_hi ~ b_hi & (d_hi ~ c_hi)) + (b_hi>>28 ~ b_lo<<4 ~ b_hi<<30 ~ b_lo>>2 ~ b_hi<<25 ~ b_lo>>7) + floor(z_lo / 2^32)
  1390. a_lo = 0|((z_lo + 2^31) % 2^32 - 2^31)
  1391. end
  1392. a_lo = h1_lo % 2^32 + a_lo % 2^32
  1393. h1_hi = h1_hi + a_hi + floor(a_lo / 2^32)
  1394. h1_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1395. a_lo = h2_lo % 2^32 + b_lo % 2^32
  1396. h2_hi = h2_hi + b_hi + floor(a_lo / 2^32)
  1397. h2_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1398. a_lo = h3_lo % 2^32 + c_lo % 2^32
  1399. h3_hi = h3_hi + c_hi + floor(a_lo / 2^32)
  1400. h3_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1401. a_lo = h4_lo % 2^32 + d_lo % 2^32
  1402. h4_hi = h4_hi + d_hi + floor(a_lo / 2^32)
  1403. h4_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1404. a_lo = h5_lo % 2^32 + e_lo % 2^32
  1405. h5_hi = h5_hi + e_hi + floor(a_lo / 2^32)
  1406. h5_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1407. a_lo = h6_lo % 2^32 + f_lo % 2^32
  1408. h6_hi = h6_hi + f_hi + floor(a_lo / 2^32)
  1409. h6_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1410. a_lo = h7_lo % 2^32 + g_lo % 2^32
  1411. h7_hi = h7_hi + g_hi + floor(a_lo / 2^32)
  1412. h7_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1413. a_lo = h8_lo % 2^32 + h_lo % 2^32
  1414. h8_hi = h8_hi + h_hi + floor(a_lo / 2^32)
  1415. h8_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1416. end
  1417. H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1418. H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1419. end
  1420. local function md5_feed_64(H, str, offs, size)
  1421. -- offs >= 0, size >= 0, size is multiple of 64
  1422. local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
  1423. local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
  1424. for pos = offs + 1, offs + size, 64 do
  1425. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1426. string_unpack("<i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1427. local a, b, c, d = h1, h2, h3, h4
  1428. local s = 32-7
  1429. for j = 1, 16 do
  1430. local F = (d ~ b & (c ~ d)) + a + K[j] + W[j]
  1431. a = d
  1432. d = c
  1433. c = b
  1434. b = (F << 32-s | F>>s) + b
  1435. s = md5_next_shift[s]
  1436. end
  1437. s = 32-5
  1438. for j = 17, 32 do
  1439. local F = (c ~ d & (b ~ c)) + a + K[j] + W[(5*j-4 & 15) + 1]
  1440. a = d
  1441. d = c
  1442. c = b
  1443. b = (F << 32-s | F>>s) + b
  1444. s = md5_next_shift[s]
  1445. end
  1446. s = 32-4
  1447. for j = 33, 48 do
  1448. local F = (b ~ c ~ d) + a + K[j] + W[(3*j+2 & 15) + 1]
  1449. a = d
  1450. d = c
  1451. c = b
  1452. b = (F << 32-s | F>>s) + b
  1453. s = md5_next_shift[s]
  1454. end
  1455. s = 32-6
  1456. for j = 49, 64 do
  1457. local F = (c ~ (b | ~d)) + a + K[j] + W[(j*7-7 & 15) + 1]
  1458. a = d
  1459. d = c
  1460. c = b
  1461. b = (F << 32-s | F>>s) + b
  1462. s = md5_next_shift[s]
  1463. end
  1464. h1 = a + h1
  1465. h2 = b + h2
  1466. h3 = c + h3
  1467. h4 = d + h4
  1468. end
  1469. H[1], H[2], H[3], H[4] = h1, h2, h3, h4
  1470. end
  1471. local function sha1_feed_64(H, str, offs, size)
  1472. -- offs >= 0, size >= 0, size is multiple of 64
  1473. local W = common_W
  1474. local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
  1475. for pos = offs + 1, offs + size, 64 do
  1476. W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1477. string_unpack(">i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1478. for j = 17, 80 do
  1479. local a = W[j-3] ~ W[j-8] ~ W[j-14] ~ W[j-16]
  1480. W[j] = a << 1 ~ a >> 31
  1481. end
  1482. local a, b, c, d, e = h1, h2, h3, h4, h5
  1483. for j = 1, 20 do
  1484. local z = (a << 5 ~ a >> 27) + (d ~ b & (c ~ d)) + 0x5A827999 + W[j] + e -- constant = floor(2^30 * sqrt(2))
  1485. e = d
  1486. d = c
  1487. c = b << 30 ~ b >> 2
  1488. b = a
  1489. a = z
  1490. end
  1491. for j = 21, 40 do
  1492. local z = (a << 5 ~ a >> 27) + (b ~ c ~ d) + 0x6ED9EBA1 + W[j] + e -- 2^30 * sqrt(3)
  1493. e = d
  1494. d = c
  1495. c = b << 30 ~ b >> 2
  1496. b = a
  1497. a = z
  1498. end
  1499. for j = 41, 60 do
  1500. local z = (a << 5 ~ a >> 27) + ((b ~ c) & d ~ b & c) + 0x8F1BBCDC + W[j] + e -- 2^30 * sqrt(5)
  1501. e = d
  1502. d = c
  1503. c = b << 30 ~ b >> 2
  1504. b = a
  1505. a = z
  1506. end
  1507. for j = 61, 80 do
  1508. local z = (a << 5 ~ a >> 27) + (b ~ c ~ d) + 0xCA62C1D6 + W[j] + e -- 2^30 * sqrt(10)
  1509. e = d
  1510. d = c
  1511. c = b << 30 ~ b >> 2
  1512. b = a
  1513. a = z
  1514. end
  1515. h1 = a + h1
  1516. h2 = b + h2
  1517. h3 = c + h3
  1518. h4 = d + h4
  1519. h5 = e + h5
  1520. end
  1521. H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
  1522. end
  1523. local keccak_format_i4i4 = build_keccak_format("i4i4")
  1524. local function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
  1525. -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  1526. local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
  1527. local qwords_qty = block_size_in_bytes / 8
  1528. local keccak_format = keccak_format_i4i4[qwords_qty]
  1529. for pos = offs + 1, offs + size, block_size_in_bytes do
  1530. local dwords_from_message = {string_unpack(keccak_format, str, pos)}
  1531. for j = 1, qwords_qty do
  1532. lanes_lo[j] = lanes_lo[j] ~ dwords_from_message[2*j-1]
  1533. lanes_hi[j] = lanes_hi[j] ~ dwords_from_message[2*j]
  1534. end
  1535. local L01_lo, L01_hi, L02_lo, L02_hi, L03_lo, L03_hi, L04_lo, L04_hi, L05_lo, L05_hi, L06_lo, L06_hi, L07_lo, L07_hi, L08_lo, L08_hi,
  1536. L09_lo, L09_hi, L10_lo, L10_hi, L11_lo, L11_hi, L12_lo, L12_hi, L13_lo, L13_hi, L14_lo, L14_hi, L15_lo, L15_hi, L16_lo, L16_hi,
  1537. L17_lo, L17_hi, L18_lo, L18_hi, L19_lo, L19_hi, L20_lo, L20_hi, L21_lo, L21_hi, L22_lo, L22_hi, L23_lo, L23_hi, L24_lo, L24_hi, L25_lo, L25_hi =
  1538. lanes_lo[1], lanes_hi[1], lanes_lo[2], lanes_hi[2], lanes_lo[3], lanes_hi[3], lanes_lo[4], lanes_hi[4], lanes_lo[5], lanes_hi[5],
  1539. lanes_lo[6], lanes_hi[6], lanes_lo[7], lanes_hi[7], lanes_lo[8], lanes_hi[8], lanes_lo[9], lanes_hi[9], lanes_lo[10], lanes_hi[10],
  1540. lanes_lo[11], lanes_hi[11], lanes_lo[12], lanes_hi[12], lanes_lo[13], lanes_hi[13], lanes_lo[14], lanes_hi[14], lanes_lo[15], lanes_hi[15],
  1541. lanes_lo[16], lanes_hi[16], lanes_lo[17], lanes_hi[17], lanes_lo[18], lanes_hi[18], lanes_lo[19], lanes_hi[19], lanes_lo[20], lanes_hi[20],
  1542. lanes_lo[21], lanes_hi[21], lanes_lo[22], lanes_hi[22], lanes_lo[23], lanes_hi[23], lanes_lo[24], lanes_hi[24], lanes_lo[25], lanes_hi[25]
  1543. for round_idx = 1, 24 do
  1544. local C1_lo = L01_lo ~ L06_lo ~ L11_lo ~ L16_lo ~ L21_lo
  1545. local C1_hi = L01_hi ~ L06_hi ~ L11_hi ~ L16_hi ~ L21_hi
  1546. local C2_lo = L02_lo ~ L07_lo ~ L12_lo ~ L17_lo ~ L22_lo
  1547. local C2_hi = L02_hi ~ L07_hi ~ L12_hi ~ L17_hi ~ L22_hi
  1548. local C3_lo = L03_lo ~ L08_lo ~ L13_lo ~ L18_lo ~ L23_lo
  1549. local C3_hi = L03_hi ~ L08_hi ~ L13_hi ~ L18_hi ~ L23_hi
  1550. local C4_lo = L04_lo ~ L09_lo ~ L14_lo ~ L19_lo ~ L24_lo
  1551. local C4_hi = L04_hi ~ L09_hi ~ L14_hi ~ L19_hi ~ L24_hi
  1552. local C5_lo = L05_lo ~ L10_lo ~ L15_lo ~ L20_lo ~ L25_lo
  1553. local C5_hi = L05_hi ~ L10_hi ~ L15_hi ~ L20_hi ~ L25_hi
  1554. local D_lo = C1_lo ~ C3_lo<<1 ~ C3_hi>>31
  1555. local D_hi = C1_hi ~ C3_hi<<1 ~ C3_lo>>31
  1556. local T0_lo = D_lo ~ L02_lo
  1557. local T0_hi = D_hi ~ L02_hi
  1558. local T1_lo = D_lo ~ L07_lo
  1559. local T1_hi = D_hi ~ L07_hi
  1560. local T2_lo = D_lo ~ L12_lo
  1561. local T2_hi = D_hi ~ L12_hi
  1562. local T3_lo = D_lo ~ L17_lo
  1563. local T3_hi = D_hi ~ L17_hi
  1564. local T4_lo = D_lo ~ L22_lo
  1565. local T4_hi = D_hi ~ L22_hi
  1566. L02_lo = T1_lo>>20 ~ T1_hi<<12
  1567. L02_hi = T1_hi>>20 ~ T1_lo<<12
  1568. L07_lo = T3_lo>>19 ~ T3_hi<<13
  1569. L07_hi = T3_hi>>19 ~ T3_lo<<13
  1570. L12_lo = T0_lo<<1 ~ T0_hi>>31
  1571. L12_hi = T0_hi<<1 ~ T0_lo>>31
  1572. L17_lo = T2_lo<<10 ~ T2_hi>>22
  1573. L17_hi = T2_hi<<10 ~ T2_lo>>22
  1574. L22_lo = T4_lo<<2 ~ T4_hi>>30
  1575. L22_hi = T4_hi<<2 ~ T4_lo>>30
  1576. D_lo = C2_lo ~ C4_lo<<1 ~ C4_hi>>31
  1577. D_hi = C2_hi ~ C4_hi<<1 ~ C4_lo>>31
  1578. T0_lo = D_lo ~ L03_lo
  1579. T0_hi = D_hi ~ L03_hi
  1580. T1_lo = D_lo ~ L08_lo
  1581. T1_hi = D_hi ~ L08_hi
  1582. T2_lo = D_lo ~ L13_lo
  1583. T2_hi = D_hi ~ L13_hi
  1584. T3_lo = D_lo ~ L18_lo
  1585. T3_hi = D_hi ~ L18_hi
  1586. T4_lo = D_lo ~ L23_lo
  1587. T4_hi = D_hi ~ L23_hi
  1588. L03_lo = T2_lo>>21 ~ T2_hi<<11
  1589. L03_hi = T2_hi>>21 ~ T2_lo<<11
  1590. L08_lo = T4_lo>>3 ~ T4_hi<<29
  1591. L08_hi = T4_hi>>3 ~ T4_lo<<29
  1592. L13_lo = T1_lo<<6 ~ T1_hi>>26
  1593. L13_hi = T1_hi<<6 ~ T1_lo>>26
  1594. L18_lo = T3_lo<<15 ~ T3_hi>>17
  1595. L18_hi = T3_hi<<15 ~ T3_lo>>17
  1596. L23_lo = T0_lo>>2 ~ T0_hi<<30
  1597. L23_hi = T0_hi>>2 ~ T0_lo<<30
  1598. D_lo = C3_lo ~ C5_lo<<1 ~ C5_hi>>31
  1599. D_hi = C3_hi ~ C5_hi<<1 ~ C5_lo>>31
  1600. T0_lo = D_lo ~ L04_lo
  1601. T0_hi = D_hi ~ L04_hi
  1602. T1_lo = D_lo ~ L09_lo
  1603. T1_hi = D_hi ~ L09_hi
  1604. T2_lo = D_lo ~ L14_lo
  1605. T2_hi = D_hi ~ L14_hi
  1606. T3_lo = D_lo ~ L19_lo
  1607. T3_hi = D_hi ~ L19_hi
  1608. T4_lo = D_lo ~ L24_lo
  1609. T4_hi = D_hi ~ L24_hi
  1610. L04_lo = T3_lo<<21 ~ T3_hi>>11
  1611. L04_hi = T3_hi<<21 ~ T3_lo>>11
  1612. L09_lo = T0_lo<<28 ~ T0_hi>>4
  1613. L09_hi = T0_hi<<28 ~ T0_lo>>4
  1614. L14_lo = T2_lo<<25 ~ T2_hi>>7
  1615. L14_hi = T2_hi<<25 ~ T2_lo>>7
  1616. L19_lo = T4_lo>>8 ~ T4_hi<<24
  1617. L19_hi = T4_hi>>8 ~ T4_lo<<24
  1618. L24_lo = T1_lo>>9 ~ T1_hi<<23
  1619. L24_hi = T1_hi>>9 ~ T1_lo<<23
  1620. D_lo = C4_lo ~ C1_lo<<1 ~ C1_hi>>31
  1621. D_hi = C4_hi ~ C1_hi<<1 ~ C1_lo>>31
  1622. T0_lo = D_lo ~ L05_lo
  1623. T0_hi = D_hi ~ L05_hi
  1624. T1_lo = D_lo ~ L10_lo
  1625. T1_hi = D_hi ~ L10_hi
  1626. T2_lo = D_lo ~ L15_lo
  1627. T2_hi = D_hi ~ L15_hi
  1628. T3_lo = D_lo ~ L20_lo
  1629. T3_hi = D_hi ~ L20_hi
  1630. T4_lo = D_lo ~ L25_lo
  1631. T4_hi = D_hi ~ L25_hi
  1632. L05_lo = T4_lo<<14 ~ T4_hi>>18
  1633. L05_hi = T4_hi<<14 ~ T4_lo>>18
  1634. L10_lo = T1_lo<<20 ~ T1_hi>>12
  1635. L10_hi = T1_hi<<20 ~ T1_lo>>12
  1636. L15_lo = T3_lo<<8 ~ T3_hi>>24
  1637. L15_hi = T3_hi<<8 ~ T3_lo>>24
  1638. L20_lo = T0_lo<<27 ~ T0_hi>>5
  1639. L20_hi = T0_hi<<27 ~ T0_lo>>5
  1640. L25_lo = T2_lo>>25 ~ T2_hi<<7
  1641. L25_hi = T2_hi>>25 ~ T2_lo<<7
  1642. D_lo = C5_lo ~ C2_lo<<1 ~ C2_hi>>31
  1643. D_hi = C5_hi ~ C2_hi<<1 ~ C2_lo>>31
  1644. T1_lo = D_lo ~ L06_lo
  1645. T1_hi = D_hi ~ L06_hi
  1646. T2_lo = D_lo ~ L11_lo
  1647. T2_hi = D_hi ~ L11_hi
  1648. T3_lo = D_lo ~ L16_lo
  1649. T3_hi = D_hi ~ L16_hi
  1650. T4_lo = D_lo ~ L21_lo
  1651. T4_hi = D_hi ~ L21_hi
  1652. L06_lo = T2_lo<<3 ~ T2_hi>>29
  1653. L06_hi = T2_hi<<3 ~ T2_lo>>29
  1654. L11_lo = T4_lo<<18 ~ T4_hi>>14
  1655. L11_hi = T4_hi<<18 ~ T4_lo>>14
  1656. L16_lo = T1_lo>>28 ~ T1_hi<<4
  1657. L16_hi = T1_hi>>28 ~ T1_lo<<4
  1658. L21_lo = T3_lo>>23 ~ T3_hi<<9
  1659. L21_hi = T3_hi>>23 ~ T3_lo<<9
  1660. L01_lo = D_lo ~ L01_lo
  1661. L01_hi = D_hi ~ L01_hi
  1662. L01_lo, L02_lo, L03_lo, L04_lo, L05_lo = L01_lo ~ ~L02_lo & L03_lo, L02_lo ~ ~L03_lo & L04_lo, L03_lo ~ ~L04_lo & L05_lo, L04_lo ~ ~L05_lo & L01_lo, L05_lo ~ ~L01_lo & L02_lo
  1663. L01_hi, L02_hi, L03_hi, L04_hi, L05_hi = L01_hi ~ ~L02_hi & L03_hi, L02_hi ~ ~L03_hi & L04_hi, L03_hi ~ ~L04_hi & L05_hi, L04_hi ~ ~L05_hi & L01_hi, L05_hi ~ ~L01_hi & L02_hi
  1664. L06_lo, L07_lo, L08_lo, L09_lo, L10_lo = L09_lo ~ ~L10_lo & L06_lo, L10_lo ~ ~L06_lo & L07_lo, L06_lo ~ ~L07_lo & L08_lo, L07_lo ~ ~L08_lo & L09_lo, L08_lo ~ ~L09_lo & L10_lo
  1665. L06_hi, L07_hi, L08_hi, L09_hi, L10_hi = L09_hi ~ ~L10_hi & L06_hi, L10_hi ~ ~L06_hi & L07_hi, L06_hi ~ ~L07_hi & L08_hi, L07_hi ~ ~L08_hi & L09_hi, L08_hi ~ ~L09_hi & L10_hi
  1666. L11_lo, L12_lo, L13_lo, L14_lo, L15_lo = L12_lo ~ ~L13_lo & L14_lo, L13_lo ~ ~L14_lo & L15_lo, L14_lo ~ ~L15_lo & L11_lo, L15_lo ~ ~L11_lo & L12_lo, L11_lo ~ ~L12_lo & L13_lo
  1667. L11_hi, L12_hi, L13_hi, L14_hi, L15_hi = L12_hi ~ ~L13_hi & L14_hi, L13_hi ~ ~L14_hi & L15_hi, L14_hi ~ ~L15_hi & L11_hi, L15_hi ~ ~L11_hi & L12_hi, L11_hi ~ ~L12_hi & L13_hi
  1668. L16_lo, L17_lo, L18_lo, L19_lo, L20_lo = L20_lo ~ ~L16_lo & L17_lo, L16_lo ~ ~L17_lo & L18_lo, L17_lo ~ ~L18_lo & L19_lo, L18_lo ~ ~L19_lo & L20_lo, L19_lo ~ ~L20_lo & L16_lo
  1669. L16_hi, L17_hi, L18_hi, L19_hi, L20_hi = L20_hi ~ ~L16_hi & L17_hi, L16_hi ~ ~L17_hi & L18_hi, L17_hi ~ ~L18_hi & L19_hi, L18_hi ~ ~L19_hi & L20_hi, L19_hi ~ ~L20_hi & L16_hi
  1670. L21_lo, L22_lo, L23_lo, L24_lo, L25_lo = L23_lo ~ ~L24_lo & L25_lo, L24_lo ~ ~L25_lo & L21_lo, L25_lo ~ ~L21_lo & L22_lo, L21_lo ~ ~L22_lo & L23_lo, L22_lo ~ ~L23_lo & L24_lo
  1671. L21_hi, L22_hi, L23_hi, L24_hi, L25_hi = L23_hi ~ ~L24_hi & L25_hi, L24_hi ~ ~L25_hi & L21_hi, L25_hi ~ ~L21_hi & L22_hi, L21_hi ~ ~L22_hi & L23_hi, L22_hi ~ ~L23_hi & L24_hi
  1672. L01_lo = L01_lo ~ RC_lo[round_idx]
  1673. L01_hi = L01_hi ~ RC_hi[round_idx]
  1674. end
  1675. lanes_lo[1] = L01_lo
  1676. lanes_hi[1] = L01_hi
  1677. lanes_lo[2] = L02_lo
  1678. lanes_hi[2] = L02_hi
  1679. lanes_lo[3] = L03_lo
  1680. lanes_hi[3] = L03_hi
  1681. lanes_lo[4] = L04_lo
  1682. lanes_hi[4] = L04_hi
  1683. lanes_lo[5] = L05_lo
  1684. lanes_hi[5] = L05_hi
  1685. lanes_lo[6] = L06_lo
  1686. lanes_hi[6] = L06_hi
  1687. lanes_lo[7] = L07_lo
  1688. lanes_hi[7] = L07_hi
  1689. lanes_lo[8] = L08_lo
  1690. lanes_hi[8] = L08_hi
  1691. lanes_lo[9] = L09_lo
  1692. lanes_hi[9] = L09_hi
  1693. lanes_lo[10] = L10_lo
  1694. lanes_hi[10] = L10_hi
  1695. lanes_lo[11] = L11_lo
  1696. lanes_hi[11] = L11_hi
  1697. lanes_lo[12] = L12_lo
  1698. lanes_hi[12] = L12_hi
  1699. lanes_lo[13] = L13_lo
  1700. lanes_hi[13] = L13_hi
  1701. lanes_lo[14] = L14_lo
  1702. lanes_hi[14] = L14_hi
  1703. lanes_lo[15] = L15_lo
  1704. lanes_hi[15] = L15_hi
  1705. lanes_lo[16] = L16_lo
  1706. lanes_hi[16] = L16_hi
  1707. lanes_lo[17] = L17_lo
  1708. lanes_hi[17] = L17_hi
  1709. lanes_lo[18] = L18_lo
  1710. lanes_hi[18] = L18_hi
  1711. lanes_lo[19] = L19_lo
  1712. lanes_hi[19] = L19_hi
  1713. lanes_lo[20] = L20_lo
  1714. lanes_hi[20] = L20_hi
  1715. lanes_lo[21] = L21_lo
  1716. lanes_hi[21] = L21_hi
  1717. lanes_lo[22] = L22_lo
  1718. lanes_hi[22] = L22_hi
  1719. lanes_lo[23] = L23_lo
  1720. lanes_hi[23] = L23_hi
  1721. lanes_lo[24] = L24_lo
  1722. lanes_hi[24] = L24_hi
  1723. lanes_lo[25] = L25_lo
  1724. lanes_hi[25] = L25_hi
  1725. end
  1726. end
  1727. return XOR32A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed
  1728. ]](md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo, sha3_RC_hi)
  1729. end
  1730. if branch == "LIB32" or branch == "EMUL" then
  1731. -- implementation for Lua 5.1/5.2 (with or without bitwise library available)
  1732. function sha256_feed_64(H, str, offs, size)
  1733. -- offs >= 0, size >= 0, size is multiple of 64
  1734. local W, K = common_W, sha2_K_hi
  1735. local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1736. for pos = offs, offs + size - 1, 64 do
  1737. for j = 1, 16 do
  1738. pos = pos + 4
  1739. local a, b, c, d = byte(str, pos - 3, pos)
  1740. W[j] = ((a * 256 + b) * 256 + c) * 256 + d
  1741. end
  1742. for j = 17, 64 do
  1743. local a, b = W[j-15], W[j-2]
  1744. W[j] = XOR(ROR(a, 7), ROL(a, 14), SHR(a, 3)) + XOR(ROL(b, 15), ROL(b, 13), SHR(b, 10)) + W[j-7] + W[j-16]
  1745. end
  1746. local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1747. for j = 1, 64 do
  1748. local z = XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + AND(e, f) + AND(-1-e, g) + h + K[j] + W[j]
  1749. h = g
  1750. g = f
  1751. f = e
  1752. e = z + d
  1753. d = c
  1754. c = b
  1755. b = a
  1756. a = z + AND(d, c) + AND(a, XOR(d, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10))
  1757. end
  1758. h1, h2, h3, h4 = (a + h1) % 4294967296, (b + h2) % 4294967296, (c + h3) % 4294967296, (d + h4) % 4294967296
  1759. h5, h6, h7, h8 = (e + h5) % 4294967296, (f + h6) % 4294967296, (g + h7) % 4294967296, (h + h8) % 4294967296
  1760. end
  1761. H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1762. end
  1763. function sha512_feed_128(H_lo, H_hi, str, offs, size)
  1764. -- offs >= 0, size >= 0, size is multiple of 128
  1765. -- W1_hi, W1_lo, W2_hi, W2_lo, ... Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  1766. local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
  1767. local h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  1768. local h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  1769. for pos = offs, offs + size - 1, 128 do
  1770. for j = 1, 16*2 do
  1771. pos = pos + 4
  1772. local a, b, c, d = byte(str, pos - 3, pos)
  1773. W[j] = ((a * 256 + b) * 256 + c) * 256 + d
  1774. end
  1775. for jj = 17*2, 80*2, 2 do
  1776. local a_lo, a_hi, b_lo, b_hi = W[jj-30], W[jj-31], W[jj-4], W[jj-5]
  1777. local tmp1 = XOR(SHR(a_lo, 1) + SHL(a_hi, 31), SHR(a_lo, 8) + SHL(a_hi, 24), SHR(a_lo, 7) + SHL(a_hi, 25)) % 4294967296 + XOR(SHR(b_lo, 19) + SHL(b_hi, 13), SHL(b_lo, 3) + SHR(b_hi, 29), SHR(b_lo, 6) + SHL(b_hi, 26)) % 4294967296 + W[jj-14] + W[jj-32]
  1778. local tmp2 = tmp1 % 4294967296
  1779. W[jj-1] = XOR(SHR(a_hi, 1) + SHL(a_lo, 31), SHR(a_hi, 8) + SHL(a_lo, 24), SHR(a_hi, 7)) + XOR(SHR(b_hi, 19) + SHL(b_lo, 13), SHL(b_hi, 3) + SHR(b_lo, 29), SHR(b_hi, 6)) + W[jj-15] + W[jj-33] + (tmp1 - tmp2) / 4294967296
  1780. W[jj] = tmp2
  1781. end
  1782. local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1783. local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1784. for j = 1, 80 do
  1785. local jj = 2*j
  1786. local tmp1 = XOR(SHR(e_lo, 14) + SHL(e_hi, 18), SHR(e_lo, 18) + SHL(e_hi, 14), SHL(e_lo, 23) + SHR(e_hi, 9)) % 4294967296 + (AND(e_lo, f_lo) + AND(-1-e_lo, g_lo)) % 4294967296 + h_lo + K_lo[j] + W[jj]
  1787. local z_lo = tmp1 % 4294967296
  1788. local z_hi = XOR(SHR(e_hi, 14) + SHL(e_lo, 18), SHR(e_hi, 18) + SHL(e_lo, 14), SHL(e_hi, 23) + SHR(e_lo, 9)) + AND(e_hi, f_hi) + AND(-1-e_hi, g_hi) + h_hi + K_hi[j] + W[jj-1] + (tmp1 - z_lo) / 4294967296
  1789. h_lo = g_lo
  1790. h_hi = g_hi
  1791. g_lo = f_lo
  1792. g_hi = f_hi
  1793. f_lo = e_lo
  1794. f_hi = e_hi
  1795. tmp1 = z_lo + d_lo
  1796. e_lo = tmp1 % 4294967296
  1797. e_hi = z_hi + d_hi + (tmp1 - e_lo) / 4294967296
  1798. d_lo = c_lo
  1799. d_hi = c_hi
  1800. c_lo = b_lo
  1801. c_hi = b_hi
  1802. b_lo = a_lo
  1803. b_hi = a_hi
  1804. tmp1 = z_lo + (AND(d_lo, c_lo) + AND(b_lo, XOR(d_lo, c_lo))) % 4294967296 + XOR(SHR(b_lo, 28) + SHL(b_hi, 4), SHL(b_lo, 30) + SHR(b_hi, 2), SHL(b_lo, 25) + SHR(b_hi, 7)) % 4294967296
  1805. a_lo = tmp1 % 4294967296
  1806. a_hi = z_hi + (AND(d_hi, c_hi) + AND(b_hi, XOR(d_hi, c_hi))) + XOR(SHR(b_hi, 28) + SHL(b_lo, 4), SHL(b_hi, 30) + SHR(b_lo, 2), SHL(b_hi, 25) + SHR(b_lo, 7)) + (tmp1 - a_lo) / 4294967296
  1807. end
  1808. a_lo = h1_lo + a_lo
  1809. h1_lo = a_lo % 4294967296
  1810. h1_hi = (h1_hi + a_hi + (a_lo - h1_lo) / 4294967296) % 4294967296
  1811. a_lo = h2_lo + b_lo
  1812. h2_lo = a_lo % 4294967296
  1813. h2_hi = (h2_hi + b_hi + (a_lo - h2_lo) / 4294967296) % 4294967296
  1814. a_lo = h3_lo + c_lo
  1815. h3_lo = a_lo % 4294967296
  1816. h3_hi = (h3_hi + c_hi + (a_lo - h3_lo) / 4294967296) % 4294967296
  1817. a_lo = h4_lo + d_lo
  1818. h4_lo = a_lo % 4294967296
  1819. h4_hi = (h4_hi + d_hi + (a_lo - h4_lo) / 4294967296) % 4294967296
  1820. a_lo = h5_lo + e_lo
  1821. h5_lo = a_lo % 4294967296
  1822. h5_hi = (h5_hi + e_hi + (a_lo - h5_lo) / 4294967296) % 4294967296
  1823. a_lo = h6_lo + f_lo
  1824. h6_lo = a_lo % 4294967296
  1825. h6_hi = (h6_hi + f_hi + (a_lo - h6_lo) / 4294967296) % 4294967296
  1826. a_lo = h7_lo + g_lo
  1827. h7_lo = a_lo % 4294967296
  1828. h7_hi = (h7_hi + g_hi + (a_lo - h7_lo) / 4294967296) % 4294967296
  1829. a_lo = h8_lo + h_lo
  1830. h8_lo = a_lo % 4294967296
  1831. h8_hi = (h8_hi + h_hi + (a_lo - h8_lo) / 4294967296) % 4294967296
  1832. end
  1833. H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1834. H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1835. end
  1836. function md5_feed_64(H, str, offs, size)
  1837. -- offs >= 0, size >= 0, size is multiple of 64
  1838. local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
  1839. local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
  1840. for pos = offs, offs + size - 1, 64 do
  1841. for j = 1, 16 do
  1842. pos = pos + 4
  1843. local a, b, c, d = byte(str, pos - 3, pos)
  1844. W[j] = ((d * 256 + c) * 256 + b) * 256 + a
  1845. end
  1846. local a, b, c, d = h1, h2, h3, h4
  1847. local s = 32-7
  1848. for j = 1, 16 do
  1849. local F = ROR(AND(b, c) + AND(-1-b, d) + a + K[j] + W[j], s) + b
  1850. s = md5_next_shift[s]
  1851. a = d
  1852. d = c
  1853. c = b
  1854. b = F
  1855. end
  1856. s = 32-5
  1857. for j = 17, 32 do
  1858. local F = ROR(AND(d, b) + AND(-1-d, c) + a + K[j] + W[(5*j-4) % 16 + 1], s) + b
  1859. s = md5_next_shift[s]
  1860. a = d
  1861. d = c
  1862. c = b
  1863. b = F
  1864. end
  1865. s = 32-4
  1866. for j = 33, 48 do
  1867. local F = ROR(XOR(XOR(b, c), d) + a + K[j] + W[(3*j+2) % 16 + 1], s) + b
  1868. s = md5_next_shift[s]
  1869. a = d
  1870. d = c
  1871. c = b
  1872. b = F
  1873. end
  1874. s = 32-6
  1875. for j = 49, 64 do
  1876. local F = ROR(XOR(c, OR(b, -1-d)) + a + K[j] + W[(j*7-7) % 16 + 1], s) + b
  1877. s = md5_next_shift[s]
  1878. a = d
  1879. d = c
  1880. c = b
  1881. b = F
  1882. end
  1883. h1 = (a + h1) % 4294967296
  1884. h2 = (b + h2) % 4294967296
  1885. h3 = (c + h3) % 4294967296
  1886. h4 = (d + h4) % 4294967296
  1887. end
  1888. H[1], H[2], H[3], H[4] = h1, h2, h3, h4
  1889. end
  1890. function sha1_feed_64(H, str, offs, size)
  1891. -- offs >= 0, size >= 0, size is multiple of 64
  1892. local W = common_W
  1893. local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
  1894. for pos = offs, offs + size - 1, 64 do
  1895. for j = 1, 16 do
  1896. pos = pos + 4
  1897. local a, b, c, d = byte(str, pos - 3, pos)
  1898. W[j] = ((a * 256 + b) * 256 + c) * 256 + d
  1899. end
  1900. for j = 17, 80 do
  1901. W[j] = ROL(XOR(W[j-3], W[j-8], W[j-14], W[j-16]), 1)
  1902. end
  1903. local a, b, c, d, e = h1, h2, h3, h4, h5
  1904. for j = 1, 20 do
  1905. local z = ROL(a, 5) + AND(b, c) + AND(-1-b, d) + 0x5A827999 + W[j] + e -- constant = floor(2^30 * sqrt(2))
  1906. e = d
  1907. d = c
  1908. c = ROR(b, 2)
  1909. b = a
  1910. a = z
  1911. end
  1912. for j = 21, 40 do
  1913. local z = ROL(a, 5) + XOR(b, c, d) + 0x6ED9EBA1 + W[j] + e -- 2^30 * sqrt(3)
  1914. e = d
  1915. d = c
  1916. c = ROR(b, 2)
  1917. b = a
  1918. a = z
  1919. end
  1920. for j = 41, 60 do
  1921. local z = ROL(a, 5) + AND(d, c) + AND(b, XOR(d, c)) + 0x8F1BBCDC + W[j] + e -- 2^30 * sqrt(5)
  1922. e = d
  1923. d = c
  1924. c = ROR(b, 2)
  1925. b = a
  1926. a = z
  1927. end
  1928. for j = 61, 80 do
  1929. local z = ROL(a, 5) + XOR(b, c, d) + 0xCA62C1D6 + W[j] + e -- 2^30 * sqrt(10)
  1930. e = d
  1931. d = c
  1932. c = ROR(b, 2)
  1933. b = a
  1934. a = z
  1935. end
  1936. h1 = (a + h1) % 4294967296
  1937. h2 = (b + h2) % 4294967296
  1938. h3 = (c + h3) % 4294967296
  1939. h4 = (d + h4) % 4294967296
  1940. h5 = (e + h5) % 4294967296
  1941. end
  1942. H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
  1943. end
  1944. function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
  1945. -- This is an example of a Lua function having 79 local variables :-)
  1946. -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  1947. local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
  1948. local qwords_qty = block_size_in_bytes / 8
  1949. for pos = offs, offs + size - 1, block_size_in_bytes do
  1950. for j = 1, qwords_qty do
  1951. local a, b, c, d = byte(str, pos + 1, pos + 4)
  1952. lanes_lo[j] = XOR(lanes_lo[j], ((d * 256 + c) * 256 + b) * 256 + a)
  1953. pos = pos + 8
  1954. a, b, c, d = byte(str, pos - 3, pos)
  1955. lanes_hi[j] = XOR(lanes_hi[j], ((d * 256 + c) * 256 + b) * 256 + a)
  1956. end
  1957. local L01_lo, L01_hi, L02_lo, L02_hi, L03_lo, L03_hi, L04_lo, L04_hi, L05_lo, L05_hi, L06_lo, L06_hi, L07_lo, L07_hi, L08_lo, L08_hi,
  1958. L09_lo, L09_hi, L10_lo, L10_hi, L11_lo, L11_hi, L12_lo, L12_hi, L13_lo, L13_hi, L14_lo, L14_hi, L15_lo, L15_hi, L16_lo, L16_hi,
  1959. L17_lo, L17_hi, L18_lo, L18_hi, L19_lo, L19_hi, L20_lo, L20_hi, L21_lo, L21_hi, L22_lo, L22_hi, L23_lo, L23_hi, L24_lo, L24_hi, L25_lo, L25_hi =
  1960. lanes_lo[1], lanes_hi[1], lanes_lo[2], lanes_hi[2], lanes_lo[3], lanes_hi[3], lanes_lo[4], lanes_hi[4], lanes_lo[5], lanes_hi[5],
  1961. lanes_lo[6], lanes_hi[6], lanes_lo[7], lanes_hi[7], lanes_lo[8], lanes_hi[8], lanes_lo[9], lanes_hi[9], lanes_lo[10], lanes_hi[10],
  1962. lanes_lo[11], lanes_hi[11], lanes_lo[12], lanes_hi[12], lanes_lo[13], lanes_hi[13], lanes_lo[14], lanes_hi[14], lanes_lo[15], lanes_hi[15],
  1963. lanes_lo[16], lanes_hi[16], lanes_lo[17], lanes_hi[17], lanes_lo[18], lanes_hi[18], lanes_lo[19], lanes_hi[19], lanes_lo[20], lanes_hi[20],
  1964. lanes_lo[21], lanes_hi[21], lanes_lo[22], lanes_hi[22], lanes_lo[23], lanes_hi[23], lanes_lo[24], lanes_hi[24], lanes_lo[25], lanes_hi[25]
  1965. for round_idx = 1, 24 do
  1966. local C1_lo = XOR(L01_lo, L06_lo, L11_lo, L16_lo, L21_lo)
  1967. local C1_hi = XOR(L01_hi, L06_hi, L11_hi, L16_hi, L21_hi)
  1968. local C2_lo = XOR(L02_lo, L07_lo, L12_lo, L17_lo, L22_lo)
  1969. local C2_hi = XOR(L02_hi, L07_hi, L12_hi, L17_hi, L22_hi)
  1970. local C3_lo = XOR(L03_lo, L08_lo, L13_lo, L18_lo, L23_lo)
  1971. local C3_hi = XOR(L03_hi, L08_hi, L13_hi, L18_hi, L23_hi)
  1972. local C4_lo = XOR(L04_lo, L09_lo, L14_lo, L19_lo, L24_lo)
  1973. local C4_hi = XOR(L04_hi, L09_hi, L14_hi, L19_hi, L24_hi)
  1974. local C5_lo = XOR(L05_lo, L10_lo, L15_lo, L20_lo, L25_lo)
  1975. local C5_hi = XOR(L05_hi, L10_hi, L15_hi, L20_hi, L25_hi)
  1976. local D_lo = XOR(C1_lo, C3_lo * 2 + (C3_hi % 2^32 - C3_hi % 2^31) / 2^31)
  1977. local D_hi = XOR(C1_hi, C3_hi * 2 + (C3_lo % 2^32 - C3_lo % 2^31) / 2^31)
  1978. local T0_lo = XOR(D_lo, L02_lo)
  1979. local T0_hi = XOR(D_hi, L02_hi)
  1980. local T1_lo = XOR(D_lo, L07_lo)
  1981. local T1_hi = XOR(D_hi, L07_hi)
  1982. local T2_lo = XOR(D_lo, L12_lo)
  1983. local T2_hi = XOR(D_hi, L12_hi)
  1984. local T3_lo = XOR(D_lo, L17_lo)
  1985. local T3_hi = XOR(D_hi, L17_hi)
  1986. local T4_lo = XOR(D_lo, L22_lo)
  1987. local T4_hi = XOR(D_hi, L22_hi)
  1988. L02_lo = (T1_lo % 2^32 - T1_lo % 2^20) / 2^20 + T1_hi * 2^12
  1989. L02_hi = (T1_hi % 2^32 - T1_hi % 2^20) / 2^20 + T1_lo * 2^12
  1990. L07_lo = (T3_lo % 2^32 - T3_lo % 2^19) / 2^19 + T3_hi * 2^13
  1991. L07_hi = (T3_hi % 2^32 - T3_hi % 2^19) / 2^19 + T3_lo * 2^13
  1992. L12_lo = T0_lo * 2 + (T0_hi % 2^32 - T0_hi % 2^31) / 2^31
  1993. L12_hi = T0_hi * 2 + (T0_lo % 2^32 - T0_lo % 2^31) / 2^31
  1994. L17_lo = T2_lo * 2^10 + (T2_hi % 2^32 - T2_hi % 2^22) / 2^22
  1995. L17_hi = T2_hi * 2^10 + (T2_lo % 2^32 - T2_lo % 2^22) / 2^22
  1996. L22_lo = T4_lo * 2^2 + (T4_hi % 2^32 - T4_hi % 2^30) / 2^30
  1997. L22_hi = T4_hi * 2^2 + (T4_lo % 2^32 - T4_lo % 2^30) / 2^30
  1998. D_lo = XOR(C2_lo, C4_lo * 2 + (C4_hi % 2^32 - C4_hi % 2^31) / 2^31)
  1999. D_hi = XOR(C2_hi, C4_hi * 2 + (C4_lo % 2^32 - C4_lo % 2^31) / 2^31)
  2000. T0_lo = XOR(D_lo, L03_lo)
  2001. T0_hi = XOR(D_hi, L03_hi)
  2002. T1_lo = XOR(D_lo, L08_lo)
  2003. T1_hi = XOR(D_hi, L08_hi)
  2004. T2_lo = XOR(D_lo, L13_lo)
  2005. T2_hi = XOR(D_hi, L13_hi)
  2006. T3_lo = XOR(D_lo, L18_lo)
  2007. T3_hi = XOR(D_hi, L18_hi)
  2008. T4_lo = XOR(D_lo, L23_lo)
  2009. T4_hi = XOR(D_hi, L23_hi)
  2010. L03_lo = (T2_lo % 2^32 - T2_lo % 2^21) / 2^21 + T2_hi * 2^11
  2011. L03_hi = (T2_hi % 2^32 - T2_hi % 2^21) / 2^21 + T2_lo * 2^11
  2012. L08_lo = (T4_lo % 2^32 - T4_lo % 2^3) / 2^3 + T4_hi * 2^29 % 2^32
  2013. L08_hi = (T4_hi % 2^32 - T4_hi % 2^3) / 2^3 + T4_lo * 2^29 % 2^32
  2014. L13_lo = T1_lo * 2^6 + (T1_hi % 2^32 - T1_hi % 2^26) / 2^26
  2015. L13_hi = T1_hi * 2^6 + (T1_lo % 2^32 - T1_lo % 2^26) / 2^26
  2016. L18_lo = T3_lo * 2^15 + (T3_hi % 2^32 - T3_hi % 2^17) / 2^17
  2017. L18_hi = T3_hi * 2^15 + (T3_lo % 2^32 - T3_lo % 2^17) / 2^17
  2018. L23_lo = (T0_lo % 2^32 - T0_lo % 2^2) / 2^2 + T0_hi * 2^30 % 2^32
  2019. L23_hi = (T0_hi % 2^32 - T0_hi % 2^2) / 2^2 + T0_lo * 2^30 % 2^32
  2020. D_lo = XOR(C3_lo, C5_lo * 2 + (C5_hi % 2^32 - C5_hi % 2^31) / 2^31)
  2021. D_hi = XOR(C3_hi, C5_hi * 2 + (C5_lo % 2^32 - C5_lo % 2^31) / 2^31)
  2022. T0_lo = XOR(D_lo, L04_lo)
  2023. T0_hi = XOR(D_hi, L04_hi)
  2024. T1_lo = XOR(D_lo, L09_lo)
  2025. T1_hi = XOR(D_hi, L09_hi)
  2026. T2_lo = XOR(D_lo, L14_lo)
  2027. T2_hi = XOR(D_hi, L14_hi)
  2028. T3_lo = XOR(D_lo, L19_lo)
  2029. T3_hi = XOR(D_hi, L19_hi)
  2030. T4_lo = XOR(D_lo, L24_lo)
  2031. T4_hi = XOR(D_hi, L24_hi)
  2032. L04_lo = T3_lo * 2^21 % 2^32 + (T3_hi % 2^32 - T3_hi % 2^11) / 2^11
  2033. L04_hi = T3_hi * 2^21 % 2^32 + (T3_lo % 2^32 - T3_lo % 2^11) / 2^11
  2034. L09_lo = T0_lo * 2^28 % 2^32 + (T0_hi % 2^32 - T0_hi % 2^4) / 2^4
  2035. L09_hi = T0_hi * 2^28 % 2^32 + (T0_lo % 2^32 - T0_lo % 2^4) / 2^4
  2036. L14_lo = T2_lo * 2^25 % 2^32 + (T2_hi % 2^32 - T2_hi % 2^7) / 2^7
  2037. L14_hi = T2_hi * 2^25 % 2^32 + (T2_lo % 2^32 - T2_lo % 2^7) / 2^7
  2038. L19_lo = (T4_lo % 2^32 - T4_lo % 2^8) / 2^8 + T4_hi * 2^24 % 2^32
  2039. L19_hi = (T4_hi % 2^32 - T4_hi % 2^8) / 2^8 + T4_lo * 2^24 % 2^32
  2040. L24_lo = (T1_lo % 2^32 - T1_lo % 2^9) / 2^9 + T1_hi * 2^23 % 2^32
  2041. L24_hi = (T1_hi % 2^32 - T1_hi % 2^9) / 2^9 + T1_lo * 2^23 % 2^32
  2042. D_lo = XOR(C4_lo, C1_lo * 2 + (C1_hi % 2^32 - C1_hi % 2^31) / 2^31)
  2043. D_hi = XOR(C4_hi, C1_hi * 2 + (C1_lo % 2^32 - C1_lo % 2^31) / 2^31)
  2044. T0_lo = XOR(D_lo, L05_lo)
  2045. T0_hi = XOR(D_hi, L05_hi)
  2046. T1_lo = XOR(D_lo, L10_lo)
  2047. T1_hi = XOR(D_hi, L10_hi)
  2048. T2_lo = XOR(D_lo, L15_lo)
  2049. T2_hi = XOR(D_hi, L15_hi)
  2050. T3_lo = XOR(D_lo, L20_lo)
  2051. T3_hi = XOR(D_hi, L20_hi)
  2052. T4_lo = XOR(D_lo, L25_lo)
  2053. T4_hi = XOR(D_hi, L25_hi)
  2054. L05_lo = T4_lo * 2^14 + (T4_hi % 2^32 - T4_hi % 2^18) / 2^18
  2055. L05_hi = T4_hi * 2^14 + (T4_lo % 2^32 - T4_lo % 2^18) / 2^18
  2056. L10_lo = T1_lo * 2^20 % 2^32 + (T1_hi % 2^32 - T1_hi % 2^12) / 2^12
  2057. L10_hi = T1_hi * 2^20 % 2^32 + (T1_lo % 2^32 - T1_lo % 2^12) / 2^12
  2058. L15_lo = T3_lo * 2^8 + (T3_hi % 2^32 - T3_hi % 2^24) / 2^24
  2059. L15_hi = T3_hi * 2^8 + (T3_lo % 2^32 - T3_lo % 2^24) / 2^24
  2060. L20_lo = T0_lo * 2^27 % 2^32 + (T0_hi % 2^32 - T0_hi % 2^5) / 2^5
  2061. L20_hi = T0_hi * 2^27 % 2^32 + (T0_lo % 2^32 - T0_lo % 2^5) / 2^5
  2062. L25_lo = (T2_lo % 2^32 - T2_lo % 2^25) / 2^25 + T2_hi * 2^7
  2063. L25_hi = (T2_hi % 2^32 - T2_hi % 2^25) / 2^25 + T2_lo * 2^7
  2064. D_lo = XOR(C5_lo, C2_lo * 2 + (C2_hi % 2^32 - C2_hi % 2^31) / 2^31)
  2065. D_hi = XOR(C5_hi, C2_hi * 2 + (C2_lo % 2^32 - C2_lo % 2^31) / 2^31)
  2066. T1_lo = XOR(D_lo, L06_lo)
  2067. T1_hi = XOR(D_hi, L06_hi)
  2068. T2_lo = XOR(D_lo, L11_lo)
  2069. T2_hi = XOR(D_hi, L11_hi)
  2070. T3_lo = XOR(D_lo, L16_lo)
  2071. T3_hi = XOR(D_hi, L16_hi)
  2072. T4_lo = XOR(D_lo, L21_lo)
  2073. T4_hi = XOR(D_hi, L21_hi)
  2074. L06_lo = T2_lo * 2^3 + (T2_hi % 2^32 - T2_hi % 2^29) / 2^29
  2075. L06_hi = T2_hi * 2^3 + (T2_lo % 2^32 - T2_lo % 2^29) / 2^29
  2076. L11_lo = T4_lo * 2^18 + (T4_hi % 2^32 - T4_hi % 2^14) / 2^14
  2077. L11_hi = T4_hi * 2^18 + (T4_lo % 2^32 - T4_lo % 2^14) / 2^14
  2078. L16_lo = (T1_lo % 2^32 - T1_lo % 2^28) / 2^28 + T1_hi * 2^4
  2079. L16_hi = (T1_hi % 2^32 - T1_hi % 2^28) / 2^28 + T1_lo * 2^4
  2080. L21_lo = (T3_lo % 2^32 - T3_lo % 2^23) / 2^23 + T3_hi * 2^9
  2081. L21_hi = (T3_hi % 2^32 - T3_hi % 2^23) / 2^23 + T3_lo * 2^9
  2082. L01_lo = XOR(D_lo, L01_lo)
  2083. L01_hi = XOR(D_hi, L01_hi)
  2084. L01_lo, L02_lo, L03_lo, L04_lo, L05_lo = XOR(L01_lo, AND(-1-L02_lo, L03_lo)), XOR(L02_lo, AND(-1-L03_lo, L04_lo)), XOR(L03_lo, AND(-1-L04_lo, L05_lo)), XOR(L04_lo, AND(-1-L05_lo, L01_lo)), XOR(L05_lo, AND(-1-L01_lo, L02_lo))
  2085. L01_hi, L02_hi, L03_hi, L04_hi, L05_hi = XOR(L01_hi, AND(-1-L02_hi, L03_hi)), XOR(L02_hi, AND(-1-L03_hi, L04_hi)), XOR(L03_hi, AND(-1-L04_hi, L05_hi)), XOR(L04_hi, AND(-1-L05_hi, L01_hi)), XOR(L05_hi, AND(-1-L01_hi, L02_hi))
  2086. L06_lo, L07_lo, L08_lo, L09_lo, L10_lo = XOR(L09_lo, AND(-1-L10_lo, L06_lo)), XOR(L10_lo, AND(-1-L06_lo, L07_lo)), XOR(L06_lo, AND(-1-L07_lo, L08_lo)), XOR(L07_lo, AND(-1-L08_lo, L09_lo)), XOR(L08_lo, AND(-1-L09_lo, L10_lo))
  2087. L06_hi, L07_hi, L08_hi, L09_hi, L10_hi = XOR(L09_hi, AND(-1-L10_hi, L06_hi)), XOR(L10_hi, AND(-1-L06_hi, L07_hi)), XOR(L06_hi, AND(-1-L07_hi, L08_hi)), XOR(L07_hi, AND(-1-L08_hi, L09_hi)), XOR(L08_hi, AND(-1-L09_hi, L10_hi))
  2088. L11_lo, L12_lo, L13_lo, L14_lo, L15_lo = XOR(L12_lo, AND(-1-L13_lo, L14_lo)), XOR(L13_lo, AND(-1-L14_lo, L15_lo)), XOR(L14_lo, AND(-1-L15_lo, L11_lo)), XOR(L15_lo, AND(-1-L11_lo, L12_lo)), XOR(L11_lo, AND(-1-L12_lo, L13_lo))
  2089. L11_hi, L12_hi, L13_hi, L14_hi, L15_hi = XOR(L12_hi, AND(-1-L13_hi, L14_hi)), XOR(L13_hi, AND(-1-L14_hi, L15_hi)), XOR(L14_hi, AND(-1-L15_hi, L11_hi)), XOR(L15_hi, AND(-1-L11_hi, L12_hi)), XOR(L11_hi, AND(-1-L12_hi, L13_hi))
  2090. L16_lo, L17_lo, L18_lo, L19_lo, L20_lo = XOR(L20_lo, AND(-1-L16_lo, L17_lo)), XOR(L16_lo, AND(-1-L17_lo, L18_lo)), XOR(L17_lo, AND(-1-L18_lo, L19_lo)), XOR(L18_lo, AND(-1-L19_lo, L20_lo)), XOR(L19_lo, AND(-1-L20_lo, L16_lo))
  2091. L16_hi, L17_hi, L18_hi, L19_hi, L20_hi = XOR(L20_hi, AND(-1-L16_hi, L17_hi)), XOR(L16_hi, AND(-1-L17_hi, L18_hi)), XOR(L17_hi, AND(-1-L18_hi, L19_hi)), XOR(L18_hi, AND(-1-L19_hi, L20_hi)), XOR(L19_hi, AND(-1-L20_hi, L16_hi))
  2092. L21_lo, L22_lo, L23_lo, L24_lo, L25_lo = XOR(L23_lo, AND(-1-L24_lo, L25_lo)), XOR(L24_lo, AND(-1-L25_lo, L21_lo)), XOR(L25_lo, AND(-1-L21_lo, L22_lo)), XOR(L21_lo, AND(-1-L22_lo, L23_lo)), XOR(L22_lo, AND(-1-L23_lo, L24_lo))
  2093. L21_hi, L22_hi, L23_hi, L24_hi, L25_hi = XOR(L23_hi, AND(-1-L24_hi, L25_hi)), XOR(L24_hi, AND(-1-L25_hi, L21_hi)), XOR(L25_hi, AND(-1-L21_hi, L22_hi)), XOR(L21_hi, AND(-1-L22_hi, L23_hi)), XOR(L22_hi, AND(-1-L23_hi, L24_hi))
  2094. L01_lo = XOR(L01_lo, RC_lo[round_idx])
  2095. L01_hi = L01_hi + RC_hi[round_idx] -- RC_hi[] is either 0 or 0x80000000, so we could use fast addition instead of slow XOR
  2096. end
  2097. lanes_lo[1] = L01_lo
  2098. lanes_hi[1] = L01_hi
  2099. lanes_lo[2] = L02_lo
  2100. lanes_hi[2] = L02_hi
  2101. lanes_lo[3] = L03_lo
  2102. lanes_hi[3] = L03_hi
  2103. lanes_lo[4] = L04_lo
  2104. lanes_hi[4] = L04_hi
  2105. lanes_lo[5] = L05_lo
  2106. lanes_hi[5] = L05_hi
  2107. lanes_lo[6] = L06_lo
  2108. lanes_hi[6] = L06_hi
  2109. lanes_lo[7] = L07_lo
  2110. lanes_hi[7] = L07_hi
  2111. lanes_lo[8] = L08_lo
  2112. lanes_hi[8] = L08_hi
  2113. lanes_lo[9] = L09_lo
  2114. lanes_hi[9] = L09_hi
  2115. lanes_lo[10] = L10_lo
  2116. lanes_hi[10] = L10_hi
  2117. lanes_lo[11] = L11_lo
  2118. lanes_hi[11] = L11_hi
  2119. lanes_lo[12] = L12_lo
  2120. lanes_hi[12] = L12_hi
  2121. lanes_lo[13] = L13_lo
  2122. lanes_hi[13] = L13_hi
  2123. lanes_lo[14] = L14_lo
  2124. lanes_hi[14] = L14_hi
  2125. lanes_lo[15] = L15_lo
  2126. lanes_hi[15] = L15_hi
  2127. lanes_lo[16] = L16_lo
  2128. lanes_hi[16] = L16_hi
  2129. lanes_lo[17] = L17_lo
  2130. lanes_hi[17] = L17_hi
  2131. lanes_lo[18] = L18_lo
  2132. lanes_hi[18] = L18_hi
  2133. lanes_lo[19] = L19_lo
  2134. lanes_hi[19] = L19_hi
  2135. lanes_lo[20] = L20_lo
  2136. lanes_hi[20] = L20_hi
  2137. lanes_lo[21] = L21_lo
  2138. lanes_hi[21] = L21_hi
  2139. lanes_lo[22] = L22_lo
  2140. lanes_hi[22] = L22_hi
  2141. lanes_lo[23] = L23_lo
  2142. lanes_hi[23] = L23_hi
  2143. lanes_lo[24] = L24_lo
  2144. lanes_hi[24] = L24_hi
  2145. lanes_lo[25] = L25_lo
  2146. lanes_hi[25] = L25_hi
  2147. end
  2148. end
  2149. end
  2150. --------------------------------------------------------------------------------
  2151. -- MAGIC NUMBERS CALCULATOR
  2152. --------------------------------------------------------------------------------
  2153. -- Q:
  2154. -- Is 53-bit "double" math enough to calculate square roots and cube roots of primes with 64 correct bits after decimal point?
  2155. -- A:
  2156. -- Yes, 53-bit "double" arithmetic is enough.
  2157. -- We could obtain first 40 bits by direct calculation of p^(1/3) and next 40 bits by one step of Newton's method.
  2158. do
  2159. local function mul(src1, src2, factor, result_length)
  2160. -- src1, src2 - long integers (arrays of digits in base 2^24)
  2161. -- factor - small integer
  2162. -- returns long integer result (src1 * src2 * factor) and its floating point approximation
  2163. local result, carry, value, weight = {}, 0.0, 0.0, 1.0
  2164. for j = 1, result_length do
  2165. for k = math_max(1, j + 1 - #src2), math_min(j, #src1) do
  2166. carry = carry + factor * src1[k] * src2[j + 1 - k] -- "int32" is not enough for multiplication result, that's why "factor" must be of type "double"
  2167. end
  2168. local digit = carry % 2^24
  2169. result[j] = floor(digit)
  2170. carry = (carry - digit) / 2^24
  2171. value = value + digit * weight
  2172. weight = weight * 2^24
  2173. end
  2174. return result, value
  2175. end
  2176. local idx, step, p, one, sqrt_hi, sqrt_lo = 0, {4, 1, 2, -2, 2}, 4, {1}, sha2_H_hi, sha2_H_lo
  2177. repeat
  2178. p = p + step[p % 6]
  2179. local d = 1
  2180. repeat
  2181. d = d + step[d % 6]
  2182. if d*d > p then -- next prime number is found
  2183. local root = p^(1/3)
  2184. local R = root * 2^40
  2185. R = mul({R - R % 1}, one, 1.0, 2)
  2186. local _, delta = mul(R, mul(R, R, 1.0, 4), -1.0, 4)
  2187. local hi = R[2] % 65536 * 65536 + floor(R[1] / 256)
  2188. local lo = R[1] % 256 * 16777216 + floor(delta * (2^-56 / 3) * root / p)
  2189. if idx < 16 then
  2190. root = p^(1/2)
  2191. R = root * 2^40
  2192. R = mul({R - R % 1}, one, 1.0, 2)
  2193. _, delta = mul(R, R, -1.0, 2)
  2194. local hi = R[2] % 65536 * 65536 + floor(R[1] / 256)
  2195. local lo = R[1] % 256 * 16777216 + floor(delta * 2^-17 / root)
  2196. local idx = idx % 8 + 1
  2197. sha2_H_ext256[224][idx] = lo
  2198. sqrt_hi[idx], sqrt_lo[idx] = hi, lo + hi * hi_factor
  2199. if idx > 7 then
  2200. sqrt_hi, sqrt_lo = sha2_H_ext512_hi[384], sha2_H_ext512_lo[384]
  2201. end
  2202. end
  2203. idx = idx + 1
  2204. sha2_K_hi[idx], sha2_K_lo[idx] = hi, lo % K_lo_modulo + hi * hi_factor
  2205. break
  2206. end
  2207. until p % d == 0
  2208. until idx > 79
  2209. end
  2210. -- Calculating IVs for SHA512/224 and SHA512/256
  2211. for width = 224, 256, 32 do
  2212. local H_lo, H_hi = {}
  2213. if XOR64A5 then
  2214. for j = 1, 8 do
  2215. H_lo[j] = XOR64A5(sha2_H_lo[j])
  2216. end
  2217. else
  2218. H_hi = {}
  2219. for j = 1, 8 do
  2220. H_lo[j] = XOR32A5(sha2_H_lo[j])
  2221. H_hi[j] = XOR32A5(sha2_H_hi[j])
  2222. end
  2223. end
  2224. sha512_feed_128(H_lo, H_hi, "SHA-512/"..tostring(width).."\128"..string_rep("\0", 115).."\88", 0, 128)
  2225. sha2_H_ext512_lo[width] = H_lo
  2226. sha2_H_ext512_hi[width] = H_hi
  2227. end
  2228. -- Constants for MD5
  2229. do
  2230. local sin, abs, modf = math.sin, math.abs, math.modf
  2231. for idx = 1, 64 do
  2232. -- we can't use formula floor(abs(sin(idx))*2^32) because its result may be beyond integer range on Lua built with 32-bit integers
  2233. local hi, lo = modf(abs(sin(idx)) * 2^16)
  2234. md5_K[idx] = hi * 65536 + floor(lo * 2^16)
  2235. end
  2236. end
  2237. -- Constants for SHA3
  2238. do
  2239. local sh_reg = 29
  2240. local function next_bit()
  2241. local r = sh_reg % 2
  2242. sh_reg = XOR_BYTE((sh_reg - r) / 2, 142 * r)
  2243. return r
  2244. end
  2245. for idx = 1, 24 do
  2246. local lo, m = 0
  2247. for _ = 1, 6 do
  2248. m = m and m * m * 2 or 1
  2249. lo = lo + next_bit() * m
  2250. end
  2251. local hi = next_bit() * m
  2252. sha3_RC_hi[idx], sha3_RC_lo[idx] = hi, lo + hi * hi_factor_keccak
  2253. end
  2254. end
  2255. --------------------------------------------------------------------------------
  2256. -- MAIN FUNCTIONS
  2257. --------------------------------------------------------------------------------
  2258. local function sha256ext(width, message)
  2259. -- Create an instance (private objects for current calculation)
  2260. local H, length, tail = {unpack(sha2_H_ext256[width])}, 0.0, ""
  2261. local function partial(message_part)
  2262. if message_part then
  2263. if tail then
  2264. length = length + #message_part
  2265. local offs = 0
  2266. if tail ~= "" and #tail + #message_part >= 64 then
  2267. offs = 64 - #tail
  2268. sha256_feed_64(H, tail..sub(message_part, 1, offs), 0, 64)
  2269. tail = ""
  2270. end
  2271. local size = #message_part - offs
  2272. local size_tail = size % 64
  2273. sha256_feed_64(H, message_part, offs, size - size_tail)
  2274. tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2275. return partial
  2276. else
  2277. error("Adding more chunks is not allowed after receiving the result", 2)
  2278. end
  2279. else
  2280. if tail then
  2281. local final_blocks = {tail, "\128", string_rep("\0", (-9 - length) % 64 + 1)}
  2282. tail = nil
  2283. -- Assuming user data length is shorter than (2^53)-9 bytes
  2284. -- Anyway, it looks very unrealistic that someone would spend more than a year of calculations to process 2^53 bytes of data by using this Lua script :-)
  2285. -- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
  2286. length = length * (8 / 256^7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left
  2287. for j = 4, 10 do
  2288. length = length % 1 * 256
  2289. final_blocks[j] = char(floor(length))
  2290. end
  2291. final_blocks = table_concat(final_blocks)
  2292. sha256_feed_64(H, final_blocks, 0, #final_blocks)
  2293. local max_reg = width / 32
  2294. for j = 1, max_reg do
  2295. H[j] = HEX(H[j])
  2296. end
  2297. H = table_concat(H, "", 1, max_reg)
  2298. end
  2299. return H
  2300. end
  2301. end
  2302. if message then
  2303. -- Actually perform calculations and return the SHA256 digest of a message
  2304. return partial(message)()
  2305. else
  2306. -- Return function for chunk-by-chunk loading
  2307. -- User should feed every chunk of input data as single argument to this function and finally get SHA256 digest by invoking this function without an argument
  2308. return partial
  2309. end
  2310. end
  2311. local function sha512ext(width, message)
  2312. -- Create an instance (private objects for current calculation)
  2313. local length, tail, H_lo, H_hi = 0.0, "", {unpack(sha2_H_ext512_lo[width])}, not HEX64 and {unpack(sha2_H_ext512_hi[width])}
  2314. local function partial(message_part)
  2315. if message_part then
  2316. if tail then
  2317. length = length + #message_part
  2318. local offs = 0
  2319. if tail ~= "" and #tail + #message_part >= 128 then
  2320. offs = 128 - #tail
  2321. sha512_feed_128(H_lo, H_hi, tail..sub(message_part, 1, offs), 0, 128)
  2322. tail = ""
  2323. end
  2324. local size = #message_part - offs
  2325. local size_tail = size % 128
  2326. sha512_feed_128(H_lo, H_hi, message_part, offs, size - size_tail)
  2327. tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2328. return partial
  2329. else
  2330. error("Adding more chunks is not allowed after receiving the result", 2)
  2331. end
  2332. else
  2333. if tail then
  2334. local final_blocks = {tail, "\128", string_rep("\0", (-17-length) % 128 + 9)}
  2335. tail = nil
  2336. -- Assuming user data length is shorter than (2^53)-17 bytes
  2337. -- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
  2338. length = length * (8 / 256^7) -- convert "byte-counter" to "bit-counter" and move floating point to the left
  2339. for j = 4, 10 do
  2340. length = length % 1 * 256
  2341. final_blocks[j] = char(floor(length))
  2342. end
  2343. final_blocks = table_concat(final_blocks)
  2344. sha512_feed_128(H_lo, H_hi, final_blocks, 0, #final_blocks)
  2345. local max_reg = ceil(width / 64)
  2346. if HEX64 then
  2347. for j = 1, max_reg do
  2348. H_lo[j] = HEX64(H_lo[j])
  2349. end
  2350. else
  2351. for j = 1, max_reg do
  2352. H_lo[j] = HEX(H_hi[j])..HEX(H_lo[j])
  2353. end
  2354. H_hi = nil
  2355. end
  2356. H_lo = sub(table_concat(H_lo, "", 1, max_reg), 1, width / 4)
  2357. end
  2358. return H_lo
  2359. end
  2360. end
  2361. if message then
  2362. -- Actually perform calculations and return the SHA512 digest of a message
  2363. return partial(message)()
  2364. else
  2365. -- Return function for chunk-by-chunk loading
  2366. -- User should feed every chunk of input data as single argument to this function and finally get SHA512 digest by invoking this function without an argument
  2367. return partial
  2368. end
  2369. end
  2370. local function md5(message)
  2371. -- Create an instance (private objects for current calculation)
  2372. local H, length, tail = {unpack(md5_sha1_H, 1, 4)}, 0.0, ""
  2373. local function partial(message_part)
  2374. if message_part then
  2375. if tail then
  2376. length = length + #message_part
  2377. local offs = 0
  2378. if tail ~= "" and #tail + #message_part >= 64 then
  2379. offs = 64 - #tail
  2380. md5_feed_64(H, tail..sub(message_part, 1, offs), 0, 64)
  2381. tail = ""
  2382. end
  2383. local size = #message_part - offs
  2384. local size_tail = size % 64
  2385. md5_feed_64(H, message_part, offs, size - size_tail)
  2386. tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2387. return partial
  2388. else
  2389. error("Adding more chunks is not allowed after receiving the result", 2)
  2390. end
  2391. else
  2392. if tail then
  2393. local final_blocks = {tail, "\128", string_rep("\0", (-9 - length) % 64)}
  2394. tail = nil
  2395. length = length * 8 -- convert "byte-counter" to "bit-counter"
  2396. for j = 4, 11 do
  2397. local low_byte = length % 256
  2398. final_blocks[j] = char(low_byte)
  2399. length = (length - low_byte) / 256
  2400. end
  2401. final_blocks = table_concat(final_blocks)
  2402. md5_feed_64(H, final_blocks, 0, #final_blocks)
  2403. for j = 1, 4 do
  2404. H[j] = HEX(H[j])
  2405. end
  2406. H = gsub(table_concat(H), "(..)(..)(..)(..)", "%4%3%2%1")
  2407. end
  2408. return H
  2409. end
  2410. end
  2411. if message then
  2412. -- Actually perform calculations and return the MD5 digest of a message
  2413. return partial(message)()
  2414. else
  2415. -- Return function for chunk-by-chunk loading
  2416. -- User should feed every chunk of input data as single argument to this function and finally get MD5 digest by invoking this function without an argument
  2417. return partial
  2418. end
  2419. end
  2420. local function sha1(message)
  2421. -- Create an instance (private objects for current calculation)
  2422. local H, length, tail = {unpack(md5_sha1_H)}, 0.0, ""
  2423. local function partial(message_part)
  2424. if message_part then
  2425. if tail then
  2426. length = length + #message_part
  2427. local offs = 0
  2428. if tail ~= "" and #tail + #message_part >= 64 then
  2429. offs = 64 - #tail
  2430. sha1_feed_64(H, tail..sub(message_part, 1, offs), 0, 64)
  2431. tail = ""
  2432. end
  2433. local size = #message_part - offs
  2434. local size_tail = size % 64
  2435. sha1_feed_64(H, message_part, offs, size - size_tail)
  2436. tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2437. return partial
  2438. else
  2439. error("Adding more chunks is not allowed after receiving the result", 2)
  2440. end
  2441. else
  2442. if tail then
  2443. local final_blocks = {tail, "\128", string_rep("\0", (-9 - length) % 64 + 1)}
  2444. tail = nil
  2445. -- Assuming user data length is shorter than (2^53)-9 bytes
  2446. -- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
  2447. length = length * (8 / 256^7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left
  2448. for j = 4, 10 do
  2449. length = length % 1 * 256
  2450. final_blocks[j] = char(floor(length))
  2451. end
  2452. final_blocks = table_concat(final_blocks)
  2453. sha1_feed_64(H, final_blocks, 0, #final_blocks)
  2454. for j = 1, 5 do
  2455. H[j] = HEX(H[j])
  2456. end
  2457. H = table_concat(H)
  2458. end
  2459. return H
  2460. end
  2461. end
  2462. if message then
  2463. -- Actually perform calculations and return the SHA-1 digest of a message
  2464. return partial(message)()
  2465. else
  2466. -- Return function for chunk-by-chunk loading
  2467. -- User should feed every chunk of input data as single argument to this function and finally get SHA-1 digest by invoking this function without an argument
  2468. return partial
  2469. end
  2470. end
  2471. local function keccak(block_size_in_bytes, digest_size_in_bytes, is_SHAKE, message)
  2472. -- "block_size_in_bytes" is multiple of 8
  2473. if type(digest_size_in_bytes) ~= "number" then
  2474. -- arguments in SHAKE are swapped:
  2475. -- NIST FIPS 202 defines SHAKE(message,num_bits)
  2476. -- this module defines SHAKE(num_bytes,message)
  2477. -- it's easy to forget about this swap, hence the check
  2478. error("Argument 'digest_size_in_bytes' must be a number", 2)
  2479. end
  2480. -- Create an instance (private objects for current calculation)
  2481. local tail, lanes_lo, lanes_hi = "", create_array_of_lanes(), hi_factor_keccak == 0 and create_array_of_lanes()
  2482. local result
  2483. --~ pad the input N using the pad function, yielding a padded bit string P with a length divisible by r (such that n = len(P)/r is integer),
  2484. --~ break P into n consecutive r-bit pieces P0, ..., Pn-1 (last is zero-padded)
  2485. --~ initialize the state S to a string of b 0 bits.
  2486. --~ absorb the input into the state: For each block Pi,
  2487. --~ extend Pi at the end by a string of c 0 bits, yielding one of length b,
  2488. --~ XOR that with S and
  2489. --~ apply the block permutation f to the result, yielding a new state S
  2490. --~ initialize Z to be the empty string
  2491. --~ while the length of Z is less than d:
  2492. --~ append the first r bits of S to Z
  2493. --~ if Z is still less than d bits long, apply f to S, yielding a new state S.
  2494. --~ truncate Z to d bits
  2495. local function partial(message_part)
  2496. if message_part then
  2497. if tail then
  2498. local offs = 0
  2499. if tail ~= "" and #tail + #message_part >= block_size_in_bytes then
  2500. offs = block_size_in_bytes - #tail
  2501. keccak_feed(lanes_lo, lanes_hi, tail..sub(message_part, 1, offs), 0, block_size_in_bytes, block_size_in_bytes)
  2502. tail = ""
  2503. end
  2504. local size = #message_part - offs
  2505. local size_tail = size % block_size_in_bytes
  2506. keccak_feed(lanes_lo, lanes_hi, message_part, offs, size - size_tail, block_size_in_bytes)
  2507. tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2508. return partial
  2509. else
  2510. error("Adding more chunks is not allowed after receiving the result", 2)
  2511. end
  2512. else
  2513. if tail then
  2514. -- append the following bits to the message: for usual SHA3: 011(0*)1, for SHAKE: 11111(0*)1
  2515. local gap_start = is_SHAKE and 31 or 6
  2516. tail = tail..(#tail + 1 == block_size_in_bytes and char(gap_start + 128) or char(gap_start)..string_rep("\0", (-2 - #tail) % block_size_in_bytes).."\128")
  2517. keccak_feed(lanes_lo, lanes_hi, tail, 0, #tail, block_size_in_bytes)
  2518. tail = nil
  2519. local lanes_used = 0
  2520. local total_lanes = floor(block_size_in_bytes / 8)
  2521. local qwords = {}
  2522. local function get_next_qwords_of_digest(qwords_qty)
  2523. -- returns not more than 'qwords_qty' qwords ('qwords_qty' might be non-integer)
  2524. -- doesn't go across keccak-buffer boundary
  2525. -- block_size_in_bytes is a multiple of 8, so, keccak-buffer contains integer number of qwords
  2526. if lanes_used >= total_lanes then
  2527. keccak_feed(lanes_lo, lanes_hi, "\0\0\0\0\0\0\0\0", 0, 8, 8)
  2528. lanes_used = 0
  2529. end
  2530. qwords_qty = floor(math_min(qwords_qty, total_lanes - lanes_used))
  2531. if hi_factor_keccak ~= 0 then
  2532. for j = 1, qwords_qty do
  2533. qwords[j] = HEX64(lanes_lo[lanes_used + j - 1 + lanes_index_base])
  2534. end
  2535. else
  2536. for j = 1, qwords_qty do
  2537. qwords[j] = HEX(lanes_hi[lanes_used + j])..HEX(lanes_lo[lanes_used + j])
  2538. end
  2539. end
  2540. lanes_used = lanes_used + qwords_qty
  2541. return
  2542. gsub(table_concat(qwords, "", 1, qwords_qty), "(..)(..)(..)(..)(..)(..)(..)(..)", "%8%7%6%5%4%3%2%1"),
  2543. qwords_qty * 8
  2544. end
  2545. local parts = {} -- digest parts
  2546. local last_part, last_part_size = "", 0
  2547. local function get_next_part_of_digest(bytes_needed)
  2548. -- returns 'bytes_needed' bytes, for arbitrary integer 'bytes_needed'
  2549. bytes_needed = bytes_needed or 1
  2550. if bytes_needed <= last_part_size then
  2551. last_part_size = last_part_size - bytes_needed
  2552. local part_size_in_nibbles = bytes_needed * 2
  2553. local result = sub(last_part, 1, part_size_in_nibbles)
  2554. last_part = sub(last_part, part_size_in_nibbles + 1)
  2555. return result
  2556. end
  2557. local parts_qty = 0
  2558. if last_part_size > 0 then
  2559. parts_qty = 1
  2560. parts[parts_qty] = last_part
  2561. bytes_needed = bytes_needed - last_part_size
  2562. end
  2563. -- repeats until the length is enough
  2564. while bytes_needed >= 8 do
  2565. local next_part, next_part_size = get_next_qwords_of_digest(bytes_needed / 8)
  2566. parts_qty = parts_qty + 1
  2567. parts[parts_qty] = next_part
  2568. bytes_needed = bytes_needed - next_part_size
  2569. end
  2570. if bytes_needed > 0 then
  2571. last_part, last_part_size = get_next_qwords_of_digest(1)
  2572. parts_qty = parts_qty + 1
  2573. parts[parts_qty] = get_next_part_of_digest(bytes_needed)
  2574. else
  2575. last_part, last_part_size = "", 0
  2576. end
  2577. return table_concat(parts, "", 1, parts_qty)
  2578. end
  2579. if digest_size_in_bytes < 0 then
  2580. result = get_next_part_of_digest
  2581. else
  2582. result = get_next_part_of_digest(digest_size_in_bytes)
  2583. end
  2584. end
  2585. return result
  2586. end
  2587. end
  2588. if message then
  2589. -- Actually perform calculations and return the SHA3 digest of a message
  2590. return partial(message)()
  2591. else
  2592. -- Return function for chunk-by-chunk loading
  2593. -- User should feed every chunk of input data as single argument to this function and finally get SHA3 digest by invoking this function without an argument
  2594. return partial
  2595. end
  2596. end
  2597. local hex2bin, bin2base64, base642bin
  2598. do
  2599. function hex2bin(hex_string)
  2600. return (gsub(hex_string, "%x%x",
  2601. function (hh)
  2602. return char(tonumber(hh, 16))
  2603. end
  2604. ))
  2605. end
  2606. local base64_symbols = {
  2607. ['+'] = 62, ['-'] = 62, [62] = '+',
  2608. ['/'] = 63, ['_'] = 63, [63] = '/',
  2609. ['='] = -1, ['.'] = -1, [-1] = '='
  2610. }
  2611. local symbol_index = 0
  2612. for j, pair in ipairs{'AZ', 'az', '09'} do
  2613. for ascii = byte(pair), byte(pair, 2) do
  2614. local ch = char(ascii)
  2615. base64_symbols[ch] = symbol_index
  2616. base64_symbols[symbol_index] = ch
  2617. symbol_index = symbol_index + 1
  2618. end
  2619. end
  2620. function bin2base64(binary_string)
  2621. local result = {}
  2622. for pos = 1, #binary_string, 3 do
  2623. local c1, c2, c3, c4 = byte(sub(binary_string, pos, pos + 2)..'\0', 1, -1)
  2624. result[#result + 1] =
  2625. base64_symbols[floor(c1 / 4)]
  2626. ..base64_symbols[c1 % 4 * 16 + floor(c2 / 16)]
  2627. ..base64_symbols[c3 and c2 % 16 * 4 + floor(c3 / 64) or -1]
  2628. ..base64_symbols[c4 and c3 % 64 or -1]
  2629. end
  2630. return table_concat(result)
  2631. end
  2632. function base642bin(base64_string)
  2633. local result, chars_qty = {}, 3
  2634. for pos, ch in gmatch(gsub(base64_string, '%s+', ''), '()(.)') do
  2635. local code = base64_symbols[ch]
  2636. if code < 0 then
  2637. chars_qty = chars_qty - 1
  2638. code = 0
  2639. end
  2640. local idx = pos % 4
  2641. if idx > 0 then
  2642. result[-idx] = code
  2643. else
  2644. local c1 = result[-1] * 4 + floor(result[-2] / 16)
  2645. local c2 = (result[-2] % 16) * 16 + floor(result[-3] / 4)
  2646. local c3 = (result[-3] % 4) * 64 + code
  2647. result[#result + 1] = sub(char(c1, c2, c3), 1, chars_qty)
  2648. end
  2649. end
  2650. return table_concat(result)
  2651. end
  2652. end
  2653. local block_size_for_HMAC -- this table will be initialized at the end of the module
  2654. local function pad_and_xor(str, result_length, byte_for_xor)
  2655. return gsub(str, ".",
  2656. function(c)
  2657. return char(XOR_BYTE(byte(c), byte_for_xor))
  2658. end
  2659. )..string_rep(char(byte_for_xor), result_length - #str)
  2660. end
  2661. local function hmac(hash_func, key, message)
  2662. -- Create an instance (private objects for current calculation)
  2663. local block_size = block_size_for_HMAC[hash_func]
  2664. if not block_size then
  2665. error("Unknown hash function", 2)
  2666. end
  2667. if #key > block_size then
  2668. key = hex2bin(hash_func(key))
  2669. end
  2670. local append = hash_func()(pad_and_xor(key, block_size, 0x36))
  2671. local result
  2672. local function partial(message_part)
  2673. if not message_part then
  2674. result = result or hash_func(pad_and_xor(key, block_size, 0x5C)..hex2bin(append()))
  2675. return result
  2676. elseif result then
  2677. error("Adding more chunks is not allowed after receiving the result", 2)
  2678. else
  2679. append(message_part)
  2680. return partial
  2681. end
  2682. end
  2683. if message then
  2684. -- Actually perform calculations and return the HMAC of a message
  2685. return partial(message)()
  2686. else
  2687. -- Return function for chunk-by-chunk loading of a message
  2688. -- User should feed every chunk of the message as single argument to this function and finally get HMAC by invoking this function without an argument
  2689. return partial
  2690. end
  2691. end
  2692. local sha = {
  2693. md5 = md5, -- MD5
  2694. sha1 = sha1, -- SHA-1
  2695. -- SHA2 hash functions:
  2696. sha224 = function (message) return sha256ext(224, message) end, -- SHA-224
  2697. sha256 = function (message) return sha256ext(256, message) end, -- SHA-256
  2698. sha512_224 = function (message) return sha512ext(224, message) end, -- SHA-512/224
  2699. sha512_256 = function (message) return sha512ext(256, message) end, -- SHA-512/256
  2700. sha384 = function (message) return sha512ext(384, message) end, -- SHA-384
  2701. sha512 = function (message) return sha512ext(512, message) end, -- SHA-512
  2702. -- SHA3 hash functions:
  2703. sha3_224 = function (message) return keccak((1600 - 2 * 224) / 8, 224 / 8, false, message) end, -- SHA3-224
  2704. sha3_256 = function (message) return keccak((1600 - 2 * 256) / 8, 256 / 8, false, message) end, -- SHA3-256
  2705. sha3_384 = function (message) return keccak((1600 - 2 * 384) / 8, 384 / 8, false, message) end, -- SHA3-384
  2706. sha3_512 = function (message) return keccak((1600 - 2 * 512) / 8, 512 / 8, false, message) end, -- SHA3-512
  2707. shake128 = function (digest_size_in_bytes, message) return keccak((1600 - 2 * 128) / 8, digest_size_in_bytes, true, message) end, -- SHAKE128
  2708. shake256 = function (digest_size_in_bytes, message) return keccak((1600 - 2 * 256) / 8, digest_size_in_bytes, true, message) end, -- SHAKE256
  2709. -- misc utilities:
  2710. hmac = hmac, -- HMAC(hash_func, key, message) is applicable to any hash function from this module except SHAKE*
  2711. hex2bin = hex2bin, -- converts hexadecimal representation to binary string
  2712. base642bin = base642bin, -- converts base64 representation to binary string
  2713. bin2base64 = bin2base64, -- converts binary string to base64 representation
  2714. }
  2715. block_size_for_HMAC = {
  2716. [sha.md5] = 64,
  2717. [sha.sha1] = 64,
  2718. [sha.sha224] = 64,
  2719. [sha.sha256] = 64,
  2720. [sha.sha512_224] = 128,
  2721. [sha.sha512_256] = 128,
  2722. [sha.sha384] = 128,
  2723. [sha.sha512] = 128,
  2724. [sha.sha3_224] = (1600 - 2 * 224) / 8,
  2725. [sha.sha3_256] = (1600 - 2 * 256) / 8,
  2726. [sha.sha3_384] = (1600 - 2 * 384) / 8,
  2727. [sha.sha3_512] = (1600 - 2 * 512) / 8,
  2728. }
  2729. return sha