Crypto++  8.0
Free C++ class library of cryptographic schemes
sha.cpp
1 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2 
3 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
4 // implemented Intel SHA extensions based on Intel articles and code by
5 // Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
6 // code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
7 // All code is in the public domain.
8 
9 // In August 2017 JW reworked the internals to align all the implementations.
10 // Formerly all hashes were software based, IterHashBase handled endian conversions,
11 // and IterHashBase dispatched a single to block SHA{N}::Transform. SHA{N}::Transform
12 // then performed the single block hashing. It was repeated for multiple blocks.
13 //
14 // The rework added SHA{N}::HashMultipleBlocks (class) and SHA{N}_HashMultipleBlocks
15 // (free standing). There are also hardware accelerated variations. Callers enter
16 // SHA{N}::HashMultipleBlocks (class), and the function calls SHA{N}_HashMultipleBlocks
17 // (free standing) or SHA{N}_HashBlock (free standing) as a fallback.
18 //
19 // An added wrinkle is hardware is little endian, C++ is big endian, and callers use
20 // big endian, so SHA{N}_HashMultipleBlock accepts a ByteOrder for the incoming data
21 // arrangement. Hardware based SHA{N}_HashMultipleBlock can often perform the endian
22 // swap much easier by setting an EPI mask. Endian swap incurs no penalty on Intel SHA,
23 // and 4-instruction penalty on ARM SHA. Under C++ the full software based swap penalty
24 // is incurred due to use of ReverseBytes().
25 //
26 // The rework also removed the hacked-in pointers to implementations.
27 
28 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
29 
30 #include "pch.h"
31 #include "config.h"
32 
33 #if CRYPTOPP_MSC_VERSION
34 # pragma warning(disable: 4100 4731)
35 #endif
36 
37 #ifndef CRYPTOPP_IMPORTS
38 #ifndef CRYPTOPP_GENERATE_X64_MASM
39 
40 #include "secblock.h"
41 #include "sha.h"
42 #include "misc.h"
43 #include "cpu.h"
44 
45 #if defined(CRYPTOPP_DISABLE_SHA_ASM)
46 # undef CRYPTOPP_X86_ASM_AVAILABLE
47 # undef CRYPTOPP_X32_ASM_AVAILABLE
48 # undef CRYPTOPP_X64_ASM_AVAILABLE
49 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
50 #endif
51 
52 NAMESPACE_BEGIN(CryptoPP)
53 
54 #if CRYPTOPP_SHANI_AVAILABLE
55 extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
56 extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
57 #endif
58 
59 #if CRYPTOPP_ARM_SHA1_AVAILABLE
60 extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
61 #endif
62 
63 #if CRYPTOPP_ARM_SHA2_AVAILABLE
64 extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
65 #endif
66 
67 #if CRYPTOPP_ARM_SHA512_AVAILABLE
68 extern void SHA512_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
69 #endif
70 
71 #if CRYPTOPP_POWER8_SHA_AVAILABLE
72 extern void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t length, ByteOrder order);
73 extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t length, ByteOrder order);
74 #endif
75 
76 // We add extern to export table to sha_simd.cpp, but it
77 // cleared http://github.com/weidai11/cryptopp/issues/502
78 extern const word32 SHA256_K[64];
79 extern const word64 SHA512_K[80];
80 
81 CRYPTOPP_ALIGN_DATA(16)
82 const word64 SHA512_K[80] = {
83  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
84  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
85  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
86  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
87  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
88  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
89  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
90  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
91  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
92  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
93  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
94  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
95  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
96  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
97  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
98  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
99  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
100  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
101  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
102  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
103  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
104  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
105  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
106  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
107  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
108  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
109  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
110  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
111  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
112  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
113  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
114  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
115  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
116  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
117  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
118  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
119  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
120  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
121  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
122  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
123 };
124 
125 CRYPTOPP_ALIGN_DATA(16)
126 const word32 SHA256_K[64] = {
127 
128  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
129  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
130  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
131  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
132  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
133  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
134  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
135  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
136  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
137  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
138  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
139  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
140  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
141  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
142  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
143  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
144 };
145 
146 ////////////////////////////////
147 // start of Steve Reid's code //
148 ////////////////////////////////
149 
150 ANONYMOUS_NAMESPACE_BEGIN
151 
152 #define blk0(i) (W[i] = data[i])
153 #define blk1(i) (W[i&15] = rotlConstant<1>(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15]))
154 
155 #define f1(x,y,z) (z^(x&(y^z)))
156 #define f2(x,y,z) (x^y^z)
157 #define f3(x,y,z) ((x&y)|(z&(x|y)))
158 #define f4(x,y,z) (x^y^z)
159 
160 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
161 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
162 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
163 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlConstant<5>(v);w=rotlConstant<30>(w);
164 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlConstant<5>(v);w=rotlConstant<30>(w);
165 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlConstant<5>(v);w=rotlConstant<30>(w);
166 
167 void SHA1_HashBlock_CXX(word32 *state, const word32 *data)
168 {
169  CRYPTOPP_ASSERT(state);
170  CRYPTOPP_ASSERT(data);
171 
172  word32 W[16];
173  /* Copy context->state[] to working vars */
174  word32 a = state[0];
175  word32 b = state[1];
176  word32 c = state[2];
177  word32 d = state[3];
178  word32 e = state[4];
179  /* 4 rounds of 20 operations each. Loop unrolled. */
180  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
181  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
182  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
183  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
184  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
185  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
186  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
187  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
188  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
189  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
190  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
191  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
192  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
193  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
194  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
195  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
196  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
197  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
198  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
199  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
200  /* Add the working vars back into context.state[] */
201  state[0] += a;
202  state[1] += b;
203  state[2] += c;
204  state[3] += d;
205  state[4] += e;
206 }
207 
208 #undef blk0
209 #undef blk1
210 #undef f1
211 #undef f2
212 #undef f3
213 #undef f4
214 #undef R1
215 #undef R2
216 #undef R3
217 #undef R4
218 
219 ANONYMOUS_NAMESPACE_END
220 
221 //////////////////////////////
222 // end of Steve Reid's code //
223 //////////////////////////////
224 
225 std::string SHA1::AlgorithmProvider() const
226 {
227 #if CRYPTOPP_SHANI_AVAILABLE
228  if (HasSHA())
229  return "SHANI";
230 #endif
231 #if CRYPTOPP_SSE2_ASM_AVAILABLE
232  if (HasSSE2())
233  return "SSE2";
234 #endif
235 #if CRYPTOPP_ARM_SHA1_AVAILABLE
236  if (HasSHA1())
237  return "ARMv8";
238 #endif
239  return "C++";
240 }
241 
242 void SHA1::InitState(HashWordType *state)
243 {
244  state[0] = 0x67452301;
245  state[1] = 0xEFCDAB89;
246  state[2] = 0x98BADCFE;
247  state[3] = 0x10325476;
248  state[4] = 0xC3D2E1F0;
249 }
250 
251 void SHA1::Transform(word32 *state, const word32 *data)
252 {
253  CRYPTOPP_ASSERT(state);
254  CRYPTOPP_ASSERT(data);
255 
256 #if CRYPTOPP_SHANI_AVAILABLE
257  if (HasSHA())
258  {
259  SHA1_HashMultipleBlocks_SHANI(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
260  return;
261  }
262 #endif
263 #if CRYPTOPP_ARM_SHA1_AVAILABLE
264  if (HasSHA1())
265  {
266  SHA1_HashMultipleBlocks_ARMV8(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
267  return;
268  }
269 #endif
270 
271  SHA1_HashBlock_CXX(state, data);
272 }
273 
274 size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
275 {
276  CRYPTOPP_ASSERT(input);
277  CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
278 
279 #if CRYPTOPP_SHANI_AVAILABLE
280  if (HasSHA())
281  {
282  SHA1_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
283  return length & (SHA1::BLOCKSIZE - 1);
284  }
285 #endif
286 #if CRYPTOPP_ARM_SHA1_AVAILABLE
287  if (HasSHA1())
288  {
289  SHA1_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
290  return length & (SHA1::BLOCKSIZE - 1);
291  }
292 #endif
293 
294  const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
295  word32 *dataBuf = this->DataBuf();
296  do
297  {
298  if (noReverse)
299  {
300  SHA1_HashBlock_CXX(m_state, input);
301  }
302  else
303  {
304  ByteReverse(dataBuf, input, SHA1::BLOCKSIZE);
305  SHA1_HashBlock_CXX(m_state, dataBuf);
306  }
307 
308  input += SHA1::BLOCKSIZE/sizeof(word32);
309  length -= SHA1::BLOCKSIZE;
310  }
311  while (length >= SHA1::BLOCKSIZE);
312  return length;
313 }
314 
315 // *************************************************************
316 
317 ANONYMOUS_NAMESPACE_BEGIN
318 
319 #define a(i) T[(0-i)&7]
320 #define b(i) T[(1-i)&7]
321 #define c(i) T[(2-i)&7]
322 #define d(i) T[(3-i)&7]
323 #define e(i) T[(4-i)&7]
324 #define f(i) T[(5-i)&7]
325 #define g(i) T[(6-i)&7]
326 #define h(i) T[(7-i)&7]
327 
328 #define blk0(i) (W[i] = data[i])
329 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
330 
331 #define Ch(x,y,z) (z^(x&(y^z)))
332 #define Maj(x,y,z) (y^((x^y)&(y^z)))
333 
334 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
335  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
336 
337 // for SHA256
338 #define s0(x) (rotrConstant<7>(x)^rotrConstant<18>(x)^(x>>3))
339 #define s1(x) (rotrConstant<17>(x)^rotrConstant<19>(x)^(x>>10))
340 #define S0(x) (rotrConstant<2>(x)^rotrConstant<13>(x)^rotrConstant<22>(x))
341 #define S1(x) (rotrConstant<6>(x)^rotrConstant<11>(x)^rotrConstant<25>(x))
342 
343 void SHA256_HashBlock_CXX(word32 *state, const word32 *data)
344 {
345  word32 W[16]={0}, T[8];
346  /* Copy context->state[] to working vars */
347  memcpy(T, state, sizeof(T));
348  /* 64 operations, partially loop unrolled */
349  for (unsigned int j=0; j<64; j+=16)
350  {
351  R( 0); R( 1); R( 2); R( 3);
352  R( 4); R( 5); R( 6); R( 7);
353  R( 8); R( 9); R(10); R(11);
354  R(12); R(13); R(14); R(15);
355  }
356  /* Add the working vars back into context.state[] */
357  state[0] += a(0);
358  state[1] += b(0);
359  state[2] += c(0);
360  state[3] += d(0);
361  state[4] += e(0);
362  state[5] += f(0);
363  state[6] += g(0);
364  state[7] += h(0);
365 }
366 
367 #undef Ch
368 #undef Maj
369 #undef s0
370 #undef s1
371 #undef S0
372 #undef S1
373 #undef blk0
374 #undef blk1
375 #undef blk2
376 #undef R
377 
378 #undef a
379 #undef b
380 #undef c
381 #undef d
382 #undef e
383 #undef f
384 #undef g
385 #undef h
386 
387 ANONYMOUS_NAMESPACE_END
388 
389 std::string SHA256_AlgorithmProvider()
390 {
391 #if CRYPTOPP_SHANI_AVAILABLE
392  if (HasSHA())
393  return "SHANI";
394 #endif
395 #if CRYPTOPP_SSE2_ASM_AVAILABLE
396  if (HasSSE2())
397  return "SSE2";
398 #endif
399 #if CRYPTOPP_ARM_SHA2_AVAILABLE
400  if (HasSHA2())
401  return "ARMv8";
402 #endif
403 #if (CRYPTOPP_POWER8_SHA_AVAILABLE)
404  if (HasSHA256())
405  return "Power8";
406 #endif
407  return "C++";
408 }
409 
410 std::string SHA224::AlgorithmProvider() const
411 {
412  return SHA256_AlgorithmProvider();
413 }
414 
415 void SHA224::InitState(HashWordType *state)
416 {
417  static const word32 s[8] = {
418  0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
419  0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
420  memcpy(state, s, sizeof(s));
421 }
422 
423 void SHA256::InitState(HashWordType *state)
424 {
425  static const word32 s[8] = {
426  0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
427  0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
428  memcpy(state, s, sizeof(s));
429 }
430 #endif // Not CRYPTOPP_GENERATE_X64_MASM
431 
432 #if defined(CRYPTOPP_X86_ASM_AVAILABLE)
433 
434 ANONYMOUS_NAMESPACE_BEGIN
435 
436 void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len)
437 {
438  #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
439  #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
440  #define G(i) H(i+1)
441  #define F(i) H(i+2)
442  #define E(i) H(i+3)
443  #define D(i) H(i+4)
444  #define C(i) H(i+5)
445  #define B(i) H(i+6)
446  #define A(i) H(i+7)
447  #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
448  #define Wt_2(i) Wt((i)-2)
449  #define Wt_15(i) Wt((i)-15)
450  #define Wt_7(i) Wt((i)-7)
451  #define K_END [BASE+8*4+16*4+0*WORD_SZ]
452  #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
453  #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
454  #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
455  #define Kt(i) WORD_REG(si)+(i)*4
456 #if CRYPTOPP_BOOL_X86
457  #define BASE esp+4
458 #elif defined(__GNUC__)
459  #define BASE r8
460 #else
461  #define BASE rsp
462 #endif
463 
464 #define RA0(i, edx, edi) \
465  AS2( add edx, [Kt(i)] )\
466  AS2( add edx, [Wt(i)] )\
467  AS2( add edx, H(i) )\
468 
469 #define RA1(i, edx, edi)
470 
471 #define RB0(i, edx, edi)
472 
473 #define RB1(i, edx, edi) \
474  AS2( mov AS_REG_7d, [Wt_2(i)] )\
475  AS2( mov edi, [Wt_15(i)])\
476  AS2( mov ebx, AS_REG_7d )\
477  AS2( shr AS_REG_7d, 10 )\
478  AS2( ror ebx, 17 )\
479  AS2( xor AS_REG_7d, ebx )\
480  AS2( ror ebx, 2 )\
481  AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
482  AS2( add ebx, [Wt_7(i)])\
483  AS2( mov AS_REG_7d, edi )\
484  AS2( shr AS_REG_7d, 3 )\
485  AS2( ror edi, 7 )\
486  AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
487  AS2( xor AS_REG_7d, edi )\
488  AS2( add edx, [Kt(i)])\
489  AS2( ror edi, 11 )\
490  AS2( add edx, H(i) )\
491  AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
492  AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
493  AS2( mov [Wt(i)], AS_REG_7d)\
494  AS2( add edx, AS_REG_7d )\
495 
496 #define ROUND(i, r, eax, ecx, edi, edx)\
497  /* in: edi = E */\
498  /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
499  AS2( mov edx, F(i) )\
500  AS2( xor edx, G(i) )\
501  AS2( and edx, edi )\
502  AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
503  AS2( mov AS_REG_7d, edi )\
504  AS2( ror edi, 6 )\
505  AS2( ror AS_REG_7d, 25 )\
506  RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
507  AS2( xor AS_REG_7d, edi )\
508  AS2( ror edi, 5 )\
509  AS2( xor AS_REG_7d, edi )/* S1(E) */\
510  AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
511  RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
512  /* in: ecx = A, eax = B^C, edx = T1 */\
513  /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
514  AS2( mov ebx, ecx )\
515  AS2( xor ecx, B(i) )/* A^B */\
516  AS2( and eax, ecx )\
517  AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
518  AS2( mov AS_REG_7d, ebx )\
519  AS2( ror ebx, 2 )\
520  AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
521  AS2( add edx, D(i) )\
522  AS2( mov D(i), edx )\
523  AS2( ror AS_REG_7d, 22 )\
524  AS2( xor AS_REG_7d, ebx )\
525  AS2( ror ebx, 11 )\
526  AS2( xor AS_REG_7d, ebx )\
527  AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
528  AS2( mov H(i), eax )\
529 
530 // Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
531 // complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
532 #if CRYPTOPP_BOOL_X64
533 #define SWAP_COPY(i) \
534  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
535  AS1( bswap WORD_REG(bx))\
536  AS2( mov [Wt(i*2+1)], WORD_REG(bx))
537 #else // X86 and X32
538 #define SWAP_COPY(i) \
539  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
540  AS1( bswap WORD_REG(bx))\
541  AS2( mov [Wt(i)], WORD_REG(bx))
542 #endif
543 
544 #if defined(__GNUC__)
545  #if CRYPTOPP_BOOL_X64
547  #endif
548  __asm__ __volatile__
549  (
550  #if CRYPTOPP_BOOL_X64
551  "lea %4, %%r8;"
552  #endif
553  INTEL_NOPREFIX
554 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
555  ALIGN 8
556  SHA256_HashMultipleBlocks_SSE2 PROC FRAME
557  rex_push_reg rsi
558  push_reg rdi
559  push_reg rbx
560  push_reg rbp
561  alloc_stack(LOCALS_SIZE+8)
562  .endprolog
563  mov rdi, r8
564  lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
565 #endif
566 
567 #if CRYPTOPP_BOOL_X86
568  #ifndef __GNUC__
569  AS2( mov edi, [len])
570  AS2( lea WORD_REG(si), [SHA256_K+48*4])
571  #endif
572  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
573  AS_PUSH_IF86(bx)
574  #endif
575 
576  AS_PUSH_IF86(bp)
577  AS2( mov ebx, esp)
578  AS2( and esp, -16)
579  AS2( sub WORD_REG(sp), LOCALS_SIZE)
580  AS_PUSH_IF86(bx)
581 #endif
582  AS2( mov STATE_SAVE, WORD_REG(cx))
583  AS2( mov DATA_SAVE, WORD_REG(dx))
584  AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
585  AS2( mov DATA_END, WORD_REG(ax))
586  AS2( mov K_END, WORD_REG(si))
587 
588 #if CRYPTOPP_SSE2_ASM_AVAILABLE
589 #if CRYPTOPP_BOOL_X86
590  AS2( test edi, 1)
591  ASJ( jnz, 2, f)
592  AS1( dec DWORD PTR K_END)
593 #endif
594  AS2( movdqu xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
595  AS2( movdqu xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
596 #endif
597 
598 #if CRYPTOPP_BOOL_X86
599 #if CRYPTOPP_SSE2_ASM_AVAILABLE
600  ASJ( jmp, 0, f)
601 #endif
602  ASL(2) // non-SSE2
603  AS2( mov esi, ecx)
604  AS2( lea edi, A(0))
605  AS2( mov ecx, 8)
606 ATT_NOPREFIX
607  AS1( rep movsd)
608 INTEL_NOPREFIX
609  AS2( mov esi, K_END)
610  ASJ( jmp, 3, f)
611 #endif
612 
613 #if CRYPTOPP_SSE2_ASM_AVAILABLE
614  ASL(0)
615  AS2( movdqu E(0), xmm1)
616  AS2( movdqu A(0), xmm0)
617 #endif
618 #if CRYPTOPP_BOOL_X86
619  ASL(3)
620 #endif
621  AS2( sub WORD_REG(si), 48*4)
622  SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
623  SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
624 #if CRYPTOPP_BOOL_X86
625  SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
626  SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
627 #endif
628  AS2( mov edi, E(0)) // E
629  AS2( mov eax, B(0)) // B
630  AS2( xor eax, C(0)) // B^C
631  AS2( mov ecx, A(0)) // A
632 
633  ROUND(0, 0, eax, ecx, edi, edx)
634  ROUND(1, 0, ecx, eax, edx, edi)
635  ROUND(2, 0, eax, ecx, edi, edx)
636  ROUND(3, 0, ecx, eax, edx, edi)
637  ROUND(4, 0, eax, ecx, edi, edx)
638  ROUND(5, 0, ecx, eax, edx, edi)
639  ROUND(6, 0, eax, ecx, edi, edx)
640  ROUND(7, 0, ecx, eax, edx, edi)
641  ROUND(8, 0, eax, ecx, edi, edx)
642  ROUND(9, 0, ecx, eax, edx, edi)
643  ROUND(10, 0, eax, ecx, edi, edx)
644  ROUND(11, 0, ecx, eax, edx, edi)
645  ROUND(12, 0, eax, ecx, edi, edx)
646  ROUND(13, 0, ecx, eax, edx, edi)
647  ROUND(14, 0, eax, ecx, edi, edx)
648  ROUND(15, 0, ecx, eax, edx, edi)
649 
650  ASL(1)
651  AS2(add WORD_REG(si), 4*16)
652  ROUND(0, 1, eax, ecx, edi, edx)
653  ROUND(1, 1, ecx, eax, edx, edi)
654  ROUND(2, 1, eax, ecx, edi, edx)
655  ROUND(3, 1, ecx, eax, edx, edi)
656  ROUND(4, 1, eax, ecx, edi, edx)
657  ROUND(5, 1, ecx, eax, edx, edi)
658  ROUND(6, 1, eax, ecx, edi, edx)
659  ROUND(7, 1, ecx, eax, edx, edi)
660  ROUND(8, 1, eax, ecx, edi, edx)
661  ROUND(9, 1, ecx, eax, edx, edi)
662  ROUND(10, 1, eax, ecx, edi, edx)
663  ROUND(11, 1, ecx, eax, edx, edi)
664  ROUND(12, 1, eax, ecx, edi, edx)
665  ROUND(13, 1, ecx, eax, edx, edi)
666  ROUND(14, 1, eax, ecx, edi, edx)
667  ROUND(15, 1, ecx, eax, edx, edi)
668  AS2( cmp WORD_REG(si), K_END)
669  ATT_NOPREFIX
670  ASJ( jb, 1, b)
671  INTEL_NOPREFIX
672 
673  AS2( mov WORD_REG(dx), DATA_SAVE)
674  AS2( add WORD_REG(dx), 64)
675  AS2( mov AS_REG_7, STATE_SAVE)
676  AS2( mov DATA_SAVE, WORD_REG(dx))
677 
678 #if CRYPTOPP_SSE2_ASM_AVAILABLE
679 #if CRYPTOPP_BOOL_X86
680  AS2( test DWORD PTR K_END, 1)
681  ASJ( jz, 4, f)
682 #endif
683  AS2( movdqu xmm1, XMMWORD_PTR [AS_REG_7+1*16])
684  AS2( movdqu xmm0, XMMWORD_PTR [AS_REG_7+0*16])
685  AS2( paddd xmm1, E(0))
686  AS2( paddd xmm0, A(0))
687  AS2( movdqu [AS_REG_7+1*16], xmm1)
688  AS2( movdqu [AS_REG_7+0*16], xmm0)
689  AS2( cmp WORD_REG(dx), DATA_END)
690  ATT_NOPREFIX
691  ASJ( jb, 0, b)
692  INTEL_NOPREFIX
693 #endif
694 
695 #if CRYPTOPP_BOOL_X86
696 #if CRYPTOPP_SSE2_ASM_AVAILABLE
697  ASJ( jmp, 5, f)
698  ASL(4) // non-SSE2
699 #endif
700  AS2( add [AS_REG_7+0*4], ecx) // A
701  AS2( add [AS_REG_7+4*4], edi) // E
702  AS2( mov eax, B(0))
703  AS2( mov ebx, C(0))
704  AS2( mov ecx, D(0))
705  AS2( add [AS_REG_7+1*4], eax)
706  AS2( add [AS_REG_7+2*4], ebx)
707  AS2( add [AS_REG_7+3*4], ecx)
708  AS2( mov eax, F(0))
709  AS2( mov ebx, G(0))
710  AS2( mov ecx, H(0))
711  AS2( add [AS_REG_7+5*4], eax)
712  AS2( add [AS_REG_7+6*4], ebx)
713  AS2( add [AS_REG_7+7*4], ecx)
714  AS2( mov ecx, AS_REG_7d)
715  AS2( cmp WORD_REG(dx), DATA_END)
716  ASJ( jb, 2, b)
717 #if CRYPTOPP_SSE2_ASM_AVAILABLE
718  ASL(5)
719 #endif
720 #endif
721 
722  AS_POP_IF86(sp)
723  AS_POP_IF86(bp)
724  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
725  AS_POP_IF86(bx)
726  #endif
727 
728 #ifdef CRYPTOPP_GENERATE_X64_MASM
729  add rsp, LOCALS_SIZE+8
730  pop rbp
731  pop rbx
732  pop rdi
733  pop rsi
734  ret
735  SHA256_HashMultipleBlocks_SSE2 ENDP
736 #endif
737 
738 #ifdef __GNUC__
739  ATT_PREFIX
740  :
741  : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
742  #if CRYPTOPP_BOOL_X64
743  , "m" (workspace[0])
744  #endif
745  : "memory", "cc", "%eax"
746  #if CRYPTOPP_BOOL_X64
747  , "%rbx", "%r8", "%r10"
748  #endif
749  );
750 #endif
751 }
752 
753 ANONYMOUS_NAMESPACE_END
754 
755 #endif // CRYPTOPP_X86_ASM_AVAILABLE
756 
757 #ifndef CRYPTOPP_GENERATE_X64_MASM
758 
759 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
760 extern "C" {
761 void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len);
762 }
763 #endif
764 
765 std::string SHA256::AlgorithmProvider() const
766 {
767  return SHA256_AlgorithmProvider();
768 }
769 
770 void SHA256::Transform(word32 *state, const word32 *data)
771 {
772  CRYPTOPP_ASSERT(state);
773  CRYPTOPP_ASSERT(data);
774 
775 #if CRYPTOPP_SHANI_AVAILABLE
776  if (HasSHA())
777  {
778  SHA256_HashMultipleBlocks_SHANI(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
779  return;
780  }
781 #endif
782 #if CRYPTOPP_ARM_SHA2_AVAILABLE
783  if (HasSHA2())
784  {
785  SHA256_HashMultipleBlocks_ARMV8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
786  return;
787  }
788 #endif
789 #if CRYPTOPP_POWER8_SHA_AVAILABLE
790  if (HasSHA256())
791  {
792  SHA256_HashMultipleBlocks_POWER8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
793  return;
794  }
795 #endif
796 
797  SHA256_HashBlock_CXX(state, data);
798 }
799 
800 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
801 {
802  CRYPTOPP_ASSERT(input);
803  CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
804 
805 #if CRYPTOPP_SHANI_AVAILABLE
806  if (HasSHA())
807  {
808  SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
809  return length & (SHA256::BLOCKSIZE - 1);
810  }
811 #endif
812 #if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE
813  if (HasSSE2())
814  {
815  const size_t res = length & (SHA256::BLOCKSIZE - 1);
816  SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
817  return res;
818  }
819 #endif
820 #if CRYPTOPP_ARM_SHA2_AVAILABLE
821  if (HasSHA2())
822  {
823  SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
824  return length & (SHA256::BLOCKSIZE - 1);
825  }
826 #endif
827 #if CRYPTOPP_POWER8_SHA_AVAILABLE
828  if (HasSHA256())
829  {
830  SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
831  return length & (SHA256::BLOCKSIZE - 1);
832  }
833 #endif
834 
835  const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
836  word32 *dataBuf = this->DataBuf();
837  do
838  {
839  if (noReverse)
840  {
841  SHA256_HashBlock_CXX(m_state, input);
842  }
843  else
844  {
845  ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
846  SHA256_HashBlock_CXX(m_state, dataBuf);
847  }
848 
849  input += SHA256::BLOCKSIZE/sizeof(word32);
850  length -= SHA256::BLOCKSIZE;
851  }
852  while (length >= SHA256::BLOCKSIZE);
853  return length;
854 }
855 
856 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
857 {
858  CRYPTOPP_ASSERT(input);
859  CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
860 
861 #if CRYPTOPP_SHANI_AVAILABLE
862  if (HasSHA())
863  {
864  SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
865  return length & (SHA256::BLOCKSIZE - 1);
866  }
867 #endif
868 #if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE
869  if (HasSSE2())
870  {
871  const size_t res = length & (SHA256::BLOCKSIZE - 1);
872  SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
873  return res;
874  }
875 #endif
876 #if CRYPTOPP_ARM_SHA2_AVAILABLE
877  if (HasSHA2())
878  {
879  SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
880  return length & (SHA256::BLOCKSIZE - 1);
881  }
882 #endif
883 #if CRYPTOPP_POWER8_SHA_AVAILABLE
884  if (HasSHA256())
885  {
886  SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
887  return length & (SHA256::BLOCKSIZE - 1);
888  }
889 #endif
890 
891  const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
892  word32 *dataBuf = this->DataBuf();
893  do
894  {
895  if (noReverse)
896  {
897  SHA256_HashBlock_CXX(m_state, input);
898  }
899  else
900  {
901  ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
902  SHA256_HashBlock_CXX(m_state, dataBuf);
903  }
904 
905  input += SHA256::BLOCKSIZE/sizeof(word32);
906  length -= SHA256::BLOCKSIZE;
907  }
908  while (length >= SHA256::BLOCKSIZE);
909  return length;
910 }
911 
912 // *************************************************************
913 
914 std::string SHA512_AlgorithmProvider()
915 {
916 #if CRYPTOPP_SSE2_ASM_AVAILABLE
917  if (HasSSE2())
918  return "SSE2";
919 #endif
920 #if (CRYPTOPP_POWER8_SHA_AVAILABLE)
921  if (HasSHA512())
922  return "Power8";
923 #endif
924  return "C++";
925 }
926 
927 std::string SHA384::AlgorithmProvider() const
928 {
929  return SHA512_AlgorithmProvider();
930 }
931 
932 std::string SHA512::AlgorithmProvider() const
933 {
934  return SHA512_AlgorithmProvider();
935 }
936 
937 void SHA384::InitState(HashWordType *state)
938 {
939  const word64 s[8] = {
940  W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
941  W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
942  W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
943  W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
944  memcpy(state, s, sizeof(s));
945 }
946 
947 void SHA512::InitState(HashWordType *state)
948 {
949  const word64 s[8] = {
950  W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
951  W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
952  W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
953  W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
954  memcpy(state, s, sizeof(s));
955 }
956 
957 #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)
958 
959 ANONYMOUS_NAMESPACE_BEGIN
960 
961 // No inlining due to https://github.com/weidai11/cryptopp/issues/684
962 // g++ -DNDEBUG -g2 -O3 -pthread -pipe -c sha.cpp
963 // sha.cpp: Assembler messages:
964 // sha.cpp:1155: Error: symbol `SHA512_Round' is already defined
965 // sha.cpp:1155: Error: symbol `SHA512_Round' is already defined
966 
967 CRYPTOPP_NOINLINE CRYPTOPP_NAKED
968 void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const word64 *data)
969 {
970 #ifdef __GNUC__
971  __asm__ __volatile__
972  (
973  INTEL_NOPREFIX
974  AS_PUSH_IF86( bx)
975  AS2( mov ebx, eax)
976 #else
977  AS1( push ebx)
978  AS1( push esi)
979  AS1( push edi)
980  AS2( lea ebx, SHA512_K)
981 #endif
982 
983  AS2( mov eax, esp)
984  AS2( and esp, 0xfffffff0)
985  AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
986  AS_PUSH_IF86( ax)
987  AS2( xor eax, eax)
988 
989  AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
990  AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
991 
992  AS2( movdqu xmm0, [ecx+0*16])
993  AS2( movdq2q mm4, xmm0)
994  AS2( movdqu [edi+0*16], xmm0)
995  AS2( movdqu xmm0, [ecx+1*16])
996  AS2( movdqu [edi+1*16], xmm0)
997  AS2( movdqu xmm0, [ecx+2*16])
998  AS2( movdq2q mm5, xmm0)
999  AS2( movdqu [edi+2*16], xmm0)
1000  AS2( movdqu xmm0, [ecx+3*16])
1001  AS2( movdqu [edi+3*16], xmm0)
1002  ASJ( jmp, 0, f)
1003 
1004 #define SSE2_S0_S1(r, a, b, c) \
1005  AS2( movq mm6, r)\
1006  AS2( psrlq r, a)\
1007  AS2( movq mm7, r)\
1008  AS2( psllq mm6, 64-c)\
1009  AS2( pxor mm7, mm6)\
1010  AS2( psrlq r, b-a)\
1011  AS2( pxor mm7, r)\
1012  AS2( psllq mm6, c-b)\
1013  AS2( pxor mm7, mm6)\
1014  AS2( psrlq r, c-b)\
1015  AS2( pxor r, mm7)\
1016  AS2( psllq mm6, b-a)\
1017  AS2( pxor r, mm6)
1018 
1019 #define SSE2_s0(r, a, b, c) \
1020  AS2( movdqu xmm6, r)\
1021  AS2( psrlq r, a)\
1022  AS2( movdqu xmm7, r)\
1023  AS2( psllq xmm6, 64-c)\
1024  AS2( pxor xmm7, xmm6)\
1025  AS2( psrlq r, b-a)\
1026  AS2( pxor xmm7, r)\
1027  AS2( psrlq r, c-b)\
1028  AS2( pxor r, xmm7)\
1029  AS2( psllq xmm6, c-a)\
1030  AS2( pxor r, xmm6)
1031 
1032 #define SSE2_s1(r, a, b, c) \
1033  AS2( movdqu xmm6, r)\
1034  AS2( psrlq r, a)\
1035  AS2( movdqu xmm7, r)\
1036  AS2( psllq xmm6, 64-c)\
1037  AS2( pxor xmm7, xmm6)\
1038  AS2( psrlq r, b-a)\
1039  AS2( pxor xmm7, r)\
1040  AS2( psllq xmm6, c-b)\
1041  AS2( pxor xmm7, xmm6)\
1042  AS2( psrlq r, c-b)\
1043  AS2( pxor r, xmm7)
1044  ASL(SHA512_Round)
1045 
1046  // k + w is in mm0, a is in mm4, e is in mm5
1047  AS2( paddq mm0, [edi+7*8]) // h
1048  AS2( movq mm2, [edi+5*8]) // f
1049  AS2( movq mm3, [edi+6*8]) // g
1050  AS2( pxor mm2, mm3)
1051  AS2( pand mm2, mm5)
1052  SSE2_S0_S1(mm5,14,18,41)
1053  AS2( pxor mm2, mm3)
1054  AS2( paddq mm0, mm2) // h += Ch(e,f,g)
1055  AS2( paddq mm5, mm0) // h += S1(e)
1056  AS2( movq mm2, [edi+1*8]) // b
1057  AS2( movq mm1, mm2)
1058  AS2( por mm2, mm4)
1059  AS2( pand mm2, [edi+2*8]) // c
1060  AS2( pand mm1, mm4)
1061  AS2( por mm1, mm2)
1062  AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
1063  AS2( paddq mm5, [edi+3*8]) // e = d + h
1064  AS2( movq [edi+3*8], mm5)
1065  AS2( movq [edi+11*8], mm5)
1066  SSE2_S0_S1(mm4,28,34,39) // S0(a)
1067  AS2( paddq mm4, mm1) // a = temp + S0(a)
1068  AS2( movq [edi-8], mm4)
1069  AS2( movq [edi+7*8], mm4)
1070  AS1( ret)
1071 
1072  // first 16 rounds
1073  ASL(0)
1074  AS2( movq mm0, [edx+eax*8])
1075  AS2( movq [esi+eax*8], mm0)
1076  AS2( movq [esi+eax*8+16*8], mm0)
1077  AS2( paddq mm0, [ebx+eax*8])
1078  ASC( call, SHA512_Round)
1079 
1080  AS1( inc eax)
1081  AS2( sub edi, 8)
1082  AS2( test eax, 7)
1083  ASJ( jnz, 0, b)
1084  AS2( add edi, 8*8)
1085  AS2( cmp eax, 16)
1086  ASJ( jne, 0, b)
1087 
1088  // rest of the rounds
1089  AS2( movdqu xmm0, [esi+(16-2)*8])
1090  ASL(1)
1091  // data expansion, W[i-2] already in xmm0
1092  AS2( movdqu xmm3, [esi])
1093  AS2( paddq xmm3, [esi+(16-7)*8])
1094  AS2( movdqu xmm2, [esi+(16-15)*8])
1095  SSE2_s1(xmm0, 6, 19, 61)
1096  AS2( paddq xmm0, xmm3)
1097  SSE2_s0(xmm2, 1, 7, 8)
1098  AS2( paddq xmm0, xmm2)
1099  AS2( movdq2q mm0, xmm0)
1100  AS2( movhlps xmm1, xmm0)
1101  AS2( paddq mm0, [ebx+eax*8])
1102  AS2( movlps [esi], xmm0)
1103  AS2( movlps [esi+8], xmm1)
1104  AS2( movlps [esi+8*16], xmm0)
1105  AS2( movlps [esi+8*17], xmm1)
1106  // 2 rounds
1107  ASC( call, SHA512_Round)
1108  AS2( sub edi, 8)
1109  AS2( movdq2q mm0, xmm1)
1110  AS2( paddq mm0, [ebx+eax*8+8])
1111  ASC( call, SHA512_Round)
1112  // update indices and loop
1113  AS2( add esi, 16)
1114  AS2( add eax, 2)
1115  AS2( sub edi, 8)
1116  AS2( test eax, 7)
1117  ASJ( jnz, 1, b)
1118  // do housekeeping every 8 rounds
1119  AS2( mov esi, 0xf)
1120  AS2( and esi, eax)
1121  AS2( lea esi, [esp+4+20*8+8+esi*8])
1122  AS2( add edi, 8*8)
1123  AS2( cmp eax, 80)
1124  ASJ( jne, 1, b)
1125 
1126 #define SSE2_CombineState(i) \
1127  AS2( movdqu xmm0, [edi+i*16])\
1128  AS2( paddq xmm0, [ecx+i*16])\
1129  AS2( movdqu [ecx+i*16], xmm0)
1130 
1131  SSE2_CombineState(0)
1132  SSE2_CombineState(1)
1133  SSE2_CombineState(2)
1134  SSE2_CombineState(3)
1135 
1136  AS_POP_IF86( sp)
1137  AS1( emms)
1138 
1139 #if defined(__GNUC__)
1140  AS_POP_IF86( bx)
1141  ATT_PREFIX
1142  :
1143  : "a" (SHA512_K), "c" (state), "d" (data)
1144  : "%esi", "%edi", "memory", "cc"
1145  );
1146 #else
1147  AS1( pop edi)
1148  AS1( pop esi)
1149  AS1( pop ebx)
1150  AS1( ret)
1151 #endif
1152 }
1153 
1154 ANONYMOUS_NAMESPACE_END
1155 
1156 #endif // CRYPTOPP_SSE2_ASM_AVAILABLE
1157 
1158 ANONYMOUS_NAMESPACE_BEGIN
1159 
1160 #define a(i) T[(0-i)&7]
1161 #define b(i) T[(1-i)&7]
1162 #define c(i) T[(2-i)&7]
1163 #define d(i) T[(3-i)&7]
1164 #define e(i) T[(4-i)&7]
1165 #define f(i) T[(5-i)&7]
1166 #define g(i) T[(6-i)&7]
1167 #define h(i) T[(7-i)&7]
1168 
1169 #define blk0(i) (W[i]=data[i])
1170 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
1171 
1172 #define Ch(x,y,z) (z^(x&(y^z)))
1173 #define Maj(x,y,z) (y^((x^y)&(y^z)))
1174 
1175 #define s0(x) (rotrConstant<1>(x)^rotrConstant<8>(x)^(x>>7))
1176 #define s1(x) (rotrConstant<19>(x)^rotrConstant<61>(x)^(x>>6))
1177 #define S0(x) (rotrConstant<28>(x)^rotrConstant<34>(x)^rotrConstant<39>(x))
1178 #define S1(x) (rotrConstant<14>(x)^rotrConstant<18>(x)^rotrConstant<41>(x))
1179 
1180 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+\
1181  (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
1182 
1183 void SHA512_HashBlock_CXX(word64 *state, const word64 *data)
1184 {
1185  CRYPTOPP_ASSERT(state);
1186  CRYPTOPP_ASSERT(data);
1187 
1188  word64 W[16]={0}, T[8];
1189 
1190  /* Copy context->state[] to working vars */
1191  std::memcpy(T, state, sizeof(T));
1192 
1193  /* 80 operations, partially loop unrolled */
1194  for (unsigned int j=0; j<80; j+=16)
1195  {
1196  R( 0); R( 1); R( 2); R( 3);
1197  R( 4); R( 5); R( 6); R( 7);
1198  R( 8); R( 9); R(10); R(11);
1199  R(12); R(13); R(14); R(15);
1200  }
1201 
1202  state[0] += a(0);
1203  state[1] += b(0);
1204  state[2] += c(0);
1205  state[3] += d(0);
1206  state[4] += e(0);
1207  state[5] += f(0);
1208  state[6] += g(0);
1209  state[7] += h(0);
1210 }
1211 
1212 ANONYMOUS_NAMESPACE_END
1213 
1214 void SHA512::Transform(word64 *state, const word64 *data)
1215 {
1216  CRYPTOPP_ASSERT(state);
1217  CRYPTOPP_ASSERT(data);
1218 
1219 #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)
1220  if (HasSSE2())
1221  {
1222  SHA512_HashBlock_SSE2(state, data);
1223  return;
1224  }
1225 #endif
1226 #if CRYPTOPP_POWER8_SHA_AVAILABLE
1227  if (HasSHA512())
1228  {
1229  SHA512_HashMultipleBlocks_POWER8(state, data, SHA512::BLOCKSIZE, BIG_ENDIAN_ORDER);
1230  return;
1231  }
1232 #endif
1233 
1234  SHA512_HashBlock_CXX(state, data);
1235 }
1236 
1237 #undef Ch
1238 #undef Maj
1239 
1240 #undef s0
1241 #undef s1
1242 #undef S0
1243 #undef S1
1244 
1245 #undef blk0
1246 #undef blk1
1247 #undef blk2
1248 
1249 #undef R
1250 
1251 #undef a
1252 #undef b
1253 #undef c
1254 #undef d
1255 #undef e
1256 #undef f
1257 #undef g
1258 #undef h
1259 
1260 NAMESPACE_END
1261 
1262 #endif // Not CRYPTOPP_GENERATE_X64_MASM
1263 #endif // Not CRYPTOPP_IMPORTS
bool HasSHA()
Determines SHA availability.
Definition: cpu.h:189
bool NativeByteOrderIs(ByteOrder order)
Determines whether order follows native byte ordering.
Definition: misc.h:1156
Utility functions for the Crypto++ library.
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
Definition: sha.cpp:225
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:143
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:937
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
Definition: sha.cpp:765
Library configuration file.
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
Definition: sha.cpp:927
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:251
byte order is little-endian
Definition: cryptlib.h:145
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
Definition: sha.cpp:410
Classes and functions for secure memory allocations.
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:415
bool HasSHA256()
Determine if a PowerPC processor has SHA256 available.
Definition: cpu.h:695
ByteOrder GetByteOrder() const
Provides the byte order of the hash.
Definition: iterhash.h:144
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:242
Precompiled header file.
bool HasSHA1()
Determine if an ARM processor has SHA1 available.
Definition: cpu.h:469
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:1089
bool HasSHA512()
Determine if an ARM processor has SHA512 available.
Definition: cpu.h:509
byte order is big-endian
Definition: cryptlib.h:147
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:69
Functions for CPU features and intrinsics.
Classes for SHA-1 and SHA-2 family of message digests.
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:1214
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:116
bool HasSHA2()
Determine if an ARM processor has SHA256 available.
Definition: cpu.h:489
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:947
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:770
Crypto++ library namespace.
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:423
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1940
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
Definition: sha.cpp:932