Crypto++  8.0
Free C++ class library of cryptographic schemes
gcm.cpp
1 // gcm.cpp - originally written and placed in the public domain by Wei Dai.
2 // ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3 // multiply routines are less efficient because they shadow x86.
4 // The precomputed key table integration makes it tricky to use the
5 // more efficient ARMv8 implementation of the multiply and reduce.
6 
7 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8 
9 #include "pch.h"
10 #include "config.h"
11 
12 #ifndef CRYPTOPP_IMPORTS
13 #ifndef CRYPTOPP_GENERATE_X64_MASM
14 
15 // Visual Studio .Net 2003 compiler crash
16 #if defined(_MSC_VER) && (_MSC_VER < 1400)
17 # pragma optimize("", off)
18 #endif
19 
20 #include "gcm.h"
21 #include "cpu.h"
22 
23 #if defined(CRYPTOPP_DISABLE_GCM_ASM)
24 # undef CRYPTOPP_X86_ASM_AVAILABLE
25 # undef CRYPTOPP_X32_ASM_AVAILABLE
26 # undef CRYPTOPP_X64_ASM_AVAILABLE
27 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
28 #endif
29 
30 NAMESPACE_BEGIN(CryptoPP)
31 
32 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
33 // Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
34 // 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
35 #if defined(CRYPTOPP_DISABLE_MIXED_ASM)
36 // 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path.
37 # define USE_MOVD_REG32 1
38 #elif defined(__GNUC__) || defined(_MSC_VER)
39 // 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
40 # define USE_MOVD_REG32_OR_REG64 1
41 #else
42 // 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
43 # define USE_MOV_REG32_OR_REG64 1
44 #endif
45 #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
46 
47 // Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670
48 #define M128_CAST(x) ((__m128i *)(void *)(x))
49 #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
50 
51 word16 GCM_Base::s_reductionTable[256];
52 volatile bool GCM_Base::s_reductionTableInitialized = false;
53 
54 void GCM_Base::GCTR::IncrementCounterBy256()
55 {
56  IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
57 }
58 
59 static inline void Xor16(byte *a, const byte *b, const byte *c)
60 {
61  CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
62  CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
63  CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
64  ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
65  ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
66 }
67 
68 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
69 // SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
70 // a source file with a SSE architecture switch. Also see GH #226 and GH #284.
71 extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
72 #endif // SSE2
73 
74 #if CRYPTOPP_ARM_NEON_AVAILABLE
75 extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
76 #endif
77 
78 #if CRYPTOPP_POWER7_AVAILABLE
79 extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c);
80 #endif
81 
82 #if CRYPTOPP_CLMUL_AVAILABLE
83 extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
84 extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
85 const unsigned int s_cltableSizeInBlocks = 8;
86 extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
87 #endif // CRYPTOPP_CLMUL_AVAILABLE
88 
89 #if CRYPTOPP_ARM_PMULL_AVAILABLE
90 extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
91 extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
92 const unsigned int s_cltableSizeInBlocks = 8;
93 extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
94 #endif // CRYPTOPP_ARM_PMULL_AVAILABLE
95 
96 #if CRYPTOPP_POWER8_VMULL_AVAILABLE
97 extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
98 extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
99 const unsigned int s_cltableSizeInBlocks = 8;
100 extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer);
101 #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
102 
103 void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
104 {
105  BlockCipher &blockCipher = AccessBlockCipher();
106  blockCipher.SetKey(userKey, keylength, params);
107 
108  // GCM is only defined for 16-byte block ciphers at the moment.
109  // However, variable blocksize support means we have to defer
110  // blocksize checks to runtime after the key is set. Also see
111  // https://github.com/weidai11/cryptopp/issues/408.
112  const unsigned int blockSize = blockCipher.BlockSize();
113  CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
114  if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
115  throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
116 
117  int tableSize, i, j, k;
118 
119 #if CRYPTOPP_CLMUL_AVAILABLE
120  if (HasCLMUL())
121  {
122  // Avoid "parameter not used" error and suppress Coverity finding
123  (void)params.GetIntValue(Name::TableSize(), tableSize);
124  tableSize = s_cltableSizeInBlocks * blockSize;
125  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
126  }
127  else
128 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
129  if (HasPMULL())
130  {
131  // Avoid "parameter not used" error and suppress Coverity finding
132  (void)params.GetIntValue(Name::TableSize(), tableSize);
133  tableSize = s_cltableSizeInBlocks * blockSize;
134  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
135  }
136  else
137 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
138  if (HasPMULL())
139  {
140  // Avoid "parameter not used" error and suppress Coverity finding
141  (void)params.GetIntValue(Name::TableSize(), tableSize);
142  tableSize = s_cltableSizeInBlocks * blockSize;
143  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
144  }
145  else
146 #endif
147  {
148  if (params.GetIntValue(Name::TableSize(), tableSize))
149  tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
150  else
151  tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
152 
153  //#if defined(_MSC_VER) && (_MSC_VER < 1400)
154  // VC 2003 workaround: compiler generates bad code for 64K tables
155  //tableSize = 2*1024;
156  //#endif
157  }
158 
159  m_buffer.resize(3*blockSize + tableSize);
160  byte *mulTable = MulTable();
161  byte *hashKey = HashKey();
162  memset(hashKey, 0, REQUIRED_BLOCKSIZE);
163  blockCipher.ProcessBlock(hashKey);
164 
165 #if CRYPTOPP_CLMUL_AVAILABLE
166  if (HasCLMUL())
167  {
168  GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
169  return;
170  }
171 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
172  if (HasPMULL())
173  {
174  GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
175  return;
176  }
177 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
178  if (HasPMULL())
179  {
180  GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
181  return;
182  }
183 #endif
184 
185  word64 V0, V1;
187  Block::Get(hashKey)(V0)(V1);
188 
189  if (tableSize == 64*1024)
190  {
191  for (i=0; i<128; i++)
192  {
193  k = i%8;
194  Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
195 
196  int x = (int)V1 & 1;
197  V1 = (V1>>1) | (V0<<63);
198  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
199  }
200 
201  for (i=0; i<16; i++)
202  {
203  memset(mulTable+i*256*16, 0, 16);
204 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
205  if (HasSSE2())
206  for (j=2; j<=0x80; j*=2)
207  for (k=1; k<j; k++)
208  GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
209  else
210 #elif CRYPTOPP_ARM_NEON_AVAILABLE
211  if (HasNEON())
212  for (j=2; j<=0x80; j*=2)
213  for (k=1; k<j; k++)
214  GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
215  else
216 #elif CRYPTOPP_POWER7_AVAILABLE
217  if (HasPower7())
218  for (j=2; j<=0x80; j*=2)
219  for (k=1; k<j; k++)
220  GCM_Xor16_POWER7(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
221  else
222 #endif
223  for (j=2; j<=0x80; j*=2)
224  for (k=1; k<j; k++)
225  Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
226  }
227  }
228  else
229  {
230  if (!s_reductionTableInitialized)
231  {
232  s_reductionTable[0] = 0;
233  word16 x = 0x01c2;
234  s_reductionTable[1] = ByteReverse(x);
235  for (unsigned int ii=2; ii<=0x80; ii*=2)
236  {
237  x <<= 1;
238  s_reductionTable[ii] = ByteReverse(x);
239  for (unsigned int jj=1; jj<ii; jj++)
240  s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
241  }
242  s_reductionTableInitialized = true;
243  }
244 
245  for (i=0; i<128-24; i++)
246  {
247  k = i%32;
248  if (k < 4)
249  Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
250  else if (k < 8)
251  Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
252 
253  int x = (int)V1 & 1;
254  V1 = (V1>>1) | (V0<<63);
255  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
256  }
257 
258  for (i=0; i<4; i++)
259  {
260  memset(mulTable+i*256, 0, 16);
261  memset(mulTable+1024+i*256, 0, 16);
262 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
263  if (HasSSE2())
264  for (j=2; j<=8; j*=2)
265  for (k=1; k<j; k++)
266  {
267  GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
268  GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
269  }
270  else
271 #elif CRYPTOPP_ARM_NEON_AVAILABLE
272  if (HasNEON())
273  for (j=2; j<=8; j*=2)
274  for (k=1; k<j; k++)
275  {
276  GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
277  GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
278  }
279  else
280 #elif CRYPTOPP_POWER7_AVAILABLE
281  if (HasPower7())
282  for (j=2; j<=8; j*=2)
283  for (k=1; k<j; k++)
284  {
285  GCM_Xor16_POWER7(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
286  GCM_Xor16_POWER7(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
287  }
288  else
289 #endif
290  for (j=2; j<=8; j*=2)
291  for (k=1; k<j; k++)
292  {
293  Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
294  Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
295  }
296  }
297  }
298 }
299 
300 inline void GCM_Base::ReverseHashBufferIfNeeded()
301 {
302 #if CRYPTOPP_CLMUL_AVAILABLE
303  if (HasCLMUL())
304  {
305  GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
306  }
307 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
308  if (HasPMULL())
309  {
310  GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
311  }
312 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
313  if (HasPMULL())
314  {
315  GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
316  }
317 #endif
318 }
319 
320 void GCM_Base::Resync(const byte *iv, size_t len)
321 {
322  BlockCipher &cipher = AccessBlockCipher();
323  byte *hashBuffer = HashBuffer();
324 
325  if (len == 12)
326  {
327  memcpy(hashBuffer, iv, len);
328  memset(hashBuffer+len, 0, 3);
329  hashBuffer[len+3] = 1;
330  }
331  else
332  {
333  size_t origLen = len;
334  memset(hashBuffer, 0, HASH_BLOCKSIZE);
335 
336  if (len >= HASH_BLOCKSIZE)
337  {
338  len = GCM_Base::AuthenticateBlocks(iv, len);
339  iv += (origLen - len);
340  }
341 
342  if (len > 0)
343  {
344  memcpy(m_buffer, iv, len);
345  memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
346  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
347  }
348 
349  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
350  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
351 
352  ReverseHashBufferIfNeeded();
353  }
354 
355  if (m_state >= State_IVSet)
356  m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
357  else
358  m_ctr.SetCipherWithIV(cipher, hashBuffer);
359 
360  m_ctr.Seek(HASH_BLOCKSIZE);
361 
362  memset(hashBuffer, 0, HASH_BLOCKSIZE);
363 }
364 
365 unsigned int GCM_Base::OptimalDataAlignment() const
366 {
367  return
368 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
369  HasSSE2() ? 16 :
370 #elif CRYPTOPP_ARM_NEON_AVAILABLE
371  HasNEON() ? 4 :
372 #elif CRYPTOPP_POWER7_AVAILABLE
373  HasPower7() ? 16 :
374 #endif
375  GetBlockCipher().OptimalDataAlignment();
376 }
377 
378 #if CRYPTOPP_MSC_VERSION
379 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
380 #endif
381 
382 #endif // Not CRYPTOPP_GENERATE_X64_MASM
383 
384 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
385 extern "C" {
386 void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
387 void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer);
388 }
389 #endif
390 
391 #ifndef CRYPTOPP_GENERATE_X64_MASM
392 
393 size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
394 {
395 #if CRYPTOPP_CLMUL_AVAILABLE
396  if (HasCLMUL())
397  {
398  return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
399  }
400 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
401  if (HasPMULL())
402  {
403  return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
404  }
405 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
406  if (HasPMULL())
407  {
408  return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
409  }
410 #endif
411 
413  word64 *hashBuffer = (word64 *)(void *)HashBuffer();
414  CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
415 
416  switch (2*(m_buffer.size()>=64*1024)
417 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
418  + HasSSE2()
419 //#elif CRYPTOPP_ARM_NEON_AVAILABLE
420 // + HasNEON()
421 #endif
422  )
423  {
424  case 0: // non-SSE2 and 2K tables
425  {
426  byte *mulTable = MulTable();
427  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
428 
429  do
430  {
431  word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
432  Block::Get(data)(y0)(y1);
433  x0 ^= y0;
434  x1 ^= y1;
435 
436  data += HASH_BLOCKSIZE;
437  len -= HASH_BLOCKSIZE;
438 
439  #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
440 
441  #if (CRYPTOPP_LITTLE_ENDIAN)
442  #if CRYPTOPP_BOOL_SLOW_WORD64
443  word32 z0 = (word32)x0;
444  word32 z1 = (word32)(x0>>32);
445  word32 z2 = (word32)x1;
446  word32 z3 = (word32)(x1>>32);
447  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
448  #else
449  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
450  #endif
451  #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
452  #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
453  #else
454  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
455  #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
456  #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
457  #endif
458 
459  #define GF_MUL_32BY128(op, a, b, c) \
460  a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
461  a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
462  b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
463  b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
464  c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
465  c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
466  d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
467  d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
468 
469  GF_MUL_32BY128(=, 0, 0, 0)
470  GF_MUL_32BY128(^=, 0, 1, 1)
471  GF_MUL_32BY128(^=, 1, 0, 2)
472  GF_MUL_32BY128(^=, 1, 1, 3)
473 
474  word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
475  GF_SHIFT_8(d)
476  c0 ^= d0; c1 ^= d1;
477  r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
478  GF_SHIFT_8(c)
479  b0 ^= c0; b1 ^= c1;
480  r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
481  GF_SHIFT_8(b)
482  a0 ^= b0; a1 ^= b1;
484  x0 = a0; x1 = a1;
485  }
486  while (len >= HASH_BLOCKSIZE);
487 
488  hashBuffer[0] = x0; hashBuffer[1] = x1;
489  return len;
490  }
491 
492  case 2: // non-SSE2 and 64K tables
493  {
494  byte *mulTable = MulTable();
495  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
496 
497  do
498  {
499  word64 y0, y1, a0, a1;
500  Block::Get(data)(y0)(y1);
501  x0 ^= y0;
502  x1 ^= y1;
503 
504  data += HASH_BLOCKSIZE;
505  len -= HASH_BLOCKSIZE;
506 
507  #undef READ_TABLE_WORD64_COMMON
508  #undef READ_TABLE_WORD64
509 
510  #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
511 
512  #if (CRYPTOPP_LITTLE_ENDIAN)
513  #if CRYPTOPP_BOOL_SLOW_WORD64
514  word32 z0 = (word32)x0;
515  word32 z1 = (word32)(x0>>32);
516  word32 z2 = (word32)x1;
517  word32 z3 = (word32)(x1>>32);
518  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
519  #else
520  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
521  #endif
522  #else
523  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
524  #endif
525 
526  #define GF_MUL_8BY128(op, b, c, d) \
527  a0 op READ_TABLE_WORD64(b, c, d, 0);\
528  a1 op READ_TABLE_WORD64(b, c, d, 1);\
529 
530  GF_MUL_8BY128(=, 0, 0, 0)
531  GF_MUL_8BY128(^=, 0, 0, 1)
532  GF_MUL_8BY128(^=, 0, 0, 2)
533  GF_MUL_8BY128(^=, 0, 0, 3)
534  GF_MUL_8BY128(^=, 0, 1, 0)
535  GF_MUL_8BY128(^=, 0, 1, 1)
536  GF_MUL_8BY128(^=, 0, 1, 2)
537  GF_MUL_8BY128(^=, 0, 1, 3)
538  GF_MUL_8BY128(^=, 1, 2, 0)
539  GF_MUL_8BY128(^=, 1, 2, 1)
540  GF_MUL_8BY128(^=, 1, 2, 2)
541  GF_MUL_8BY128(^=, 1, 2, 3)
542  GF_MUL_8BY128(^=, 1, 3, 0)
543  GF_MUL_8BY128(^=, 1, 3, 1)
544  GF_MUL_8BY128(^=, 1, 3, 2)
545  GF_MUL_8BY128(^=, 1, 3, 3)
546 
547  x0 = a0; x1 = a1;
548  }
549  while (len >= HASH_BLOCKSIZE);
550 
551  hashBuffer[0] = x0; hashBuffer[1] = x1;
552  return len;
553  }
554 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
555 
556 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
557  case 1: // SSE2 and 2K tables
558  GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
559  return len % 16;
560  case 3: // SSE2 and 64K tables
561  GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
562  return len % 16;
563 #endif
564 
565 #if CRYPTOPP_SSE2_ASM_AVAILABLE
566  case 1: // SSE2 and 2K tables
567  {
568  #ifdef __GNUC__
569  __asm__ __volatile__
570  (
571  INTEL_NOPREFIX
572  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
573  ALIGN 8
574  GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
575  rex_push_reg rsi
576  push_reg rdi
577  push_reg rbx
578  .endprolog
579  mov rsi, r8
580  mov r11, r9
581  #else
582  AS2( mov WORD_REG(cx), data )
583  AS2( mov WORD_REG(dx), len )
584  AS2( mov WORD_REG(si), hashBuffer )
585  AS2( shr WORD_REG(dx), 4 )
586  #endif
587 
588  #if CRYPTOPP_BOOL_X32
589  AS1(push rbx)
590  AS1(push rbp)
591  #else
592  AS_PUSH_IF86( bx)
593  AS_PUSH_IF86( bp)
594  #endif
595 
596  #ifdef __GNUC__
597  AS2( mov AS_REG_7, WORD_REG(di))
598  #elif CRYPTOPP_BOOL_X86
599  AS2( lea AS_REG_7, s_reductionTable)
600  #endif
601 
602  AS2( movdqa xmm0, [WORD_REG(si)] )
603 
604  #define MUL_TABLE_0 WORD_REG(si) + 32
605  #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
606  #define RED_TABLE AS_REG_7
607 
608  ASL(0)
609  AS2( movdqu xmm4, [WORD_REG(cx)] )
610  AS2( pxor xmm0, xmm4 )
611 
612  AS2( movd ebx, xmm0 )
613  AS2( mov eax, AS_HEX(f0f0f0f0) )
614  AS2( and eax, ebx )
615  AS2( shl ebx, 4 )
616  AS2( and ebx, AS_HEX(f0f0f0f0) )
617  AS2( movzx edi, ah )
618  AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
619  AS2( movzx edi, al )
620  AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
621  AS2( shr eax, 16 )
622  AS2( movzx edi, ah )
623  AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
624  AS2( movzx edi, al )
625  AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
626 
627  #define SSE2_MUL_32BITS(i) \
628  AS2( psrldq xmm0, 4 )\
629  AS2( movd eax, xmm0 )\
630  AS2( and eax, AS_HEX(f0f0f0f0) )\
631  AS2( movzx edi, bh )\
632  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
633  AS2( movzx edi, bl )\
634  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
635  AS2( shr ebx, 16 )\
636  AS2( movzx edi, bh )\
637  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
638  AS2( movzx edi, bl )\
639  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
640  AS2( movd ebx, xmm0 )\
641  AS2( shl ebx, 4 )\
642  AS2( and ebx, AS_HEX(f0f0f0f0) )\
643  AS2( movzx edi, ah )\
644  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
645  AS2( movzx edi, al )\
646  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
647  AS2( shr eax, 16 )\
648  AS2( movzx edi, ah )\
649  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
650  AS2( movzx edi, al )\
651  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
652 
653  SSE2_MUL_32BITS(1)
654  SSE2_MUL_32BITS(2)
655  SSE2_MUL_32BITS(3)
656 
657  AS2( movzx edi, bh )
658  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
659  AS2( movzx edi, bl )
660  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
661  AS2( shr ebx, 16 )
662  AS2( movzx edi, bh )
663  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
664  AS2( movzx edi, bl )
665  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
666 
667  AS2( movdqa xmm0, xmm3 )
668  AS2( pslldq xmm3, 1 )
669  AS2( pxor xmm2, xmm3 )
670  AS2( movdqa xmm1, xmm2 )
671  AS2( pslldq xmm2, 1 )
672  AS2( pxor xmm5, xmm2 )
673 
674  AS2( psrldq xmm0, 15 )
675 #if USE_MOVD_REG32
676  AS2( movd edi, xmm0 )
677 #elif USE_MOV_REG32_OR_REG64
678  AS2( mov WORD_REG(di), xmm0 )
679 #else // GNU Assembler
680  AS2( movd WORD_REG(di), xmm0 )
681 #endif
682  AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
683  AS2( shl eax, 8 )
684 
685  AS2( movdqa xmm0, xmm5 )
686  AS2( pslldq xmm5, 1 )
687  AS2( pxor xmm4, xmm5 )
688 
689  AS2( psrldq xmm1, 15 )
690 #if USE_MOVD_REG32
691  AS2( movd edi, xmm1 )
692 #elif USE_MOV_REG32_OR_REG64
693  AS2( mov WORD_REG(di), xmm1 )
694 #else
695  AS2( movd WORD_REG(di), xmm1 )
696 #endif
697  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
698  AS2( shl eax, 8 )
699 
700  AS2( psrldq xmm0, 15 )
701 #if USE_MOVD_REG32
702  AS2( movd edi, xmm0 )
703 #elif USE_MOV_REG32_OR_REG64
704  AS2( mov WORD_REG(di), xmm0 )
705 #else
706  AS2( movd WORD_REG(di), xmm0 )
707 #endif
708  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
709 
710  AS2( movd xmm0, eax )
711  AS2( pxor xmm0, xmm4 )
712 
713  AS2( add WORD_REG(cx), 16 )
714  AS2( sub WORD_REG(dx), 1 )
715  // ATT_NOPREFIX
716  ASJ( jnz, 0, b )
717  INTEL_NOPREFIX
718  AS2( movdqa [WORD_REG(si)], xmm0 )
719 
720  #if CRYPTOPP_BOOL_X32
721  AS1(pop rbp)
722  AS1(pop rbx)
723  #else
724  AS_POP_IF86( bp)
725  AS_POP_IF86( bx)
726  #endif
727 
728  #ifdef __GNUC__
729  ATT_PREFIX
730  :
731  : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
732  : "memory", "cc", "%eax"
733  #if CRYPTOPP_BOOL_X64
734  , "%ebx", "%r11"
735  #endif
736  );
737  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
738  pop rbx
739  pop rdi
740  pop rsi
741  ret
742  GCM_AuthenticateBlocks_2K_SSE2 ENDP
743  #endif
744 
745  return len%16;
746  }
747  case 3: // SSE2 and 64K tables
748  {
749  #ifdef __GNUC__
750  __asm__ __volatile__
751  (
752  INTEL_NOPREFIX
753  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
754  ALIGN 8
755  GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
756  rex_push_reg rsi
757  push_reg rdi
758  .endprolog
759  mov rsi, r8
760  #else
761  AS2( mov WORD_REG(cx), data )
762  AS2( mov WORD_REG(dx), len )
763  AS2( mov WORD_REG(si), hashBuffer )
764  AS2( shr WORD_REG(dx), 4 )
765  #endif
766 
767  AS2( movdqa xmm0, [WORD_REG(si)] )
768 
769  #undef MUL_TABLE
770  #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
771 
772  ASL(1)
773  AS2( movdqu xmm1, [WORD_REG(cx)] )
774  AS2( pxor xmm1, xmm0 )
775  AS2( pxor xmm0, xmm0 )
776 
777  #undef SSE2_MUL_32BITS
778  #define SSE2_MUL_32BITS(i) \
779  AS2( movd eax, xmm1 )\
780  AS2( psrldq xmm1, 4 )\
781  AS2( movzx edi, al )\
782  AS2( add WORD_REG(di), WORD_REG(di) )\
783  AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
784  AS2( movzx edi, ah )\
785  AS2( add WORD_REG(di), WORD_REG(di) )\
786  AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
787  AS2( shr eax, 16 )\
788  AS2( movzx edi, al )\
789  AS2( add WORD_REG(di), WORD_REG(di) )\
790  AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
791  AS2( movzx edi, ah )\
792  AS2( add WORD_REG(di), WORD_REG(di) )\
793  AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
794 
795  SSE2_MUL_32BITS(0)
796  SSE2_MUL_32BITS(1)
797  SSE2_MUL_32BITS(2)
798  SSE2_MUL_32BITS(3)
799 
800  AS2( add WORD_REG(cx), 16 )
801  AS2( sub WORD_REG(dx), 1 )
802  // ATT_NOPREFIX
803  ASJ( jnz, 1, b )
804  INTEL_NOPREFIX
805  AS2( movdqa [WORD_REG(si)], xmm0 )
806 
807  #ifdef __GNUC__
808  ATT_PREFIX
809  :
810  : "c" (data), "d" (len/16), "S" (hashBuffer)
811  : "memory", "cc", "%edi", "%eax"
812  );
813  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
814  pop rdi
815  pop rsi
816  ret
817  GCM_AuthenticateBlocks_64K_SSE2 ENDP
818  #endif
819 
820  return len%16;
821  }
822 #endif
823 #ifndef CRYPTOPP_GENERATE_X64_MASM
824  }
825 
826  return len%16;
827 }
828 
829 void GCM_Base::AuthenticateLastHeaderBlock()
830 {
831  if (m_bufferedDataLength > 0)
832  {
833  memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
834  m_bufferedDataLength = 0;
835  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
836  }
837 }
838 
839 void GCM_Base::AuthenticateLastConfidentialBlock()
840 {
841  GCM_Base::AuthenticateLastHeaderBlock();
842  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
843  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
844 }
845 
846 void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
847 {
848  m_ctr.Seek(0);
849  ReverseHashBufferIfNeeded();
850  m_ctr.ProcessData(mac, HashBuffer(), macSize);
851 }
852 
853 NAMESPACE_END
854 
855 #endif // Not CRYPTOPP_GENERATE_X64_MASM
856 #endif
An invalid argument was detected.
Definition: cryptlib.h:202
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:58
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: gcm.cpp:365
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition: misc.h:1194
Library configuration file.
Access a block of memory.
Definition: misc.h:2500
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:190
byte order is little-endian
Definition: cryptlib.h:145
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1250
bool HasPower7()
Determine if a PowerPC processor has Power7 available.
Definition: cpu.h:627
Use a table with 64K entries.
Definition: gcm.h:27
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1111
const char * TableSize()
int, in bytes
Definition: argnames.h:81
bool HasCLMUL()
Determines Carryless Multiply availability.
Definition: cpu.h:177
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2081
Precompiled header file.
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:851
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: gcm.h:36
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:69
const char * BlockSize()
int, in bytes
Definition: argnames.h:27
Functions for CPU features and intrinsics.
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:116
GCM block cipher mode of operation.
Access a block of memory.
Definition: misc.h:2463
Crypto++ library namespace.
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:386
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1940
bool HasPMULL()
Determine if an ARM processor provides Polynomial Multiplication.
Definition: cpu.h:408
bool HasNEON()
Determine if an ARM processor has Advanced SIMD available.
Definition: cpu.h:387
Interface for retrieving values given their names.
Definition: cryptlib.h:293