Crypto++  8.8
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - originally written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if defined(CRYPTOPP_MSC_VERSION) && !CRYPTOPP_BOOL_SLOW_WORD64
13 #include <intrin.h>
14 #endif
15 
16 #if defined(CRYPTOPP_DISABLE_VMAC_ASM)
17 # undef CRYPTOPP_X86_ASM_AVAILABLE
18 # undef CRYPTOPP_X32_ASM_AVAILABLE
19 # undef CRYPTOPP_X64_ASM_AVAILABLE
20 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
21 #endif
22 
23 #if CRYPTOPP_MSC_VERSION
24 # pragma warning(disable: 4731)
25 #endif
26 
27 ANONYMOUS_NAMESPACE_BEGIN
28 
29 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30 using CryptoPP::word128;
31 using CryptoPP::word64;
32 # define VMAC_BOOL_WORD128 1
33 #else
34 using CryptoPP::word64;
35 # define VMAC_BOOL_WORD128 0
36 #endif
37 
38 #ifdef __BORLANDC__
39 #define const // Turbo C++ 2006 workaround
40 #endif
41 const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
42 const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
43 const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
44 const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
45 const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
46 #ifdef __BORLANDC__
47 #undef const
48 #endif
49 
50 #if VMAC_BOOL_WORD128
51 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
52 # if defined(__powerpc__) && defined (CRYPTOPP_GCC_VERSION) && (CRYPTOPP_GCC_VERSION < 50300)
53 # define m126 ((word128(m62)<<64)|m64)
54 # else
55 const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
56 # endif
57 #endif
58 
59 ANONYMOUS_NAMESPACE_END
60 
61 NAMESPACE_BEGIN(CryptoPP)
62 
63 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
64 {
65  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
66  if (digestLength != 8 && digestLength != 16)
67  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
68  m_is128 = digestLength == 16;
69 
70  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
71  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
72  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
73 
74  AllocateBlocks();
75 
76  BlockCipher &cipher = AccessCipher();
77  cipher.SetKey(userKey, keylength, params);
78  const unsigned int blockSize = cipher.BlockSize();
79  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
82  in.CleanNew(blockSize);
83  size_t i;
84 
85  /* Fill nh key */
86  in[0] = 0x80;
87  cipher.AdvancedProcessBlocks(in, NULLPTR, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
88  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
89 
90  /* Fill poly key */
91  in[0] = 0xC0;
92  in[15] = 0;
93  for (i = 0; i <= (size_t)m_is128; i++)
94  {
95  cipher.ProcessBlock(in, out.BytePtr());
96  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
97  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
98  in[15]++;
99  }
100 
101  /* Fill ip key */
102  in[0] = 0xE0;
103  in[15] = 0;
104  word64 *l3Key = m_l3Key();
105  CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
106 
107  for (i = 0; i <= (size_t)m_is128; i++)
108  do
109  {
110  cipher.ProcessBlock(in, out.BytePtr());
111  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
112  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
113  in[15]++;
114  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
115 
116  m_padCached = false;
117  size_t nonceLength;
118  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
119  Resynchronize(nonce, (int)nonceLength);
120 }
121 
123 {
125  IV[0] &= 0x7f;
126 }
127 
128 void VMAC_Base::Resynchronize(const byte *nonce, int len)
129 {
130  size_t length = ThrowIfInvalidIVLength(len);
131  size_t s = IVSize();
132  byte *storedNonce = m_nonce();
133 
134  if (m_is128)
135  {
136  std::memset(storedNonce, 0, s-length);
137  std::memcpy(storedNonce+s-length, nonce, length);
138  AccessCipher().ProcessBlock(storedNonce, m_pad());
139  }
140  else
141  {
142  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
143  {
144  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
145  for (size_t i=0; m_padCached && i<s-length; i++)
146  m_padCached = (storedNonce[i] == 0);
147  }
148  if (!m_padCached)
149  {
150  std::memset(storedNonce, 0, s-length);
151  std::memcpy(storedNonce+s-length, nonce, length-1);
152  storedNonce[s-1] = nonce[length-1] & 0xfe;
153  AccessCipher().ProcessBlock(storedNonce, m_pad());
154  m_padCached = true;
155  }
156  storedNonce[s-1] = nonce[length-1];
157  }
158  m_isFirstBlock = true;
159  Restart();
160 }
161 
162 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
163 {
164  CRYPTOPP_UNUSED(data);
165  CRYPTOPP_ASSERT(false);
166  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
167 }
168 
170 {
171  return
172 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
173  HasSSE2() ? 16 :
174 #endif
175  GetCipher().OptimalDataAlignment();
176 }
177 
178 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
179 #if CRYPTOPP_MSC_VERSION
180 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
181 #endif
182 
183 CRYPTOPP_NOINLINE
184 void VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
185 {
186  const word64 *nhK = m_nhKey();
187  word64 *polyS = (word64*)(void*)m_polyState();
188  word32 L1KeyLength = m_L1KeyLength;
189 
190  // These are used in the ASM, but some analysis services miss it.
191  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart);
192  CRYPTOPP_UNUSED(L1KeyLength);
193  CRYPTOPP_UNUSED(blocksRemainingInWord64);
194 
195  // This inline ASM is tricky, and down right difficult on 32-bit when
196  // PIC is in effect. The ASM uses all the general purpose registers
197  // and all the XMM registers on 32-bit machines. When PIC is in effect
198  // on a 32-bit machine, GCC uses EBX as a base register for PLT. Saving
199  // EBX with 'mov %%ebx, %0' and restoring EBX with 'mov %0, %%ebx'
200  // causes GCC to generate 'mov -0x40(%ebx), %ebx' for the restore. That
201  // obviously won't work because EBX is no longer valid. We can push and
202  // pop EBX, but that breaks the stack-based references. Attempting to
203  // sidestep with clobber lists results in "error: ‘asm’ operand has
204  // impossible constraints". Eventually, we found we could save EBX to
205  // ESP-20, which is one word below our stack in the frame.
206 #ifdef __GNUC__
207  __asm__ __volatile__
208  (
210  // Hack. Save EBX for PIC. Do NOT 'push EBX' here.
211  // GCC issues 'mov ESP+8, EBX' to load L1KeyLength.
212  // A push breaks the reference to L1KeyLength.
213  AS2( mov %%ebx, -20(%%esp))
214 # endif
215  // L1KeyLength into EBX.
216  // GCC generates 'mov ESP+8, EBX'.
217  AS2( mov %0, %%ebx)
218  INTEL_NOPREFIX
219 #else
220  #if defined(__INTEL_COMPILER)
221  char isFirstBlock = m_isFirstBlock;
222  AS2( mov ebx, [L1KeyLength])
223  AS2( mov dl, [isFirstBlock])
224  #else
225  AS2( mov ecx, this)
226  AS2( mov ebx, [ecx+m_L1KeyLength])
227  AS2( mov dl, [ecx+m_isFirstBlock])
228  #endif
229  AS2( mov eax, tagPart)
230  AS2( shl eax, 4)
231  AS2( mov edi, nhK)
232  AS2( add edi, eax)
233  AS2( add eax, eax)
234  AS2( add eax, polyS)
235 
236  AS2( mov esi, data)
237  AS2( mov ecx, blocksRemainingInWord64)
238 #endif
239 
240  AS2( shr ebx, 3)
241  AS_PUSH_IF86( bp)
242  AS2( sub esp, 12)
243  ASL(4)
244  AS2( mov ebp, ebx)
245  AS2( cmp ecx, ebx)
246  AS2( cmovl ebp, ecx)
247  AS2( sub ecx, ebp)
248  AS2( lea ebp, [edi+8*ebp]) // end of nhK
249  AS2( movq mm6, [esi])
250  AS2( paddq mm6, [edi])
251  AS2( movq mm5, [esi+8])
252  AS2( paddq mm5, [edi+8])
253  AS2( add esi, 16)
254  AS2( add edi, 16)
255  AS2( movq mm4, mm6)
256  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
257  AS2( pmuludq mm6, mm5)
258  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
259  AS2( pmuludq mm5, mm2)
260  AS2( pmuludq mm2, mm3)
261  AS2( pmuludq mm3, mm4)
262  AS2( pxor mm7, mm7)
263  AS2( movd [esp], mm6)
264  AS2( psrlq mm6, 32)
265  AS2( movd [esp+4], mm5)
266  AS2( psrlq mm5, 32)
267  AS2( cmp edi, ebp)
268  ASJ( je, 1, f)
269  ASL(0)
270  AS2( movq mm0, [esi])
271  AS2( paddq mm0, [edi])
272  AS2( movq mm1, [esi+8])
273  AS2( paddq mm1, [edi+8])
274  AS2( add esi, 16)
275  AS2( add edi, 16)
276  AS2( movq mm4, mm0)
277  AS2( paddq mm5, mm2)
278  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
279  AS2( pmuludq mm0, mm1)
280  AS2( movd [esp+8], mm3)
281  AS2( psrlq mm3, 32)
282  AS2( paddq mm5, mm3)
283  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
284  AS2( pmuludq mm1, mm2)
285  AS2( pmuludq mm2, mm3)
286  AS2( pmuludq mm3, mm4)
287  AS2( movd mm4, [esp])
288  AS2( paddq mm7, mm4)
289  AS2( movd mm4, [esp+4])
290  AS2( paddq mm6, mm4)
291  AS2( movd mm4, [esp+8])
292  AS2( paddq mm6, mm4)
293  AS2( movd [esp], mm0)
294  AS2( psrlq mm0, 32)
295  AS2( paddq mm6, mm0)
296  AS2( movd [esp+4], mm1)
297  AS2( psrlq mm1, 32)
298  AS2( paddq mm5, mm1)
299  AS2( cmp edi, ebp)
300  ASJ( jne, 0, b)
301  ASL(1)
302  AS2( paddq mm5, mm2)
303  AS2( movd [esp+8], mm3)
304  AS2( psrlq mm3, 32)
305  AS2( paddq mm5, mm3)
306  AS2( movd mm4, [esp])
307  AS2( paddq mm7, mm4)
308  AS2( movd mm4, [esp+4])
309  AS2( paddq mm6, mm4)
310  AS2( movd mm4, [esp+8])
311  AS2( paddq mm6, mm4)
312  AS2( lea ebp, [8*ebx])
313  AS2( sub edi, ebp) // reset edi to start of nhK
314 
315  AS2( movd [esp], mm7)
316  AS2( psrlq mm7, 32)
317  AS2( paddq mm6, mm7)
318  AS2( movd [esp+4], mm6)
319  AS2( psrlq mm6, 32)
320  AS2( paddq mm5, mm6)
321  AS2( psllq mm5, 2)
322  AS2( psrlq mm5, 2)
323 
324 #define a0 [eax+2*4]
325 #define a1 [eax+3*4]
326 #define a2 [eax+0*4]
327 #define a3 [eax+1*4]
328 #define k0 [eax+2*8+2*4]
329 #define k1 [eax+2*8+3*4]
330 #define k2 [eax+2*8+0*4]
331 #define k3 [eax+2*8+1*4]
332 
333  AS2( test dl, dl)
334  ASJ( jz, 2, f)
335  AS2( movd mm1, k0)
336  AS2( movd mm0, [esp])
337  AS2( paddq mm0, mm1)
338  AS2( movd a0, mm0)
339  AS2( psrlq mm0, 32)
340  AS2( movd mm1, k1)
341  AS2( movd mm2, [esp+4])
342  AS2( paddq mm1, mm2)
343  AS2( paddq mm0, mm1)
344  AS2( movd a1, mm0)
345  AS2( psrlq mm0, 32)
346  AS2( paddq mm5, k2)
347  AS2( paddq mm0, mm5)
348  AS2( movq a2, mm0)
349  AS2( xor edx, edx)
350  ASJ( jmp, 3, f)
351  ASL(2)
352  AS2( movd mm0, a3)
353  AS2( movq mm4, mm0)
354  AS2( pmuludq mm0, k3) // a3*k3
355  AS2( movd mm1, a0)
356  AS2( pmuludq mm1, k2) // a0*k2
357  AS2( movd mm2, a1)
358  AS2( movd mm6, k1)
359  AS2( pmuludq mm2, mm6) // a1*k1
360  AS2( movd mm3, a2)
361  AS2( psllq mm0, 1)
362  AS2( paddq mm0, mm5)
363  AS2( movq mm5, mm3)
364  AS2( movd mm7, k0)
365  AS2( pmuludq mm3, mm7) // a2*k0
366  AS2( pmuludq mm4, mm7) // a3*k0
367  AS2( pmuludq mm5, mm6) // a2*k1
368  AS2( paddq mm0, mm1)
369  AS2( movd mm1, a1)
370  AS2( paddq mm4, mm5)
371  AS2( movq mm5, mm1)
372  AS2( pmuludq mm1, k2) // a1*k2
373  AS2( paddq mm0, mm2)
374  AS2( movd mm2, a0)
375  AS2( paddq mm0, mm3)
376  AS2( movq mm3, mm2)
377  AS2( pmuludq mm2, k3) // a0*k3
378  AS2( pmuludq mm3, mm7) // a0*k0
379  AS2( movd [esp+8], mm0)
380  AS2( psrlq mm0, 32)
381  AS2( pmuludq mm7, mm5) // a1*k0
382  AS2( pmuludq mm5, k3) // a1*k3
383  AS2( paddq mm0, mm1)
384  AS2( movd mm1, a2)
385  AS2( pmuludq mm1, k2) // a2*k2
386  AS2( paddq mm0, mm2)
387  AS2( paddq mm0, mm4)
388  AS2( movq mm4, mm0)
389  AS2( movd mm2, a3)
390  AS2( pmuludq mm2, mm6) // a3*k1
391  AS2( pmuludq mm6, a0) // a0*k1
392  AS2( psrlq mm0, 31)
393  AS2( paddq mm0, mm3)
394  AS2( movd mm3, [esp])
395  AS2( paddq mm0, mm3)
396  AS2( movd mm3, a2)
397  AS2( pmuludq mm3, k3) // a2*k3
398  AS2( paddq mm5, mm1)
399  AS2( movd mm1, a3)
400  AS2( pmuludq mm1, k2) // a3*k2
401  AS2( paddq mm5, mm2)
402  AS2( movd mm2, [esp+4])
403  AS2( psllq mm5, 1)
404  AS2( paddq mm0, mm5)
405  AS2( psllq mm4, 33)
406  AS2( movd a0, mm0)
407  AS2( psrlq mm0, 32)
408  AS2( paddq mm6, mm7)
409  AS2( movd mm7, [esp+8])
410  AS2( paddq mm0, mm6)
411  AS2( paddq mm0, mm2)
412  AS2( paddq mm3, mm1)
413  AS2( psllq mm3, 1)
414  AS2( paddq mm0, mm3)
415  AS2( psrlq mm4, 1)
416  AS2( movd a1, mm0)
417  AS2( psrlq mm0, 32)
418  AS2( por mm4, mm7)
419  AS2( paddq mm0, mm4)
420  AS2( movq a2, mm0)
421 
422 #undef a0
423 #undef a1
424 #undef a2
425 #undef a3
426 #undef k0
427 #undef k1
428 #undef k2
429 #undef k3
430 
431  ASL(3)
432  AS2( test ecx, ecx)
433  ASJ( jnz, 4, b)
434  AS2( add esp, 12)
435  AS_POP_IF86( bp)
436  AS1( emms)
437 #ifdef __GNUC__
438  ATT_PREFIX
439 # if CRYPTOPP_BOOL_X86
440  // Restore EBX for PIC
441  AS2( mov -20(%%esp), %%ebx)
442 # endif
443  :
444  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data),
445  "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
446  : "memory", "cc"
447  );
448 #endif
449 }
450 #endif
451 
452 #if VMAC_BOOL_WORD128
453  #define DeclareNH(a) word128 a=0
454  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
455  #define AccumulateNH(a, b, c) a += word128(b)*(c)
456  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
457 #else
458  #if CRYPTOPP_MSC_VERSION >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
459  #define MUL32(a, b) __emulu(word32(a), word32(b))
460  #else
461  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
462  #endif
463  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
464  #define DeclareNH(a) word64 a##0=0, a##1=0
465  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
466  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
467  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
468  #elif defined(CRYPTOPP_MSC_VERSION) && !CRYPTOPP_BOOL_SLOW_WORD64
469  #define DeclareNH(a) word64 a##0=0, a##1=0
470  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
471  #define AccumulateNH(a, b, c) {\
472  word64 ph, pl;\
473  pl = _umul128(b,c,&ph);\
474  a##0 += pl;\
475  a##1 += ph + (a##0 < pl);}
476  #else
477  #define VMAC_BOOL_32BIT 1
478  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
479  #define MUL64(rh,rl,i1,i2) \
480  { word64 _i1 = (i1), _i2 = (i2); \
481  word64 m1= MUL32(_i1,_i2>>32); \
482  word64 m2= MUL32(_i1>>32,_i2); \
483  rh = MUL32(_i1>>32,_i2>>32); \
484  rl = MUL32(_i1,_i2); \
485  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
486  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
487  }
488  #define AccumulateNH(a, b, c) {\
489  word64 p = MUL32(b, c);\
490  a##1 += word32((p)>>32);\
491  a##0 += word32(p);\
492  p = MUL32((b)>>32, c);\
493  a##2 += word32((p)>>32);\
494  a##1 += word32(p);\
495  p = MUL32((b)>>32, (c)>>32);\
496  a##2 += p;\
497  p = MUL32(b, (c)>>32);\
498  a##1 += word32(p);\
499  a##2 += word32(p>>32);}
500  #endif
501 #endif
502 #ifndef VMAC_BOOL_32BIT
503  #define VMAC_BOOL_32BIT 0
504 #endif
505 #ifndef ADD128
506  #define ADD128(rh,rl,ih,il) \
507  { word64 _il = (il); \
508  (rl) += (_il); \
509  (rh) += (ih) + ((rl) < (_il)); \
510  }
511 #endif
512 
513 template <bool T_128BitTag>
514 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
515 {
516  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
517  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
518 
519  #define INNER_LOOP_ITERATION(j) {\
520  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
521  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
522  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
523  if (T_128BitTag)\
524  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
525  }
526 
527  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
528  size_t innerLoopEnd = L1KeyLengthInWord64;
529  const word64 *nhK = m_nhKey();
530  word64 *polyS = (word64*)(void*)m_polyState();
531  bool isFirstBlock = true;
532  size_t i;
533 
534  #if !VMAC_BOOL_32BIT
535  #if VMAC_BOOL_WORD128
536  word128 a1=0, a2=0;
537  #else
538  word64 ah1=0, al1=0, ah2=0, al2=0;
539  #endif
540  word64 kh1, kl1, kh2, kl2;
541  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
542  if (T_128BitTag)
543  {
544  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
545  }
546  #endif
547 
548  do
549  {
550  DeclareNH(nhA);
551  DeclareNH(nhB);
552 
553  i = 0;
554  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
555  {
556  if (blocksRemainingInWord64 % 8)
557  {
558  innerLoopEnd = blocksRemainingInWord64 % 8;
559  for (; i<innerLoopEnd; i+=2)
560  INNER_LOOP_ITERATION(0);
561  }
562  innerLoopEnd = blocksRemainingInWord64;
563  }
564  for (; i<innerLoopEnd; i+=8)
565  {
566  INNER_LOOP_ITERATION(0);
567  INNER_LOOP_ITERATION(1);
568  INNER_LOOP_ITERATION(2);
569  INNER_LOOP_ITERATION(3);
570  }
571  blocksRemainingInWord64 -= innerLoopEnd;
572  data += innerLoopEnd;
573 
574  #if VMAC_BOOL_32BIT
575  word32 nh0[2], nh1[2];
576  word64 nh2[2];
577 
578  nh0[0] = word32(nhA0);
579  nhA1 += (nhA0 >> 32);
580  nh1[0] = word32(nhA1);
581  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
582 
583  if (T_128BitTag)
584  {
585  nh0[1] = word32(nhB0);
586  nhB1 += (nhB0 >> 32);
587  nh1[1] = word32(nhB1);
588  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
589  }
590 
591  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
592  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
593  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
594  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
595  #define aHi ((polyS+i*4)[0])
596  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
597  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
598  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
599  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
600  #define kHi ((polyS+i*4+2)[0])
601 
602  if (isFirstBlock)
603  {
604  isFirstBlock = false;
605  if (m_isFirstBlock)
606  {
607  m_isFirstBlock = false;
608  for (i=0; i<=(size_t)T_128BitTag; i++)
609  {
610  word64 t = (word64)nh0[i] + k0;
611  a0 = (word32)t;
612  t = (t >> 32) + nh1[i] + k1;
613  a1 = (word32)t;
614  aHi = (t >> 32) + nh2[i] + kHi;
615  }
616  continue;
617  }
618  }
619  for (i=0; i<=(size_t)T_128BitTag; i++)
620  {
621  word64 p, t;
622  word32 t2;
623 
624  p = MUL32(a3, 2*k3);
625  p += nh2[i];
626  p += MUL32(a0, k2);
627  p += MUL32(a1, k1);
628  p += MUL32(a2, k0);
629  t2 = (word32)p;
630  p >>= 32;
631  p += MUL32(a0, k3);
632  p += MUL32(a1, k2);
633  p += MUL32(a2, k1);
634  p += MUL32(a3, k0);
635  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
636  p >>= 31;
637  p += nh0[i];
638  p += MUL32(a0, k0);
639  p += MUL32(a1, 2*k3);
640  p += MUL32(a2, 2*k2);
641  p += MUL32(a3, 2*k1);
642  t2 = (word32)p;
643  p >>= 32;
644  p += nh1[i];
645  p += MUL32(a0, k1);
646  p += MUL32(a1, k0);
647  p += MUL32(a2, 2*k3);
648  p += MUL32(a3, 2*k2);
649  a0 = t2;
650  a1 = (word32)p;
651  aHi = (p >> 32) + t;
652  }
653 
654  #undef a0
655  #undef a1
656  #undef a2
657  #undef a3
658  #undef aHi
659  #undef k0
660  #undef k1
661  #undef k2
662  #undef k3
663  #undef kHi
664  #else // #if VMAC_BOOL_32BIT
665  if (isFirstBlock)
666  {
667  isFirstBlock = false;
668  if (m_isFirstBlock)
669  {
670  m_isFirstBlock = false;
671  #if VMAC_BOOL_WORD128
672  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
673 
674  first_poly_step(a1, kh1, kl1, nhA);
675  if (T_128BitTag)
676  first_poly_step(a2, kh2, kl2, nhB);
677  #else
678  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
679  mh &= m62;\
680  ADD128(mh, ml, kh, kl); \
681  ah = mh; al = ml;}
682 
683  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
684  if (T_128BitTag)
685  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
686  #endif
687  continue;
688  }
689  else
690  {
691  #if VMAC_BOOL_WORD128
692  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
693  #else
694  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
695  #endif
696  if (T_128BitTag)
697  {
698  #if VMAC_BOOL_WORD128
699  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
700  #else
701  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
702  #endif
703  }
704  }
705  }
706 
707  #if VMAC_BOOL_WORD128
708  #define poly_step(a, kh, kl, m) \
709  { word128 t1, t2, t3, t4;\
710  Multiply128(t2, a>>64, kl);\
711  Multiply128(t3, a, kh);\
712  Multiply128(t1, a, kl);\
713  Multiply128(t4, a>>64, 2*kh);\
714  t2 += t3;\
715  t4 += t1;\
716  t2 += t4>>64;\
717  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
718  t2 *= 2;\
719  a += m & m126;\
720  a += t2>>64;}
721 
722  poly_step(a1, kh1, kl1, nhA);
723  if (T_128BitTag)
724  poly_step(a2, kh2, kl2, nhB);
725  #else
726  #define poly_step(ah, al, kh, kl, mh, ml) \
727  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
728  /* compute ab*cd, put bd into result registers */ \
729  MUL64(t2h,t2l,ah,kl); \
730  MUL64(t3h,t3l,al,kh); \
731  MUL64(t1h,t1l,ah,2*kh); \
732  MUL64(ah,al,al,kl); \
733  /* add together ad + bc */ \
734  ADD128(t2h,t2l,t3h,t3l); \
735  /* add 2 * ac to result */ \
736  ADD128(ah,al,t1h,t1l); \
737  /* now (ah,al), (t2l,2*t2h) need summing */ \
738  /* first add the high registers, carrying into t2h */ \
739  ADD128(t2h,ah,z,t2l); \
740  /* double t2h and add top bit of ah */ \
741  t2h += t2h + (ah >> 63); \
742  ah &= m63; \
743  /* now add the low registers */ \
744  mh &= m62; \
745  ADD128(ah,al,mh,ml); \
746  ADD128(ah,al,z,t2h); \
747  }
748 
749  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
750  if (T_128BitTag)
751  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
752  #endif
753  #endif // #if VMAC_BOOL_32BIT
754  } while (blocksRemainingInWord64);
755 
756  #if VMAC_BOOL_WORD128
757  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
758  if (T_128BitTag)
759  {
760  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
761  }
762  #elif !VMAC_BOOL_32BIT
763  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
764  if (T_128BitTag)
765  {
766  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
767  }
768  #endif
769 }
770 
771 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
772 {
773 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
774  if (HasSSE2())
775  {
776  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
777  if (m_is128)
778  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
779  m_isFirstBlock = false;
780  }
781  else
782 #endif
783  {
784  if (m_is128)
785  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
786  else
787  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
788  }
789 }
790 
791 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
792 {
793  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
794  VHASH_Update(data, (length-remaining)/8);
795  return remaining;
796 }
797 
798 word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
799 {
800  word64 rh, rl, t, z=0;
801  word64 p1 = input[0], p2 = input[1];
802  word64 k1 = l3Key[0], k2 = l3Key[1];
803 
804  /* fully reduce (p1,p2)+(len,0) mod p127 */
805  t = p1 >> 63;
806  p1 &= m63;
807  ADD128(p1, p2, len, t);
808  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
809  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
810  ADD128(p1, p2, z, t);
811  p1 &= m63;
812 
813  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
814  t = p1 + (p2 >> 32);
815  t += (t >> 32);
816  t += (word32)t > 0xfffffffeU;
817  p1 += (t >> 32);
818  p2 += (p1 << 32);
819 
820  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
821  p1 += k1;
822  p1 += (0 - (p1 < k1)) & 257;
823  p2 += k2;
824  p2 += (0 - (p2 < k2)) & 257;
825 
826  /* compute (p1+k1)*(p2+k2)%p64 */
827  MUL64(rh, rl, p1, p2);
828  t = rh >> 56;
829  ADD128(t, rl, z, rh);
830  rh <<= 8;
831  ADD128(t, rl, z, rh);
832  t += t << 8;
833  rl += t;
834  rl += (0 - (rl < t)) & 257;
835  rl += (0 - (rl > p64-1)) & 257;
836  return rl;
837 }
838 
839 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
840 {
841  CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
842  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
843  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
844 
845  if (len)
846  {
847  std::memset(m_data()+len, 0, (0-len)%16);
848  VHASH_Update(DataBuf(), ((len+15)/16)*2);
849  len *= 8; // convert to bits
850  }
851  else if (m_isFirstBlock)
852  {
853  // special case for empty string
854  m_polyState()[0] = m_polyState()[2];
855  m_polyState()[1] = m_polyState()[3];
856  if (m_is128)
857  {
858  m_polyState()[4] = m_polyState()[6];
859  m_polyState()[5] = m_polyState()[7];
860  }
861  }
862 
863  if (m_is128)
864  {
865  word64 t[2];
866  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
867  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
868  if (size == 16)
869  {
870  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
871  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
872  }
873  else
874  {
877  std::memcpy(mac, t, size);
878  }
879  }
880  else
881  {
882  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
883  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
884  if (size == 8)
885  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
886  else
887  {
889  std::memcpy(mac, &t, size);
890  }
891  }
892 }
893 
894 NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
SecBlock using AllocatorWithCleanup<byte, true> typedef.
Definition: secblock.h:1230
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1288
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:884
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
@ BT_InBlockIsCounter
inBlock is a counter
Definition: cryptlib.h:922
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
An invalid argument was detected.
Definition: cryptlib.h:208
Interface for retrieving values given their names.
Definition: cryptlib.h:327
A method was called which was not implemented.
Definition: cryptlib.h:238
Interface for random number generators.
Definition: cryptlib.h:1440
Secure memory block with allocator and cleanup.
Definition: secblock.h:731
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:1143
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:876
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
VMAC message authentication code base class.
Definition: vmac.h:25
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:839
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:169
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:29
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:128
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:122
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
Definition: config_cpu.h:52
#define W64LIT(x)
Declare an unsigned word64.
Definition: config_int.h:129
__uint128_t word128
128-bit unsigned datatype
Definition: config_int.h:119
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:72
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:101
Functions for CPU features and intrinsics.
@ BIG_ENDIAN_ORDER
byte order is big-endian
Definition: cryptlib.h:152
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition: misc.h:1334
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1436
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2417
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2948
CRYPTOPP_DLL bool VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Crypto++ library namespace.
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:80
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Definition: argnames.h:21
const char * DigestSize()
int, in bytes
Definition: argnames.h:79
Precompiled header file.
Classes and functions for secure memory allocations.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68
Classes for the VMAC message authentication code.