Crypto++  8.0
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - originally written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
13 #include <intrin.h>
14 #endif
15 
16 #if defined(CRYPTOPP_DISABLE_VMAC_ASM)
17 # undef CRYPTOPP_X86_ASM_AVAILABLE
18 # undef CRYPTOPP_X32_ASM_AVAILABLE
19 # undef CRYPTOPP_X64_ASM_AVAILABLE
20 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
21 #endif
22 
23 #if CRYPTOPP_MSC_VERSION
24 # pragma warning(disable: 4731)
25 #endif
26 
27 ANONYMOUS_NAMESPACE_BEGIN
28 
29 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30 using CryptoPP::word128;
31 using CryptoPP::word64;
32 # define VMAC_BOOL_WORD128 1
33 #else
34 using CryptoPP::word64;
35 # define VMAC_BOOL_WORD128 0
36 #endif
37 
38 #ifdef __BORLANDC__
39 #define const // Turbo C++ 2006 workaround
40 #endif
41 const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
42 const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
43 const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
44 const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
45 const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
46 #ifdef __BORLANDC__
47 #undef const
48 #endif
49 #if VMAC_BOOL_WORD128
50 #ifdef __powerpc__
51 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
52 #define m126 ((word128(m62)<<64)|m64)
53 #else
54 const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
55 #endif
56 #endif
57 
58 ANONYMOUS_NAMESPACE_END
59 
60 NAMESPACE_BEGIN(CryptoPP)
61 
62 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
63 {
64  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
65  if (digestLength != 8 && digestLength != 16)
66  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
67  m_is128 = digestLength == 16;
68 
69  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
70  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
71  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
72 
73  AllocateBlocks();
74 
75  BlockCipher &cipher = AccessCipher();
76  cipher.SetKey(userKey, keylength, params);
77  const unsigned int blockSize = cipher.BlockSize();
78  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
81  in.CleanNew(blockSize);
82  size_t i;
83 
84  /* Fill nh key */
85  in[0] = 0x80;
86  cipher.AdvancedProcessBlocks(in, NULLPTR, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
87  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
88 
89  /* Fill poly key */
90  in[0] = 0xC0;
91  in[15] = 0;
92  for (i = 0; i <= (size_t)m_is128; i++)
93  {
94  cipher.ProcessBlock(in, out.BytePtr());
95  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
96  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
97  in[15]++;
98  }
99 
100  /* Fill ip key */
101  in[0] = 0xE0;
102  in[15] = 0;
103  word64 *l3Key = m_l3Key();
104  CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
105 
106  for (i = 0; i <= (size_t)m_is128; i++)
107  do
108  {
109  cipher.ProcessBlock(in, out.BytePtr());
110  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
111  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
112  in[15]++;
113  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
114 
115  m_padCached = false;
116  size_t nonceLength;
117  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
118  Resynchronize(nonce, (int)nonceLength);
119 }
120 
122 {
124  IV[0] &= 0x7f;
125 }
126 
127 void VMAC_Base::Resynchronize(const byte *nonce, int len)
128 {
129  size_t length = ThrowIfInvalidIVLength(len);
130  size_t s = IVSize();
131  byte *storedNonce = m_nonce();
132 
133  if (m_is128)
134  {
135  memset(storedNonce, 0, s-length);
136  memcpy(storedNonce+s-length, nonce, length);
137  AccessCipher().ProcessBlock(storedNonce, m_pad());
138  }
139  else
140  {
141  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
142  {
143  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
144  for (size_t i=0; m_padCached && i<s-length; i++)
145  m_padCached = (storedNonce[i] == 0);
146  }
147  if (!m_padCached)
148  {
149  memset(storedNonce, 0, s-length);
150  memcpy(storedNonce+s-length, nonce, length-1);
151  storedNonce[s-1] = nonce[length-1] & 0xfe;
152  AccessCipher().ProcessBlock(storedNonce, m_pad());
153  m_padCached = true;
154  }
155  storedNonce[s-1] = nonce[length-1];
156  }
157  m_isFirstBlock = true;
158  Restart();
159 }
160 
161 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
162 {
163  CRYPTOPP_UNUSED(data);
164  CRYPTOPP_ASSERT(false);
165  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
166 }
167 
169 {
170  return
171 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
172  HasSSE2() ? 16 :
173 #endif
174  GetCipher().OptimalDataAlignment();
175 }
176 
177 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
178 #if CRYPTOPP_MSC_VERSION
179 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
180 #endif
181 void
182 #ifdef __GNUC__
183 __attribute__ ((noinline)) // Intel Compiler 9.1 workaround
184 #endif
185 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
186 {
187  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
188  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
189 
190  const word64 *nhK = m_nhKey();
191  word64 *polyS = (word64*)(void*)m_polyState();
192  word32 L1KeyLength = m_L1KeyLength;
193 
194  // These are used in the ASM, but some analysis engines cnnot determine it.
195  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
196  CRYPTOPP_UNUSED(blocksRemainingInWord64);
197 
198 #ifdef __GNUC__
199  word32 temp;
200  __asm__ __volatile__
201  (
202  AS2( mov %%ebx, %0)
203  AS2( mov %1, %%ebx)
204  INTEL_NOPREFIX
205 #else
206  #if defined(__INTEL_COMPILER)
207  char isFirstBlock = m_isFirstBlock;
208  AS2( mov ebx, [L1KeyLength])
209  AS2( mov dl, [isFirstBlock])
210  #else
211  AS2( mov ecx, this)
212  AS2( mov ebx, [ecx+m_L1KeyLength])
213  AS2( mov dl, [ecx+m_isFirstBlock])
214  #endif
215  AS2( mov eax, tagPart)
216  AS2( shl eax, 4)
217  AS2( mov edi, nhK)
218  AS2( add edi, eax)
219  AS2( add eax, eax)
220  AS2( add eax, polyS)
221 
222  AS2( mov esi, data)
223  AS2( mov ecx, blocksRemainingInWord64)
224 #endif
225 
226  AS2( shr ebx, 3)
227  AS_PUSH_IF86( bp)
228  AS2( sub esp, 12)
229  ASL(4)
230  AS2( mov ebp, ebx)
231  AS2( cmp ecx, ebx)
232  AS2( cmovl ebp, ecx)
233  AS2( sub ecx, ebp)
234  AS2( lea ebp, [edi+8*ebp]) // end of nhK
235  AS2( movq mm6, [esi])
236  AS2( paddq mm6, [edi])
237  AS2( movq mm5, [esi+8])
238  AS2( paddq mm5, [edi+8])
239  AS2( add esi, 16)
240  AS2( add edi, 16)
241  AS2( movq mm4, mm6)
242  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
243  AS2( pmuludq mm6, mm5)
244  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
245  AS2( pmuludq mm5, mm2)
246  AS2( pmuludq mm2, mm3)
247  AS2( pmuludq mm3, mm4)
248  AS2( pxor mm7, mm7)
249  AS2( movd [esp], mm6)
250  AS2( psrlq mm6, 32)
251  AS2( movd [esp+4], mm5)
252  AS2( psrlq mm5, 32)
253  AS2( cmp edi, ebp)
254  ASJ( je, 1, f)
255  ASL(0)
256  AS2( movq mm0, [esi])
257  AS2( paddq mm0, [edi])
258  AS2( movq mm1, [esi+8])
259  AS2( paddq mm1, [edi+8])
260  AS2( add esi, 16)
261  AS2( add edi, 16)
262  AS2( movq mm4, mm0)
263  AS2( paddq mm5, mm2)
264  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
265  AS2( pmuludq mm0, mm1)
266  AS2( movd [esp+8], mm3)
267  AS2( psrlq mm3, 32)
268  AS2( paddq mm5, mm3)
269  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
270  AS2( pmuludq mm1, mm2)
271  AS2( pmuludq mm2, mm3)
272  AS2( pmuludq mm3, mm4)
273  AS2( movd mm4, [esp])
274  AS2( paddq mm7, mm4)
275  AS2( movd mm4, [esp+4])
276  AS2( paddq mm6, mm4)
277  AS2( movd mm4, [esp+8])
278  AS2( paddq mm6, mm4)
279  AS2( movd [esp], mm0)
280  AS2( psrlq mm0, 32)
281  AS2( paddq mm6, mm0)
282  AS2( movd [esp+4], mm1)
283  AS2( psrlq mm1, 32)
284  AS2( paddq mm5, mm1)
285  AS2( cmp edi, ebp)
286  ASJ( jne, 0, b)
287  ASL(1)
288  AS2( paddq mm5, mm2)
289  AS2( movd [esp+8], mm3)
290  AS2( psrlq mm3, 32)
291  AS2( paddq mm5, mm3)
292  AS2( movd mm4, [esp])
293  AS2( paddq mm7, mm4)
294  AS2( movd mm4, [esp+4])
295  AS2( paddq mm6, mm4)
296  AS2( movd mm4, [esp+8])
297  AS2( paddq mm6, mm4)
298  AS2( lea ebp, [8*ebx])
299  AS2( sub edi, ebp) // reset edi to start of nhK
300 
301  AS2( movd [esp], mm7)
302  AS2( psrlq mm7, 32)
303  AS2( paddq mm6, mm7)
304  AS2( movd [esp+4], mm6)
305  AS2( psrlq mm6, 32)
306  AS2( paddq mm5, mm6)
307  AS2( psllq mm5, 2)
308  AS2( psrlq mm5, 2)
309 
310 #define a0 [eax+2*4]
311 #define a1 [eax+3*4]
312 #define a2 [eax+0*4]
313 #define a3 [eax+1*4]
314 #define k0 [eax+2*8+2*4]
315 #define k1 [eax+2*8+3*4]
316 #define k2 [eax+2*8+0*4]
317 #define k3 [eax+2*8+1*4]
318  AS2( test dl, dl)
319  ASJ( jz, 2, f)
320  AS2( movd mm1, k0)
321  AS2( movd mm0, [esp])
322  AS2( paddq mm0, mm1)
323  AS2( movd a0, mm0)
324  AS2( psrlq mm0, 32)
325  AS2( movd mm1, k1)
326  AS2( movd mm2, [esp+4])
327  AS2( paddq mm1, mm2)
328  AS2( paddq mm0, mm1)
329  AS2( movd a1, mm0)
330  AS2( psrlq mm0, 32)
331  AS2( paddq mm5, k2)
332  AS2( paddq mm0, mm5)
333  AS2( movq a2, mm0)
334  AS2( xor edx, edx)
335  ASJ( jmp, 3, f)
336  ASL(2)
337  AS2( movd mm0, a3)
338  AS2( movq mm4, mm0)
339  AS2( pmuludq mm0, k3) // a3*k3
340  AS2( movd mm1, a0)
341  AS2( pmuludq mm1, k2) // a0*k2
342  AS2( movd mm2, a1)
343  AS2( movd mm6, k1)
344  AS2( pmuludq mm2, mm6) // a1*k1
345  AS2( movd mm3, a2)
346  AS2( psllq mm0, 1)
347  AS2( paddq mm0, mm5)
348  AS2( movq mm5, mm3)
349  AS2( movd mm7, k0)
350  AS2( pmuludq mm3, mm7) // a2*k0
351  AS2( pmuludq mm4, mm7) // a3*k0
352  AS2( pmuludq mm5, mm6) // a2*k1
353  AS2( paddq mm0, mm1)
354  AS2( movd mm1, a1)
355  AS2( paddq mm4, mm5)
356  AS2( movq mm5, mm1)
357  AS2( pmuludq mm1, k2) // a1*k2
358  AS2( paddq mm0, mm2)
359  AS2( movd mm2, a0)
360  AS2( paddq mm0, mm3)
361  AS2( movq mm3, mm2)
362  AS2( pmuludq mm2, k3) // a0*k3
363  AS2( pmuludq mm3, mm7) // a0*k0
364  AS2( movd [esp+8], mm0)
365  AS2( psrlq mm0, 32)
366  AS2( pmuludq mm7, mm5) // a1*k0
367  AS2( pmuludq mm5, k3) // a1*k3
368  AS2( paddq mm0, mm1)
369  AS2( movd mm1, a2)
370  AS2( pmuludq mm1, k2) // a2*k2
371  AS2( paddq mm0, mm2)
372  AS2( paddq mm0, mm4)
373  AS2( movq mm4, mm0)
374  AS2( movd mm2, a3)
375  AS2( pmuludq mm2, mm6) // a3*k1
376  AS2( pmuludq mm6, a0) // a0*k1
377  AS2( psrlq mm0, 31)
378  AS2( paddq mm0, mm3)
379  AS2( movd mm3, [esp])
380  AS2( paddq mm0, mm3)
381  AS2( movd mm3, a2)
382  AS2( pmuludq mm3, k3) // a2*k3
383  AS2( paddq mm5, mm1)
384  AS2( movd mm1, a3)
385  AS2( pmuludq mm1, k2) // a3*k2
386  AS2( paddq mm5, mm2)
387  AS2( movd mm2, [esp+4])
388  AS2( psllq mm5, 1)
389  AS2( paddq mm0, mm5)
390  AS2( psllq mm4, 33)
391  AS2( movd a0, mm0)
392  AS2( psrlq mm0, 32)
393  AS2( paddq mm6, mm7)
394  AS2( movd mm7, [esp+8])
395  AS2( paddq mm0, mm6)
396  AS2( paddq mm0, mm2)
397  AS2( paddq mm3, mm1)
398  AS2( psllq mm3, 1)
399  AS2( paddq mm0, mm3)
400  AS2( psrlq mm4, 1)
401  AS2( movd a1, mm0)
402  AS2( psrlq mm0, 32)
403  AS2( por mm4, mm7)
404  AS2( paddq mm0, mm4)
405  AS2( movq a2, mm0)
406 #undef a0
407 #undef a1
408 #undef a2
409 #undef a3
410 #undef k0
411 #undef k1
412 #undef k2
413 #undef k3
414 
415  ASL(3)
416  AS2( test ecx, ecx)
417  ASJ( jnz, 4, b)
418  AS2( add esp, 12)
419  AS_POP_IF86( bp)
420  AS1( emms)
421 #ifdef __GNUC__
422  ATT_PREFIX
423  AS2( mov %0, %%ebx)
424  : "=m" (temp)
425  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
426  : "memory", "cc"
427  );
428 #endif
429 }
430 #endif
431 
432 #if VMAC_BOOL_WORD128
433  #define DeclareNH(a) word128 a=0
434  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
435  #define AccumulateNH(a, b, c) a += word128(b)*(c)
436  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
437 #else
438  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
439  #define MUL32(a, b) __emulu(word32(a), word32(b))
440  #else
441  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
442  #endif
443  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
444  #define DeclareNH(a) word64 a##0=0, a##1=0
445  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
446  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
447  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
448  #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
449  #define DeclareNH(a) word64 a##0=0, a##1=0
450  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
451  #define AccumulateNH(a, b, c) {\
452  word64 ph, pl;\
453  pl = _umul128(b,c,&ph);\
454  a##0 += pl;\
455  a##1 += ph + (a##0 < pl);}
456  #else
457  #define VMAC_BOOL_32BIT 1
458  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
459  #define MUL64(rh,rl,i1,i2) \
460  { word64 _i1 = (i1), _i2 = (i2); \
461  word64 m1= MUL32(_i1,_i2>>32); \
462  word64 m2= MUL32(_i1>>32,_i2); \
463  rh = MUL32(_i1>>32,_i2>>32); \
464  rl = MUL32(_i1,_i2); \
465  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
466  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
467  }
468  #define AccumulateNH(a, b, c) {\
469  word64 p = MUL32(b, c);\
470  a##1 += word32((p)>>32);\
471  a##0 += word32(p);\
472  p = MUL32((b)>>32, c);\
473  a##2 += word32((p)>>32);\
474  a##1 += word32(p);\
475  p = MUL32((b)>>32, (c)>>32);\
476  a##2 += p;\
477  p = MUL32(b, (c)>>32);\
478  a##1 += word32(p);\
479  a##2 += word32(p>>32);}
480  #endif
481 #endif
482 #ifndef VMAC_BOOL_32BIT
483  #define VMAC_BOOL_32BIT 0
484 #endif
485 #ifndef ADD128
486  #define ADD128(rh,rl,ih,il) \
487  { word64 _il = (il); \
488  (rl) += (_il); \
489  (rh) += (ih) + ((rl) < (_il)); \
490  }
491 #endif
492 
493 template <bool T_128BitTag>
494 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
495 {
496  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
497  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
498 
499  #define INNER_LOOP_ITERATION(j) {\
500  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
501  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
502  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
503  if (T_128BitTag)\
504  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
505  }
506 
507  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
508  size_t innerLoopEnd = L1KeyLengthInWord64;
509  const word64 *nhK = m_nhKey();
510  word64 *polyS = (word64*)(void*)m_polyState();
511  bool isFirstBlock = true;
512  size_t i;
513 
514  #if !VMAC_BOOL_32BIT
515  #if VMAC_BOOL_WORD128
516  word128 a1=0, a2=0;
517  #else
518  word64 ah1=0, al1=0, ah2=0, al2=0;
519  #endif
520  word64 kh1, kl1, kh2, kl2;
521  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
522  if (T_128BitTag)
523  {
524  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
525  }
526  #endif
527 
528  do
529  {
530  DeclareNH(nhA);
531  DeclareNH(nhB);
532 
533  i = 0;
534  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
535  {
536  if (blocksRemainingInWord64 % 8)
537  {
538  innerLoopEnd = blocksRemainingInWord64 % 8;
539  for (; i<innerLoopEnd; i+=2)
540  INNER_LOOP_ITERATION(0);
541  }
542  innerLoopEnd = blocksRemainingInWord64;
543  }
544  for (; i<innerLoopEnd; i+=8)
545  {
546  INNER_LOOP_ITERATION(0);
547  INNER_LOOP_ITERATION(1);
548  INNER_LOOP_ITERATION(2);
549  INNER_LOOP_ITERATION(3);
550  }
551  blocksRemainingInWord64 -= innerLoopEnd;
552  data += innerLoopEnd;
553 
554  #if VMAC_BOOL_32BIT
555  word32 nh0[2], nh1[2];
556  word64 nh2[2];
557 
558  nh0[0] = word32(nhA0);
559  nhA1 += (nhA0 >> 32);
560  nh1[0] = word32(nhA1);
561  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
562 
563  if (T_128BitTag)
564  {
565  nh0[1] = word32(nhB0);
566  nhB1 += (nhB0 >> 32);
567  nh1[1] = word32(nhB1);
568  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
569  }
570 
571  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
572  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
573  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
574  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
575  #define aHi ((polyS+i*4)[0])
576  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
577  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
578  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
579  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
580  #define kHi ((polyS+i*4+2)[0])
581 
582  if (isFirstBlock)
583  {
584  isFirstBlock = false;
585  if (m_isFirstBlock)
586  {
587  m_isFirstBlock = false;
588  for (i=0; i<=(size_t)T_128BitTag; i++)
589  {
590  word64 t = (word64)nh0[i] + k0;
591  a0 = (word32)t;
592  t = (t >> 32) + nh1[i] + k1;
593  a1 = (word32)t;
594  aHi = (t >> 32) + nh2[i] + kHi;
595  }
596  continue;
597  }
598  }
599  for (i=0; i<=(size_t)T_128BitTag; i++)
600  {
601  word64 p, t;
602  word32 t2;
603 
604  p = MUL32(a3, 2*k3);
605  p += nh2[i];
606  p += MUL32(a0, k2);
607  p += MUL32(a1, k1);
608  p += MUL32(a2, k0);
609  t2 = (word32)p;
610  p >>= 32;
611  p += MUL32(a0, k3);
612  p += MUL32(a1, k2);
613  p += MUL32(a2, k1);
614  p += MUL32(a3, k0);
615  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
616  p >>= 31;
617  p += nh0[i];
618  p += MUL32(a0, k0);
619  p += MUL32(a1, 2*k3);
620  p += MUL32(a2, 2*k2);
621  p += MUL32(a3, 2*k1);
622  t2 = (word32)p;
623  p >>= 32;
624  p += nh1[i];
625  p += MUL32(a0, k1);
626  p += MUL32(a1, k0);
627  p += MUL32(a2, 2*k3);
628  p += MUL32(a3, 2*k2);
629  a0 = t2;
630  a1 = (word32)p;
631  aHi = (p >> 32) + t;
632  }
633 
634  #undef a0
635  #undef a1
636  #undef a2
637  #undef a3
638  #undef aHi
639  #undef k0
640  #undef k1
641  #undef k2
642  #undef k3
643  #undef kHi
644  #else // #if VMAC_BOOL_32BIT
645  if (isFirstBlock)
646  {
647  isFirstBlock = false;
648  if (m_isFirstBlock)
649  {
650  m_isFirstBlock = false;
651  #if VMAC_BOOL_WORD128
652  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
653 
654  first_poly_step(a1, kh1, kl1, nhA);
655  if (T_128BitTag)
656  first_poly_step(a2, kh2, kl2, nhB);
657  #else
658  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
659  mh &= m62;\
660  ADD128(mh, ml, kh, kl); \
661  ah = mh; al = ml;}
662 
663  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
664  if (T_128BitTag)
665  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
666  #endif
667  continue;
668  }
669  else
670  {
671  #if VMAC_BOOL_WORD128
672  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
673  #else
674  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
675  #endif
676  if (T_128BitTag)
677  {
678  #if VMAC_BOOL_WORD128
679  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
680  #else
681  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
682  #endif
683  }
684  }
685  }
686 
687  #if VMAC_BOOL_WORD128
688  #define poly_step(a, kh, kl, m) \
689  { word128 t1, t2, t3, t4;\
690  Multiply128(t2, a>>64, kl);\
691  Multiply128(t3, a, kh);\
692  Multiply128(t1, a, kl);\
693  Multiply128(t4, a>>64, 2*kh);\
694  t2 += t3;\
695  t4 += t1;\
696  t2 += t4>>64;\
697  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
698  t2 *= 2;\
699  a += m & m126;\
700  a += t2>>64;}
701 
702  poly_step(a1, kh1, kl1, nhA);
703  if (T_128BitTag)
704  poly_step(a2, kh2, kl2, nhB);
705  #else
706  #define poly_step(ah, al, kh, kl, mh, ml) \
707  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
708  /* compute ab*cd, put bd into result registers */ \
709  MUL64(t2h,t2l,ah,kl); \
710  MUL64(t3h,t3l,al,kh); \
711  MUL64(t1h,t1l,ah,2*kh); \
712  MUL64(ah,al,al,kl); \
713  /* add together ad + bc */ \
714  ADD128(t2h,t2l,t3h,t3l); \
715  /* add 2 * ac to result */ \
716  ADD128(ah,al,t1h,t1l); \
717  /* now (ah,al), (t2l,2*t2h) need summing */ \
718  /* first add the high registers, carrying into t2h */ \
719  ADD128(t2h,ah,z,t2l); \
720  /* double t2h and add top bit of ah */ \
721  t2h += t2h + (ah >> 63); \
722  ah &= m63; \
723  /* now add the low registers */ \
724  mh &= m62; \
725  ADD128(ah,al,mh,ml); \
726  ADD128(ah,al,z,t2h); \
727  }
728 
729  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
730  if (T_128BitTag)
731  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
732  #endif
733  #endif // #if VMAC_BOOL_32BIT
734  } while (blocksRemainingInWord64);
735 
736  #if VMAC_BOOL_WORD128
737  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
738  if (T_128BitTag)
739  {
740  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
741  }
742  #elif !VMAC_BOOL_32BIT
743  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
744  if (T_128BitTag)
745  {
746  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
747  }
748  #endif
749 }
750 
751 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
752 {
753 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
754  if (HasSSE2())
755  {
756  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
757  if (m_is128)
758  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
759  m_isFirstBlock = false;
760  }
761  else
762 #endif
763  {
764  if (m_is128)
765  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
766  else
767  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
768  }
769 }
770 
771 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
772 {
773  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
774  VHASH_Update(data, (length-remaining)/8);
775  return remaining;
776 }
777 
778 word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
779 {
780  word64 rh, rl, t, z=0;
781  word64 p1 = input[0], p2 = input[1];
782  word64 k1 = l3Key[0], k2 = l3Key[1];
783 
784  /* fully reduce (p1,p2)+(len,0) mod p127 */
785  t = p1 >> 63;
786  p1 &= m63;
787  ADD128(p1, p2, len, t);
788  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
789  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
790  ADD128(p1, p2, z, t);
791  p1 &= m63;
792 
793  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
794  t = p1 + (p2 >> 32);
795  t += (t >> 32);
796  t += (word32)t > 0xfffffffeU;
797  p1 += (t >> 32);
798  p2 += (p1 << 32);
799 
800  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
801  p1 += k1;
802  p1 += (0 - (p1 < k1)) & 257;
803  p2 += k2;
804  p2 += (0 - (p2 < k2)) & 257;
805 
806  /* compute (p1+k1)*(p2+k2)%p64 */
807  MUL64(rh, rl, p1, p2);
808  t = rh >> 56;
809  ADD128(t, rl, z, rh);
810  rh <<= 8;
811  ADD128(t, rl, z, rh);
812  t += t << 8;
813  rl += t;
814  rl += (0 - (rl < t)) & 257;
815  rl += (0 - (rl > p64-1)) & 257;
816  return rl;
817 }
818 
819 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
820 {
821  CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
822  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
823  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
824 
825  if (len)
826  {
827  memset(m_data()+len, 0, (0-len)%16);
828  VHASH_Update(DataBuf(), ((len+15)/16)*2);
829  len *= 8; // convert to bits
830  }
831  else if (m_isFirstBlock)
832  {
833  // special case for empty string
834  m_polyState()[0] = m_polyState()[2];
835  m_polyState()[1] = m_polyState()[3];
836  if (m_is128)
837  {
838  m_polyState()[4] = m_polyState()[6];
839  m_polyState()[5] = m_polyState()[7];
840  }
841  }
842 
843  if (m_is128)
844  {
845  word64 t[2];
846  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
847  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
848  if (size == 16)
849  {
850  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
851  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
852  }
853  else
854  {
857  memcpy(mac, t, size);
858  }
859  }
860  else
861  {
862  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
863  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
864  if (size == 8)
865  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
866  else
867  {
869  memcpy(mac, &t, size);
870  }
871  }
872 }
873 
874 NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
const char * DigestSize()
int, in bytes
Definition: argnames.h:79
An invalid argument was detected.
Definition: cryptlib.h:202
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:58
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition: misc.h:1004
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:980
VMAC message authentication code base class.
Definition: vmac.h:24
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2396
Secure memory block with allocator and cleanup.
Definition: secblock.h:688
Library configuration file.
Interface for random number generators.
Definition: cryptlib.h:1383
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:190
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1250
Classes and functions for secure memory allocations.
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1111
Classes for the VMAC message authentication code.
A method was called which was not implemented.
Definition: cryptlib.h:223
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:29
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2081
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:141
Precompiled header file.
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:851
SecBlock using AllocatorWithCleanup<byte, true> typedef.
Definition: secblock.h:1062
byte order is big-endian
Definition: cryptlib.h:147
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:69
Functions for CPU features and intrinsics.
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:819
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Definition: argnames.h:21
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:116
bool VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Definition: misc.cpp:100
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:80
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:168
Crypto++ library namespace.
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:127
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition: cryptlib.cpp:136
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:121
Interface for retrieving values given their names.
Definition: cryptlib.h:293
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:804