Crypto++  8.8
Free C++ class library of cryptographic schemes
lsh256_avx.cpp
1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 // Based on the specification and source code provided by
3 // Korea Internet & Security Agency (KISA) website. Also
4 // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "misc.h"
17 
18 // Squash MS LNK4221 and libtool warnings
19 extern const char LSH256_AVX_FNAME[] = __FILE__;
20 
21 #if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22 
23 #if defined(CRYPTOPP_AVX2_AVAILABLE)
24 # include <emmintrin.h>
25 # include <immintrin.h>
26 #endif
27 
28 #if defined(CRYPTOPP_GCC_COMPATIBLE)
29 # include <x86intrin.h>
30 #endif
31 
32 ANONYMOUS_NAMESPACE_BEGIN
33 
34 /* LSH Constants */
35 
36 const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
37 // const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
38 // const unsigned int LSH256_CV_BYTE_LEN = 64;
39 const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
40 
41 // const unsigned int MSG_BLK_WORD_LEN = 32;
42 const unsigned int CV_WORD_LEN = 16;
43 const unsigned int CONST_WORD_LEN = 8;
44 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
45 // const unsigned int WORD_BIT_LEN = 32;
46 const unsigned int NUM_STEPS = 26;
47 
48 const unsigned int ROT_EVEN_ALPHA = 29;
49 const unsigned int ROT_EVEN_BETA = 1;
50 const unsigned int ROT_ODD_ALPHA = 5;
51 const unsigned int ROT_ODD_BETA = 17;
52 
53 const unsigned int LSH_TYPE_256_256 = 0x0000020;
54 const unsigned int LSH_TYPE_256_224 = 0x000001C;
55 
56 // const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
57 // const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
58 
59 /* Error Code */
60 
61 const unsigned int LSH_SUCCESS = 0x0;
62 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
63 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
64 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
65 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
66 
67 /* Index into our state array */
68 
69 const unsigned int AlgorithmType = 80;
70 const unsigned int RemainingBits = 81;
71 
72 NAMESPACE_END
73 
74 NAMESPACE_BEGIN(CryptoPP)
75 NAMESPACE_BEGIN(LSH)
76 
77 // lsh256.cpp
78 extern const word32 LSH256_IV224[CV_WORD_LEN];
79 extern const word32 LSH256_IV256[CV_WORD_LEN];
80 extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
81 
82 NAMESPACE_END // LSH
83 NAMESPACE_END // Crypto++
84 
85 ANONYMOUS_NAMESPACE_BEGIN
86 
87 using CryptoPP::byte;
88 using CryptoPP::word32;
91 
92 using CryptoPP::GetBlock;
96 
97 typedef byte lsh_u8;
98 typedef word32 lsh_u32;
99 typedef word32 lsh_uint;
100 typedef word32 lsh_err;
101 typedef word32 lsh_type;
102 
103 using CryptoPP::LSH::LSH256_IV224;
104 using CryptoPP::LSH::LSH256_IV256;
105 using CryptoPP::LSH::LSH256_StepConstants;
106 
107 struct LSH256_AVX2_Context
108 {
109  LSH256_AVX2_Context(word32* state, word32 algType, word32& remainingBitLength) :
110  cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
111  last_block(reinterpret_cast<byte*>(state+48)),
112  remain_databitlen(remainingBitLength),
113  alg_type(static_cast<lsh_type>(algType)) {}
114 
115  lsh_u32* cv_l; // start of our state block
116  lsh_u32* cv_r;
117  lsh_u32* sub_msgs;
118  lsh_u8* last_block;
119  lsh_u32& remain_databitlen;
120  lsh_type alg_type;
121 };
122 
123 struct LSH256_AVX2_Internal
124 {
125  LSH256_AVX2_Internal(word32* state) :
126  submsg_e_l(state+16), submsg_e_r(state+24),
127  submsg_o_l(state+32), submsg_o_r(state+40) { }
128 
129  lsh_u32* submsg_e_l; /* even left sub-message */
130  lsh_u32* submsg_e_r; /* even right sub-message */
131  lsh_u32* submsg_o_l; /* odd left sub-message */
132  lsh_u32* submsg_o_r; /* odd right sub-message */
133 };
134 
135 // Zero the upper 128 bits of all YMM registers on exit.
136 // It avoids AVX state transition penalties when saving state.
137 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735
138 // makes using zeroupper a little tricky.
139 
140 struct AVX_Cleanup
141 {
142  ~AVX_Cleanup() {
143  _mm256_zeroupper();
144  }
145 };
146 
147 // const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
148 
149 /* LSH AlgType Macro */
150 
151 inline bool LSH_IS_LSH512(lsh_uint val) {
152  return (val & 0xf0000) == 0;
153 }
154 
155 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
156  return val >> 24;
157 }
158 
159 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
160  return val & 0xffff;
161 }
162 
163 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
164  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
165 }
166 
167 inline lsh_u32 loadLE32(lsh_u32 v) {
169 }
170 
171 lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
172  return rotlFixed(x, r);
173 }
174 
175 // Original code relied upon unaligned lsh_u32 buffer
176 inline void load_msg_blk(LSH256_AVX2_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
177 {
178  CRYPTOPP_ASSERT(i_state != NULLPTR);
179 
180  lsh_u32* submsg_e_l = i_state->submsg_e_l;
181  lsh_u32* submsg_e_r = i_state->submsg_e_r;
182  lsh_u32* submsg_o_l = i_state->submsg_o_l;
183  lsh_u32* submsg_o_r = i_state->submsg_o_r;
184 
185  _mm256_storeu_si256(M256_CAST(submsg_e_l+0),
186  _mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
187  _mm256_storeu_si256(M256_CAST(submsg_e_r+0),
188  _mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
189  _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
190  _mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
191  _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
192  _mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
193 }
194 
195 inline void msg_exp_even(LSH256_AVX2_Internal* i_state)
196 {
197  CRYPTOPP_ASSERT(i_state != NULLPTR);
198 
199  lsh_u32* submsg_e_l = i_state->submsg_e_l;
200  lsh_u32* submsg_e_r = i_state->submsg_e_r;
201  lsh_u32* submsg_o_l = i_state->submsg_o_l;
202  lsh_u32* submsg_o_r = i_state->submsg_o_r;
203 
204  const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
205  0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
206 
207  _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi32(
208  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
209  _mm256_shuffle_epi8(
210  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)), mask)));
211  _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi32(
212  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
213  _mm256_shuffle_epi8(
214  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)), mask)));
215 }
216 
217 inline void msg_exp_odd(LSH256_AVX2_Internal* i_state)
218 {
219  CRYPTOPP_ASSERT(i_state != NULLPTR);
220 
221  lsh_u32* submsg_e_l = i_state->submsg_e_l;
222  lsh_u32* submsg_e_r = i_state->submsg_e_r;
223  lsh_u32* submsg_o_l = i_state->submsg_o_l;
224  lsh_u32* submsg_o_r = i_state->submsg_o_r;
225 
226  const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
227  0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
228 
229  _mm256_storeu_si256(M256_CAST(submsg_o_l+0), _mm256_add_epi32(
230  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
231  _mm256_shuffle_epi8(
232  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)), mask)));
233  _mm256_storeu_si256(M256_CAST(submsg_o_r+0), _mm256_add_epi32(
234  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
235  _mm256_shuffle_epi8(
236  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)), mask)));
237 }
238 
239 inline void load_sc(const lsh_u32** p_const_v, size_t i)
240 {
241  CRYPTOPP_ASSERT(p_const_v != NULLPTR);
242 
243  *p_const_v = &LSH256_StepConstants[i];
244 }
245 
246 inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
247 {
248  CRYPTOPP_ASSERT(i_state != NULLPTR);
249 
250  lsh_u32* submsg_e_l = i_state->submsg_e_l;
251  lsh_u32* submsg_e_r = i_state->submsg_e_r;
252 
253  _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_xor_si256(
254  _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)),
255  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0))));
256  _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_xor_si256(
257  _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
258  _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0))));
259 }
260 
261 inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
262 {
263  CRYPTOPP_ASSERT(i_state != NULLPTR);
264 
265  lsh_u32* submsg_o_l = i_state->submsg_o_l;
266  lsh_u32* submsg_o_r = i_state->submsg_o_r;
267 
268  _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
269  _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
270  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
271  _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
272  _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
273  _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
274 }
275 
276 inline void add_blk(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
277 {
278  _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi32(
279  _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
280  _mm256_loadu_si256(CONST_M256_CAST(cv_r))));
281 }
282 
283 template <unsigned int R>
284 inline void rotate_blk(lsh_u32 cv[8])
285 {
286  _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
287  _mm256_slli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
288  _mm256_srli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), 32-R)));
289 }
290 
291 inline void xor_with_const(lsh_u32 cv_l[8], const lsh_u32 const_v[8])
292 {
293  _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
294  _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
295  _mm256_loadu_si256(CONST_M256_CAST(const_v))));
296 }
297 
298 inline void rotate_msg_gamma(lsh_u32 cv_r[8])
299 {
300  // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
301  _mm256_storeu_si256(M256_CAST(cv_r+0),
302  _mm256_shuffle_epi8(_mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
303  _mm256_set_epi8(
304  /* hi lane */ 15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1,
305  /* lo lane */ 12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
306 }
307 
308 inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
309 {
310  __m256i temp = _mm256_shuffle_epi32(
311  _mm256_loadu_si256(CONST_M256_CAST(cv_l)), _MM_SHUFFLE(3,1,0,2));
312  _mm256_storeu_si256(M256_CAST(cv_r),
313  _mm256_shuffle_epi32(
314  _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(1,2,3,0)));
315  _mm256_storeu_si256(M256_CAST(cv_l),
316  _mm256_permute2x128_si256(temp,
317  _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,3,0,1)));
318  _mm256_storeu_si256(M256_CAST(cv_r),
319  _mm256_permute2x128_si256(temp,
320  _mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,2,0,0)));
321 }
322 
323 /* -------------------------------------------------------- *
324 * step function
325 * -------------------------------------------------------- */
326 
327 template <unsigned int Alpha, unsigned int Beta>
328 inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
329 {
330  add_blk(cv_l, cv_r);
331  rotate_blk<Alpha>(cv_l);
332  xor_with_const(cv_l, const_v);
333  add_blk(cv_r, cv_l);
334  rotate_blk<Beta>(cv_r);
335  add_blk(cv_l, cv_r);
336  rotate_msg_gamma(cv_r);
337 }
338 
339 /* -------------------------------------------------------- *
340 * compression function
341 * -------------------------------------------------------- */
342 
343 inline void compress(LSH256_AVX2_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
344 {
345  CRYPTOPP_ASSERT(ctx != NULLPTR);
346 
347  LSH256_AVX2_Internal s_state(ctx->cv_l);
348  LSH256_AVX2_Internal* i_state = &s_state;
349 
350  const lsh_u32* const_v = NULL;
351  lsh_u32* cv_l = ctx->cv_l;
352  lsh_u32* cv_r = ctx->cv_r;
353 
354  load_msg_blk(i_state, pdMsgBlk);
355 
356  msg_add_even(cv_l, cv_r, i_state);
357  load_sc(&const_v, 0);
358  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
359  word_perm(cv_l, cv_r);
360 
361  msg_add_odd(cv_l, cv_r, i_state);
362  load_sc(&const_v, 8);
363  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
364  word_perm(cv_l, cv_r);
365 
366  for (size_t i = 1; i < NUM_STEPS / 2; i++)
367  {
368  msg_exp_even(i_state);
369  msg_add_even(cv_l, cv_r, i_state);
370  load_sc(&const_v, 16 * i);
371  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
372  word_perm(cv_l, cv_r);
373 
374  msg_exp_odd(i_state);
375  msg_add_odd(cv_l, cv_r, i_state);
376  load_sc(&const_v, 16 * i + 8);
377  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
378  word_perm(cv_l, cv_r);
379  }
380 
381  msg_exp_even(i_state);
382  msg_add_even(cv_l, cv_r, i_state);
383 }
384 
385 /* -------------------------------------------------------- */
386 
387 inline void load_iv(word32 cv_l[8], word32 cv_r[8], const word32 iv[16])
388 {
389  // The IV's are 32-byte aligned so we can use aligned loads.
390  _mm256_storeu_si256(M256_CAST(cv_l+0),
391  _mm256_load_si256(CONST_M256_CAST(iv+0)));
392  _mm256_storeu_si256(M256_CAST(cv_r+0),
393  _mm256_load_si256(CONST_M256_CAST(iv+8)));
394 }
395 
396 inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
397 {
398  _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
399  _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
400 }
401 
402 inline void zero_submsgs(LSH256_AVX2_Context* ctx)
403 {
404  lsh_u32* sub_msgs = ctx->sub_msgs;
405 
406  _mm256_storeu_si256(M256_CAST(sub_msgs+ 0), _mm256_setzero_si256());
407  _mm256_storeu_si256(M256_CAST(sub_msgs+ 8), _mm256_setzero_si256());
408  _mm256_storeu_si256(M256_CAST(sub_msgs+16), _mm256_setzero_si256());
409  _mm256_storeu_si256(M256_CAST(sub_msgs+24), _mm256_setzero_si256());
410 }
411 
412 inline void init224(LSH256_AVX2_Context* ctx)
413 {
414  CRYPTOPP_ASSERT(ctx != NULLPTR);
415 
416  zero_submsgs(ctx);
417  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
418 }
419 
420 inline void init256(LSH256_AVX2_Context* ctx)
421 {
422  CRYPTOPP_ASSERT(ctx != NULLPTR);
423 
424  zero_submsgs(ctx);
425  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
426 }
427 
428 /* -------------------------------------------------------- */
429 
430 inline void fin(LSH256_AVX2_Context* ctx)
431 {
432  CRYPTOPP_ASSERT(ctx != NULLPTR);
433 
434  _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
435  _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
436  _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
437 }
438 
439 /* -------------------------------------------------------- */
440 
441 inline void get_hash(LSH256_AVX2_Context* ctx, lsh_u8* pbHashVal)
442 {
443  CRYPTOPP_ASSERT(ctx != NULLPTR);
444  CRYPTOPP_ASSERT(ctx->alg_type != 0);
445  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
446 
447  lsh_uint alg_type = ctx->alg_type;
448  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
449  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
450 
451  // Multiplying by looks odd...
452  std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
453  if (hash_val_bit_len){
454  pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
455  }
456 }
457 
458 /* -------------------------------------------------------- */
459 
460 lsh_err lsh256_init_avx2(LSH256_AVX2_Context* ctx)
461 {
462  CRYPTOPP_ASSERT(ctx != NULLPTR);
463  CRYPTOPP_ASSERT(ctx->alg_type != 0);
464 
465  lsh_u32 alg_type = ctx->alg_type;
466  const lsh_u32* const_v = NULL;
467  ctx->remain_databitlen = 0;
468 
469  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
470  AVX_Cleanup cleanup;
471 
472  switch (alg_type)
473  {
474  case LSH_TYPE_256_256:
475  init256(ctx);
476  return LSH_SUCCESS;
477  case LSH_TYPE_256_224:
478  init224(ctx);
479  return LSH_SUCCESS;
480  default:
481  break;
482  }
483 
484  lsh_u32* cv_l = ctx->cv_l;
485  lsh_u32* cv_r = ctx->cv_r;
486 
487  zero_iv(cv_l, cv_r);
488  cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
489  cv_l[1] = LSH_GET_HASHBIT(alg_type);
490 
491  for (size_t i = 0; i < NUM_STEPS / 2; i++)
492  {
493  //Mix
494  load_sc(&const_v, i * 16);
495  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
496  word_perm(cv_l, cv_r);
497 
498  load_sc(&const_v, i * 16 + 8);
499  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
500  word_perm(cv_l, cv_r);
501  }
502 
503  return LSH_SUCCESS;
504 }
505 
506 lsh_err lsh256_update_avx2(LSH256_AVX2_Context* ctx, const lsh_u8* data, size_t databitlen)
507 {
508  CRYPTOPP_ASSERT(ctx != NULLPTR);
509  CRYPTOPP_ASSERT(data != NULLPTR);
510  CRYPTOPP_ASSERT(databitlen % 8 == 0);
511  CRYPTOPP_ASSERT(ctx->alg_type != 0);
512 
513  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
514  AVX_Cleanup cleanup;
515 
516  if (databitlen == 0){
517  return LSH_SUCCESS;
518  }
519 
520  // We are byte oriented. tail bits will always be 0.
521  size_t databytelen = databitlen >> 3;
522  // lsh_uint pos2 = databitlen & 0x7;
523  const size_t pos2 = 0;
524 
525  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
526  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
527  const size_t remain_msg_bit = 0;
528 
529  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
530  return LSH_ERR_INVALID_STATE;
531  }
532  if (remain_msg_bit > 0){
533  return LSH_ERR_INVALID_DATABITLEN;
534  }
535 
536  if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
537  {
538  std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
539  ctx->remain_databitlen += (lsh_uint)databitlen;
540  remain_msg_byte += (lsh_uint)databytelen;
541  if (pos2){
542  ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
543  }
544  return LSH_SUCCESS;
545  }
546 
547  if (remain_msg_byte > 0){
548  size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
549  std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
550  compress(ctx, ctx->last_block);
551  data += more_byte;
552  databytelen -= more_byte;
553  remain_msg_byte = 0;
554  ctx->remain_databitlen = 0;
555  }
556 
557  while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
558  {
559  // This call to compress caused some trouble.
560  // The data pointer can become unaligned in the
561  // previous block.
562  compress(ctx, data);
563  data += LSH256_MSG_BLK_BYTE_LEN;
564  databytelen -= LSH256_MSG_BLK_BYTE_LEN;
565  }
566 
567  if (databytelen > 0){
568  std::memcpy(ctx->last_block, data, databytelen);
569  ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
570  }
571 
572  if (pos2){
573  ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
574  ctx->remain_databitlen += pos2;
575  }
576 
577  return LSH_SUCCESS;
578 }
579 
580 lsh_err lsh256_final_avx2(LSH256_AVX2_Context* ctx, lsh_u8* hashval)
581 {
582  CRYPTOPP_ASSERT(ctx != NULLPTR);
583  CRYPTOPP_ASSERT(hashval != NULLPTR);
584 
585  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
586  AVX_Cleanup cleanup;
587 
588  // We are byte oriented. tail bits will always be 0.
589  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
590  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
591  const size_t remain_msg_bit = 0;
592 
593  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
594  return LSH_ERR_INVALID_STATE;
595  }
596 
597  if (remain_msg_bit){
598  ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
599  }
600  else{
601  ctx->last_block[remain_msg_byte] = 0x80;
602  }
603  std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
604 
605  compress(ctx, ctx->last_block);
606 
607  fin(ctx);
608  get_hash(ctx, hashval);
609 
610  return LSH_SUCCESS;
611 }
612 
613 ANONYMOUS_NAMESPACE_END
614 
615 NAMESPACE_BEGIN(CryptoPP)
616 
617 extern
618 void LSH256_Base_Restart_AVX2(word32* state)
619 {
620  state[RemainingBits] = 0;
621  LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
622  lsh_err err = lsh256_init_avx2(&ctx);
623 
624  if (err != LSH_SUCCESS)
625  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_init_avx2 failed");
626 }
627 
628 extern
629 void LSH256_Base_Update_AVX2(word32* state, const byte *input, size_t size)
630 {
631  LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
632  lsh_err err = lsh256_update_avx2(&ctx, input, 8*size);
633 
634  if (err != LSH_SUCCESS)
635  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_update_avx2 failed");
636 }
637 
638 extern
639 void LSH256_Base_TruncatedFinal_AVX2(word32* state, byte *hash, size_t)
640 {
641  LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
642  lsh_err err = lsh256_final_avx2(&ctx, hash);
643 
644  if (err != LSH_SUCCESS)
645  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_final_avx2 failed");
646 }
647 
648 NAMESPACE_END
649 
650 #endif // CRYPTOPP_AVX2_AVAILABLE
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:164
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:182
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:66
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:72
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:150
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:155
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1757
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2417
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1808
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68