Crypto++  8.8
Free C++ class library of cryptographic schemes
lsh256_sse.cpp
1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 // Based on the specification and source code provided by
3 // Korea Internet & Security Agency (KISA) website. Also
4 // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "cpu.h"
17 #include "misc.h"
18 
19 // Squash MS LNK4221 and libtool warnings
20 extern const char LSH256_SSE_FNAME[] = __FILE__;
21 
22 #if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
23 
24 #if defined(CRYPTOPP_SSSE3_AVAILABLE)
25 # include <emmintrin.h>
26 # include <tmmintrin.h>
27 #endif
28 
29 #if defined(CRYPTOPP_XOP_AVAILABLE)
30 # include <ammintrin.h>
31 #endif
32 
33 #if defined(CRYPTOPP_GCC_COMPATIBLE)
34 # include <x86intrin.h>
35 #endif
36 
37 ANONYMOUS_NAMESPACE_BEGIN
38 
39 /* LSH Constants */
40 
41 const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
42 // const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
43 // const unsigned int LSH256_CV_BYTE_LEN = 64;
44 const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
45 
46 // const unsigned int MSG_BLK_WORD_LEN = 32;
47 const unsigned int CV_WORD_LEN = 16;
48 const unsigned int CONST_WORD_LEN = 8;
49 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
50 // const unsigned int WORD_BIT_LEN = 32;
51 const unsigned int NUM_STEPS = 26;
52 
53 const unsigned int ROT_EVEN_ALPHA = 29;
54 const unsigned int ROT_EVEN_BETA = 1;
55 const unsigned int ROT_ODD_ALPHA = 5;
56 const unsigned int ROT_ODD_BETA = 17;
57 
58 const unsigned int LSH_TYPE_256_256 = 0x0000020;
59 const unsigned int LSH_TYPE_256_224 = 0x000001C;
60 
61 // const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
62 // const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
63 
64 /* Error Code */
65 
66 const unsigned int LSH_SUCCESS = 0x0;
67 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
68 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
69 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
70 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
71 
72 /* Index into our state array */
73 
74 const unsigned int AlgorithmType = 80;
75 const unsigned int RemainingBits = 81;
76 
77 NAMESPACE_END
78 
79 NAMESPACE_BEGIN(CryptoPP)
80 NAMESPACE_BEGIN(LSH)
81 
82 // lsh256.cpp
83 extern const word32 LSH256_IV224[CV_WORD_LEN];
84 extern const word32 LSH256_IV256[CV_WORD_LEN];
85 extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
86 
87 NAMESPACE_END // LSH
88 NAMESPACE_END // Crypto++
89 
90 ANONYMOUS_NAMESPACE_BEGIN
91 
92 using CryptoPP::byte;
93 using CryptoPP::word32;
96 
97 using CryptoPP::GetBlock;
101 
102 typedef byte lsh_u8;
103 typedef word32 lsh_u32;
104 typedef word32 lsh_uint;
105 typedef word32 lsh_err;
106 typedef word32 lsh_type;
107 
108 using CryptoPP::LSH::LSH256_IV224;
109 using CryptoPP::LSH::LSH256_IV256;
110 using CryptoPP::LSH::LSH256_StepConstants;
111 
112 struct LSH256_SSSE3_Context
113 {
114  LSH256_SSSE3_Context(word32* state, word32 algType, word32& remainingBitLength) :
115  cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
116  last_block(reinterpret_cast<byte*>(state+48)),
117  remain_databitlen(remainingBitLength),
118  alg_type(static_cast<lsh_type>(algType)) {}
119 
120  lsh_u32* cv_l; // start of our state block
121  lsh_u32* cv_r;
122  lsh_u32* sub_msgs;
123  lsh_u8* last_block;
124  lsh_u32& remain_databitlen;
125  lsh_type alg_type;
126 };
127 
128 struct LSH256_SSSE3_Internal
129 {
130  LSH256_SSSE3_Internal(word32* state) :
131  submsg_e_l(state+16), submsg_e_r(state+24),
132  submsg_o_l(state+32), submsg_o_r(state+40) { }
133 
134  lsh_u32* submsg_e_l; /* even left sub-message */
135  lsh_u32* submsg_e_r; /* even right sub-message */
136  lsh_u32* submsg_o_l; /* odd left sub-message */
137  lsh_u32* submsg_o_r; /* odd right sub-message */
138 };
139 
140 // const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
141 
142 /* LSH AlgType Macro */
143 
144 inline bool LSH_IS_LSH512(lsh_uint val) {
145  return (val & 0xf0000) == 0;
146 }
147 
148 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
149  return val >> 24;
150 }
151 
152 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
153  return val & 0xffff;
154 }
155 
156 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
157  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
158 }
159 
160 inline lsh_u32 loadLE32(lsh_u32 v) {
162 }
163 
164 lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
165  return rotlFixed(x, r);
166 }
167 
168 // Original code relied upon unaligned lsh_u32 buffer
169 inline void load_msg_blk(LSH256_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
170 {
171  CRYPTOPP_ASSERT(i_state != NULLPTR);
172  lsh_u32* submsg_e_l = i_state->submsg_e_l;
173  lsh_u32* submsg_e_r = i_state->submsg_e_r;
174  lsh_u32* submsg_o_l = i_state->submsg_o_l;
175  lsh_u32* submsg_o_r = i_state->submsg_o_r;
176 
177  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
178  _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
179  _mm_storeu_si128(M128_CAST(submsg_e_l+4),
180  _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
181  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
182  _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
183  _mm_storeu_si128(M128_CAST(submsg_e_r+4),
184  _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
185  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
186  _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
187  _mm_storeu_si128(M128_CAST(submsg_o_l+4),
188  _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
189  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
190  _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
191  _mm_storeu_si128(M128_CAST(submsg_o_r+4),
192  _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
193 }
194 
195 inline void msg_exp_even(LSH256_SSSE3_Internal* i_state)
196 {
197  CRYPTOPP_ASSERT(i_state != NULLPTR);
198 
199  lsh_u32* submsg_e_l = i_state->submsg_e_l;
200  lsh_u32* submsg_e_r = i_state->submsg_e_r;
201  lsh_u32* submsg_o_l = i_state->submsg_o_l;
202  lsh_u32* submsg_o_r = i_state->submsg_o_r;
203 
204  _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi32(
205  _mm_shuffle_epi32(
206  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(3,2,1,0)),
207  _mm_shuffle_epi32(
208  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(1,0,2,3))));
209 
210  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi32(
211  _mm_shuffle_epi32(
212  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(3,2,1,0)),
213  _mm_shuffle_epi32(
214  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(2,1,0,3))));
215 
216  _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi32(
217  _mm_shuffle_epi32(
218  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(3,2,1,0)),
219  _mm_shuffle_epi32(
220  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(1,0,2,3))));
221 
222  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi32(
223  _mm_shuffle_epi32(
224  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(3,2,1,0)),
225  _mm_shuffle_epi32(
226  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(2,1,0,3))));
227 }
228 
229 inline void msg_exp_odd(LSH256_SSSE3_Internal* i_state)
230 {
231  CRYPTOPP_ASSERT(i_state != NULLPTR);
232 
233  lsh_u32* submsg_e_l = i_state->submsg_e_l;
234  lsh_u32* submsg_e_r = i_state->submsg_e_r;
235  lsh_u32* submsg_o_l = i_state->submsg_o_l;
236  lsh_u32* submsg_o_r = i_state->submsg_o_r;
237 
238  _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi32(
239  _mm_shuffle_epi32(
240  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(3,2,1,0)),
241  _mm_shuffle_epi32(
242  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(1,0,2,3))));
243 
244  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi32(
245  _mm_shuffle_epi32(
246  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(3,2,1,0)),
247  _mm_shuffle_epi32(
248  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(2,1,0,3))));
249 
250  _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi32(
251  _mm_shuffle_epi32(
252  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(3,2,1,0)),
253  _mm_shuffle_epi32(
254  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(1,0,2,3))));
255 
256  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi32(
257  _mm_shuffle_epi32(
258  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(3,2,1,0)),
259  _mm_shuffle_epi32(
260  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(2,1,0,3))));
261 }
262 
263 inline void load_sc(const lsh_u32** p_const_v, size_t i)
264 {
265  CRYPTOPP_ASSERT(p_const_v != NULLPTR);
266 
267  *p_const_v = &LSH256_StepConstants[i];
268 }
269 
270 inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
271 {
272  CRYPTOPP_ASSERT(i_state != NULLPTR);
273 
274  lsh_u32* submsg_e_l = i_state->submsg_e_l;
275  lsh_u32* submsg_e_r = i_state->submsg_e_r;
276 
277  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_xor_si128(
278  _mm_loadu_si128(CONST_M128_CAST(cv_l+0)),
279  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
280  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
281  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
282  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
283  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_xor_si128(
284  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
285  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
286  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
287  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
288  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
289 }
290 
291 inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
292 {
293  CRYPTOPP_ASSERT(i_state != NULLPTR);
294 
295  lsh_u32* submsg_o_l = i_state->submsg_o_l;
296  lsh_u32* submsg_o_r = i_state->submsg_o_r;
297 
298  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
299  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
300  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
301  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
302  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
303  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
304  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
305  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
306  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
307  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
308  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
309  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
310 }
311 
312 inline void add_blk(lsh_u32 cv_l[8], const lsh_u32 cv_r[8])
313 {
314  _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi32(
315  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
316  _mm_loadu_si128(CONST_M128_CAST(cv_r))));
317  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi32(
318  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
319  _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
320 }
321 
322 template <unsigned int R>
323 inline void rotate_blk(lsh_u32 cv[8])
324 {
325 #if defined(CRYPTOPP_XOP_AVAILABLE)
326  _mm_storeu_si128(M128_CAST(cv),
327  _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
328  _mm_storeu_si128(M128_CAST(cv+4),
329  _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
330 #else
331  _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
332  _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
333  _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), 32-R)));
334  _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
335  _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
336  _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 32-R)));
337 #endif
338 }
339 
340 inline void xor_with_const(lsh_u32* cv_l, const lsh_u32* const_v)
341 {
342  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
343  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
344  _mm_loadu_si128(CONST_M128_CAST(const_v))));
345  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
346  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
347  _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
348 }
349 
350 inline void rotate_msg_gamma(lsh_u32 cv_r[8])
351 {
352  // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
353  _mm_storeu_si128(M128_CAST(cv_r+0),
354  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
355  _mm_set_epi8(12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
356  _mm_storeu_si128(M128_CAST(cv_r+4),
357  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
358  _mm_set_epi8(15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1)));
359 }
360 
361 inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
362 {
363  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_shuffle_epi32(
364  _mm_loadu_si128(CONST_M128_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
365  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_shuffle_epi32(
366  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
367  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_shuffle_epi32(
368  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
369  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_shuffle_epi32(
370  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
371 
372  __m128i temp = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
373  _mm_storeu_si128(M128_CAST(cv_l+0),
374  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
375  _mm_storeu_si128(M128_CAST(cv_l+4),
376  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
377  _mm_storeu_si128(M128_CAST(cv_r+4),
378  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
379  _mm_storeu_si128(M128_CAST(cv_r+0), temp);
380 }
381 
382 /* -------------------------------------------------------- *
383 * step function
384 * -------------------------------------------------------- */
385 
386 template <unsigned int Alpha, unsigned int Beta>
387 inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
388 {
389  add_blk(cv_l, cv_r);
390  rotate_blk<Alpha>(cv_l);
391  xor_with_const(cv_l, const_v);
392  add_blk(cv_r, cv_l);
393  rotate_blk<Beta>(cv_r);
394  add_blk(cv_l, cv_r);
395  rotate_msg_gamma(cv_r);
396 }
397 
398 /* -------------------------------------------------------- *
399 * compression function
400 * -------------------------------------------------------- */
401 
402 inline void compress(LSH256_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
403 {
404  CRYPTOPP_ASSERT(ctx != NULLPTR);
405 
406  LSH256_SSSE3_Internal s_state(ctx->cv_l);
407  LSH256_SSSE3_Internal* i_state = &s_state;
408 
409  const lsh_u32* const_v = NULL;
410  lsh_u32* cv_l = ctx->cv_l;
411  lsh_u32* cv_r = ctx->cv_r;
412 
413  load_msg_blk(i_state, pdMsgBlk);
414 
415  msg_add_even(cv_l, cv_r, i_state);
416  load_sc(&const_v, 0);
417  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
418  word_perm(cv_l, cv_r);
419 
420  msg_add_odd(cv_l, cv_r, i_state);
421  load_sc(&const_v, 8);
422  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
423  word_perm(cv_l, cv_r);
424 
425  for (size_t i = 1; i < NUM_STEPS / 2; i++)
426  {
427  msg_exp_even(i_state);
428  msg_add_even(cv_l, cv_r, i_state);
429  load_sc(&const_v, 16 * i);
430  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
431  word_perm(cv_l, cv_r);
432 
433  msg_exp_odd(i_state);
434  msg_add_odd(cv_l, cv_r, i_state);
435  load_sc(&const_v, 16 * i + 8);
436  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
437  word_perm(cv_l, cv_r);
438  }
439 
440  msg_exp_even(i_state);
441  msg_add_even(cv_l, cv_r, i_state);
442 }
443 
444 /* -------------------------------------------------------- */
445 
446 inline void load_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 iv[16])
447 {
448  _mm_storeu_si128(M128_CAST(cv_l+ 0),
449  _mm_load_si128(CONST_M128_CAST(iv+ 0)));
450  _mm_storeu_si128(M128_CAST(cv_l+ 4),
451  _mm_load_si128(CONST_M128_CAST(iv+ 4)));
452  _mm_storeu_si128(M128_CAST(cv_r+ 0),
453  _mm_load_si128(CONST_M128_CAST(iv+ 8)));
454  _mm_storeu_si128(M128_CAST(cv_r+ 4),
455  _mm_load_si128(CONST_M128_CAST(iv+12)));
456 }
457 
458 inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
459 {
460  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
461  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
462  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
463  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
464 }
465 
466 inline void zero_submsgs(LSH256_SSSE3_Context* ctx)
467 {
468  lsh_u32* sub_msgs = ctx->sub_msgs;
469 
470  _mm_storeu_si128(M128_CAST(sub_msgs+ 0), _mm_setzero_si128());
471  _mm_storeu_si128(M128_CAST(sub_msgs+ 4), _mm_setzero_si128());
472  _mm_storeu_si128(M128_CAST(sub_msgs+ 8), _mm_setzero_si128());
473  _mm_storeu_si128(M128_CAST(sub_msgs+12), _mm_setzero_si128());
474  _mm_storeu_si128(M128_CAST(sub_msgs+16), _mm_setzero_si128());
475  _mm_storeu_si128(M128_CAST(sub_msgs+20), _mm_setzero_si128());
476  _mm_storeu_si128(M128_CAST(sub_msgs+24), _mm_setzero_si128());
477  _mm_storeu_si128(M128_CAST(sub_msgs+28), _mm_setzero_si128());
478 }
479 
480 inline void init224(LSH256_SSSE3_Context* ctx)
481 {
482  CRYPTOPP_ASSERT(ctx != NULLPTR);
483 
484  zero_submsgs(ctx);
485  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
486 }
487 
488 inline void init256(LSH256_SSSE3_Context* ctx)
489 {
490  CRYPTOPP_ASSERT(ctx != NULLPTR);
491 
492  zero_submsgs(ctx);
493  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
494 }
495 
496 /* -------------------------------------------------------- */
497 
498 inline void fin(LSH256_SSSE3_Context* ctx)
499 {
500  CRYPTOPP_ASSERT(ctx != NULLPTR);
501 
502  _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
503  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
504  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
505  _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
506  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
507  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
508 }
509 
510 /* -------------------------------------------------------- */
511 
512 inline void get_hash(LSH256_SSSE3_Context* ctx, lsh_u8* pbHashVal)
513 {
514  CRYPTOPP_ASSERT(ctx != NULLPTR);
515  CRYPTOPP_ASSERT(ctx->alg_type != 0);
516  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
517 
518  lsh_uint alg_type = ctx->alg_type;
519  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
520  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
521 
522  // Multiplying by sizeof(lsh_u8) looks odd...
523  std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
524  if (hash_val_bit_len){
525  pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
526  }
527 }
528 
529 /* -------------------------------------------------------- */
530 
531 lsh_err lsh256_ssse3_init(LSH256_SSSE3_Context* ctx)
532 {
533  CRYPTOPP_ASSERT(ctx != NULLPTR);
534  CRYPTOPP_ASSERT(ctx->alg_type != 0);
535 
536  lsh_u32 alg_type = ctx->alg_type;
537  const lsh_u32* const_v = NULL;
538  ctx->remain_databitlen = 0;
539 
540  switch (alg_type)
541  {
542  case LSH_TYPE_256_256:
543  init256(ctx);
544  return LSH_SUCCESS;
545  case LSH_TYPE_256_224:
546  init224(ctx);
547  return LSH_SUCCESS;
548  default:
549  break;
550  }
551 
552  lsh_u32* cv_l = ctx->cv_l;
553  lsh_u32* cv_r = ctx->cv_r;
554 
555  zero_iv(cv_l, cv_r);
556  cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
557  cv_l[1] = LSH_GET_HASHBIT(alg_type);
558 
559  for (size_t i = 0; i < NUM_STEPS / 2; i++)
560  {
561  //Mix
562  load_sc(&const_v, i * 16);
563  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
564  word_perm(cv_l, cv_r);
565 
566  load_sc(&const_v, i * 16 + 8);
567  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
568  word_perm(cv_l, cv_r);
569  }
570 
571  return LSH_SUCCESS;
572 }
573 
574 lsh_err lsh256_ssse3_update(LSH256_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
575 {
576  CRYPTOPP_ASSERT(ctx != NULLPTR);
577  CRYPTOPP_ASSERT(data != NULLPTR);
578  CRYPTOPP_ASSERT(databitlen % 8 == 0);
579  CRYPTOPP_ASSERT(ctx->alg_type != 0);
580 
581  if (databitlen == 0){
582  return LSH_SUCCESS;
583  }
584 
585  // We are byte oriented. tail bits will always be 0.
586  size_t databytelen = databitlen >> 3;
587  // lsh_uint pos2 = databitlen & 0x7;
588  const size_t pos2 = 0;
589 
590  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
591  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
592  const size_t remain_msg_bit = 0;
593 
594  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
595  return LSH_ERR_INVALID_STATE;
596  }
597  if (remain_msg_bit > 0){
598  return LSH_ERR_INVALID_DATABITLEN;
599  }
600 
601  if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
602  {
603  std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
604  ctx->remain_databitlen += (lsh_uint)databitlen;
605  remain_msg_byte += (lsh_uint)databytelen;
606  if (pos2){
607  ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
608  }
609  return LSH_SUCCESS;
610  }
611 
612  if (remain_msg_byte > 0){
613  size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
614  std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
615  compress(ctx, ctx->last_block);
616  data += more_byte;
617  databytelen -= more_byte;
618  remain_msg_byte = 0;
619  ctx->remain_databitlen = 0;
620  }
621 
622  while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
623  {
624  // This call to compress caused some trouble.
625  // The data pointer can become unaligned in the
626  // previous block.
627  compress(ctx, data);
628  data += LSH256_MSG_BLK_BYTE_LEN;
629  databytelen -= LSH256_MSG_BLK_BYTE_LEN;
630  }
631 
632  if (databytelen > 0){
633  std::memcpy(ctx->last_block, data, databytelen);
634  ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
635  }
636 
637  if (pos2){
638  ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
639  ctx->remain_databitlen += pos2;
640  }
641 
642  return LSH_SUCCESS;
643 }
644 
645 lsh_err lsh256_ssse3_final(LSH256_SSSE3_Context* ctx, lsh_u8* hashval)
646 {
647  CRYPTOPP_ASSERT(ctx != NULLPTR);
648  CRYPTOPP_ASSERT(hashval != NULLPTR);
649 
650  // We are byte oriented. tail bits will always be 0.
651  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
652  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
653  const size_t remain_msg_bit = 0;
654 
655  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
656  return LSH_ERR_INVALID_STATE;
657  }
658 
659  if (remain_msg_bit){
660  ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
661  }
662  else{
663  ctx->last_block[remain_msg_byte] = 0x80;
664  }
665  std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
666 
667  compress(ctx, ctx->last_block);
668 
669  fin(ctx);
670  get_hash(ctx, hashval);
671 
672  return LSH_SUCCESS;
673 }
674 
675 ANONYMOUS_NAMESPACE_END // Anonymous
676 
677 NAMESPACE_BEGIN(CryptoPP)
678 
679 extern
680 void LSH256_Base_Restart_SSSE3(word32* state)
681 {
682  state[RemainingBits] = 0;
683  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
684  lsh_err err = lsh256_ssse3_init(&ctx);
685 
686  if (err != LSH_SUCCESS)
687  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_init failed");
688 }
689 
690 extern
691 void LSH256_Base_Update_SSSE3(word32* state, const byte *input, size_t size)
692 {
693  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
694  lsh_err err = lsh256_ssse3_update(&ctx, input, 8*size);
695 
696  if (err != LSH_SUCCESS)
697  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_update failed");
698 }
699 
700 extern
701 void LSH256_Base_TruncatedFinal_SSSE3(word32* state, byte *hash, size_t)
702 {
703  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
704  lsh_err err = lsh256_ssse3_final(&ctx, hash);
705 
706  if (err != LSH_SUCCESS)
707  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_final failed");
708 }
709 
710 NAMESPACE_END
711 
712 #endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:164
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:182
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:66
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:72
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:150
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:155
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1757
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2417
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1808
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68