Crypto++  8.8
Free C++ class library of cryptographic schemes
lsh512_sse.cpp
1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 // Based on the specification and source code provided by
3 // Korea Internet & Security Agency (KISA) website. Also
4 // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "misc.h"
17 
18 // Squash MS LNK4221 and libtool warnings
19 extern const char LSH512_SSE_FNAME[] = __FILE__;
20 
21 #if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22 
23 #if defined(CRYPTOPP_SSSE3_AVAILABLE)
24 # include <emmintrin.h>
25 # include <tmmintrin.h>
26 #endif
27 
28 #if defined(CRYPTOPP_XOP_AVAILABLE)
29 # include <ammintrin.h>
30 #endif
31 
32 #if defined(CRYPTOPP_GCC_COMPATIBLE)
33 # include <x86intrin.h>
34 #endif
35 
36 ANONYMOUS_NAMESPACE_BEGIN
37 
38 /* LSH Constants */
39 
40 const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
41 // const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
42 // const unsigned int LSH512_CV_BYTE_LEN = 128;
43 const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
44 
45 // const unsigned int MSG_BLK_WORD_LEN = 32;
46 const unsigned int CV_WORD_LEN = 16;
47 const unsigned int CONST_WORD_LEN = 8;
48 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
49 const unsigned int NUM_STEPS = 28;
50 
51 const unsigned int ROT_EVEN_ALPHA = 23;
52 const unsigned int ROT_EVEN_BETA = 59;
53 const unsigned int ROT_ODD_ALPHA = 7;
54 const unsigned int ROT_ODD_BETA = 3;
55 
56 const unsigned int LSH_TYPE_512_512 = 0x0010040;
57 const unsigned int LSH_TYPE_512_384 = 0x0010030;
58 const unsigned int LSH_TYPE_512_256 = 0x0010020;
59 const unsigned int LSH_TYPE_512_224 = 0x001001C;
60 
61 // const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
62 // const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
63 
64 /* Error Code */
65 
66 const unsigned int LSH_SUCCESS = 0x0;
67 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
68 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
69 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
70 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
71 
72 /* Index into our state array */
73 
74 const unsigned int AlgorithmType = 80;
75 const unsigned int RemainingBits = 81;
76 
77 NAMESPACE_END
78 
79 NAMESPACE_BEGIN(CryptoPP)
80 NAMESPACE_BEGIN(LSH)
81 
82 // lsh512.cpp
83 extern const word64 LSH512_IV224[CV_WORD_LEN];
84 extern const word64 LSH512_IV256[CV_WORD_LEN];
85 extern const word64 LSH512_IV384[CV_WORD_LEN];
86 extern const word64 LSH512_IV512[CV_WORD_LEN];
87 extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
88 
89 NAMESPACE_END // LSH
90 NAMESPACE_END // Crypto++
91 
92 ANONYMOUS_NAMESPACE_BEGIN
93 
94 using CryptoPP::byte;
95 using CryptoPP::word32;
96 using CryptoPP::word64;
99 
100 using CryptoPP::GetBlock;
104 
105 using CryptoPP::LSH::LSH512_IV224;
106 using CryptoPP::LSH::LSH512_IV256;
107 using CryptoPP::LSH::LSH512_IV384;
108 using CryptoPP::LSH::LSH512_IV512;
109 using CryptoPP::LSH::LSH512_StepConstants;
110 
111 typedef byte lsh_u8;
112 typedef word32 lsh_u32;
113 typedef word64 lsh_u64;
114 typedef word32 lsh_uint;
115 typedef word32 lsh_err;
116 typedef word32 lsh_type;
117 
118 struct LSH512_SSSE3_Context
119 {
120  LSH512_SSSE3_Context(word64* state, word64 algType, word64& remainingBitLength) :
121  cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
122  last_block(reinterpret_cast<byte*>(state+48)),
123  remain_databitlen(remainingBitLength),
124  alg_type(static_cast<lsh_type>(algType)) {}
125 
126  lsh_u64* cv_l; // start of our state block
127  lsh_u64* cv_r;
128  lsh_u64* sub_msgs;
129  lsh_u8* last_block;
130  lsh_u64& remain_databitlen;
131  lsh_type alg_type;
132 };
133 
134 struct LSH512_SSSE3_Internal
135 {
136  LSH512_SSSE3_Internal(word64* state) :
137  submsg_e_l(state+16), submsg_e_r(state+24),
138  submsg_o_l(state+32), submsg_o_r(state+40) { }
139 
140  lsh_u64* submsg_e_l; /* even left sub-message */
141  lsh_u64* submsg_e_r; /* even right sub-message */
142  lsh_u64* submsg_o_l; /* odd left sub-message */
143  lsh_u64* submsg_o_r; /* odd right sub-message */
144 };
145 
146 // const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
147 
148 /* LSH AlgType Macro */
149 
150 inline bool LSH_IS_LSH512(lsh_uint val) {
151  return (val & 0xf0000) == 0x10000;
152 }
153 
154 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
155  return val >> 24;
156 }
157 
158 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
159  return val & 0xffff;
160 }
161 
162 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
163  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
164 }
165 
166 inline lsh_u64 loadLE64(lsh_u64 v) {
168 }
169 
170 lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
171  return rotlFixed(x, r);
172 }
173 
174 // Original code relied upon unaligned lsh_u64 buffer
175 inline void load_msg_blk(LSH512_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
176 {
177  lsh_u64* submsg_e_l = i_state->submsg_e_l;
178  lsh_u64* submsg_e_r = i_state->submsg_e_r;
179  lsh_u64* submsg_o_l = i_state->submsg_o_l;
180  lsh_u64* submsg_o_r = i_state->submsg_o_r;
181 
182  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
183  _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
184  _mm_storeu_si128(M128_CAST(submsg_e_l+2),
185  _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
186  _mm_storeu_si128(M128_CAST(submsg_e_l+4),
187  _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
188  _mm_storeu_si128(M128_CAST(submsg_e_l+6),
189  _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
190 
191  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
192  _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
193  _mm_storeu_si128(M128_CAST(submsg_e_r+2),
194  _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
195  _mm_storeu_si128(M128_CAST(submsg_e_r+4),
196  _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
197  _mm_storeu_si128(M128_CAST(submsg_e_r+6),
198  _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
199 
200  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
201  _mm_loadu_si128(CONST_M128_CAST(msgblk+128)));
202  _mm_storeu_si128(M128_CAST(submsg_o_l+2),
203  _mm_loadu_si128(CONST_M128_CAST(msgblk+144)));
204  _mm_storeu_si128(M128_CAST(submsg_o_l+4),
205  _mm_loadu_si128(CONST_M128_CAST(msgblk+160)));
206  _mm_storeu_si128(M128_CAST(submsg_o_l+6),
207  _mm_loadu_si128(CONST_M128_CAST(msgblk+176)));
208 
209  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
210  _mm_loadu_si128(CONST_M128_CAST(msgblk+192)));
211  _mm_storeu_si128(M128_CAST(submsg_o_r+2),
212  _mm_loadu_si128(CONST_M128_CAST(msgblk+208)));
213  _mm_storeu_si128(M128_CAST(submsg_o_r+4),
214  _mm_loadu_si128(CONST_M128_CAST(msgblk+224)));
215  _mm_storeu_si128(M128_CAST(submsg_o_r+6),
216  _mm_loadu_si128(CONST_M128_CAST(msgblk+240)));
217 }
218 
219 inline void msg_exp_even(LSH512_SSSE3_Internal* i_state)
220 {
221  CRYPTOPP_ASSERT(i_state != NULLPTR);
222 
223  lsh_u64* submsg_e_l = i_state->submsg_e_l;
224  lsh_u64* submsg_e_r = i_state->submsg_e_r;
225  lsh_u64* submsg_o_l = i_state->submsg_o_l;
226  lsh_u64* submsg_o_r = i_state->submsg_o_r;
227 
228  __m128i temp;
229  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_shuffle_epi32(
230  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)), _MM_SHUFFLE(1,0,3,2)));
231 
232  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0));
233  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
234  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)));
235  _mm_storeu_si128(M128_CAST(submsg_e_l+2), temp);
236  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_shuffle_epi32(
237  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)), _MM_SHUFFLE(1,0,3,2)));
238 
239  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4));
240  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_unpacklo_epi64(
241  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
242  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
243  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_unpackhi_epi64(
244  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
245  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_shuffle_epi32(
246  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)), _MM_SHUFFLE(1,0,3,2)));
247 
248  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0));
249  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
250  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)));
251  _mm_storeu_si128(M128_CAST(submsg_e_r+2), temp);
252  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_shuffle_epi32(
253  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)), _MM_SHUFFLE(1,0,3,2)));
254 
255  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4));
256  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_unpacklo_epi64(
257  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
258  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
259  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_unpackhi_epi64(
260  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
261 
262  _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi64(
263  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)),
264  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
265  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_add_epi64(
266  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)),
267  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
268  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi64(
269  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)),
270  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
271  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_add_epi64(
272  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
273  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
274 
275  _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi64(
276  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)),
277  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
278  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_add_epi64(
279  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)),
280  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
281  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi64(
282  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)),
283  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
284  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_add_epi64(
285  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
286  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
287 }
288 
289 inline void msg_exp_odd(LSH512_SSSE3_Internal* i_state)
290 {
291  CRYPTOPP_ASSERT(i_state != NULLPTR);
292 
293  lsh_u64* submsg_e_l = i_state->submsg_e_l;
294  lsh_u64* submsg_e_r = i_state->submsg_e_r;
295  lsh_u64* submsg_o_l = i_state->submsg_o_l;
296  lsh_u64* submsg_o_r = i_state->submsg_o_r;
297 
298  __m128i temp;
299  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_shuffle_epi32(
300  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)), _MM_SHUFFLE(1,0,3,2)));
301 
302  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0));
303  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
304  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)));
305  _mm_storeu_si128(M128_CAST(submsg_o_l+2), temp);
306  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_shuffle_epi32(
307  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)), _MM_SHUFFLE(1,0,3,2)));
308 
309  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4));
310  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_unpacklo_epi64(
311  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
312  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
313  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_unpackhi_epi64(
314  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
315  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_shuffle_epi32(
316  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)), _MM_SHUFFLE(1,0,3,2)));
317 
318  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0));
319  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
320  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)));
321  _mm_storeu_si128(M128_CAST(submsg_o_r+2), temp);
322  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_shuffle_epi32(
323  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)), _MM_SHUFFLE(1,0,3,2)));
324 
325  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4));
326  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_unpacklo_epi64(
327  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
328  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
329  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_unpackhi_epi64(
330  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
331 
332  _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi64(
333  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)),
334  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0))));
335  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_add_epi64(
336  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)),
337  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
338  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi64(
339  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)),
340  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
341  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_add_epi64(
342  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
343  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
344 
345  _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi64(
346  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)),
347  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0))));
348  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_add_epi64(
349  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)),
350  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
351  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi64(
352  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)),
353  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
354  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_add_epi64(
355  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
356  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
357 }
358 
359 inline void load_sc(const lsh_u64** p_const_v, size_t i)
360 {
361  *p_const_v = &LSH512_StepConstants[i];
362 }
363 
364 inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
365 {
366  CRYPTOPP_ASSERT(i_state != NULLPTR);
367 
368  lsh_u64* submsg_e_l = i_state->submsg_e_l;
369  lsh_u64* submsg_e_r = i_state->submsg_e_r;
370 
371  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
372  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
373  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l))));
374  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
375  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
376  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r))));
377  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
378  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
379  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
380  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
381  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
382  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
383  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
384  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
385  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
386  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
387  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
388  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
389  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
390  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
391  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
392  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
393  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
394  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
395 }
396 
397 inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
398 {
399  CRYPTOPP_ASSERT(i_state != NULLPTR);
400 
401  lsh_u64* submsg_o_l = i_state->submsg_o_l;
402  lsh_u64* submsg_o_r = i_state->submsg_o_r;
403 
404  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
405  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
406  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
407  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
408  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
409  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
410  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
411  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
412  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
413  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
414  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
415  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
416  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
417  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
418  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
419  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
420  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
421  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
422  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
423  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
424  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
425  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
426  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
427  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
428 }
429 
430 inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
431 {
432  _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi64(
433  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
434  _mm_loadu_si128(CONST_M128_CAST(cv_r))));
435  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_add_epi64(
436  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
437  _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
438  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi64(
439  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
440  _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
441  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_add_epi64(
442  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
443  _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
444 }
445 
446 template <unsigned int R>
447 inline void rotate_blk(lsh_u64 cv[8])
448 {
449 #if defined(CRYPTOPP_XOP_AVAILABLE)
450  _mm_storeu_si128(M128_CAST(cv),
451  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
452  _mm_storeu_si128(M128_CAST(cv+2),
453  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R));
454  _mm_storeu_si128(M128_CAST(cv+4),
455  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
456  _mm_storeu_si128(M128_CAST(cv+6),
457  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R));
458 
459 #else
460  _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
461  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
462  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), 64-R)));
463  _mm_storeu_si128(M128_CAST(cv+2), _mm_or_si128(
464  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R),
465  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), 64-R)));
466  _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
467  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
468  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 64-R)));
469  _mm_storeu_si128(M128_CAST(cv+6), _mm_or_si128(
470  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R),
471  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), 64-R)));
472 #endif
473 }
474 
475 inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
476 {
477  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
478  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
479  _mm_loadu_si128(CONST_M128_CAST(const_v))));
480  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
481  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
482  _mm_loadu_si128(CONST_M128_CAST(const_v+2))));
483  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
484  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
485  _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
486  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
487  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
488  _mm_loadu_si128(CONST_M128_CAST(const_v+6))));
489 }
490 
491 inline void rotate_msg_gamma(lsh_u64 cv_r[8])
492 {
493  // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
494  _mm_storeu_si128(M128_CAST(cv_r+0),
495  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
496  _mm_set_epi8(13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
497  _mm_storeu_si128(M128_CAST(cv_r+2),
498  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
499  _mm_set_epi8(9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4)));
500 
501  _mm_storeu_si128(M128_CAST(cv_r+4),
502  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
503  _mm_set_epi8(12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
504  _mm_storeu_si128(M128_CAST(cv_r+6),
505  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
506  _mm_set_epi8(8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3)));
507 }
508 
509 inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
510 {
511  __m128i temp[2];
512  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
513  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_unpacklo_epi64(
514  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
515  _mm_loadu_si128(CONST_M128_CAST(cv_l+0))));
516  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_unpackhi_epi64(
517  temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+2))));
518 
519  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+4));
520  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_unpacklo_epi64(
521  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
522  _mm_loadu_si128(CONST_M128_CAST(cv_l+4))));
523  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_unpackhi_epi64(
524  temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+6))));
525  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_shuffle_epi32(
526  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), _MM_SHUFFLE(1,0,3,2)));
527 
528  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+0));
529  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_unpacklo_epi64(
530  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
531  _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
532  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_unpackhi_epi64(
533  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), temp[0]));
534  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_shuffle_epi32(
535  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), _MM_SHUFFLE(1,0,3,2)));
536 
537  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+4));
538  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_unpacklo_epi64(
539  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
540  _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
541  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_unpackhi_epi64(
542  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), temp[0]));
543 
544  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
545  temp[1] = _mm_loadu_si128(CONST_M128_CAST(cv_l+2));
546 
547  _mm_storeu_si128(M128_CAST(cv_l+0),
548  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
549  _mm_storeu_si128(M128_CAST(cv_l+2),
550  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)));
551  _mm_storeu_si128(M128_CAST(cv_l+4),
552  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
553  _mm_storeu_si128(M128_CAST(cv_l+6),
554  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)));
555  _mm_storeu_si128(M128_CAST(cv_r+4),
556  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
557  _mm_storeu_si128(M128_CAST(cv_r+6),
558  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)));
559 
560  _mm_storeu_si128(M128_CAST(cv_r+0), temp[0]);
561  _mm_storeu_si128(M128_CAST(cv_r+2), temp[1]);
562 }
563 
564 /* -------------------------------------------------------- *
565 * step function
566 * -------------------------------------------------------- */
567 
568 template <unsigned int Alpha, unsigned int Beta>
569 inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
570 {
571  add_blk(cv_l, cv_r);
572  rotate_blk<Alpha>(cv_l);
573  xor_with_const(cv_l, const_v);
574  add_blk(cv_r, cv_l);
575  rotate_blk<Beta>(cv_r);
576  add_blk(cv_l, cv_r);
577  rotate_msg_gamma(cv_r);
578 }
579 
580 /* -------------------------------------------------------- *
581 * compression function
582 * -------------------------------------------------------- */
583 
584 inline void compress(LSH512_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
585 {
586  CRYPTOPP_ASSERT(ctx != NULLPTR);
587 
588  LSH512_SSSE3_Internal s_state(ctx->cv_l);
589  LSH512_SSSE3_Internal* i_state = &s_state;
590 
591  const lsh_u64* const_v = NULL;
592  lsh_u64 *cv_l = ctx->cv_l;
593  lsh_u64 *cv_r = ctx->cv_r;
594 
595  load_msg_blk(i_state, pdMsgBlk);
596 
597  msg_add_even(cv_l, cv_r, i_state);
598  load_sc(&const_v, 0);
599  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
600  word_perm(cv_l, cv_r);
601 
602  msg_add_odd(cv_l, cv_r, i_state);
603  load_sc(&const_v, 8);
604  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
605  word_perm(cv_l, cv_r);
606 
607  for (size_t i = 1; i < NUM_STEPS / 2; i++)
608  {
609  msg_exp_even(i_state);
610  msg_add_even(cv_l, cv_r, i_state);
611  load_sc(&const_v, 16 * i);
612  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
613  word_perm(cv_l, cv_r);
614 
615  msg_exp_odd(i_state);
616  msg_add_odd(cv_l, cv_r, i_state);
617  load_sc(&const_v, 16 * i + 8);
618  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
619  word_perm(cv_l, cv_r);
620  }
621 
622  msg_exp_even(i_state);
623  msg_add_even(cv_l, cv_r, i_state);
624 }
625 
626 /* -------------------------------------------------------- */
627 
628 inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
629 {
630  // The IV's are 32-byte aligned so we can use aligned loads.
631  _mm_storeu_si128(M128_CAST(cv_l+0),
632  _mm_load_si128(CONST_M128_CAST(iv+0)));
633  _mm_storeu_si128(M128_CAST(cv_l+2),
634  _mm_load_si128(CONST_M128_CAST(iv+2)));
635  _mm_storeu_si128(M128_CAST(cv_l+4),
636  _mm_load_si128(CONST_M128_CAST(iv+4)));
637  _mm_storeu_si128(M128_CAST(cv_l+6),
638  _mm_load_si128(CONST_M128_CAST(iv+6)));
639  _mm_storeu_si128(M128_CAST(cv_r+0),
640  _mm_load_si128(CONST_M128_CAST(iv+8)));
641  _mm_storeu_si128(M128_CAST(cv_r+2),
642  _mm_load_si128(CONST_M128_CAST(iv+10)));
643  _mm_storeu_si128(M128_CAST(cv_r+4),
644  _mm_load_si128(CONST_M128_CAST(iv+12)));
645  _mm_storeu_si128(M128_CAST(cv_r+6),
646  _mm_load_si128(CONST_M128_CAST(iv+14)));
647 }
648 
649 inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
650 {
651  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
652  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128());
653  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
654  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128());
655  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
656  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128());
657  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
658  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128());
659 }
660 
661 inline void zero_submsgs(LSH512_SSSE3_Context* ctx)
662 {
663  lsh_u64* sub_msgs = ctx->sub_msgs;
664 
665  _mm_storeu_si128(M128_CAST(sub_msgs+ 0),
666  _mm_setzero_si128());
667  _mm_storeu_si128(M128_CAST(sub_msgs+ 2),
668  _mm_setzero_si128());
669  _mm_storeu_si128(M128_CAST(sub_msgs+ 4),
670  _mm_setzero_si128());
671  _mm_storeu_si128(M128_CAST(sub_msgs+ 6),
672  _mm_setzero_si128());
673  _mm_storeu_si128(M128_CAST(sub_msgs+ 8),
674  _mm_setzero_si128());
675  _mm_storeu_si128(M128_CAST(sub_msgs+10),
676  _mm_setzero_si128());
677  _mm_storeu_si128(M128_CAST(sub_msgs+12),
678  _mm_setzero_si128());
679  _mm_storeu_si128(M128_CAST(sub_msgs+14),
680  _mm_setzero_si128());
681 }
682 
683 inline void init224(LSH512_SSSE3_Context* ctx)
684 {
685  CRYPTOPP_ASSERT(ctx != NULLPTR);
686 
687  zero_submsgs(ctx);
688  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
689 }
690 
691 inline void init256(LSH512_SSSE3_Context* ctx)
692 {
693  CRYPTOPP_ASSERT(ctx != NULLPTR);
694 
695  zero_submsgs(ctx);
696  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
697 }
698 
699 inline void init384(LSH512_SSSE3_Context* ctx)
700 {
701  CRYPTOPP_ASSERT(ctx != NULLPTR);
702 
703  zero_submsgs(ctx);
704  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
705 }
706 
707 inline void init512(LSH512_SSSE3_Context* ctx)
708 {
709  CRYPTOPP_ASSERT(ctx != NULLPTR);
710 
711  zero_submsgs(ctx);
712  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
713 }
714 
715 /* -------------------------------------------------------- */
716 
717 inline void fin(LSH512_SSSE3_Context* ctx)
718 {
719  CRYPTOPP_ASSERT(ctx != NULLPTR);
720 
721  _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
722  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
723  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
724  _mm_storeu_si128(M128_CAST(ctx->cv_l+2), _mm_xor_si128(
725  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+2)),
726  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+2))));
727  _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
728  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
729  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
730  _mm_storeu_si128(M128_CAST(ctx->cv_l+6), _mm_xor_si128(
731  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+6)),
732  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+6))));
733 }
734 
735 /* -------------------------------------------------------- */
736 
737 inline void get_hash(LSH512_SSSE3_Context* ctx, lsh_u8* pbHashVal)
738 {
739  CRYPTOPP_ASSERT(ctx != NULLPTR);
740  CRYPTOPP_ASSERT(ctx->alg_type != 0);
741  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
742 
743  lsh_uint alg_type = ctx->alg_type;
744  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
745  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
746 
747  // Multiplying by sizeof(lsh_u8) looks odd...
748  std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
749  if (hash_val_bit_len){
750  pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
751  }
752 }
753 
754 /* -------------------------------------------------------- */
755 
756 lsh_err lsh512_init_ssse3(LSH512_SSSE3_Context* ctx)
757 {
758  CRYPTOPP_ASSERT(ctx != NULLPTR);
759  CRYPTOPP_ASSERT(ctx->alg_type != 0);
760 
761  lsh_u32 alg_type = ctx->alg_type;
762  const lsh_u64* const_v = NULL;
763  ctx->remain_databitlen = 0;
764 
765  switch (alg_type){
766  case LSH_TYPE_512_512:
767  init512(ctx);
768  return LSH_SUCCESS;
769  case LSH_TYPE_512_384:
770  init384(ctx);
771  return LSH_SUCCESS;
772  case LSH_TYPE_512_256:
773  init256(ctx);
774  return LSH_SUCCESS;
775  case LSH_TYPE_512_224:
776  init224(ctx);
777  return LSH_SUCCESS;
778  default:
779  break;
780  }
781 
782  lsh_u64* cv_l = ctx->cv_l;
783  lsh_u64* cv_r = ctx->cv_r;
784 
785  zero_iv(cv_l, cv_r);
786  cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
787  cv_l[1] = LSH_GET_HASHBIT(alg_type);
788 
789  for (size_t i = 0; i < NUM_STEPS / 2; i++)
790  {
791  //Mix
792  load_sc(&const_v, i * 16);
793  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
794  word_perm(cv_l, cv_r);
795 
796  load_sc(&const_v, i * 16 + 8);
797  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
798  word_perm(cv_l, cv_r);
799  }
800 
801  return LSH_SUCCESS;
802 }
803 
804 lsh_err lsh512_update_ssse3(LSH512_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
805 {
806  CRYPTOPP_ASSERT(ctx != NULLPTR);
807  CRYPTOPP_ASSERT(data != NULLPTR);
808  CRYPTOPP_ASSERT(databitlen % 8 == 0);
809  CRYPTOPP_ASSERT(ctx->alg_type != 0);
810 
811  if (databitlen == 0){
812  return LSH_SUCCESS;
813  }
814 
815  // We are byte oriented. tail bits will always be 0.
816  size_t databytelen = databitlen >> 3;
817  // lsh_uint pos2 = databitlen & 0x7;
818  const size_t pos2 = 0;
819 
820  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
821  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
822  const size_t remain_msg_bit = 0;
823 
824  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
825  return LSH_ERR_INVALID_STATE;
826  }
827  if (remain_msg_bit > 0){
828  return LSH_ERR_INVALID_DATABITLEN;
829  }
830 
831  if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
832  std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
833  ctx->remain_databitlen += (lsh_uint)databitlen;
834  remain_msg_byte += (lsh_uint)databytelen;
835  if (pos2){
836  ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
837  }
838  return LSH_SUCCESS;
839  }
840 
841  if (remain_msg_byte > 0){
842  size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
843  std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
844  compress(ctx, ctx->last_block);
845  data += more_byte;
846  databytelen -= more_byte;
847  remain_msg_byte = 0;
848  ctx->remain_databitlen = 0;
849  }
850 
851  while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
852  {
853  // This call to compress caused some trouble.
854  // The data pointer can become unaligned in the
855  // previous block.
856  compress(ctx, data);
857  data += LSH512_MSG_BLK_BYTE_LEN;
858  databytelen -= LSH512_MSG_BLK_BYTE_LEN;
859  }
860 
861  if (databytelen > 0){
862  std::memcpy(ctx->last_block, data, databytelen);
863  ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
864  }
865 
866  if (pos2){
867  ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
868  ctx->remain_databitlen += pos2;
869  }
870  return LSH_SUCCESS;
871 }
872 
873 lsh_err lsh512_final_ssse3(LSH512_SSSE3_Context* ctx, lsh_u8* hashval)
874 {
875  CRYPTOPP_ASSERT(ctx != NULLPTR);
876  CRYPTOPP_ASSERT(hashval != NULLPTR);
877 
878  // We are byte oriented. tail bits will always be 0.
879  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
880  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
881  const size_t remain_msg_bit = 0;
882 
883  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
884  return LSH_ERR_INVALID_STATE;
885  }
886 
887  if (remain_msg_bit){
888  ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
889  }
890  else{
891  ctx->last_block[remain_msg_byte] = 0x80;
892  }
893  std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
894 
895  compress(ctx, ctx->last_block);
896 
897  fin(ctx);
898  get_hash(ctx, hashval);
899 
900  return LSH_SUCCESS;
901 }
902 
903 ANONYMOUS_NAMESPACE_END
904 
905 NAMESPACE_BEGIN(CryptoPP)
906 
907 extern
908 void LSH512_Base_Restart_SSSE3(word64* state)
909 {
910  state[RemainingBits] = 0;
911  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
912  lsh_err err = lsh512_init_ssse3(&ctx);
913 
914  if (err != LSH_SUCCESS)
915  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_ssse3 failed");
916 }
917 
918 extern
919 void LSH512_Base_Update_SSSE3(word64* state, const byte *input, size_t size)
920 {
921  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
922  lsh_err err = lsh512_update_ssse3(&ctx, input, 8*size);
923 
924  if (err != LSH_SUCCESS)
925  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_ssse3 failed");
926 }
927 
928 extern
929 void LSH512_Base_TruncatedFinal_SSSE3(word64* state, byte *hash, size_t)
930 {
931  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
932  lsh_err err = lsh512_final_ssse3(&ctx, hash);
933 
934  if (err != LSH_SUCCESS)
935  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_ssse3 failed");
936 }
937 
938 NAMESPACE_END
939 
940 #endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:164
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:182
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:66
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:72
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:101
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:150
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:155
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1757
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2417
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1808
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68