Crypto++  8.8
Free C++ class library of cryptographic schemes
sosemanuk.cpp
1 // sosemanuk.cpp - originally written and placed in the public domain by Wei Dai
2 
3 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sosemanuk.cpp" to generate MASM code
4 
5 #include "pch.h"
6 #include "config.h"
7 
8 #if CRYPTOPP_MSC_VERSION
9 # pragma warning(disable: 4702 4731)
10 #endif
11 
12 #ifndef CRYPTOPP_GENERATE_X64_MASM
13 
14 #include "sosemanuk.h"
15 #include "serpentp.h"
16 #include "secblock.h"
17 #include "misc.h"
18 #include "cpu.h"
19 
20 NAMESPACE_BEGIN(CryptoPP)
21 
22 std::string SosemanukPolicy::AlgorithmProvider() const
23 {
24 #ifndef CRYPTOPP_DISABLE_SOSEMANUK_ASM
25 # if CRYPTOPP_SSE2_ASM_AVAILABLE
26  if (HasSSE2())
27  return "SSE2";
28 # endif
29 #endif
30  return "C++";
31 }
32 
33 void SosemanukPolicy::CipherSetKey(const NameValuePairs &params, const byte *userKey, size_t keylen)
34 {
35  CRYPTOPP_UNUSED(params);
36  Serpent_KeySchedule(m_key, 24, userKey, keylen);
37 }
38 
39 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
40 {
41  CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(iv), CRYPTOPP_UNUSED(length);
42  CRYPTOPP_ASSERT(length==16);
43 
44  word32 a, b, c, d, e;
45 
47  Block::Get(iv)(a)(b)(c)(d);
48 
49  const word32 *k = m_key;
50  unsigned int i=1;
51 
52  do
53  {
54  beforeS0(KX); beforeS0(S0); afterS0(LT);
55  afterS0(KX); afterS0(S1); afterS1(LT);
56  if (i == 3) // after 18th round
57  {
58  m_state[4] = b;
59  m_state[5] = e;
60  m_state[10] = c;
61  m_state[11] = a;
62  }
63  afterS1(KX); afterS1(S2); afterS2(LT);
64  afterS2(KX); afterS2(S3); afterS3(LT);
65  if (i == 2) // after 12th round
66  {
67  m_state[6] = c;
68  m_state[7] = d;
69  m_state[8] = b;
70  m_state[9] = e;
71  }
72  afterS3(KX); afterS3(S4); afterS4(LT);
73  afterS4(KX); afterS4(S5); afterS5(LT);
74  afterS5(KX); afterS5(S6); afterS6(LT);
75  afterS6(KX); afterS6(S7); afterS7(LT);
76 
77  if (i == 3)
78  break;
79 
80  ++i;
81  c = b;
82  b = e;
83  e = d;
84  d = a;
85  a = e;
86  k += 32;
87  }
88  while (true);
89 
90  afterS7(KX);
91 
92  m_state[0] = a;
93  m_state[1] = b;
94  m_state[2] = e;
95  m_state[3] = d;
96 
97 #define XMUX(c, x, y) (x ^ (y & (0 - (c & 1))))
98  m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
99  m_state[10] = rotlConstant<7>(m_state[10] * 0x54655307);
100 }
101 
102 extern "C" {
103 word32 s_sosemanukMulTables[512] = {
104 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
105  0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836,
106  0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E,
107  0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6,
108  0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE,
109  0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF,
110  0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7,
111  0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F,
112  0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67,
113  0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D,
114  0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5,
115  0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D,
116  0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855,
117  0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04,
118  0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C,
119  0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794,
120  0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC,
121  0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9,
122  0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1,
123  0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079,
124  0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31,
125  0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60,
126  0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328,
127  0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0,
128  0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8,
129  0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52,
130  0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A,
131  0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2,
132  0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A,
133  0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB,
134  0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193,
135  0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B,
136  0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03,
137  0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021,
138  0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69,
139  0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1,
140  0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9,
141  0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8,
142  0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0,
143  0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38,
144  0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370,
145  0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A,
146  0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2,
147  0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A,
148  0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042,
149  0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313,
150  0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B,
151  0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83,
152  0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB,
153  0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE,
154  0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6,
155  0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E,
156  0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626,
157  0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577,
158  0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F,
159  0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7,
160  0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF,
161  0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645,
162  0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D,
163  0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5,
164  0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D,
165  0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC,
166  0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984,
167  0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C,
168  0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
169 #else
170  0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
171  0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
172  0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
173  0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
174  0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
175  0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
176  0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
177  0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
178  0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
179  0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
180  0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
181  0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
182  0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
183  0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
184  0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
185  0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
186  0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
187  0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
188  0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
189  0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
190  0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
191  0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
192  0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
193  0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
194  0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
195  0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
196  0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
197  0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
198  0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
199  0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
200  0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
201  0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
202  0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
203  0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
204  0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
205  0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
206  0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
207  0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
208  0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
209  0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
210  0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
211  0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
212  0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
213  0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
214  0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
215  0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
216  0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
217  0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
218  0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
219  0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
220  0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
221  0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
222  0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
223  0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
224  0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
225  0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
226  0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
227  0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
228  0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
229  0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
230  0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
231  0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
232  0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
233  0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
234 #endif
235  0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
236  0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
237  0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
238  0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
239  0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
240  0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
241  0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
242  0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
243  0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
244  0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
245  0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
246  0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
247  0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
248  0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
249  0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
250  0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
251  0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
252  0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
253  0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
254  0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
255  0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
256  0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
257  0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
258  0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
259  0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
260  0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
261  0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
262  0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
263  0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
264  0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
265  0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
266  0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
267  0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
268  0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
269  0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
270  0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
271  0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
272  0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
273  0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
274  0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
275  0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
276  0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
277  0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
278  0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
279  0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
280  0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
281  0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
282  0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
283  0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
284  0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
285  0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
286  0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
287  0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
288  0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
289  0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
290  0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
291  0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
292  0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
293  0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
294  0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
295  0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
296  0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
297  0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
298  0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
299 };
300 }
301 
302 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
303 unsigned int SosemanukPolicy::GetAlignment() const
304 {
305 #if CRYPTOPP_SSE2_ASM_AVAILABLE
306 #ifdef __INTEL_COMPILER
307  if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
308 #else
309  if (HasSSE2())
310 #endif
311  return 16;
312  else
313 #endif
314  return GetAlignmentOf<word32>();
315 }
316 
317 unsigned int SosemanukPolicy::GetOptimalBlockSize() const
318 {
319 #if CRYPTOPP_SSE2_ASM_AVAILABLE
320 #ifdef __INTEL_COMPILER
321  if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
322 #else
323  if (HasSSE2())
324 #endif
325  return 4*BYTES_PER_ITERATION;
326  else
327 #endif
328  return BYTES_PER_ITERATION;
329 }
330 #endif
331 
332 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
333 extern "C" {
334 void Sosemanuk_OperateKeystream(size_t iterationCount, const byte *input, byte *output, word32 *state);
335 }
336 #endif
337 
338 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
339 {
340 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
341 
342 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
343  Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data());
344  return;
345 #endif
346 
347 #if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
348 #ifdef CRYPTOPP_GENERATE_X64_MASM
349  ALIGN 8
350  Sosemanuk_OperateKeystream PROC FRAME
351  rex_push_reg rsi
352  push_reg rdi
353  alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8)
354  save_xmm128 xmm6, 02f0h
355  save_xmm128 xmm7, 0300h
356  .endprolog
357  mov rdi, r8
358  mov rax, r9
359 #else
360 #ifdef __INTEL_COMPILER
361  if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
362 #else
363  if (HasSSE2())
364 #endif
365  {
366 #ifdef __GNUC__
367  #if CRYPTOPP_BOOL_X64
369  #endif
370  __asm__ __volatile__
371  (
372  INTEL_NOPREFIX
373  AS_PUSH_IF86( bx)
374 #else
375  word32 *state = m_state;
376  AS2( mov WORD_REG(ax), state)
377  AS2( mov WORD_REG(di), output)
378  AS2( mov WORD_REG(dx), input)
379  AS2( mov WORD_REG(cx), iterationCount)
380 #endif
381 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
382 
383 #if defined(__GNUC__) && CRYPTOPP_BOOL_X64
384  #define SSE2_workspace %5
385 #else
386  #define SSE2_workspace WORD_REG(sp)
387 #endif
388 
389 #define SSE2_output WORD_PTR [SSE2_workspace+1*WORD_SZ]
390 #define SSE2_input WORD_PTR [SSE2_workspace+2*WORD_SZ]
391 #define SSE2_wordsLeft WORD_PTR [SSE2_workspace+3*WORD_SZ]
392 #define SSE2_diEnd WORD_PTR [SSE2_workspace+4*WORD_SZ]
393 #define SSE2_pMulTables WORD_PTR [SSE2_workspace+5*WORD_SZ]
394 #define SSE2_state WORD_PTR [SSE2_workspace+6*WORD_SZ]
395 #define SSE2_wordsLeft2 WORD_PTR [SSE2_workspace+7*WORD_SZ]
396 #define SSE2_stateCopy SSE2_workspace + 8*WORD_SZ
397 #define SSE2_uvStart SSE2_stateCopy + 12*4
398 
399 #if (CRYPTOPP_BOOL_X86) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
400  AS_PUSH_IF86( bp)
401  AS2( mov AS_REG_6, esp)
402  AS2( and esp, -16)
403  AS2( sub esp, 80*4*2+12*4+8*WORD_SZ) // 80 v's, 80 u's, 12 state, 8 locals
404  AS2( mov [esp], AS_REG_6)
405 #endif
406  AS2( mov SSE2_output, WORD_REG(di))
407  AS2( mov SSE2_input, WORD_REG(dx))
408  AS2( mov SSE2_state, WORD_REG(ax))
409 #ifndef CRYPTOPP_MSC_VERSION
410  AS2( mov SSE2_pMulTables, WORD_REG(si))
411 #endif
412  AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
413  AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
414  AS2( mov SSE2_wordsLeft, WORD_REG(si))
415  AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register
416  AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
417  AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
418  AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
419  AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
420  AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
421  AS2( psrlq xmm0, 32)
422  AS2( movd AS_REG_6d, xmm0) // s(9)
423  AS2( mov ecx, [WORD_REG(ax)+10*4])
424  AS2( mov edx, [WORD_REG(ax)+11*4])
425  AS2( pcmpeqb xmm7, xmm7) // all ones
426 
427 #define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4
428 #define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
429 #define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
430 
431 #define R10 ecx
432 #define R11 edx
433 #define R20 edx
434 #define R21 ecx
435 // workaround bug in GAS 2.15
436 #define R20r WORD_REG(dx)
437 #define R21r WORD_REG(cx)
438 
439 #define SSE2_STEP(i, j) \
440  AS2( mov eax, [s(i+0)])\
441  AS2( mov [v(i)], eax)\
442  AS2( rol eax, 8)\
443  AS2( lea AS_REG_7, [AS_REG_6 + R2##j##r])\
444  AS2( xor AS_REG_7d, R1##j)\
445  AS2( mov [u(i)], AS_REG_7d)\
446  AS2( mov AS_REG_7d, 1)\
447  AS2( and AS_REG_7d, R2##j)\
448  AS1( neg AS_REG_7d)\
449  AS2( and AS_REG_7d, AS_REG_6d)\
450  AS2( xor AS_REG_6d, eax)\
451  AS2( movzx eax, al)\
452  AS2( xor AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\
453  AS2( mov eax, [s(i+3)])\
454  AS2( xor AS_REG_7d, [s(i+2)])\
455  AS2( add R1##j, AS_REG_7d)\
456  AS2( movzx AS_REG_7d, al)\
457  AS2( shr eax, 8)\
458  AS2( xor AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\
459  AS2( xor AS_REG_6d, eax)\
460  AS2( imul R2##j, AS_HEX(54655307))\
461  AS2( rol R2##j, 7)\
462  AS2( mov [s(i+0)], AS_REG_6d)\
463 
464  ASL(2) // outer loop, each iteration of this processes 80 words
465  AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u
466  AS2( mov WORD_REG(ax), 80)
467  AS2( cmp WORD_REG(si), 80)
468  AS2( cmovg WORD_REG(si), WORD_REG(ax))
469  AS2( mov SSE2_wordsLeft2, WORD_REG(si))
470  AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)]) // use to end first inner loop
471  AS2( mov SSE2_diEnd, WORD_REG(si))
472 #ifdef CRYPTOPP_MSC_VERSION
473  AS2( lea WORD_REG(si), s_sosemanukMulTables)
474 #else
475  AS2( mov WORD_REG(si), SSE2_pMulTables)
476 #endif
477 
478  ASL(0) // first inner loop, 20 words each, 4 iterations
479  SSE2_STEP(0, 0)
480  SSE2_STEP(1, 1)
481  SSE2_STEP(2, 0)
482  SSE2_STEP(3, 1)
483  SSE2_STEP(4, 0)
484  SSE2_STEP(5, 1)
485  SSE2_STEP(6, 0)
486  SSE2_STEP(7, 1)
487  SSE2_STEP(8, 0)
488  SSE2_STEP(9, 1)
489  SSE2_STEP(10, 0)
490  SSE2_STEP(11, 1)
491  SSE2_STEP(12, 0)
492  SSE2_STEP(13, 1)
493  SSE2_STEP(14, 0)
494  SSE2_STEP(15, 1)
495  SSE2_STEP(16, 0)
496  SSE2_STEP(17, 1)
497  SSE2_STEP(18, 0)
498  SSE2_STEP(19, 1)
499  // loop
500  AS2( add WORD_REG(di), 5*4)
501  AS2( cmp WORD_REG(di), SSE2_diEnd)
502  ASJ( jne, 0, b)
503 
504  AS2( mov WORD_REG(ax), SSE2_input)
505  AS2( mov AS_REG_7, SSE2_output)
506  AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u
507  AS2( mov WORD_REG(si), SSE2_wordsLeft2)
508 
509  ASL(1) // second inner loop, 16 words each, 5 iterations
510  AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
511  AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
512  AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
513  AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
514  // S2
515  AS2( movdqa xmm4, xmm0)
516  AS2( pand xmm0, xmm2)
517  AS2( pxor xmm0, xmm3)
518  AS2( pxor xmm2, xmm1)
519  AS2( pxor xmm2, xmm0)
520  AS2( por xmm3, xmm4)
521  AS2( pxor xmm3, xmm1)
522  AS2( pxor xmm4, xmm2)
523  AS2( movdqa xmm1, xmm3)
524  AS2( por xmm3, xmm4)
525  AS2( pxor xmm3, xmm0)
526  AS2( pand xmm0, xmm1)
527  AS2( pxor xmm4, xmm0)
528  AS2( pxor xmm1, xmm3)
529  AS2( pxor xmm1, xmm4)
530  AS2( pxor xmm4, xmm7)
531  // xor with v
532  AS2( pxor xmm2, [WORD_REG(di)+80*4])
533  AS2( pxor xmm3, [WORD_REG(di)+80*5])
534  AS2( pxor xmm1, [WORD_REG(di)+80*6])
535  AS2( pxor xmm4, [WORD_REG(di)+80*7])
536  // exit loop early if less than 16 words left to output
537  // this is necessary because block size is 20 words, and we output 16 words in each iteration of this loop
538  AS2( cmp WORD_REG(si), 16)
539  ASJ( jl, 4, f)
540  // unpack
541  AS2( movdqa xmm6, xmm2)
542  AS2( punpckldq xmm2, xmm3)
543  AS2( movdqa xmm5, xmm1)
544  AS2( punpckldq xmm1, xmm4)
545  AS2( movdqa xmm0, xmm2)
546  AS2( punpcklqdq xmm2, xmm1)
547  AS2( punpckhqdq xmm0, xmm1)
548  AS2( punpckhdq xmm6, xmm3)
549  AS2( punpckhdq xmm5, xmm4)
550  AS2( movdqa xmm3, xmm6)
551  AS2( punpcklqdq xmm6, xmm5)
552  AS2( punpckhqdq xmm3, xmm5)
553 
554  // output keystream
555  AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4)
556 
557  // loop
558  AS2( add WORD_REG(di), 4*4)
559  AS2( sub WORD_REG(si), 16)
560  ASJ( jnz, 1, b)
561 
562  // outer loop
563  AS2( mov WORD_REG(si), SSE2_wordsLeft)
564  AS2( sub WORD_REG(si), 80)
565  ASJ( jz, 6, f)
566  AS2( mov SSE2_wordsLeft, WORD_REG(si))
567  AS2( mov SSE2_input, WORD_REG(ax))
568  AS2( mov SSE2_output, AS_REG_7)
569  ASJ( jmp, 2, b)
570 
571  ASL(4) // final output of less than 16 words
572  AS2( test WORD_REG(ax), WORD_REG(ax))
573  ASJ( jz, 5, f)
574  AS2( movd xmm0, dword ptr [WORD_REG(ax)+0*4])
575  AS2( pxor xmm2, xmm0)
576  AS2( movd xmm0, dword ptr [WORD_REG(ax)+1*4])
577  AS2( pxor xmm3, xmm0)
578  AS2( movd xmm0, dword ptr [WORD_REG(ax)+2*4])
579  AS2( pxor xmm1, xmm0)
580  AS2( movd xmm0, dword ptr [WORD_REG(ax)+3*4])
581  AS2( pxor xmm4, xmm0)
582  AS2( add WORD_REG(ax), 16)
583  ASL(5)
584  AS2( movd dword ptr [AS_REG_7+0*4], xmm2)
585  AS2( movd dword ptr [AS_REG_7+1*4], xmm3)
586  AS2( movd dword ptr [AS_REG_7+2*4], xmm1)
587  AS2( movd dword ptr [AS_REG_7+3*4], xmm4)
588  AS2( sub WORD_REG(si), 4)
589  ASJ( jz, 6, f)
590  AS2( add AS_REG_7, 16)
591  AS2( psrldq xmm2, 4)
592  AS2( psrldq xmm3, 4)
593  AS2( psrldq xmm1, 4)
594  AS2( psrldq xmm4, 4)
595  ASJ( jmp, 4, b)
596 
597  ASL(6) // save state
598  AS2( mov AS_REG_6, SSE2_state)
599  AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
600  AS2( movdqa [AS_REG_6+0*16], xmm0)
601  AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
602  AS2( movdqa [AS_REG_6+1*16], xmm0)
603  AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
604  AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
605  AS2( mov [AS_REG_6+10*4], ecx)
606  AS2( mov [AS_REG_6+11*4], edx)
607 
608  AS_POP_IF86( sp)
609  AS_POP_IF86( bp)
610 
611 #ifdef __GNUC__
612  AS_POP_IF86( bx)
613  ATT_PREFIX
614  :
615  : "a" (m_state.data()), "c" (iterationCount), "S" (s_sosemanukMulTables), "D" (output), "d" (input)
617  , "r" (workspace.data())
618  : "memory", "cc", "%r9", "%r10", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
619  #else
620  : "memory", "cc"
621  #endif
622  );
623 #endif
624 #ifdef CRYPTOPP_GENERATE_X64_MASM
625  movdqa xmm6, [rsp + 02f0h]
626  movdqa xmm7, [rsp + 0300h]
627  add rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8
628  pop rdi
629  pop rsi
630  ret
631  Sosemanuk_OperateKeystream ENDP
632 #else
633  }
634  else
635 #endif
636 #endif
637 #ifndef CRYPTOPP_GENERATE_X64_MASM
638  {
639 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
640 #define MUL_A(x) (x = (rotlConstant<8>(x)), x ^ s_sosemanukMulTables[byte(x)])
641 #else
642 #define MUL_A(x) (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24])
643 #endif
644 
645 #define DIV_A(x) (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)])
646 
647 #define r1(i) ((i%2) ? reg2 : reg1)
648 #define r2(i) ((i%2) ? reg1 : reg2)
649 
650 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \
651  u = (s##x9 + r2(x0)) ^ r1(x0);\
652  t = v = s##x0;\
653  s##x0 = MUL_A(t) ^ DIV_A(s##x3) ^ s##x9;\
654  r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
655  r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
656 
657 #define SOSEMANUK_OUTPUT(x) \
658  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
659  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
660  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
661  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
662 
663 #define OUTPUT4 \
664  S2(0, u0, u1, u2, u3, u4);\
665  CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
666 
667  word32 s0 = m_state[0];
668  word32 s1 = m_state[1];
669  word32 s2 = m_state[2];
670  word32 s3 = m_state[3];
671  word32 s4 = m_state[4];
672  word32 s5 = m_state[5];
673  word32 s6 = m_state[6];
674  word32 s7 = m_state[7];
675  word32 s8 = m_state[8];
676  word32 s9 = m_state[9];
677  word32 reg1 = m_state[10];
678  word32 reg2 = m_state[11];
679  word32 t, u0, u1, u2, u3, u4, v0, v1, v2, v3;
680 
681  do
682  {
683  STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
684  STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
685  STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
686  STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
687  OUTPUT4
688  STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
689  STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
690  STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
691  STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
692  OUTPUT4
693  STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
694  STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
695  STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
696  STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
697  OUTPUT4
698  STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
699  STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
700  STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
701  STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
702  OUTPUT4
703  STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
704  STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
705  STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
706  STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
707  OUTPUT4
708  }
709  while (--iterationCount);
710 
711  m_state[0] = s0;
712  m_state[1] = s1;
713  m_state[2] = s2;
714  m_state[3] = s3;
715  m_state[4] = s4;
716  m_state[5] = s5;
717  m_state[6] = s6;
718  m_state[7] = s7;
719  m_state[8] = s8;
720  m_state[9] = s9;
721  m_state[10] = reg1;
722  m_state[11] = reg2;
723  }
724 }
725 
726 NAMESPACE_END
727 
728 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:1259
Interface for retrieving values given their names.
Definition: cryptlib.h:327
A::pointer data()
Provides a pointer to the first element in the memory block.
Definition: secblock.h:857
Library configuration file.
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition: config_cpu.h:48
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:72
word128 dword
Double word used for multiprecision integer arithmetic.
Definition: config_int.h:203
Functions for CPU features and intrinsics.
Utility functions for the Crypto++ library.
Crypto++ library namespace.
Precompiled header file.
Classes and functions for secure memory allocations.
Classes for Sosemanuk stream cipher.
KeystreamOperation
Keystream operation flags.
Definition: strciphr.h:88
static const int BYTES_PER_ITERATION
Number of bytes for an iteration.
Definition: strciphr.h:211
Access a block of memory.
Definition: misc.h:3053
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68