Print this page
first pass
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/aes/amd64/aes_intel.s
+++ new/usr/src/common/crypto/aes/amd64/aes_intel.s
1 1 /*
2 2 * ====================================================================
3 3 * Written by Intel Corporation for the OpenSSL project to add support
4 4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 5 * in source and binary forms are granted according to the OpenSSL
6 6 * license.
7 7 *
8 8 * Author: Huang Ying <ying.huang at intel dot com>
9 9 * Vinodh Gopal <vinodh.gopal at intel dot com>
10 10 * Kahraman Akdemir
11 11 *
12 12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 13 * instructions that are going to be introduced in the next generation
14 14 * of Intel processor, as of 2009. These instructions enable fast and
15 15 * secure data encryption and decryption, using the Advanced Encryption
16 16 * Standard (AES), defined by FIPS Publication number 197. The
17 17 * architecture introduces six instructions that offer full hardware
18 18 * support for AES. Four of them support high performance data
19 19 * encryption and decryption, and the other two instructions support
20 20 * the AES key expansion procedure.
21 21 * ====================================================================
22 22 */
23 23
24 24 /*
25 25 * ====================================================================
26 26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
27 27 *
28 28 * Redistribution and use in source and binary forms, with or without
29 29 * modification, are permitted provided that the following conditions
30 30 * are met:
31 31 *
32 32 * 1. Redistributions of source code must retain the above copyright
33 33 * notice, this list of conditions and the following disclaimer.
34 34 *
35 35 * 2. Redistributions in binary form must reproduce the above copyright
36 36 * notice, this list of conditions and the following disclaimer in
37 37 * the documentation and/or other materials provided with the
38 38 * distribution.
39 39 *
40 40 * 3. All advertising materials mentioning features or use of this
41 41 * software must display the following acknowledgment:
42 42 * "This product includes software developed by the OpenSSL Project
43 43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
44 44 *
45 45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 46 * endorse or promote products derived from this software without
47 47 * prior written permission. For written permission, please contact
48 48 * openssl-core@openssl.org.
49 49 *
50 50 * 5. Products derived from this software may not be called "OpenSSL"
51 51 * nor may "OpenSSL" appear in their names without prior written
52 52 * permission of the OpenSSL Project.
53 53 *
54 54 * 6. Redistributions of any form whatsoever must retain the following
55 55 * acknowledgment:
56 56 * "This product includes software developed by the OpenSSL Project
57 57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
58 58 *
59 59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 71 * ====================================================================
72 72 */
73 73
74 74 /*
75 75 * ====================================================================
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
76 76 * OpenSolaris OS modifications
77 77 *
78 78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
82 82 *
83 83 * This OpenSolaris version has these major changes from the original source:
84 84 *
85 85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 - * /usr/include/sys/asm_linkage.h, lint(1B) guards, EXPORT DELETE START
87 - * and EXPORT DELETE END markers, and dummy C function definitions for lint.
86 + * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 + * definitions for lint.
88 88 *
89 89 * 2. Formatted code, added comments, and added #includes and #defines.
90 90 *
91 91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 92 * calling kpreempt_disable() and kpreempt_enable().
93 93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 94 * and end of function calls (%xmm* registers are not saved and restored by
95 95 * during kernel thread preemption).
96 96 *
97 97 * 4. Renamed functions, reordered parameters, and changed return value
98 98 * to match OpenSolaris:
99 99 *
100 100 * OpenSSL interface:
101 101 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 102 * const int bits, AES_KEY *key);
103 103 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 104 * const int bits, AES_KEY *key);
105 105 * Return values for above are non-zero on error, 0 on success.
106 106 *
107 107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 108 * const AES_KEY *key);
109 109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 110 * const AES_KEY *key);
111 111 * typedef struct aes_key_st {
112 112 * unsigned int rd_key[4 *(AES_MAXNR + 1)];
113 113 * int rounds;
114 114 * unsigned int pad[3];
115 115 * } AES_KEY;
116 116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 117 * (ks32) instead of 64-bit (ks64).
118 118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
119 119 *
120 120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 121 * int rijndael_key_setup_dec_intel(uint32_t rk[],
122 122 * const uint32_t cipherKey[], uint64_t keyBits);
123 123 * int rijndael_key_setup_enc_intel(uint32_t rk[],
124 124 * const uint32_t cipherKey[], uint64_t keyBits);
125 125 * Return values for above are 0 on error, number of rounds on success.
126 126 *
127 127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 128 * const uint32_t pt[4], uint32_t ct[4]);
129 129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 130 * const uint32_t pt[4], uint32_t ct[4]);
131 131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
133 133 *
134 134 * typedef union {
135 135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
136 136 * } aes_ks_t;
137 137 * typedef struct aes_key {
138 138 * aes_ks_t encr_ks, decr_ks;
139 139 * long double align128;
140 140 * int flags, nr, type;
141 141 * } aes_key_t;
142 142 *
143 143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 144 * ct is crypto text, and MAX_AES_NR is 14.
145 145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
146 146 *
147 147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
148 148 *
149 149 * ====================================================================
150 150 */
151 151
152 152 #if defined(lint) || defined(__lint)
153 153
154 154 #include <sys/types.h>
155 155
156 156 /* ARGSUSED */
157 157 void
158 158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
159 159 uint32_t ct[4]) {
160 160 }
161 161 /* ARGSUSED */
162 162 void
163 163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
164 164 uint32_t pt[4]) {
165 165 }
166 166 /* ARGSUSED */
167 167 int
168 168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
169 169 uint64_t keyBits) {
170 170 return (0);
171 171 }
172 172 /* ARGSUSED */
173 173 int
174 174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
175 175 uint64_t keyBits) {
176 176 return (0);
177 177 }
178 178
179 179
180 180 #else /* lint */
181 181
182 182 #include <sys/asm_linkage.h>
183 183 #include <sys/controlregs.h>
184 184 #ifdef _KERNEL
185 185 #include <sys/machprivregs.h>
186 186 #endif
187 187
188 188 #ifdef _KERNEL
189 189 /*
190 190 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is,
191 191 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
192 192 * uses it to pass P2 to syscall.
193 193 * This also occurs with the STTS macro, but we don't care if
194 194 * P2 (%rsi) is modified just before function exit.
195 195 * The CLTS and STTS macros push and pop P1 (%rdi) already.
196 196 */
197 197 #ifdef __xpv
198 198 #define PROTECTED_CLTS \
199 199 push %rsi; \
200 200 CLTS; \
201 201 pop %rsi
202 202 #else
203 203 #define PROTECTED_CLTS \
204 204 CLTS
205 205 #endif /* __xpv */
206 206
207 207 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
208 208 push %rbp; \
209 209 mov %rsp, %rbp; \
210 210 movq %cr0, tmpreg; \
211 211 testq $CR0_TS, tmpreg; \
212 212 jnz 1f; \
213 213 and $-XMM_ALIGN, %rsp; \
214 214 sub $[XMM_SIZE * 2], %rsp; \
215 215 movaps %xmm0, 16(%rsp); \
216 216 movaps %xmm1, (%rsp); \
217 217 jmp 2f; \
218 218 1: \
219 219 PROTECTED_CLTS; \
220 220 2:
221 221
222 222 /*
223 223 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
224 224 * otherwise set CR0_TS.
225 225 */
226 226 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
227 227 testq $CR0_TS, tmpreg; \
228 228 jnz 1f; \
229 229 movaps (%rsp), %xmm1; \
230 230 movaps 16(%rsp), %xmm0; \
231 231 jmp 2f; \
232 232 1: \
233 233 STTS(tmpreg); \
234 234 2: \
235 235 mov %rbp, %rsp; \
236 236 pop %rbp
237 237
238 238 /*
239 239 * If CR0_TS is not set, align stack (with push %rbp) and push
240 240 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
241 241 */
242 242 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
243 243 push %rbp; \
244 244 mov %rsp, %rbp; \
245 245 movq %cr0, tmpreg; \
246 246 testq $CR0_TS, tmpreg; \
247 247 jnz 1f; \
248 248 and $-XMM_ALIGN, %rsp; \
249 249 sub $[XMM_SIZE * 7], %rsp; \
250 250 movaps %xmm0, 96(%rsp); \
251 251 movaps %xmm1, 80(%rsp); \
252 252 movaps %xmm2, 64(%rsp); \
253 253 movaps %xmm3, 48(%rsp); \
254 254 movaps %xmm4, 32(%rsp); \
255 255 movaps %xmm5, 16(%rsp); \
256 256 movaps %xmm6, (%rsp); \
257 257 jmp 2f; \
258 258 1: \
259 259 PROTECTED_CLTS; \
260 260 2:
261 261
262 262
263 263 /*
264 264 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
265 265 * otherwise set CR0_TS.
266 266 */
267 267 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
268 268 testq $CR0_TS, tmpreg; \
269 269 jnz 1f; \
270 270 movaps (%rsp), %xmm6; \
271 271 movaps 16(%rsp), %xmm5; \
272 272 movaps 32(%rsp), %xmm4; \
273 273 movaps 48(%rsp), %xmm3; \
274 274 movaps 64(%rsp), %xmm2; \
275 275 movaps 80(%rsp), %xmm1; \
276 276 movaps 96(%rsp), %xmm0; \
277 277 jmp 2f; \
278 278 1: \
279 279 STTS(tmpreg); \
280 280 2: \
281 281 mov %rbp, %rsp; \
282 282 pop %rbp
283 283
284 284
285 285 #else
286 286 #define PROTECTED_CLTS
287 287 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
288 288 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
289 289 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
290 290 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
291 291 #endif /* _KERNEL */
292 292
293 293
294 294 /*
295 295 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
296 296 * _key_expansion_256a(), _key_expansion_256b()
297 297 *
↓ open down ↓ |
200 lines elided |
↑ open up ↑ |
298 298 * Helper functions called by rijndael_key_setup_inc_intel().
299 299 * Also used indirectly by rijndael_key_setup_dec_intel().
300 300 *
301 301 * Input:
302 302 * %xmm0 User-provided cipher key
303 303 * %xmm1 Round constant
304 304 * Output:
305 305 * (%rcx) AES key
306 306 */
307 307
308 - /* EXPORT DELETE START */
309 308 .align 16
310 309 _key_expansion_128:
311 310 _key_expansion_256a:
312 311 pshufd $0b11111111, %xmm1, %xmm1
313 312 shufps $0b00010000, %xmm0, %xmm4
314 313 pxor %xmm4, %xmm0
315 314 shufps $0b10001100, %xmm0, %xmm4
316 315 pxor %xmm4, %xmm0
317 316 pxor %xmm1, %xmm0
318 317 movaps %xmm0, (%rcx)
319 318 add $0x10, %rcx
320 319 ret
321 320 SET_SIZE(_key_expansion_128)
322 321 SET_SIZE(_key_expansion_256a)
323 322
324 323 .align 16
325 324 _key_expansion_192a:
326 325 pshufd $0b01010101, %xmm1, %xmm1
327 326 shufps $0b00010000, %xmm0, %xmm4
328 327 pxor %xmm4, %xmm0
329 328 shufps $0b10001100, %xmm0, %xmm4
330 329 pxor %xmm4, %xmm0
331 330 pxor %xmm1, %xmm0
332 331
333 332 movaps %xmm2, %xmm5
334 333 movaps %xmm2, %xmm6
335 334 pslldq $4, %xmm5
336 335 pshufd $0b11111111, %xmm0, %xmm3
337 336 pxor %xmm3, %xmm2
338 337 pxor %xmm5, %xmm2
339 338
340 339 movaps %xmm0, %xmm1
341 340 shufps $0b01000100, %xmm0, %xmm6
342 341 movaps %xmm6, (%rcx)
343 342 shufps $0b01001110, %xmm2, %xmm1
344 343 movaps %xmm1, 0x10(%rcx)
345 344 add $0x20, %rcx
346 345 ret
347 346 SET_SIZE(_key_expansion_192a)
348 347
349 348 .align 16
350 349 _key_expansion_192b:
351 350 pshufd $0b01010101, %xmm1, %xmm1
352 351 shufps $0b00010000, %xmm0, %xmm4
353 352 pxor %xmm4, %xmm0
354 353 shufps $0b10001100, %xmm0, %xmm4
355 354 pxor %xmm4, %xmm0
356 355 pxor %xmm1, %xmm0
357 356
358 357 movaps %xmm2, %xmm5
359 358 pslldq $4, %xmm5
360 359 pshufd $0b11111111, %xmm0, %xmm3
361 360 pxor %xmm3, %xmm2
362 361 pxor %xmm5, %xmm2
363 362
364 363 movaps %xmm0, (%rcx)
365 364 add $0x10, %rcx
366 365 ret
367 366 SET_SIZE(_key_expansion_192b)
368 367
369 368 .align 16
370 369 _key_expansion_256b:
↓ open down ↓ |
52 lines elided |
↑ open up ↑ |
371 370 pshufd $0b10101010, %xmm1, %xmm1
372 371 shufps $0b00010000, %xmm2, %xmm4
373 372 pxor %xmm4, %xmm2
374 373 shufps $0b10001100, %xmm2, %xmm4
375 374 pxor %xmm4, %xmm2
376 375 pxor %xmm1, %xmm2
377 376 movaps %xmm2, (%rcx)
378 377 add $0x10, %rcx
379 378 ret
380 379 SET_SIZE(_key_expansion_256b)
381 - /* EXPORT DELETE END */
382 380
383 381
384 382 /*
385 383 * rijndael_key_setup_enc_intel()
386 384 * Expand the cipher key into the encryption key schedule.
387 385 *
388 386 * For kernel code, caller is responsible for ensuring kpreempt_disable()
389 387 * has been called. This is because %xmm registers are not saved/restored.
390 388 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
391 389 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
392 390 * on the stack.
393 391 *
394 392 * OpenSolaris interface:
395 393 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
396 394 * uint64_t keyBits);
397 395 * Return value is 0 on error, number of rounds on success.
398 396 *
399 397 * Original Intel OpenSSL interface:
400 398 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
401 399 * const int bits, AES_KEY *key);
402 400 * Return value is non-zero on error, 0 on success.
403 401 */
404 402
405 403 #ifdef OPENSSL_INTERFACE
406 404 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
407 405 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
408 406
409 407 #define USERCIPHERKEY rdi /* P1, 64 bits */
410 408 #define KEYSIZE32 esi /* P2, 32 bits */
411 409 #define KEYSIZE64 rsi /* P2, 64 bits */
412 410 #define AESKEY rdx /* P3, 64 bits */
413 411
414 412 #else /* OpenSolaris Interface */
415 413 #define AESKEY rdi /* P1, 64 bits */
416 414 #define USERCIPHERKEY rsi /* P2, 64 bits */
↓ open down ↓ |
25 lines elided |
↑ open up ↑ |
417 415 #define KEYSIZE32 edx /* P3, 32 bits */
418 416 #define KEYSIZE64 rdx /* P3, 64 bits */
419 417 #endif /* OPENSSL_INTERFACE */
420 418
421 419 #define ROUNDS32 KEYSIZE32 /* temp */
422 420 #define ROUNDS64 KEYSIZE64 /* temp */
423 421 #define ENDAESKEY USERCIPHERKEY /* temp */
424 422
425 423
426 424 ENTRY_NP(rijndael_key_setup_enc_intel)
427 - /* EXPORT DELETE START */
428 425 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
429 426
430 427 / NULL pointer sanity check
431 428 test %USERCIPHERKEY, %USERCIPHERKEY
432 429 jz .Lenc_key_invalid_param
433 430 test %AESKEY, %AESKEY
434 431 jz .Lenc_key_invalid_param
435 432
436 433 movups (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes)
437 434 movaps %xmm0, (%AESKEY)
438 435 lea 0x10(%AESKEY), %rcx / key addr
439 436 pxor %xmm4, %xmm4 / xmm4 is assumed 0 in _key_expansion_x
440 437
441 438 cmp $256, %KEYSIZE32
442 439 jnz .Lenc_key192
443 440
444 441 / AES 256: 14 rounds in encryption key schedule
445 442 #ifdef OPENSSL_INTERFACE
446 443 mov $14, %ROUNDS32
447 444 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 14
448 445 #endif /* OPENSSL_INTERFACE */
449 446
450 447 movups 0x10(%USERCIPHERKEY), %xmm2 / other user key (2nd 16 bytes)
451 448 movaps %xmm2, (%rcx)
452 449 add $0x10, %rcx
453 450
454 451 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
455 452 call _key_expansion_256a
456 453 aeskeygenassist $0x1, %xmm0, %xmm1
457 454 call _key_expansion_256b
458 455 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
459 456 call _key_expansion_256a
460 457 aeskeygenassist $0x2, %xmm0, %xmm1
461 458 call _key_expansion_256b
462 459 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
463 460 call _key_expansion_256a
464 461 aeskeygenassist $0x4, %xmm0, %xmm1
465 462 call _key_expansion_256b
466 463 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
467 464 call _key_expansion_256a
468 465 aeskeygenassist $0x8, %xmm0, %xmm1
469 466 call _key_expansion_256b
470 467 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
471 468 call _key_expansion_256a
472 469 aeskeygenassist $0x10, %xmm0, %xmm1
473 470 call _key_expansion_256b
474 471 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
475 472 call _key_expansion_256a
476 473 aeskeygenassist $0x20, %xmm0, %xmm1
477 474 call _key_expansion_256b
478 475 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
479 476 call _key_expansion_256a
480 477
481 478 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
482 479 #ifdef OPENSSL_INTERFACE
483 480 xor %rax, %rax / return 0 (OK)
484 481 #else /* Open Solaris Interface */
485 482 mov $14, %rax / return # rounds = 14
486 483 #endif
487 484 ret
488 485
489 486 .align 4
490 487 .Lenc_key192:
491 488 cmp $192, %KEYSIZE32
492 489 jnz .Lenc_key128
493 490
494 491 / AES 192: 12 rounds in encryption key schedule
495 492 #ifdef OPENSSL_INTERFACE
496 493 mov $12, %ROUNDS32
497 494 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 12
498 495 #endif /* OPENSSL_INTERFACE */
499 496
500 497 movq 0x10(%USERCIPHERKEY), %xmm2 / other user key
501 498 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
502 499 call _key_expansion_192a
503 500 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
504 501 call _key_expansion_192b
505 502 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
506 503 call _key_expansion_192a
507 504 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
508 505 call _key_expansion_192b
509 506 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
510 507 call _key_expansion_192a
511 508 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
512 509 call _key_expansion_192b
513 510 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
514 511 call _key_expansion_192a
515 512 aeskeygenassist $0x80, %xmm2, %xmm1 / expand the key
516 513 call _key_expansion_192b
517 514
518 515 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
519 516 #ifdef OPENSSL_INTERFACE
520 517 xor %rax, %rax / return 0 (OK)
521 518 #else /* OpenSolaris Interface */
522 519 mov $12, %rax / return # rounds = 12
523 520 #endif
524 521 ret
525 522
526 523 .align 4
527 524 .Lenc_key128:
528 525 cmp $128, %KEYSIZE32
529 526 jnz .Lenc_key_invalid_key_bits
530 527
531 528 / AES 128: 10 rounds in encryption key schedule
532 529 #ifdef OPENSSL_INTERFACE
533 530 mov $10, %ROUNDS32
534 531 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 10
535 532 #endif /* OPENSSL_INTERFACE */
536 533
537 534 aeskeygenassist $0x1, %xmm0, %xmm1 / expand the key
538 535 call _key_expansion_128
539 536 aeskeygenassist $0x2, %xmm0, %xmm1 / expand the key
540 537 call _key_expansion_128
541 538 aeskeygenassist $0x4, %xmm0, %xmm1 / expand the key
542 539 call _key_expansion_128
543 540 aeskeygenassist $0x8, %xmm0, %xmm1 / expand the key
544 541 call _key_expansion_128
545 542 aeskeygenassist $0x10, %xmm0, %xmm1 / expand the key
546 543 call _key_expansion_128
547 544 aeskeygenassist $0x20, %xmm0, %xmm1 / expand the key
548 545 call _key_expansion_128
549 546 aeskeygenassist $0x40, %xmm0, %xmm1 / expand the key
550 547 call _key_expansion_128
551 548 aeskeygenassist $0x80, %xmm0, %xmm1 / expand the key
552 549 call _key_expansion_128
553 550 aeskeygenassist $0x1b, %xmm0, %xmm1 / expand the key
554 551 call _key_expansion_128
555 552 aeskeygenassist $0x36, %xmm0, %xmm1 / expand the key
556 553 call _key_expansion_128
557 554
558 555 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
559 556 #ifdef OPENSSL_INTERFACE
560 557 xor %rax, %rax / return 0 (OK)
561 558 #else /* OpenSolaris Interface */
562 559 mov $10, %rax / return # rounds = 10
563 560 #endif
564 561 ret
565 562
566 563 .Lenc_key_invalid_param:
567 564 #ifdef OPENSSL_INTERFACE
568 565 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
569 566 mov $-1, %rax / user key or AES key pointer is NULL
570 567 ret
571 568 #else
572 569 /* FALLTHROUGH */
↓ open down ↓ |
135 lines elided |
↑ open up ↑ |
573 570 #endif /* OPENSSL_INTERFACE */
574 571
575 572 .Lenc_key_invalid_key_bits:
576 573 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
577 574 #ifdef OPENSSL_INTERFACE
578 575 mov $-2, %rax / keysize is invalid
579 576 #else /* Open Solaris Interface */
580 577 xor %rax, %rax / a key pointer is NULL or invalid keysize
581 578 #endif /* OPENSSL_INTERFACE */
582 579
583 - /* EXPORT DELETE END */
584 580 ret
585 581 SET_SIZE(rijndael_key_setup_enc_intel)
586 582
587 583
588 584 /*
589 585 * rijndael_key_setup_dec_intel()
590 586 * Expand the cipher key into the decryption key schedule.
591 587 *
592 588 * For kernel code, caller is responsible for ensuring kpreempt_disable()
593 589 * has been called. This is because %xmm registers are not saved/restored.
594 590 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
595 591 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
596 592 * on the stack.
597 593 *
598 594 * OpenSolaris interface:
599 595 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
600 596 * uint64_t keyBits);
601 597 * Return value is 0 on error, number of rounds on success.
602 598 * P1->P2, P2->P3, P3->P1
603 599 *
604 600 * Original Intel OpenSSL interface:
605 601 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
606 602 * const int bits, AES_KEY *key);
607 603 * Return value is non-zero on error, 0 on success.
608 604 */
609 605 ENTRY_NP(rijndael_key_setup_dec_intel)
610 - /* EXPORT DELETE START */
611 606 / Generate round keys used for encryption
612 607 call rijndael_key_setup_enc_intel
613 608 test %rax, %rax
614 609 #ifdef OPENSSL_INTERFACE
615 610 jnz .Ldec_key_exit / Failed if returned non-0
616 611 #else /* OpenSolaris Interface */
617 612 jz .Ldec_key_exit / Failed if returned 0
618 613 #endif /* OPENSSL_INTERFACE */
619 614
620 615 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
621 616
622 617 /*
623 618 * Convert round keys used for encryption
624 619 * to a form usable for decryption
625 620 */
626 621 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
627 622 mov %rax, %ROUNDS64 / set # rounds (10, 12, or 14)
628 623 / (already set for OpenSSL)
629 624 #endif
630 625
631 626 lea 0x10(%AESKEY), %rcx / key addr
632 627 shl $4, %ROUNDS32
633 628 add %AESKEY, %ROUNDS64
634 629 mov %ROUNDS64, %ENDAESKEY
635 630
636 631 .align 4
637 632 .Ldec_key_reorder_loop:
638 633 movaps (%AESKEY), %xmm0
639 634 movaps (%ROUNDS64), %xmm1
640 635 movaps %xmm0, (%ROUNDS64)
641 636 movaps %xmm1, (%AESKEY)
642 637 lea 0x10(%AESKEY), %AESKEY
643 638 lea -0x10(%ROUNDS64), %ROUNDS64
644 639 cmp %AESKEY, %ROUNDS64
645 640 ja .Ldec_key_reorder_loop
646 641
647 642 .align 4
648 643 .Ldec_key_inv_loop:
649 644 movaps (%rcx), %xmm0
650 645 / Convert an encryption round key to a form usable for decryption
651 646 / with the "AES Inverse Mix Columns" instruction
652 647 aesimc %xmm0, %xmm1
↓ open down ↓ |
32 lines elided |
↑ open up ↑ |
653 648 movaps %xmm1, (%rcx)
654 649 lea 0x10(%rcx), %rcx
655 650 cmp %ENDAESKEY, %rcx
656 651 jnz .Ldec_key_inv_loop
657 652
658 653 SET_TS_OR_POP_XMM0_XMM1(%r10)
659 654
660 655 .Ldec_key_exit:
661 656 / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
662 657 / OpenSSL: rax = 0 for OK, or non-zero for error
663 - /* EXPORT DELETE END */
664 658 ret
665 659 SET_SIZE(rijndael_key_setup_dec_intel)
666 660
667 661
668 662 /*
669 663 * aes_encrypt_intel()
670 664 * Encrypt a single block (in and out can overlap).
671 665 *
672 666 * For kernel code, caller is responsible for ensuring kpreempt_disable()
673 667 * has been called. This is because %xmm registers are not saved/restored.
674 668 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
675 669 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
676 670 * on the stack.
677 671 *
678 672 * Temporary register usage:
679 673 * %xmm0 State
680 674 * %xmm1 Key
681 675 *
682 676 * Original OpenSolaris Interface:
683 677 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
684 678 * const uint32_t pt[4], uint32_t ct[4])
685 679 *
686 680 * Original Intel OpenSSL Interface:
687 681 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
688 682 * const AES_KEY *key)
689 683 */
690 684
691 685 #ifdef OPENSSL_INTERFACE
692 686 #define aes_encrypt_intel intel_AES_encrypt
693 687 #define aes_decrypt_intel intel_AES_decrypt
694 688
695 689 #define INP rdi /* P1, 64 bits */
696 690 #define OUTP rsi /* P2, 64 bits */
697 691 #define KEYP rdx /* P3, 64 bits */
698 692
699 693 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
700 694 #define NROUNDS32 ecx /* temporary, 32 bits */
701 695 #define NROUNDS cl /* temporary, 8 bits */
702 696
703 697 #else /* OpenSolaris Interface */
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
704 698 #define KEYP rdi /* P1, 64 bits */
705 699 #define NROUNDS esi /* P2, 32 bits */
706 700 #define INP rdx /* P3, 64 bits */
707 701 #define OUTP rcx /* P4, 64 bits */
708 702 #endif /* OPENSSL_INTERFACE */
709 703
710 704 #define STATE xmm0 /* temporary, 128 bits */
711 705 #define KEY xmm1 /* temporary, 128 bits */
712 706
713 707 ENTRY_NP(aes_encrypt_intel)
714 - /* EXPORT DELETE START */
715 708 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
716 709
717 710 movups (%INP), %STATE / input
718 711 movaps (%KEYP), %KEY / key
719 712 #ifdef OPENSSL_INTERFACE
720 713 mov 240(%KEYP), %NROUNDS32 / round count
721 714 #else /* OpenSolaris Interface */
722 715 /* Round count is already present as P2 in %rsi/%esi */
723 716 #endif /* OPENSSL_INTERFACE */
724 717
725 718 pxor %KEY, %STATE / round 0
726 719 lea 0x30(%KEYP), %KEYP
727 720 cmp $12, %NROUNDS
728 721 jb .Lenc128
729 722 lea 0x20(%KEYP), %KEYP
730 723 je .Lenc192
731 724
732 725 / AES 256
733 726 lea 0x20(%KEYP), %KEYP
734 727 movaps -0x60(%KEYP), %KEY
735 728 aesenc %KEY, %STATE
736 729 movaps -0x50(%KEYP), %KEY
737 730 aesenc %KEY, %STATE
738 731
739 732 .align 4
740 733 .Lenc192:
741 734 / AES 192 and 256
742 735 movaps -0x40(%KEYP), %KEY
743 736 aesenc %KEY, %STATE
744 737 movaps -0x30(%KEYP), %KEY
745 738 aesenc %KEY, %STATE
746 739
747 740 .align 4
748 741 .Lenc128:
749 742 / AES 128, 192, and 256
750 743 movaps -0x20(%KEYP), %KEY
751 744 aesenc %KEY, %STATE
752 745 movaps -0x10(%KEYP), %KEY
753 746 aesenc %KEY, %STATE
754 747 movaps (%KEYP), %KEY
755 748 aesenc %KEY, %STATE
756 749 movaps 0x10(%KEYP), %KEY
757 750 aesenc %KEY, %STATE
758 751 movaps 0x20(%KEYP), %KEY
759 752 aesenc %KEY, %STATE
760 753 movaps 0x30(%KEYP), %KEY
761 754 aesenc %KEY, %STATE
762 755 movaps 0x40(%KEYP), %KEY
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
763 756 aesenc %KEY, %STATE
764 757 movaps 0x50(%KEYP), %KEY
765 758 aesenc %KEY, %STATE
766 759 movaps 0x60(%KEYP), %KEY
767 760 aesenc %KEY, %STATE
768 761 movaps 0x70(%KEYP), %KEY
769 762 aesenclast %KEY, %STATE / last round
770 763 movups %STATE, (%OUTP) / output
771 764
772 765 SET_TS_OR_POP_XMM0_XMM1(%r10)
773 - /* EXPORT DELETE END */
774 766 ret
775 767 SET_SIZE(aes_encrypt_intel)
776 768
777 769
778 770 /*
779 771 * aes_decrypt_intel()
780 772 * Decrypt a single block (in and out can overlap).
781 773 *
782 774 * For kernel code, caller is responsible for ensuring kpreempt_disable()
783 775 * has been called. This is because %xmm registers are not saved/restored.
784 776 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
785 777 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
786 778 * on the stack.
787 779 *
788 780 * Temporary register usage:
789 781 * %xmm0 State
790 782 * %xmm1 Key
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
791 783 *
792 784 * Original OpenSolaris Interface:
793 785 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
794 786 * const uint32_t pt[4], uint32_t ct[4])/
795 787 *
796 788 * Original Intel OpenSSL Interface:
797 789 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
798 790 * const AES_KEY *key);
799 791 */
800 792 ENTRY_NP(aes_decrypt_intel)
801 - /* EXPORT DELETE START */
802 793 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
803 794
804 795 movups (%INP), %STATE / input
805 796 movaps (%KEYP), %KEY / key
806 797 #ifdef OPENSSL_INTERFACE
807 798 mov 240(%KEYP), %NROUNDS32 / round count
808 799 #else /* OpenSolaris Interface */
809 800 /* Round count is already present as P2 in %rsi/%esi */
810 801 #endif /* OPENSSL_INTERFACE */
811 802
812 803 pxor %KEY, %STATE / round 0
813 804 lea 0x30(%KEYP), %KEYP
814 805 cmp $12, %NROUNDS
815 806 jb .Ldec128
816 807 lea 0x20(%KEYP), %KEYP
817 808 je .Ldec192
818 809
819 810 / AES 256
820 811 lea 0x20(%KEYP), %KEYP
821 812 movaps -0x60(%KEYP), %KEY
822 813 aesdec %KEY, %STATE
823 814 movaps -0x50(%KEYP), %KEY
824 815 aesdec %KEY, %STATE
825 816
826 817 .align 4
827 818 .Ldec192:
828 819 / AES 192 and 256
829 820 movaps -0x40(%KEYP), %KEY
830 821 aesdec %KEY, %STATE
831 822 movaps -0x30(%KEYP), %KEY
832 823 aesdec %KEY, %STATE
833 824
834 825 .align 4
835 826 .Ldec128:
836 827 / AES 128, 192, and 256
837 828 movaps -0x20(%KEYP), %KEY
838 829 aesdec %KEY, %STATE
839 830 movaps -0x10(%KEYP), %KEY
840 831 aesdec %KEY, %STATE
841 832 movaps (%KEYP), %KEY
842 833 aesdec %KEY, %STATE
843 834 movaps 0x10(%KEYP), %KEY
844 835 aesdec %KEY, %STATE
845 836 movaps 0x20(%KEYP), %KEY
846 837 aesdec %KEY, %STATE
847 838 movaps 0x30(%KEYP), %KEY
848 839 aesdec %KEY, %STATE
849 840 movaps 0x40(%KEYP), %KEY
850 841 aesdec %KEY, %STATE
↓ open down ↓ |
39 lines elided |
↑ open up ↑ |
851 842 movaps 0x50(%KEYP), %KEY
852 843 aesdec %KEY, %STATE
853 844 movaps 0x60(%KEYP), %KEY
854 845 aesdec %KEY, %STATE
855 846 movaps 0x70(%KEYP), %KEY
856 847 aesdeclast %KEY, %STATE / last round
857 848 movups %STATE, (%OUTP) / output
858 849
859 850 SET_TS_OR_POP_XMM0_XMM1(%r10)
860 851 ret
861 - /* EXPORT DELETE END */
862 852 SET_SIZE(aes_decrypt_intel)
863 853
864 854 #endif /* lint || __lint */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX