1 /*
   2  * ====================================================================
   3  * Written by Intel Corporation for the OpenSSL project to add support
   4  * for Intel AES-NI instructions. Rights for redistribution and usage
   5  * in source and binary forms are granted according to the OpenSSL
   6  * license.
   7  *
   8  *   Author: Huang Ying <ying.huang at intel dot com>
   9  *           Vinodh Gopal <vinodh.gopal at intel dot com>
  10  *           Kahraman Akdemir
  11  *
  12  * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
  13  * instructions that are going to be introduced in the next generation
  14  * of Intel processor, as of 2009. These instructions enable fast and
  15  * secure data encryption and decryption, using the Advanced Encryption
  16  * Standard (AES), defined by FIPS Publication number 197. The
  17  * architecture introduces six instructions that offer full hardware
  18  * support for AES. Four of them support high performance data
  19  * encryption and decryption, and the other two instructions support
  20  * the AES key expansion procedure.
  21  * ====================================================================
  22  */
  23 
  24 /*
  25  * ====================================================================
  26  * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  *
  32  * 1. Redistributions of source code must retain the above copyright
  33  *    notice, this list of conditions and the following disclaimer.
  34  *
  35  * 2. Redistributions in binary form must reproduce the above copyright
  36  *    notice, this list of conditions and the following disclaimer in
  37  *    the documentation and/or other materials provided with the
  38  *    distribution.
  39  *
  40  * 3. All advertising materials mentioning features or use of this
  41  *    software must display the following acknowledgment:
  42  *    "This product includes software developed by the OpenSSL Project
  43  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  44  *
  45  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  46  *    endorse or promote products derived from this software without
  47  *    prior written permission. For written permission, please contact
  48  *    openssl-core@openssl.org.
  49  *
  50  * 5. Products derived from this software may not be called "OpenSSL"
  51  *    nor may "OpenSSL" appear in their names without prior written
  52  *    permission of the OpenSSL Project.
  53  *
  54  * 6. Redistributions of any form whatsoever must retain the following
  55  *    acknowledgment:
  56  *    "This product includes software developed by the OpenSSL Project
  57  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  58  *
  59  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  60  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
  63  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  64  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  65  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  66  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  67  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  68  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  69  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  70  * OF THE POSSIBILITY OF SUCH DAMAGE.
  71  * ====================================================================
  72  */
  73 
  74 /*
  75  * ====================================================================
  76  * OpenSolaris OS modifications
  77  *
  78  * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
  79  * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
  80  * Huang Ying of Intel to the openssl-dev mailing list under the subject
  81  * of "Add support to Intel AES-NI instruction set for x86_64 platform".
  82  *
  83  * This OpenSolaris version has these major changes from the original source:
  84  *
  85  * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  86  * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
  87  * definitions for lint.
  88  *
  89  * 2. Formatted code, added comments, and added #includes and #defines.
  90  *
  91  * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
  92  * calling kpreempt_disable() and kpreempt_enable().
  93  * If the TS bit is not set, Save and restore %xmm registers at the beginning
  94  * and end of function calls (%xmm* registers are not saved and restored by
  95  * during kernel thread preemption).
  96  *
  97  * 4. Renamed functions, reordered parameters, and changed return value
  98  * to match OpenSolaris:
  99  *
 100  * OpenSSL interface:
 101  *      int intel_AES_set_encrypt_key(const unsigned char *userKey,
 102  *              const int bits, AES_KEY *key);
 103  *      int intel_AES_set_decrypt_key(const unsigned char *userKey,
 104  *              const int bits, AES_KEY *key);
 105  *      Return values for above are non-zero on error, 0 on success.
 106  *
 107  *      void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
 108  *              const AES_KEY *key);
 109  *      void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
 110  *              const AES_KEY *key);
 111  *      typedef struct aes_key_st {
 112  *              unsigned int    rd_key[4 *(AES_MAXNR + 1)];
 113  *              int             rounds;
 114  *              unsigned int    pad[3];
 115  *      } AES_KEY;
 116  * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
 117  * (ks32) instead of 64-bit (ks64).
 118  * Number of rounds (aka round count) is at offset 240 of AES_KEY.
 119  *
 120  * OpenSolaris OS interface (#ifdefs removed for readability):
 121  *      int rijndael_key_setup_dec_intel(uint32_t rk[],
 122  *              const uint32_t cipherKey[], uint64_t keyBits);
 123  *      int rijndael_key_setup_enc_intel(uint32_t rk[],
 124  *              const uint32_t cipherKey[], uint64_t keyBits);
 125  *      Return values for above are 0 on error, number of rounds on success.
 126  *
 127  *      void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
 128  *              const uint32_t pt[4], uint32_t ct[4]);
 129  *      void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
 130  *              const uint32_t pt[4], uint32_t ct[4]);
 131  *      typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
 132  *               uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
 133  *
 134  *      typedef union {
 135  *              uint32_t        ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 136  *      } aes_ks_t;
 137  *      typedef struct aes_key {
 138  *              aes_ks_t        encr_ks, decr_ks;
 139  *              long double     align128;
 140  *              int             flags, nr, type;
 141  *      } aes_key_t;
 142  *
 143  * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
 144  * ct is crypto text, and MAX_AES_NR is 14.
 145  * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
 146  *
 147  * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
 148  *
 149  * ====================================================================
 150  */
 151 
 152 #if defined(lint) || defined(__lint)
 153 
 154 #include <sys/types.h>
 155 
 156 /* ARGSUSED */
 157 void
 158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
 159     uint32_t ct[4]) {
 160 }
 161 /* ARGSUSED */
 162 void
 163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
 164     uint32_t pt[4]) {
 165 }
 166 /* ARGSUSED */
 167 int
 168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
 169     uint64_t keyBits) {
 170         return (0);
 171 }
 172 /* ARGSUSED */
 173 int
 174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
 175    uint64_t keyBits) {
 176         return (0);
 177 }
 178 
 179 
 180 #else   /* lint */
 181 
 182 #include <sys/asm_linkage.h>
 183 #include <sys/controlregs.h>
 184 #ifdef _KERNEL
 185 #include <sys/machprivregs.h>
 186 #endif
 187 
 188 #ifdef _KERNEL
 189         /*
 190          * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv.  That is,
 191          * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
 192          * uses it to pass P2 to syscall.
 193          * This also occurs with the STTS macro, but we don't care if
 194          * P2 (%rsi) is modified just before function exit.
 195          * The CLTS and STTS macros push and pop P1 (%rdi) already.
 196          */
 197 #ifdef __xpv
 198 #define PROTECTED_CLTS \
 199         push    %rsi; \
 200         CLTS; \
 201         pop     %rsi
 202 #else
 203 #define PROTECTED_CLTS \
 204         CLTS
 205 #endif  /* __xpv */
 206 
 207 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
 208         push    %rbp; \
 209         mov     %rsp, %rbp; \
 210         movq    %cr0, tmpreg; \
 211         testq   $CR0_TS, tmpreg; \
 212         jnz     1f; \
 213         and     $-XMM_ALIGN, %rsp; \
 214         sub     $[XMM_SIZE * 2], %rsp; \
 215         movaps  %xmm0, 16(%rsp); \
 216         movaps  %xmm1, (%rsp); \
 217         jmp     2f; \
 218 1: \
 219         PROTECTED_CLTS; \
 220 2:
 221 
 222         /*
 223          * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
 224          * otherwise set CR0_TS.
 225          */
 226 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
 227         testq   $CR0_TS, tmpreg; \
 228         jnz     1f; \
 229         movaps  (%rsp), %xmm1; \
 230         movaps  16(%rsp), %xmm0; \
 231         jmp     2f; \
 232 1: \
 233         STTS(tmpreg); \
 234 2: \
 235         mov     %rbp, %rsp; \
 236         pop     %rbp
 237 
 238         /*
 239          * If CR0_TS is not set, align stack (with push %rbp) and push
 240          * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
 241          */
 242 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
 243         push    %rbp; \
 244         mov     %rsp, %rbp; \
 245         movq    %cr0, tmpreg; \
 246         testq   $CR0_TS, tmpreg; \
 247         jnz     1f; \
 248         and     $-XMM_ALIGN, %rsp; \
 249         sub     $[XMM_SIZE * 7], %rsp; \
 250         movaps  %xmm0, 96(%rsp); \
 251         movaps  %xmm1, 80(%rsp); \
 252         movaps  %xmm2, 64(%rsp); \
 253         movaps  %xmm3, 48(%rsp); \
 254         movaps  %xmm4, 32(%rsp); \
 255         movaps  %xmm5, 16(%rsp); \
 256         movaps  %xmm6, (%rsp); \
 257         jmp     2f; \
 258 1: \
 259         PROTECTED_CLTS; \
 260 2:
 261 
 262 
 263         /*
 264          * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
 265          * otherwise set CR0_TS.
 266          */
 267 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
 268         testq   $CR0_TS, tmpreg; \
 269         jnz     1f; \
 270         movaps  (%rsp), %xmm6; \
 271         movaps  16(%rsp), %xmm5; \
 272         movaps  32(%rsp), %xmm4; \
 273         movaps  48(%rsp), %xmm3; \
 274         movaps  64(%rsp), %xmm2; \
 275         movaps  80(%rsp), %xmm1; \
 276         movaps  96(%rsp), %xmm0; \
 277         jmp     2f; \
 278 1: \
 279         STTS(tmpreg); \
 280 2: \
 281         mov     %rbp, %rsp; \
 282         pop     %rbp
 283 
 284 
 285 #else
 286 #define PROTECTED_CLTS
 287 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
 288 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
 289 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
 290 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
 291 #endif  /* _KERNEL */
 292 
 293 
 294 /*
 295  * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
 296  * _key_expansion_256a(), _key_expansion_256b()
 297  *
 298  * Helper functions called by rijndael_key_setup_inc_intel().
 299  * Also used indirectly by rijndael_key_setup_dec_intel().
 300  *
 301  * Input:
 302  * %xmm0        User-provided cipher key
 303  * %xmm1        Round constant
 304  * Output:
 305  * (%rcx)       AES key
 306  */
 307 
 308 .align  16
 309 _key_expansion_128:
 310 _key_expansion_256a:
 311         pshufd  $0b11111111, %xmm1, %xmm1
 312         shufps  $0b00010000, %xmm0, %xmm4
 313         pxor    %xmm4, %xmm0
 314         shufps  $0b10001100, %xmm0, %xmm4
 315         pxor    %xmm4, %xmm0
 316         pxor    %xmm1, %xmm0
 317         movaps  %xmm0, (%rcx)
 318         add     $0x10, %rcx
 319         ret
 320         SET_SIZE(_key_expansion_128)
 321         SET_SIZE(_key_expansion_256a)
 322 
 323 .align 16
 324 _key_expansion_192a:
 325         pshufd  $0b01010101, %xmm1, %xmm1
 326         shufps  $0b00010000, %xmm0, %xmm4
 327         pxor    %xmm4, %xmm0
 328         shufps  $0b10001100, %xmm0, %xmm4
 329         pxor    %xmm4, %xmm0
 330         pxor    %xmm1, %xmm0
 331 
 332         movaps  %xmm2, %xmm5
 333         movaps  %xmm2, %xmm6
 334         pslldq  $4, %xmm5
 335         pshufd  $0b11111111, %xmm0, %xmm3
 336         pxor    %xmm3, %xmm2
 337         pxor    %xmm5, %xmm2
 338 
 339         movaps  %xmm0, %xmm1
 340         shufps  $0b01000100, %xmm0, %xmm6
 341         movaps  %xmm6, (%rcx)
 342         shufps  $0b01001110, %xmm2, %xmm1
 343         movaps  %xmm1, 0x10(%rcx)
 344         add     $0x20, %rcx
 345         ret
 346         SET_SIZE(_key_expansion_192a)
 347 
 348 .align 16
 349 _key_expansion_192b:
 350         pshufd  $0b01010101, %xmm1, %xmm1
 351         shufps  $0b00010000, %xmm0, %xmm4
 352         pxor    %xmm4, %xmm0
 353         shufps  $0b10001100, %xmm0, %xmm4
 354         pxor    %xmm4, %xmm0
 355         pxor    %xmm1, %xmm0
 356 
 357         movaps  %xmm2, %xmm5
 358         pslldq  $4, %xmm5
 359         pshufd  $0b11111111, %xmm0, %xmm3
 360         pxor    %xmm3, %xmm2
 361         pxor    %xmm5, %xmm2
 362 
 363         movaps  %xmm0, (%rcx)
 364         add     $0x10, %rcx
 365         ret
 366         SET_SIZE(_key_expansion_192b)
 367 
 368 .align 16
 369 _key_expansion_256b:
 370         pshufd  $0b10101010, %xmm1, %xmm1
 371         shufps  $0b00010000, %xmm2, %xmm4
 372         pxor    %xmm4, %xmm2
 373         shufps  $0b10001100, %xmm2, %xmm4
 374         pxor    %xmm4, %xmm2
 375         pxor    %xmm1, %xmm2
 376         movaps  %xmm2, (%rcx)
 377         add     $0x10, %rcx
 378         ret
 379         SET_SIZE(_key_expansion_256b)
 380 
 381 
 382 /*
 383  * rijndael_key_setup_enc_intel()
 384  * Expand the cipher key into the encryption key schedule.
 385  *
 386  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 387  * has been called.  This is because %xmm registers are not saved/restored.
 388  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 389  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 390  * on the stack.
 391  *
 392  * OpenSolaris interface:
 393  * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
 394  *      uint64_t keyBits);
 395  * Return value is 0 on error, number of rounds on success.
 396  *
 397  * Original Intel OpenSSL interface:
 398  * int intel_AES_set_encrypt_key(const unsigned char *userKey,
 399  *      const int bits, AES_KEY *key);
 400  * Return value is non-zero on error, 0 on success.
 401  */
 402 
 403 #ifdef  OPENSSL_INTERFACE
 404 #define rijndael_key_setup_enc_intel    intel_AES_set_encrypt_key
 405 #define rijndael_key_setup_dec_intel    intel_AES_set_decrypt_key
 406 
 407 #define USERCIPHERKEY           rdi     /* P1, 64 bits */
 408 #define KEYSIZE32               esi     /* P2, 32 bits */
 409 #define KEYSIZE64               rsi     /* P2, 64 bits */
 410 #define AESKEY                  rdx     /* P3, 64 bits */
 411 
 412 #else   /* OpenSolaris Interface */
 413 #define AESKEY                  rdi     /* P1, 64 bits */
 414 #define USERCIPHERKEY           rsi     /* P2, 64 bits */
 415 #define KEYSIZE32               edx     /* P3, 32 bits */
 416 #define KEYSIZE64               rdx     /* P3, 64 bits */
 417 #endif  /* OPENSSL_INTERFACE */
 418 
 419 #define ROUNDS32                KEYSIZE32       /* temp */
 420 #define ROUNDS64                KEYSIZE64       /* temp */
 421 #define ENDAESKEY               USERCIPHERKEY   /* temp */
 422 
 423 
 424 ENTRY_NP(rijndael_key_setup_enc_intel)
 425         CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
 426 
 427         / NULL pointer sanity check
 428         test    %USERCIPHERKEY, %USERCIPHERKEY
 429         jz      .Lenc_key_invalid_param
 430         test    %AESKEY, %AESKEY
 431         jz      .Lenc_key_invalid_param
 432 
 433         movups  (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes)
 434         movaps  %xmm0, (%AESKEY)
 435         lea     0x10(%AESKEY), %rcx     / key addr
 436         pxor    %xmm4, %xmm4            / xmm4 is assumed 0 in _key_expansion_x
 437 
 438         cmp     $256, %KEYSIZE32
 439         jnz     .Lenc_key192
 440 
 441         / AES 256: 14 rounds in encryption key schedule
 442 #ifdef OPENSSL_INTERFACE
 443         mov     $14, %ROUNDS32
 444         movl    %ROUNDS32, 240(%AESKEY)         / key.rounds = 14
 445 #endif  /* OPENSSL_INTERFACE */
 446 
 447         movups  0x10(%USERCIPHERKEY), %xmm2     / other user key (2nd 16 bytes)
 448         movaps  %xmm2, (%rcx)
 449         add     $0x10, %rcx
 450 
 451         aeskeygenassist $0x1, %xmm2, %xmm1      / expand the key
 452         call    _key_expansion_256a
 453         aeskeygenassist $0x1, %xmm0, %xmm1
 454         call    _key_expansion_256b
 455         aeskeygenassist $0x2, %xmm2, %xmm1      / expand the key
 456         call    _key_expansion_256a
 457         aeskeygenassist $0x2, %xmm0, %xmm1
 458         call    _key_expansion_256b
 459         aeskeygenassist $0x4, %xmm2, %xmm1      / expand the key
 460         call    _key_expansion_256a
 461         aeskeygenassist $0x4, %xmm0, %xmm1
 462         call    _key_expansion_256b
 463         aeskeygenassist $0x8, %xmm2, %xmm1      / expand the key
 464         call    _key_expansion_256a
 465         aeskeygenassist $0x8, %xmm0, %xmm1
 466         call    _key_expansion_256b
 467         aeskeygenassist $0x10, %xmm2, %xmm1     / expand the key
 468         call    _key_expansion_256a
 469         aeskeygenassist $0x10, %xmm0, %xmm1
 470         call    _key_expansion_256b
 471         aeskeygenassist $0x20, %xmm2, %xmm1     / expand the key
 472         call    _key_expansion_256a
 473         aeskeygenassist $0x20, %xmm0, %xmm1
 474         call    _key_expansion_256b
 475         aeskeygenassist $0x40, %xmm2, %xmm1     / expand the key
 476         call    _key_expansion_256a
 477 
 478         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 479 #ifdef  OPENSSL_INTERFACE
 480         xor     %rax, %rax                      / return 0 (OK)
 481 #else   /* Open Solaris Interface */
 482         mov     $14, %rax                       / return # rounds = 14
 483 #endif
 484         ret
 485 
 486 .align 4
 487 .Lenc_key192:
 488         cmp     $192, %KEYSIZE32
 489         jnz     .Lenc_key128
 490 
 491         / AES 192: 12 rounds in encryption key schedule
 492 #ifdef OPENSSL_INTERFACE
 493         mov     $12, %ROUNDS32
 494         movl    %ROUNDS32, 240(%AESKEY) / key.rounds = 12
 495 #endif  /* OPENSSL_INTERFACE */
 496 
 497         movq    0x10(%USERCIPHERKEY), %xmm2     / other user key
 498         aeskeygenassist $0x1, %xmm2, %xmm1      / expand the key
 499         call    _key_expansion_192a
 500         aeskeygenassist $0x2, %xmm2, %xmm1      / expand the key
 501         call    _key_expansion_192b
 502         aeskeygenassist $0x4, %xmm2, %xmm1      / expand the key
 503         call    _key_expansion_192a
 504         aeskeygenassist $0x8, %xmm2, %xmm1      / expand the key
 505         call    _key_expansion_192b
 506         aeskeygenassist $0x10, %xmm2, %xmm1     / expand the key
 507         call    _key_expansion_192a
 508         aeskeygenassist $0x20, %xmm2, %xmm1     / expand the key
 509         call    _key_expansion_192b
 510         aeskeygenassist $0x40, %xmm2, %xmm1     / expand the key
 511         call    _key_expansion_192a
 512         aeskeygenassist $0x80, %xmm2, %xmm1     / expand the key
 513         call    _key_expansion_192b
 514 
 515         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 516 #ifdef  OPENSSL_INTERFACE
 517         xor     %rax, %rax                      / return 0 (OK)
 518 #else   /* OpenSolaris Interface */
 519         mov     $12, %rax                       / return # rounds = 12
 520 #endif
 521         ret
 522 
 523 .align 4
 524 .Lenc_key128:
 525         cmp $128, %KEYSIZE32
 526         jnz .Lenc_key_invalid_key_bits
 527 
 528         / AES 128: 10 rounds in encryption key schedule
 529 #ifdef OPENSSL_INTERFACE
 530         mov     $10, %ROUNDS32
 531         movl    %ROUNDS32, 240(%AESKEY)         / key.rounds = 10
 532 #endif  /* OPENSSL_INTERFACE */
 533 
 534         aeskeygenassist $0x1, %xmm0, %xmm1      / expand the key
 535         call    _key_expansion_128
 536         aeskeygenassist $0x2, %xmm0, %xmm1      / expand the key
 537         call    _key_expansion_128
 538         aeskeygenassist $0x4, %xmm0, %xmm1      / expand the key
 539         call    _key_expansion_128
 540         aeskeygenassist $0x8, %xmm0, %xmm1      / expand the key
 541         call    _key_expansion_128
 542         aeskeygenassist $0x10, %xmm0, %xmm1     / expand the key
 543         call    _key_expansion_128
 544         aeskeygenassist $0x20, %xmm0, %xmm1     / expand the key
 545         call    _key_expansion_128
 546         aeskeygenassist $0x40, %xmm0, %xmm1     / expand the key
 547         call    _key_expansion_128
 548         aeskeygenassist $0x80, %xmm0, %xmm1     / expand the key
 549         call    _key_expansion_128
 550         aeskeygenassist $0x1b, %xmm0, %xmm1     / expand the key
 551         call    _key_expansion_128
 552         aeskeygenassist $0x36, %xmm0, %xmm1     / expand the key
 553         call    _key_expansion_128
 554 
 555         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 556 #ifdef  OPENSSL_INTERFACE
 557         xor     %rax, %rax                      / return 0 (OK)
 558 #else   /* OpenSolaris Interface */
 559         mov     $10, %rax                       / return # rounds = 10
 560 #endif
 561         ret
 562 
 563 .Lenc_key_invalid_param:
 564 #ifdef  OPENSSL_INTERFACE
 565         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 566         mov     $-1, %rax       / user key or AES key pointer is NULL
 567         ret
 568 #else
 569         /* FALLTHROUGH */
 570 #endif  /* OPENSSL_INTERFACE */
 571 
 572 .Lenc_key_invalid_key_bits:
 573         SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
 574 #ifdef  OPENSSL_INTERFACE
 575         mov     $-2, %rax       / keysize is invalid
 576 #else   /* Open Solaris Interface */
 577         xor     %rax, %rax      / a key pointer is NULL or invalid keysize
 578 #endif  /* OPENSSL_INTERFACE */
 579 
 580         ret
 581         SET_SIZE(rijndael_key_setup_enc_intel)
 582 
 583 
 584 /*
 585  * rijndael_key_setup_dec_intel()
 586  * Expand the cipher key into the decryption key schedule.
 587  *
 588  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 589  * has been called.  This is because %xmm registers are not saved/restored.
 590  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 591  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 592  * on the stack.
 593  *
 594  * OpenSolaris interface:
 595  * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
 596  *      uint64_t keyBits);
 597  * Return value is 0 on error, number of rounds on success.
 598  * P1->P2, P2->P3, P3->P1
 599  *
 600  * Original Intel OpenSSL interface:
 601  * int intel_AES_set_decrypt_key(const unsigned char *userKey,
 602  *      const int bits, AES_KEY *key);
 603  * Return value is non-zero on error, 0 on success.
 604  */
 605 ENTRY_NP(rijndael_key_setup_dec_intel)
 606         / Generate round keys used for encryption
 607         call    rijndael_key_setup_enc_intel
 608         test    %rax, %rax
 609 #ifdef  OPENSSL_INTERFACE
 610         jnz     .Ldec_key_exit  / Failed if returned non-0
 611 #else   /* OpenSolaris Interface */
 612         jz      .Ldec_key_exit  / Failed if returned 0
 613 #endif  /* OPENSSL_INTERFACE */
 614 
 615         CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
 616 
 617         /*
 618          * Convert round keys used for encryption
 619          * to a form usable for decryption
 620          */
 621 #ifndef OPENSSL_INTERFACE               /* OpenSolaris Interface */
 622         mov     %rax, %ROUNDS64         / set # rounds (10, 12, or 14)
 623                                         / (already set for OpenSSL)
 624 #endif
 625 
 626         lea     0x10(%AESKEY), %rcx     / key addr
 627         shl     $4, %ROUNDS32
 628         add     %AESKEY, %ROUNDS64
 629         mov     %ROUNDS64, %ENDAESKEY
 630 
 631 .align 4
 632 .Ldec_key_reorder_loop:
 633         movaps  (%AESKEY), %xmm0
 634         movaps  (%ROUNDS64), %xmm1
 635         movaps  %xmm0, (%ROUNDS64)
 636         movaps  %xmm1, (%AESKEY)
 637         lea     0x10(%AESKEY), %AESKEY
 638         lea     -0x10(%ROUNDS64), %ROUNDS64
 639         cmp     %AESKEY, %ROUNDS64
 640         ja      .Ldec_key_reorder_loop
 641 
 642 .align 4
 643 .Ldec_key_inv_loop:
 644         movaps  (%rcx), %xmm0
 645         / Convert an encryption round key to a form usable for decryption
 646         / with the "AES Inverse Mix Columns" instruction
 647         aesimc  %xmm0, %xmm1
 648         movaps  %xmm1, (%rcx)
 649         lea     0x10(%rcx), %rcx
 650         cmp     %ENDAESKEY, %rcx
 651         jnz     .Ldec_key_inv_loop
 652 
 653         SET_TS_OR_POP_XMM0_XMM1(%r10)
 654 
 655 .Ldec_key_exit:
 656         / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
 657         / OpenSSL: rax = 0 for OK, or non-zero for error
 658         ret
 659         SET_SIZE(rijndael_key_setup_dec_intel)
 660 
 661 
 662 /*
 663  * aes_encrypt_intel()
 664  * Encrypt a single block (in and out can overlap).
 665  *
 666  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 667  * has been called.  This is because %xmm registers are not saved/restored.
 668  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 669  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 670  * on the stack.
 671  *
 672  * Temporary register usage:
 673  * %xmm0        State
 674  * %xmm1        Key
 675  *
 676  * Original OpenSolaris Interface:
 677  * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
 678  *      const uint32_t pt[4], uint32_t ct[4])
 679  *
 680  * Original Intel OpenSSL Interface:
 681  * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
 682  *      const AES_KEY *key)
 683  */
 684 
 685 #ifdef  OPENSSL_INTERFACE
 686 #define aes_encrypt_intel       intel_AES_encrypt
 687 #define aes_decrypt_intel       intel_AES_decrypt
 688 
 689 #define INP             rdi     /* P1, 64 bits */
 690 #define OUTP            rsi     /* P2, 64 bits */
 691 #define KEYP            rdx     /* P3, 64 bits */
 692 
 693 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
 694 #define NROUNDS32       ecx     /* temporary, 32 bits */
 695 #define NROUNDS         cl      /* temporary,  8 bits */
 696 
 697 #else   /* OpenSolaris Interface */
 698 #define KEYP            rdi     /* P1, 64 bits */
 699 #define NROUNDS         esi     /* P2, 32 bits */
 700 #define INP             rdx     /* P3, 64 bits */
 701 #define OUTP            rcx     /* P4, 64 bits */
 702 #endif  /* OPENSSL_INTERFACE */
 703 
 704 #define STATE           xmm0    /* temporary, 128 bits */
 705 #define KEY             xmm1    /* temporary, 128 bits */
 706 
 707 ENTRY_NP(aes_encrypt_intel)
 708         CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
 709 
 710         movups  (%INP), %STATE                  / input
 711         movaps  (%KEYP), %KEY                   / key
 712 #ifdef  OPENSSL_INTERFACE
 713         mov     240(%KEYP), %NROUNDS32          / round count
 714 #else   /* OpenSolaris Interface */
 715         /* Round count is already present as P2 in %rsi/%esi */
 716 #endif  /* OPENSSL_INTERFACE */
 717 
 718         pxor    %KEY, %STATE                    / round 0
 719         lea     0x30(%KEYP), %KEYP
 720         cmp     $12, %NROUNDS
 721         jb      .Lenc128
 722         lea     0x20(%KEYP), %KEYP
 723         je      .Lenc192
 724 
 725         / AES 256
 726         lea     0x20(%KEYP), %KEYP
 727         movaps  -0x60(%KEYP), %KEY
 728         aesenc  %KEY, %STATE
 729         movaps  -0x50(%KEYP), %KEY
 730         aesenc  %KEY, %STATE
 731 
 732 .align 4
 733 .Lenc192:
 734         / AES 192 and 256
 735         movaps  -0x40(%KEYP), %KEY
 736         aesenc  %KEY, %STATE
 737         movaps  -0x30(%KEYP), %KEY
 738         aesenc  %KEY, %STATE
 739 
 740 .align 4
 741 .Lenc128:
 742         / AES 128, 192, and 256
 743         movaps  -0x20(%KEYP), %KEY
 744         aesenc  %KEY, %STATE
 745         movaps  -0x10(%KEYP), %KEY
 746         aesenc  %KEY, %STATE
 747         movaps  (%KEYP), %KEY
 748         aesenc  %KEY, %STATE
 749         movaps  0x10(%KEYP), %KEY
 750         aesenc  %KEY, %STATE
 751         movaps  0x20(%KEYP), %KEY
 752         aesenc  %KEY, %STATE
 753         movaps  0x30(%KEYP), %KEY
 754         aesenc  %KEY, %STATE
 755         movaps  0x40(%KEYP), %KEY
 756         aesenc  %KEY, %STATE
 757         movaps  0x50(%KEYP), %KEY
 758         aesenc  %KEY, %STATE
 759         movaps  0x60(%KEYP), %KEY
 760         aesenc  %KEY, %STATE
 761         movaps  0x70(%KEYP), %KEY
 762         aesenclast       %KEY, %STATE           / last round
 763         movups  %STATE, (%OUTP)                 / output
 764 
 765         SET_TS_OR_POP_XMM0_XMM1(%r10)
 766         ret
 767         SET_SIZE(aes_encrypt_intel)
 768 
 769 
 770 /*
 771  * aes_decrypt_intel()
 772  * Decrypt a single block (in and out can overlap).
 773  *
 774  * For kernel code, caller is responsible for ensuring kpreempt_disable()
 775  * has been called.  This is because %xmm registers are not saved/restored.
 776  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 777  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 778  * on the stack.
 779  *
 780  * Temporary register usage:
 781  * %xmm0        State
 782  * %xmm1        Key
 783  *
 784  * Original OpenSolaris Interface:
 785  * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
 786  *      const uint32_t pt[4], uint32_t ct[4])/
 787  *
 788  * Original Intel OpenSSL Interface:
 789  * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
 790  *      const AES_KEY *key);
 791  */
 792 ENTRY_NP(aes_decrypt_intel)
 793         CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
 794 
 795         movups  (%INP), %STATE                  / input
 796         movaps  (%KEYP), %KEY                   / key
 797 #ifdef  OPENSSL_INTERFACE
 798         mov     240(%KEYP), %NROUNDS32          / round count
 799 #else   /* OpenSolaris Interface */
 800         /* Round count is already present as P2 in %rsi/%esi */
 801 #endif  /* OPENSSL_INTERFACE */
 802 
 803         pxor    %KEY, %STATE                    / round 0
 804         lea     0x30(%KEYP), %KEYP
 805         cmp     $12, %NROUNDS
 806         jb      .Ldec128
 807         lea     0x20(%KEYP), %KEYP
 808         je      .Ldec192
 809 
 810         / AES 256
 811         lea     0x20(%KEYP), %KEYP
 812         movaps  -0x60(%KEYP), %KEY
 813         aesdec  %KEY, %STATE
 814         movaps  -0x50(%KEYP), %KEY
 815         aesdec  %KEY, %STATE
 816 
 817 .align 4
 818 .Ldec192:
 819         / AES 192 and 256
 820         movaps  -0x40(%KEYP), %KEY
 821         aesdec  %KEY, %STATE
 822         movaps  -0x30(%KEYP), %KEY
 823         aesdec  %KEY, %STATE
 824 
 825 .align 4
 826 .Ldec128:
 827         / AES 128, 192, and 256
 828         movaps  -0x20(%KEYP), %KEY
 829         aesdec  %KEY, %STATE
 830         movaps  -0x10(%KEYP), %KEY
 831         aesdec  %KEY, %STATE
 832         movaps  (%KEYP), %KEY
 833         aesdec  %KEY, %STATE
 834         movaps  0x10(%KEYP), %KEY
 835         aesdec  %KEY, %STATE
 836         movaps  0x20(%KEYP), %KEY
 837         aesdec  %KEY, %STATE
 838         movaps  0x30(%KEYP), %KEY
 839         aesdec  %KEY, %STATE
 840         movaps  0x40(%KEYP), %KEY
 841         aesdec  %KEY, %STATE
 842         movaps  0x50(%KEYP), %KEY
 843         aesdec  %KEY, %STATE
 844         movaps  0x60(%KEYP), %KEY
 845         aesdec  %KEY, %STATE
 846         movaps  0x70(%KEYP), %KEY
 847         aesdeclast      %KEY, %STATE            / last round
 848         movups  %STATE, (%OUTP)                 / output
 849 
 850         SET_TS_OR_POP_XMM0_XMM1(%r10)
 851         ret
 852         SET_SIZE(aes_decrypt_intel)
 853 
 854 #endif  /* lint || __lint */