1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #define ARCFOUR_LOOP_OPTIMIZED
  27 
  28 #ifndef _KERNEL
  29 #include <stdint.h>
  30 #endif  /* _KERNEL */
  31 
  32 #include "arcfour.h"
  33 
  34 #if defined(__amd64)
  35 /* ARCFour_key.flag values */
  36 #define ARCFOUR_ON_INTEL        1
  37 #define ARCFOUR_ON_AMD64        0
  38 
  39 #ifdef _KERNEL
  40 #include <sys/x86_archext.h>
  41 #include <sys/cpuvar.h>
  42 
  43 #else
  44 #include <sys/auxv.h>
  45 #endif  /* _KERNEL */
  46 #endif  /* __amd64 */
  47 
  48 #ifndef __amd64
  49 /*
  50  * Initialize the key stream 'key' using the key value.
  51  *
  52  * Input:
  53  * keyval       User-provided key
  54  * keyvallen    Length, in bytes, of keyval
  55  * Output:
  56  * key          Initialized ARCFOUR key schedule, based on keyval
  57  */
  58 void
  59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
  60 {
  61         uchar_t ext_keyval[256];
  62         uchar_t tmp;
  63         int i, j;
  64 
  65         /* Normalize key length to 256 */
  66         for (i = j = 0; i < 256; i++, j++) {
  67                 if (j == keyvallen)
  68                         j = 0;
  69                 ext_keyval[i] = keyval[j];
  70         }
  71 
  72         for (i = 0; i < 256; i++)
  73                 key->arr[i] = (uchar_t)i;
  74 
  75         j = 0;
  76         for (i = 0; i < 256; i++) {
  77                 j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
  78                 tmp = key->arr[i];
  79                 key->arr[i] = key->arr[j];
  80                 key->arr[j] = tmp;
  81         }
  82         key->i = 0;
  83         key->j = 0;
  84 }
  85 #endif  /* !__amd64 */
  86 
  87 
  88 /*
  89  * Encipher 'in' using 'key'.
  90  *
  91  * Input:
  92  * key          ARCFOUR key, initialized by arcfour_key_init()
  93  * in           Input text
  94  * out          Buffer to contain output text
  95  * len          Length, in bytes, of the in and out buffers
  96  *
  97  * Output:
  98  * out          Buffer containing output text
  99  *
 100  * Note: in and out can point to the same location
 101  */
 102 void
 103 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
 104 {
 105 #ifdef  __amd64
 106         if (key->flag == ARCFOUR_ON_AMD64) {
 107                 arcfour_crypt_asm(key, in, out, len);
 108         } else { /* Intel EM64T */
 109 #endif  /* amd64 */
 110 
 111         size_t          ii;
 112         uchar_t         i, j, ti, tj;
 113 #ifdef ARCFOUR_LOOP_OPTIMIZED
 114         uchar_t         arr_ij;
 115 #endif
 116 #ifdef __amd64
 117         uint32_t        *arr;
 118 #else
 119         uchar_t         *arr;
 120 #endif
 121 
 122 #ifdef  sun4u
 123         /*
 124          * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
 125          * the cases where the input and output buffers are aligned on
 126          * a multiple of 8-byte boundary.
 127          */
 128         int             index;
 129         uchar_t         tmp;
 130 
 131         index = (((uint64_t)(uintptr_t)in) & 0x7);
 132 
 133         /* Get the 'in' on an 8-byte alignment */
 134         if (index > 0) {
 135                 i = key->i;
 136                 j = key->j;
 137                 for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
 138                     (index-- > 0) && len > 0;
 139                     len--, in++, out++) {
 140                         ++i;
 141                         j = j + key->arr[i];
 142                         tmp = key->arr[i];
 143                         key->arr[i] = key->arr[j];
 144                         key->arr[j] = tmp;
 145                         tmp = key->arr[i] + key->arr[j];
 146                         *out = *in ^ key->arr[tmp];
 147                 }
 148                 key->i = i;
 149                 key->j = j;
 150         }
 151 
 152         if (len == 0)
 153                 return;
 154 
 155         /* See if we're fortunate and 'out' got aligned as well */
 156 
 157         if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
 158 #endif  /* sun4u */
 159 
 160         i = key->i;
 161         j = key->j;
 162         arr = key->arr;
 163 
 164 #ifndef ARCFOUR_LOOP_OPTIMIZED
 165         /*
 166          * This loop is hasn't been reordered, but is kept for reference
 167          * purposes as it's more readable
 168          */
 169         for (ii = 0; ii < len; ++ii) {
 170                 ++i;
 171                 ti = arr[i];
 172                 j = j + ti;
 173                 tj = arr[j];
 174                 arr[j] = ti;
 175                 arr[i] = tj;
 176                 out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
 177         }
 178 
 179 #else
 180         /*
 181          * This for loop is optimized by carefully spreading out
 182          * memory access and storage to avoid conflicts,
 183          * allowing the processor to process operations in parallel
 184          */
 185 
 186         /* for loop setup */
 187         ++i;
 188         ti = arr[i];
 189         j = j + ti;
 190         tj = arr[j];
 191         arr[j] = ti;
 192         arr[i] = tj;
 193         arr_ij = arr[(ti + tj) & 0xff];
 194         --len;
 195 
 196         for (ii = 0; ii < len; ) {
 197                 ++i;
 198                 ti = arr[i];
 199                 j = j + ti;
 200                 tj = arr[j];
 201                 arr[j] = ti;
 202                 arr[i] = tj;
 203 
 204                 /* save result from previous loop: */
 205                 out[ii] = in[ii] ^ arr_ij;
 206 
 207                 ++ii;
 208                 arr_ij = arr[(ti + tj) & 0xff];
 209         }
 210         /* save result from last loop: */
 211         out[ii] = in[ii] ^ arr_ij;
 212 #endif
 213 
 214         key->i = i;
 215         key->j = j;
 216 
 217 #ifdef  sun4u
 218         } else {
 219                 arcfour_crypt_aligned(key, len, in, out);
 220         }
 221 #endif  /* sun4u */
 222 #ifdef  __amd64
 223         }
 224 #endif  /* amd64 */
 225 }
 226 
 227 
 228 #ifdef  __amd64
 229 /*
 230  * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
 231  * Cache the result, as the CPU can't change.
 232  *
 233  * Note: the userland version uses getisax() and checks for an AMD-64-only
 234  * feature.  The kernel version uses cpuid_getvendor().
 235  */
 236 int
 237 arcfour_crypt_on_intel(void)
 238 {
 239         static int      cached_result = -1;
 240 
 241         if (cached_result == -1) { /* first time */
 242 #ifdef _KERNEL
 243                 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
 244 #else
 245                 uint_t  ui;
 246 
 247                 (void) getisax(&ui, 1);
 248                 cached_result = ((ui & AV_386_AMD_MMX) == 0);
 249 #endif  /* _KERNEL */
 250         }
 251 
 252         return (cached_result);
 253 }
 254 #endif  /* __amd64 */