1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /
  27 / In-line functions for amd64 kernels.
  28 /
  29 
  30 /
  31 / return current thread pointer
  32 /
  33 / NOTE: the "0x18" should be replaced by the computed value of the
  34 /       offset of "cpu_thread" from the beginning of the struct cpu.
  35 /       Including "assym.h" does not work, however, since that stuff
  36 /       is PSM-specific and is only visible to the 'unix' build anyway.
  37 /       Same with current cpu pointer, where "0xc" should be replaced
  38 /       by the computed value of the offset of "cpu_self".
  39 /       Ugh -- what a disaster.
  40 /
  41         .inline threadp,0
  42         movq    %gs:0x18, %rax
  43         .end
  44 
  45 /
  46 / return current cpu pointer
  47 /
  48         .inline curcpup,0
  49         movq    %gs:0x10, %rax
  50         .end
  51 
  52 /
  53 / return caller
  54 /
  55         .inline caller,0
  56         movq    8(%rbp), %rax
  57         .end
  58 
  59 /
  60 / convert ipl to spl.  This is the identity function for i86
  61 /
  62         .inline ipltospl,0
  63         movq    %rdi, %rax
  64         .end
  65 
  66 /
  67 / find the low order bit in a word
  68 /
  69         .inline lowbit,4
  70         movq    $-1, %rax
  71         bsfq    %rdi, %rax
  72         incq    %rax
  73         .end
  74 
  75 /
  76 / Networking byte order functions (too bad, Intel has the wrong byte order)
  77 /
  78 
  79         .inline htonll,4
  80         movq    %rdi, %rax
  81         bswapq  %rax
  82         .end
  83 
  84         .inline ntohll,4
  85         movq    %rdi, %rax
  86         bswapq  %rax
  87         .end
  88 
  89         .inline htonl,4
  90         movl    %edi, %eax
  91         bswap   %eax
  92         .end
  93 
  94         .inline ntohl,4
  95         movl    %edi, %eax
  96         bswap   %eax
  97         .end
  98 
  99         .inline htons,4
 100         movl    %edi, %eax
 101         bswap   %eax
 102         shrl    $16, %eax
 103         .end
 104 
 105         .inline ntohs,4
 106         movl    %edi, %eax
 107         bswap   %eax
 108         shrl    $16, %eax
 109         .end
 110 
 111 /*
 112  * multiply two long numbers and yield a u_lonlong_t result
 113  * Provided to manipulate hrtime_t values.
 114  */
 115         /* XX64 These don't work correctly with SOS9 build 13.0 yet
 116         .inline mul32, 8
 117         xorl    %edx, %edx
 118         movl    %edi, %eax
 119         mull    %esi
 120         shlq    $32, %rdx
 121         orq     %rdx, %rax
 122         ret
 123         .end
 124         */
 125 /*
 126  * Unlock hres_lock and increment the count value. (See clock.h)
 127  */
 128         .inline unlock_hres_lock, 0
 129         lock
 130         incl    hres_lock
 131         .end
 132 
 133         .inline atomic_orb,8
 134         movl    %esi, %eax
 135         lock
 136         orb     %al,(%rdi)
 137         .end
 138 
 139         .inline atomic_andb,8
 140         movl    %esi, %eax
 141         lock
 142         andb    %al,(%rdi)
 143         .end
 144 
 145 /*
 146  * atomic inc/dec operations.
 147  *      void atomic_inc16(uint16_t *addr) { ++*addr; }
 148  *      void atomic_dec16(uint16_t *addr) { --*addr; }
 149  */
 150         .inline atomic_inc16,4
 151         lock
 152         incw    (%rdi)
 153         .end
 154 
 155         .inline atomic_dec16,4
 156         lock
 157         decw    (%rdi)
 158         .end
 159 
 160 /*
 161  * atomic bit clear
 162  */
 163         .inline atomic_btr32,8
 164         lock
 165         btrl %esi, (%rdi)
 166         setc %al
 167         .end
 168 
 169 /*
 170  * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
 171  * a hint that the code sequence is a busy spin-wait loop.  Without a pause
 172  * instruction in these loops, the P4 Xeon processor may suffer a severe
 173  * penalty when exiting the loop because the processor detects a possible
 174  * memory violation.  Inserting the pause instruction significantly reduces
 175  * the likelihood of a memory order violation, improving performance.
 176  * The pause instruction is a NOP on all other IA-32 processors.
 177  */
 178         .inline ht_pause, 0
 179         pause
 180         .end
 181 
 182 /*
 183  * inlines for update_sregs().
 184  */
 185         .inline __set_ds, 0
 186         movw    %di, %ds
 187         .end
 188 
 189         .inline __set_es, 0
 190         movw    %di, %es
 191         .end
 192 
 193         .inline __set_fs, 0
 194         movw    %di, %fs
 195         .end
 196 
 197         .inline __set_gs, 0
 198         movw    %di, %gs
 199         .end
 200 
 201         /*
 202          * OPTERON_ERRATUM_88 requires mfence
 203          */
 204         .inline __swapgs, 0
 205         mfence
 206         swapgs
 207         .end
 208 
 209 /*
 210  * prefetch 64 bytes
 211  */
 212 
 213         .inline prefetch_read_many,8
 214         prefetcht0      (%rdi)
 215         prefetcht0      32(%rdi)
 216         .end
 217 
 218         .inline prefetch_read_once,8
 219         prefetchnta     (%rdi)
 220         prefetchnta     32(%rdi)
 221         .end
 222 
 223         .inline prefetch_write_many,8
 224         prefetcht0      (%rdi)
 225         prefetcht0      32(%rdi)
 226         .end
 227 
 228         .inline prefetch_write_once,8
 229         prefetcht0      (%rdi)
 230         prefetcht0      32(%rdi)
 231         .end